diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,240601 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 17184, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.9379844961240308e-10, + "logits/chosen": -2.3148956298828125, + "logits/rejected": -2.3116657733917236, + "logps/chosen": -0.9619483351707458, + "logps/rejected": -60.3209228515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 3.8759689922480617e-10, + "logits/chosen": -2.173767328262329, + "logits/rejected": -2.1571481227874756, + "logps/chosen": -281.1432189941406, + "logps/rejected": -521.8209228515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 5.813953488372093e-10, + "logits/chosen": -2.0815069675445557, + "logits/rejected": -1.9928966760635376, + "logps/chosen": -376.32708740234375, + "logps/rejected": -568.0127563476562, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 7.751937984496123e-10, + "logits/chosen": -2.0424838066101074, + "logits/rejected": -1.98953378200531, + "logps/chosen": -217.12767028808594, + "logps/rejected": -312.8235778808594, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 9.689922480620154e-10, + "logits/chosen": -2.1098685264587402, + "logits/rejected": -2.097724199295044, + "logps/chosen": -3.778204917907715, + "logps/rejected": -126.03208923339844, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.1627906976744186e-09, + "logits/chosen": -2.1457772254943848, + "logits/rejected": -2.1344656944274902, + "logps/chosen": -83.25022888183594, + "logps/rejected": -375.0087890625, + "loss": 0.6771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01841888390481472, + "rewards/margins": 0.04779510572552681, + "rewards/rejected": -0.02937622182071209, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.3565891472868216e-09, + "logits/chosen": -2.2394957542419434, + "logits/rejected": -2.239711284637451, + "logps/chosen": -163.40557861328125, + "logps/rejected": -238.2117919921875, + "loss": 0.6691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04796142503619194, + "rewards/margins": 0.04245605319738388, + "rewards/rejected": 0.005505371373146772, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 1.5503875968992247e-09, + "logits/chosen": -2.1073718070983887, + "logits/rejected": -2.0362727642059326, + "logps/chosen": -98.4739990234375, + "logps/rejected": -246.28912353515625, + "loss": 0.6798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03303375467658043, + "rewards/margins": 0.02403717115521431, + "rewards/rejected": 0.008996582590043545, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 1.7441860465116277e-09, + "logits/chosen": -2.1476328372955322, + "logits/rejected": -2.141610860824585, + "logps/chosen": -44.429256439208984, + "logps/rejected": -284.420166015625, + "loss": 0.6743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014646530151367188, + "rewards/margins": 0.06239128112792969, + "rewards/rejected": -0.0477447509765625, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 1.9379844961240307e-09, + "logits/chosen": -2.148139238357544, + "logits/rejected": -2.1399922370910645, + "logps/chosen": -8.343321800231934, + "logps/rejected": -79.12874603271484, + "loss": 0.6804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02447347715497017, + "rewards/margins": 0.03523321449756622, + "rewards/rejected": -0.010759735479950905, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.131782945736434e-09, + "logits/chosen": -2.1181116104125977, + "logits/rejected": -2.0785129070281982, + "logps/chosen": -249.86044311523438, + "logps/rejected": -361.5333251953125, + "loss": 0.705, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00911560095846653, + "rewards/margins": -0.0819091796875, + "rewards/rejected": 0.09102477878332138, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 2.3255813953488372e-09, + "logits/chosen": -2.302480459213257, + "logits/rejected": -2.301037073135376, + "logps/chosen": -74.32432556152344, + "logps/rejected": -140.6397705078125, + "loss": 0.6808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03035125695168972, + "rewards/margins": 0.01443328708410263, + "rewards/rejected": 0.01591796986758709, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 2.51937984496124e-09, + "logits/chosen": -2.038545608520508, + "logits/rejected": -2.039599657058716, + "logps/chosen": -72.5895767211914, + "logps/rejected": -171.87445068359375, + "loss": 0.6751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02878265455365181, + "rewards/margins": 0.03737488016486168, + "rewards/rejected": -0.008592224679887295, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 2.7131782945736433e-09, + "logits/chosen": -2.222468614578247, + "logits/rejected": -2.20499849319458, + "logps/chosen": -264.46563720703125, + "logps/rejected": -267.7445373535156, + "loss": 0.6894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02553710900247097, + "rewards/margins": 0.00981140322983265, + "rewards/rejected": -0.03534851223230362, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 2.9069767441860465e-09, + "logits/chosen": -2.256462335586548, + "logits/rejected": -2.255791425704956, + "logps/chosen": -0.5150231719017029, + "logps/rejected": -75.39102172851562, + "loss": 0.6874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003547418164089322, + "rewards/margins": 0.015638483688235283, + "rewards/rejected": -0.01209106482565403, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.1007751937984494e-09, + "logits/chosen": -2.2168092727661133, + "logits/rejected": -2.2080657482147217, + "logps/chosen": -43.79476547241211, + "logps/rejected": -180.26510620117188, + "loss": 0.6751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012454223819077015, + "rewards/margins": 0.05372772365808487, + "rewards/rejected": -0.04127350077033043, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 3.294573643410853e-09, + "logits/chosen": -2.1675055027008057, + "logits/rejected": -2.1333134174346924, + "logps/chosen": -199.805908203125, + "logps/rejected": -317.5986328125, + "loss": 0.6625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07163696736097336, + "rewards/margins": 0.0032714903354644775, + "rewards/rejected": 0.06836547702550888, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 3.4883720930232554e-09, + "logits/chosen": -2.3806512355804443, + "logits/rejected": -2.3651845455169678, + "logps/chosen": -0.0014163634041324258, + "logps/rejected": -133.23495483398438, + "loss": 0.7023, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.169190111089847e-06, + "rewards/margins": -0.036383941769599915, + "rewards/rejected": 0.03638610988855362, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 3.682170542635659e-09, + "logits/chosen": -2.275090217590332, + "logits/rejected": -2.270596504211426, + "logps/chosen": -71.586181640625, + "logps/rejected": -185.04522705078125, + "loss": 0.7106, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.040824126452207565, + "rewards/margins": -0.025955962017178535, + "rewards/rejected": -0.01486816443502903, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 3.8759689922480615e-09, + "logits/chosen": -2.170671224594116, + "logits/rejected": -2.1471750736236572, + "logps/chosen": -40.575225830078125, + "logps/rejected": -196.63238525390625, + "loss": 0.6949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04085998609662056, + "rewards/margins": 0.03107604756951332, + "rewards/rejected": -0.07193603366613388, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.0697674418604655e-09, + "logits/chosen": -2.062227725982666, + "logits/rejected": -2.023212194442749, + "logps/chosen": -193.46913146972656, + "logps/rejected": -394.58770751953125, + "loss": 0.7435, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03709259256720543, + "rewards/margins": -0.14914703369140625, + "rewards/rejected": 0.11205444484949112, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 4.263565891472868e-09, + "logits/chosen": -2.4106733798980713, + "logits/rejected": -2.3821308612823486, + "logps/chosen": -4.352597236633301, + "logps/rejected": -199.45217895507812, + "loss": 0.6898, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4536591303767636e-05, + "rewards/margins": 0.006998920347541571, + "rewards/rejected": -0.0070434571243822575, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 4.457364341085271e-09, + "logits/chosen": -2.2695364952087402, + "logits/rejected": -2.2694156169891357, + "logps/chosen": -0.0007359902374446392, + "logps/rejected": -92.66877746582031, + "loss": 0.6901, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.339029020338785e-06, + "rewards/margins": 0.012233918532729149, + "rewards/rejected": -0.012224579229950905, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 4.6511627906976744e-09, + "logits/chosen": -2.1647233963012695, + "logits/rejected": -2.1502225399017334, + "logps/chosen": -212.65341186523438, + "logps/rejected": -339.483154296875, + "loss": 0.7002, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0037551880814135075, + "rewards/margins": -0.04865875467658043, + "rewards/rejected": 0.04490356519818306, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 4.844961240310078e-09, + "logits/chosen": -2.034806251525879, + "logits/rejected": -2.0850095748901367, + "logps/chosen": -192.51303100585938, + "logps/rejected": -267.3485412597656, + "loss": 0.744, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.07314910739660263, + "rewards/margins": -0.11294707655906677, + "rewards/rejected": 0.03979797288775444, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.03875968992248e-09, + "logits/chosen": -1.9350206851959229, + "logits/rejected": -1.9827467203140259, + "logps/chosen": -304.0624694824219, + "logps/rejected": -247.01901245117188, + "loss": 0.6927, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01355590857565403, + "rewards/margins": -0.0097198486328125, + "rewards/rejected": -0.003836059710010886, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 5.232558139534884e-09, + "logits/chosen": -2.2444376945495605, + "logits/rejected": -2.2213566303253174, + "logps/chosen": -0.10457487404346466, + "logps/rejected": -245.8800506591797, + "loss": 0.6917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021017573308199644, + "rewards/margins": 0.007729480043053627, + "rewards/rejected": -0.009831237606704235, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 5.4263565891472866e-09, + "logits/chosen": -2.2715132236480713, + "logits/rejected": -2.2414767742156982, + "logps/chosen": -12.66240119934082, + "logps/rejected": -166.28463745117188, + "loss": 0.6701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01364974956959486, + "rewards/margins": 0.08430404216051102, + "rewards/rejected": -0.07065429538488388, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 5.62015503875969e-09, + "logits/chosen": -2.107072591781616, + "logits/rejected": -2.138511896133423, + "logps/chosen": -213.1365509033203, + "logps/rejected": -219.95144653320312, + "loss": 0.7126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05253753811120987, + "rewards/margins": 0.02253112941980362, + "rewards/rejected": -0.07506866753101349, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 5.813953488372093e-09, + "logits/chosen": -2.2746939659118652, + "logits/rejected": -2.25620174407959, + "logps/chosen": -52.51691436767578, + "logps/rejected": -145.33099365234375, + "loss": 0.6928, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.001532745431177318, + "rewards/margins": -0.0049461363814771175, + "rewards/rejected": 0.0064788819290697575, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.007751937984496e-09, + "logits/chosen": -2.027568817138672, + "logits/rejected": -2.0166985988616943, + "logps/chosen": -73.82940673828125, + "logps/rejected": -190.39736938476562, + "loss": 0.6915, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.017534637823700905, + "rewards/margins": -0.015557097271084785, + "rewards/rejected": 0.03309173509478569, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 6.201550387596899e-09, + "logits/chosen": -2.1398468017578125, + "logits/rejected": -2.1072983741760254, + "logps/chosen": -179.74087524414062, + "logps/rejected": -298.5606689453125, + "loss": 0.6834, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02156677283346653, + "rewards/margins": 0.00586547888815403, + "rewards/rejected": 0.0157012939453125, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 6.395348837209302e-09, + "logits/chosen": -2.2609751224517822, + "logits/rejected": -2.196776866912842, + "logps/chosen": -177.2208251953125, + "logps/rejected": -405.36126708984375, + "loss": 0.7367, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1094818115234375, + "rewards/margins": -0.04557190090417862, + "rewards/rejected": -0.06390991061925888, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 6.589147286821706e-09, + "logits/chosen": -2.220968008041382, + "logits/rejected": -2.2155070304870605, + "logps/chosen": -195.90882873535156, + "logps/rejected": -252.4315948486328, + "loss": 0.726, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03707733377814293, + "rewards/margins": -0.05055695027112961, + "rewards/rejected": 0.01347961463034153, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 6.782945736434108e-09, + "logits/chosen": -2.2258615493774414, + "logits/rejected": -2.226794719696045, + "logps/chosen": -27.39501953125, + "logps/rejected": -141.67295837402344, + "loss": 0.6991, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00883331336081028, + "rewards/margins": -0.01457977294921875, + "rewards/rejected": 0.0057464600540697575, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 6.976744186046511e-09, + "logits/chosen": -2.171876907348633, + "logits/rejected": -2.165266275405884, + "logps/chosen": -229.91006469726562, + "logps/rejected": -261.27581787109375, + "loss": 0.6585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05157165601849556, + "rewards/margins": 0.04118652269244194, + "rewards/rejected": 0.010385132394731045, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 7.170542635658915e-09, + "logits/chosen": -2.2107040882110596, + "logits/rejected": -2.2037112712860107, + "logps/chosen": -198.80807495117188, + "logps/rejected": -432.0906066894531, + "loss": 0.6562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0044235228560864925, + "rewards/margins": 0.15294036269187927, + "rewards/rejected": -0.1573638916015625, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 7.364341085271318e-09, + "logits/chosen": -2.143949270248413, + "logits/rejected": -2.1030943393707275, + "logps/chosen": -236.91421508789062, + "logps/rejected": -342.7208251953125, + "loss": 0.6699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01597137562930584, + "rewards/margins": 0.08865204453468323, + "rewards/rejected": -0.07268066704273224, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 7.55813953488372e-09, + "logits/chosen": -2.160705089569092, + "logits/rejected": -2.1501173973083496, + "logps/chosen": -21.494260787963867, + "logps/rejected": -67.05029296875, + "loss": 0.6921, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.015834618359804153, + "rewards/margins": -0.0037775039672851562, + "rewards/rejected": 0.01961212232708931, + "step": 39 + }, + { + "epoch": 0.0, + "learning_rate": 7.751937984496123e-09, + "logits/chosen": -2.067978620529175, + "logits/rejected": -2.0610454082489014, + "logps/chosen": -189.68258666992188, + "logps/rejected": -280.46978759765625, + "loss": 0.7179, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0024810791946947575, + "rewards/margins": -0.11598511040210724, + "rewards/rejected": 0.11846619099378586, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 7.945736434108527e-09, + "logits/chosen": -2.0966808795928955, + "logits/rejected": -2.0967917442321777, + "logps/chosen": -3.1446661949157715, + "logps/rejected": -75.88737487792969, + "loss": 0.6847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023934388533234596, + "rewards/margins": 0.014975952915847301, + "rewards/rejected": 0.008958435617387295, + "step": 41 + }, + { + "epoch": 0.0, + "learning_rate": 8.139534883720931e-09, + "logits/chosen": -2.1224260330200195, + "logits/rejected": -2.1126837730407715, + "logps/chosen": -208.76336669921875, + "logps/rejected": -323.6079406738281, + "loss": 0.6812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0096893310546875, + "rewards/margins": 0.0613555908203125, + "rewards/rejected": -0.071044921875, + "step": 42 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333332e-09, + "logits/chosen": -1.9829554557800293, + "logits/rejected": -2.0047972202301025, + "logps/chosen": -326.3988342285156, + "logps/rejected": -499.3002624511719, + "loss": 0.7359, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08500061184167862, + "rewards/margins": -0.01849365234375, + "rewards/rejected": -0.06650695949792862, + "step": 43 + }, + { + "epoch": 0.0, + "learning_rate": 8.527131782945736e-09, + "logits/chosen": -2.180764675140381, + "logits/rejected": -2.138803243637085, + "logps/chosen": -192.7882080078125, + "logps/rejected": -253.8825225830078, + "loss": 0.7058, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02193603478372097, + "rewards/margins": -0.02557830698788166, + "rewards/rejected": 0.0036422729026526213, + "step": 44 + }, + { + "epoch": 0.0, + "learning_rate": 8.72093023255814e-09, + "logits/chosen": -2.2278523445129395, + "logits/rejected": -2.2088897228240967, + "logps/chosen": -12.757972717285156, + "logps/rejected": -175.82337951660156, + "loss": 0.6934, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0018859863048419356, + "rewards/margins": -0.00042572012171149254, + "rewards/rejected": -0.001460266183130443, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 8.914728682170542e-09, + "logits/chosen": -2.247387170791626, + "logits/rejected": -2.243746519088745, + "logps/chosen": -0.11950886249542236, + "logps/rejected": -109.5815200805664, + "loss": 0.688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00132281263358891, + "rewards/margins": 0.021984225139021873, + "rewards/rejected": -0.023307038471102715, + "step": 46 + }, + { + "epoch": 0.0, + "learning_rate": 9.108527131782945e-09, + "logits/chosen": -2.1702847480773926, + "logits/rejected": -2.1352248191833496, + "logps/chosen": -225.24026489257812, + "logps/rejected": -282.30364990234375, + "loss": 0.7528, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.10648193210363388, + "rewards/margins": -0.13701781630516052, + "rewards/rejected": 0.03053588978946209, + "step": 47 + }, + { + "epoch": 0.0, + "learning_rate": 9.302325581395349e-09, + "logits/chosen": -2.266887903213501, + "logits/rejected": -2.2613139152526855, + "logps/chosen": -7.422872543334961, + "logps/rejected": -107.74263000488281, + "loss": 0.7016, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.0326844353112392e-05, + "rewards/margins": -0.0334588997066021, + "rewards/rejected": 0.033489227294921875, + "step": 48 + }, + { + "epoch": 0.0, + "learning_rate": 9.496124031007751e-09, + "logits/chosen": -2.1734776496887207, + "logits/rejected": -2.177151918411255, + "logps/chosen": -256.2799377441406, + "logps/rejected": -419.996337890625, + "loss": 0.752, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0064849853515625, + "rewards/margins": -0.19297181069850922, + "rewards/rejected": 0.18648682534694672, + "step": 49 + }, + { + "epoch": 0.0, + "learning_rate": 9.689922480620155e-09, + "logits/chosen": -2.229743480682373, + "logits/rejected": -2.2213082313537598, + "logps/chosen": -3.4089090824127197, + "logps/rejected": -170.97764587402344, + "loss": 0.6844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005950689665041864, + "rewards/margins": 0.0361221544444561, + "rewards/rejected": -0.03671722486615181, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 9.883720930232558e-09, + "logits/chosen": -2.1992666721343994, + "logits/rejected": -2.2042598724365234, + "logps/chosen": -186.6855010986328, + "logps/rejected": -255.7127685546875, + "loss": 0.7458, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04282684251666069, + "rewards/margins": -0.1626327484846115, + "rewards/rejected": 0.11980590969324112, + "step": 51 + }, + { + "epoch": 0.0, + "learning_rate": 1.007751937984496e-08, + "logits/chosen": -2.2202346324920654, + "logits/rejected": -2.192445993423462, + "logps/chosen": -164.524658203125, + "logps/rejected": -351.094970703125, + "loss": 0.6955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00830078125, + "rewards/margins": 0.03927917405962944, + "rewards/rejected": -0.04757995530962944, + "step": 52 + }, + { + "epoch": 0.0, + "learning_rate": 1.0271317829457364e-08, + "logits/chosen": -2.1215295791625977, + "logits/rejected": -2.1205360889434814, + "logps/chosen": -27.991548538208008, + "logps/rejected": -102.5394287109375, + "loss": 0.6939, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02109546773135662, + "rewards/margins": 0.00897960551083088, + "rewards/rejected": -0.0300750732421875, + "step": 53 + }, + { + "epoch": 0.0, + "learning_rate": 1.0465116279069768e-08, + "logits/chosen": -1.9988274574279785, + "logits/rejected": -1.909447193145752, + "logps/chosen": -223.84402465820312, + "logps/rejected": -443.3267822265625, + "loss": 0.6847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0048614502884447575, + "rewards/margins": 0.02849426306784153, + "rewards/rejected": -0.02363281324505806, + "step": 54 + }, + { + "epoch": 0.0, + "learning_rate": 1.0658914728682169e-08, + "logits/chosen": -2.306286573410034, + "logits/rejected": -2.27531099319458, + "logps/chosen": -50.50160217285156, + "logps/rejected": -204.90127563476562, + "loss": 0.6815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015055847354233265, + "rewards/margins": 0.05105438455939293, + "rewards/rejected": -0.06611023098230362, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 1.0852713178294573e-08, + "logits/chosen": -2.2773449420928955, + "logits/rejected": -2.217329978942871, + "logps/chosen": -78.54661560058594, + "logps/rejected": -308.2908935546875, + "loss": 0.6469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.062256623059511185, + "rewards/margins": 0.12208328396081924, + "rewards/rejected": -0.05982666090130806, + "step": 56 + }, + { + "epoch": 0.0, + "learning_rate": 1.1046511627906977e-08, + "logits/chosen": -1.951431155204773, + "logits/rejected": -1.9654927253723145, + "logps/chosen": -20.867752075195312, + "logps/rejected": -222.45672607421875, + "loss": 0.6994, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0032505036797374487, + "rewards/margins": -0.03795280680060387, + "rewards/rejected": 0.04120330885052681, + "step": 57 + }, + { + "epoch": 0.0, + "learning_rate": 1.124031007751938e-08, + "logits/chosen": -2.1877403259277344, + "logits/rejected": -2.1860499382019043, + "logps/chosen": -40.376319885253906, + "logps/rejected": -177.95823669433594, + "loss": 0.7092, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.025664521381258965, + "rewards/margins": -0.029576875269412994, + "rewards/rejected": 0.0039123534224927425, + "step": 58 + }, + { + "epoch": 0.0, + "learning_rate": 1.1434108527131782e-08, + "logits/chosen": -2.212672472000122, + "logits/rejected": -2.1410951614379883, + "logps/chosen": -230.22312927246094, + "logps/rejected": -382.5626220703125, + "loss": 0.6988, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.001933288644067943, + "rewards/margins": -0.03896636888384819, + "rewards/rejected": 0.0370330810546875, + "step": 59 + }, + { + "epoch": 0.0, + "learning_rate": 1.1627906976744186e-08, + "logits/chosen": -2.405893325805664, + "logits/rejected": -2.396190881729126, + "logps/chosen": -51.071380615234375, + "logps/rejected": -243.70309448242188, + "loss": 0.652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06855469197034836, + "rewards/margins": 0.10332031548023224, + "rewards/rejected": -0.03476562723517418, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.1821705426356589e-08, + "logits/chosen": -2.0284013748168945, + "logits/rejected": -2.022871971130371, + "logps/chosen": -0.690685510635376, + "logps/rejected": -116.98268127441406, + "loss": 0.7038, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0056288959458470345, + "rewards/margins": -0.044893573969602585, + "rewards/rejected": 0.039264678955078125, + "step": 61 + }, + { + "epoch": 0.0, + "learning_rate": 1.2015503875968993e-08, + "logits/chosen": -2.01338529586792, + "logits/rejected": -2.006033420562744, + "logps/chosen": -35.53407669067383, + "logps/rejected": -194.33065795898438, + "loss": 0.7241, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.049687959253787994, + "rewards/margins": -0.08794937282800674, + "rewards/rejected": 0.03826141357421875, + "step": 62 + }, + { + "epoch": 0.0, + "learning_rate": 1.2209302325581395e-08, + "logits/chosen": -2.2694411277770996, + "logits/rejected": -2.260047197341919, + "logps/chosen": -196.5650634765625, + "logps/rejected": -276.741943359375, + "loss": 0.7382, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04139099270105362, + "rewards/margins": -0.11092224717140198, + "rewards/rejected": 0.06953125447034836, + "step": 63 + }, + { + "epoch": 0.0, + "learning_rate": 1.2403100775193797e-08, + "logits/chosen": -2.287919521331787, + "logits/rejected": -2.264690637588501, + "logps/chosen": -7.720747470855713, + "logps/rejected": -226.759033203125, + "loss": 0.6869, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006555795669555664, + "rewards/margins": 0.013339852914214134, + "rewards/rejected": -0.0067840577103197575, + "step": 64 + }, + { + "epoch": 0.0, + "learning_rate": 1.2596899224806201e-08, + "logits/chosen": -2.169144868850708, + "logits/rejected": -2.135016679763794, + "logps/chosen": -195.32888793945312, + "logps/rejected": -351.4256591796875, + "loss": 0.744, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.09679412841796875, + "rewards/margins": -0.09237213432788849, + "rewards/rejected": -0.004421997349709272, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 1.2790697674418604e-08, + "logits/chosen": -1.9971495866775513, + "logits/rejected": -1.9538509845733643, + "logps/chosen": -210.16888427734375, + "logps/rejected": -266.7147216796875, + "loss": 0.6676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08595886081457138, + "rewards/margins": 0.04064330831170082, + "rewards/rejected": 0.04531555250287056, + "step": 66 + }, + { + "epoch": 0.0, + "learning_rate": 1.2984496124031008e-08, + "logits/chosen": -2.072216510772705, + "logits/rejected": -2.0215227603912354, + "logps/chosen": -226.07962036132812, + "logps/rejected": -346.85693359375, + "loss": 0.7552, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06683807820081711, + "rewards/margins": -0.16255950927734375, + "rewards/rejected": 0.09572143852710724, + "step": 67 + }, + { + "epoch": 0.0, + "learning_rate": 1.3178294573643412e-08, + "logits/chosen": -2.294879198074341, + "logits/rejected": -2.292799711227417, + "logps/chosen": -0.45998474955558777, + "logps/rejected": -111.10633087158203, + "loss": 0.7013, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.004223254509270191, + "rewards/margins": -0.02834511175751686, + "rewards/rejected": 0.024121856316924095, + "step": 68 + }, + { + "epoch": 0.0, + "learning_rate": 1.3372093023255813e-08, + "logits/chosen": -2.074608325958252, + "logits/rejected": -2.0755183696746826, + "logps/chosen": -30.400876998901367, + "logps/rejected": -92.63516998291016, + "loss": 0.6918, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012348175514489412, + "rewards/margins": 0.005270767025649548, + "rewards/rejected": -0.0065055848099291325, + "step": 69 + }, + { + "epoch": 0.0, + "learning_rate": 1.3565891472868215e-08, + "logits/chosen": -2.1969199180603027, + "logits/rejected": -2.196047306060791, + "logps/chosen": -22.469511032104492, + "logps/rejected": -246.0426788330078, + "loss": 0.7272, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.023714257404208183, + "rewards/margins": -0.10219784080982208, + "rewards/rejected": 0.07848358154296875, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.375968992248062e-08, + "logits/chosen": -2.1136341094970703, + "logits/rejected": -2.106001615524292, + "logps/chosen": -43.32163619995117, + "logps/rejected": -204.81210327148438, + "loss": 0.696, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0033958435524255037, + "rewards/margins": -0.01051254291087389, + "rewards/rejected": 0.0071166991256177425, + "step": 71 + }, + { + "epoch": 0.0, + "learning_rate": 1.3953488372093022e-08, + "logits/chosen": -2.148130416870117, + "logits/rejected": -2.1415226459503174, + "logps/chosen": -9.663826942443848, + "logps/rejected": -151.00784301757812, + "loss": 0.673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05596513673663139, + "rewards/margins": 0.020406052470207214, + "rewards/rejected": 0.03555908426642418, + "step": 72 + }, + { + "epoch": 0.0, + "learning_rate": 1.4147286821705426e-08, + "logits/chosen": -2.1278462409973145, + "logits/rejected": -2.1358797550201416, + "logps/chosen": -235.63160705566406, + "logps/rejected": -345.89898681640625, + "loss": 0.7119, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.005902099888771772, + "rewards/margins": -0.07859496772289276, + "rewards/rejected": 0.07269287109375, + "step": 73 + }, + { + "epoch": 0.0, + "learning_rate": 1.434108527131783e-08, + "logits/chosen": -2.130514144897461, + "logits/rejected": -2.0708582401275635, + "logps/chosen": -95.68710327148438, + "logps/rejected": -381.87158203125, + "loss": 0.6938, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01567230187356472, + "rewards/margins": -0.0205612201243639, + "rewards/rejected": 0.03623352199792862, + "step": 74 + }, + { + "epoch": 0.0, + "learning_rate": 1.4534883720930232e-08, + "logits/chosen": -2.0842957496643066, + "logits/rejected": -2.114494800567627, + "logps/chosen": -285.65509033203125, + "logps/rejected": -250.60781860351562, + "loss": 0.7061, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06787109375, + "rewards/margins": -0.01792297139763832, + "rewards/rejected": -0.04994812235236168, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.4728682170542636e-08, + "logits/chosen": -2.0717756748199463, + "logits/rejected": -2.09660005569458, + "logps/chosen": -217.3656005859375, + "logps/rejected": -303.5911865234375, + "loss": 0.7192, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02398681640625, + "rewards/margins": -0.07759399712085724, + "rewards/rejected": 0.05360717698931694, + "step": 76 + }, + { + "epoch": 0.0, + "learning_rate": 1.4922480620155037e-08, + "logits/chosen": -2.1007633209228516, + "logits/rejected": -2.076171636581421, + "logps/chosen": -236.3071746826172, + "logps/rejected": -339.91937255859375, + "loss": 0.6554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04782257229089737, + "rewards/margins": 0.09122467041015625, + "rewards/rejected": -0.04340210184454918, + "step": 77 + }, + { + "epoch": 0.0, + "learning_rate": 1.511627906976744e-08, + "logits/chosen": -2.143369436264038, + "logits/rejected": -2.1279871463775635, + "logps/chosen": -0.00021194780129007995, + "logps/rejected": -132.33189392089844, + "loss": 0.6953, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.2754986755680875e-06, + "rewards/margins": -0.008480585180222988, + "rewards/rejected": 0.008479309268295765, + "step": 78 + }, + { + "epoch": 0.0, + "learning_rate": 1.5310077519379845e-08, + "logits/chosen": -2.3869946002960205, + "logits/rejected": -2.388528347015381, + "logps/chosen": -27.88675308227539, + "logps/rejected": -139.73365783691406, + "loss": 0.6904, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0016828536754474044, + "rewards/margins": -0.001278877374716103, + "rewards/rejected": 0.0029617310501635075, + "step": 79 + }, + { + "epoch": 0.0, + "learning_rate": 1.5503875968992246e-08, + "logits/chosen": -2.13249135017395, + "logits/rejected": -2.132880210876465, + "logps/chosen": -0.0003675938060041517, + "logps/rejected": -29.51634407043457, + "loss": 0.6989, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.3114186003804207e-07, + "rewards/margins": -0.014927041716873646, + "rewards/rejected": 0.014926910400390625, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.569767441860465e-08, + "logits/chosen": -2.3426144123077393, + "logits/rejected": -2.342311143875122, + "logps/chosen": -19.519304275512695, + "logps/rejected": -100.51524353027344, + "loss": 0.683, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9373170036706142e-05, + "rewards/margins": 0.04047813266515732, + "rewards/rejected": -0.04050750657916069, + "step": 81 + }, + { + "epoch": 0.0, + "learning_rate": 1.5891472868217054e-08, + "logits/chosen": -1.958461046218872, + "logits/rejected": -2.0367963314056396, + "logps/chosen": -318.0076904296875, + "logps/rejected": -383.6347961425781, + "loss": 0.8107, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2120361328125, + "rewards/margins": -0.23723144829273224, + "rewards/rejected": 0.02519531361758709, + "step": 82 + }, + { + "epoch": 0.0, + "learning_rate": 1.6085271317829458e-08, + "logits/chosen": -2.2755727767944336, + "logits/rejected": -2.2486512660980225, + "logps/chosen": -179.62960815429688, + "logps/rejected": -314.9161376953125, + "loss": 0.6897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01626281812787056, + "rewards/margins": 0.01110534742474556, + "rewards/rejected": 0.005157470703125, + "step": 83 + }, + { + "epoch": 0.0, + "learning_rate": 1.6279069767441862e-08, + "logits/chosen": -2.261662483215332, + "logits/rejected": -2.2593560218811035, + "logps/chosen": -0.18059581518173218, + "logps/rejected": -65.82353973388672, + "loss": 0.7038, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.001690740929916501, + "rewards/margins": -0.04398415610194206, + "rewards/rejected": 0.045674897730350494, + "step": 84 + }, + { + "epoch": 0.0, + "learning_rate": 1.6472868217054263e-08, + "logits/chosen": -2.1176669597625732, + "logits/rejected": -2.079845428466797, + "logps/chosen": -311.89996337890625, + "logps/rejected": -412.8753356933594, + "loss": 0.7585, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08783264458179474, + "rewards/margins": -0.17863769829273224, + "rewards/rejected": 0.0908050537109375, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666664e-08, + "logits/chosen": -2.093197822570801, + "logits/rejected": -2.0712499618530273, + "logps/chosen": -112.02984619140625, + "logps/rejected": -277.2025146484375, + "loss": 0.6785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.054888155311346054, + "rewards/margins": 0.0028312690556049347, + "rewards/rejected": 0.05205688625574112, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 1.6860465116279068e-08, + "logits/chosen": -2.304257869720459, + "logits/rejected": -2.2713239192962646, + "logps/chosen": -234.09994506835938, + "logps/rejected": -384.47760009765625, + "loss": 0.6787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05797271803021431, + "rewards/margins": 0.02052154392004013, + "rewards/rejected": 0.03745117411017418, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 1.7054263565891472e-08, + "logits/chosen": -2.1396520137786865, + "logits/rejected": -2.1298749446868896, + "logps/chosen": -287.43115234375, + "logps/rejected": -367.6432800292969, + "loss": 0.6815, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01608581654727459, + "rewards/margins": -0.0031555164605379105, + "rewards/rejected": 0.0192413330078125, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 1.7248062015503876e-08, + "logits/chosen": -2.2426414489746094, + "logits/rejected": -2.2452008724212646, + "logps/chosen": -180.85699462890625, + "logps/rejected": -201.47177124023438, + "loss": 0.6909, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0006195068708620965, + "rewards/margins": -0.00817565992474556, + "rewards/rejected": 0.00879516638815403, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 1.744186046511628e-08, + "logits/chosen": -2.141282081604004, + "logits/rejected": -2.1240882873535156, + "logps/chosen": -0.0003020478179678321, + "logps/rejected": -189.14306640625, + "loss": 0.6869, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.342613000422716e-08, + "rewards/margins": 0.026794517412781715, + "rewards/rejected": -0.02679443359375, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.763565891472868e-08, + "logits/chosen": -2.2324142456054688, + "logits/rejected": -2.1991195678710938, + "logps/chosen": -22.861164093017578, + "logps/rejected": -207.5128173828125, + "loss": 0.7045, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.403259350103326e-05, + "rewards/margins": -0.0480831153690815, + "rewards/rejected": 0.04805908352136612, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 1.7829457364341085e-08, + "logits/chosen": -2.2480926513671875, + "logits/rejected": -2.241847515106201, + "logps/chosen": -53.91372299194336, + "logps/rejected": -161.06399536132812, + "loss": 0.6844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01293029822409153, + "rewards/margins": 0.02083740383386612, + "rewards/rejected": -0.007907104678452015, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 1.8023255813953486e-08, + "logits/chosen": -2.3065390586853027, + "logits/rejected": -2.3019587993621826, + "logps/chosen": -28.316373825073242, + "logps/rejected": -95.61224365234375, + "loss": 0.6839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0075931549072265625, + "rewards/margins": 0.029626846313476562, + "rewards/rejected": -0.02203369140625, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 1.821705426356589e-08, + "logits/chosen": -2.2120776176452637, + "logits/rejected": -2.2024519443511963, + "logps/chosen": -23.385879516601562, + "logps/rejected": -217.9351348876953, + "loss": 0.7044, + "rewards/accuracies": 0.0, + "rewards/chosen": 9.5367431640625e-07, + "rewards/margins": -0.0339239127933979, + "rewards/rejected": 0.03392486646771431, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 1.8410852713178294e-08, + "logits/chosen": -2.2401535511016846, + "logits/rejected": -2.2349188327789307, + "logps/chosen": -57.016788482666016, + "logps/rejected": -271.88739013671875, + "loss": 0.6501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0747455582022667, + "rewards/margins": 0.10802802443504333, + "rewards/rejected": -0.03328246995806694, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 1.8604651162790698e-08, + "logits/chosen": -2.447382688522339, + "logits/rejected": -2.424625873565674, + "logps/chosen": -0.005253523588180542, + "logps/rejected": -141.81268310546875, + "loss": 0.6864, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.338460661936551e-05, + "rewards/margins": 0.03024374321103096, + "rewards/rejected": -0.03018035925924778, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.87984496124031e-08, + "logits/chosen": -2.2262380123138428, + "logits/rejected": -2.216157913208008, + "logps/chosen": -8.85405445098877, + "logps/rejected": -170.23934936523438, + "loss": 0.6754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04867878183722496, + "rewards/margins": 0.02229786105453968, + "rewards/rejected": 0.02638092078268528, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 1.8992248062015503e-08, + "logits/chosen": -2.2869646549224854, + "logits/rejected": -2.2820897102355957, + "logps/chosen": -58.494686126708984, + "logps/rejected": -188.43722534179688, + "loss": 0.7014, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01617431640625, + "rewards/margins": -0.01367950439453125, + "rewards/rejected": -0.00249481201171875, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 1.9186046511627907e-08, + "logits/chosen": -2.2632713317871094, + "logits/rejected": -2.1967482566833496, + "logps/chosen": -233.31207275390625, + "logps/rejected": -337.694091796875, + "loss": 0.6771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04447021707892418, + "rewards/margins": 0.012426760047674179, + "rewards/rejected": 0.03204345703125, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 1.937984496124031e-08, + "logits/chosen": -2.1321160793304443, + "logits/rejected": -2.1143267154693604, + "logps/chosen": -136.1132354736328, + "logps/rejected": -232.1945343017578, + "loss": 0.6804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0020797729957848787, + "rewards/margins": 0.05668945610523224, + "rewards/rejected": -0.05460968241095543, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.957364341085271e-08, + "logits/chosen": -2.060286283493042, + "logits/rejected": -2.0627734661102295, + "logps/chosen": -317.7697448730469, + "logps/rejected": -380.99072265625, + "loss": 0.7292, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.10509033501148224, + "rewards/margins": -0.0019317641854286194, + "rewards/rejected": -0.10315857082605362, + "step": 101 + }, + { + "epoch": 0.01, + "learning_rate": 1.9767441860465116e-08, + "logits/chosen": -1.9842956066131592, + "logits/rejected": -1.9658697843551636, + "logps/chosen": -217.56654357910156, + "logps/rejected": -323.67236328125, + "loss": 0.685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05442657694220543, + "rewards/margins": 0.09326018393039703, + "rewards/rejected": -0.14768676459789276, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 1.9961240310077516e-08, + "logits/chosen": -2.1406612396240234, + "logits/rejected": -2.143317222595215, + "logps/chosen": -120.97405242919922, + "logps/rejected": -202.01898193359375, + "loss": 0.6889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022870635613799095, + "rewards/margins": 0.0070426929742097855, + "rewards/rejected": 0.01582794263958931, + "step": 103 + }, + { + "epoch": 0.01, + "learning_rate": 2.015503875968992e-08, + "logits/chosen": -2.150463342666626, + "logits/rejected": -2.1385364532470703, + "logps/chosen": -131.2736358642578, + "logps/rejected": -299.31329345703125, + "loss": 0.6641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05624084547162056, + "rewards/margins": 0.04731140285730362, + "rewards/rejected": 0.008929443545639515, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 2.0348837209302324e-08, + "logits/chosen": -2.044654607772827, + "logits/rejected": -1.990453839302063, + "logps/chosen": -313.288818359375, + "logps/rejected": -490.7568664550781, + "loss": 0.6629, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.10269775241613388, + "rewards/margins": -0.02533569186925888, + "rewards/rejected": 0.12803344428539276, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.054263565891473e-08, + "logits/chosen": -2.0068745613098145, + "logits/rejected": -1.941979169845581, + "logps/chosen": -193.32386779785156, + "logps/rejected": -413.4327392578125, + "loss": 0.6817, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013301086612045765, + "rewards/margins": 0.06203765794634819, + "rewards/rejected": -0.07533874362707138, + "step": 106 + }, + { + "epoch": 0.01, + "learning_rate": 2.0736434108527133e-08, + "logits/chosen": -2.2465035915374756, + "logits/rejected": -2.2149102687835693, + "logps/chosen": -92.85482788085938, + "logps/rejected": -383.6329040527344, + "loss": 0.6993, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01875610463321209, + "rewards/margins": -0.01117859035730362, + "rewards/rejected": -0.0075775147415697575, + "step": 107 + }, + { + "epoch": 0.01, + "learning_rate": 2.0930232558139537e-08, + "logits/chosen": -2.196261405944824, + "logits/rejected": -2.1575560569763184, + "logps/chosen": -63.079689025878906, + "logps/rejected": -287.2218322753906, + "loss": 0.7, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.021402740851044655, + "rewards/margins": -0.010395050048828125, + "rewards/rejected": -0.01100769080221653, + "step": 108 + }, + { + "epoch": 0.01, + "learning_rate": 2.1124031007751934e-08, + "logits/chosen": -2.0997817516326904, + "logits/rejected": -2.093613624572754, + "logps/chosen": -42.0269775390625, + "logps/rejected": -263.9812927246094, + "loss": 0.6775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00952835101634264, + "rewards/margins": 0.056864164769649506, + "rewards/rejected": -0.04733581468462944, + "step": 109 + }, + { + "epoch": 0.01, + "learning_rate": 2.1317829457364338e-08, + "logits/chosen": -2.2576513290405273, + "logits/rejected": -2.2193737030029297, + "logps/chosen": -242.56634521484375, + "logps/rejected": -430.20758056640625, + "loss": 0.7209, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04219665750861168, + "rewards/margins": -0.02505493350327015, + "rewards/rejected": -0.01714172400534153, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.1511627906976742e-08, + "logits/chosen": -2.044940948486328, + "logits/rejected": -2.008739948272705, + "logps/chosen": -62.36235046386719, + "logps/rejected": -328.0372619628906, + "loss": 0.7058, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.009875488467514515, + "rewards/margins": -0.04930419847369194, + "rewards/rejected": 0.0394287109375, + "step": 111 + }, + { + "epoch": 0.01, + "learning_rate": 2.1705426356589146e-08, + "logits/chosen": -2.2189388275146484, + "logits/rejected": -2.2304317951202393, + "logps/chosen": -270.73974609375, + "logps/rejected": -292.8398742675781, + "loss": 0.6767, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0015106201171875, + "rewards/margins": 0.06307373195886612, + "rewards/rejected": -0.06156311184167862, + "step": 112 + }, + { + "epoch": 0.01, + "learning_rate": 2.189922480620155e-08, + "logits/chosen": -2.171511650085449, + "logits/rejected": -2.1307294368743896, + "logps/chosen": -271.6164245605469, + "logps/rejected": -444.32403564453125, + "loss": 0.7017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08930053561925888, + "rewards/margins": 0.061862193048000336, + "rewards/rejected": -0.15116272866725922, + "step": 113 + }, + { + "epoch": 0.01, + "learning_rate": 2.2093023255813954e-08, + "logits/chosen": -1.9344784021377563, + "logits/rejected": -1.8974343538284302, + "logps/chosen": -215.44154357910156, + "logps/rejected": -421.92730712890625, + "loss": 0.7279, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.05871276929974556, + "rewards/margins": -0.08974914252758026, + "rewards/rejected": 0.031036376953125, + "step": 114 + }, + { + "epoch": 0.01, + "learning_rate": 2.2286821705426355e-08, + "logits/chosen": -2.160456657409668, + "logits/rejected": -2.1550798416137695, + "logps/chosen": -281.17047119140625, + "logps/rejected": -406.38018798828125, + "loss": 0.7481, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.053863525390625, + "rewards/margins": -0.12500610947608948, + "rewards/rejected": 0.07114257663488388, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.248062015503876e-08, + "logits/chosen": -2.05511474609375, + "logits/rejected": -2.0266714096069336, + "logps/chosen": -191.009521484375, + "logps/rejected": -305.81597900390625, + "loss": 0.6263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09808044880628586, + "rewards/margins": 0.17358703911304474, + "rewards/rejected": -0.07550659030675888, + "step": 116 + }, + { + "epoch": 0.01, + "learning_rate": 2.267441860465116e-08, + "logits/chosen": -2.0967442989349365, + "logits/rejected": -2.083115577697754, + "logps/chosen": -23.30881118774414, + "logps/rejected": -194.9810791015625, + "loss": 0.7072, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.014406776987016201, + "rewards/margins": -0.048195842653512955, + "rewards/rejected": 0.03378906473517418, + "step": 117 + }, + { + "epoch": 0.01, + "learning_rate": 2.2868217054263564e-08, + "logits/chosen": -2.2377936840057373, + "logits/rejected": -2.23014760017395, + "logps/chosen": -44.13267135620117, + "logps/rejected": -270.3117370605469, + "loss": 0.6884, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.031035615131258965, + "rewards/margins": -0.014453886076807976, + "rewards/rejected": 0.04548950120806694, + "step": 118 + }, + { + "epoch": 0.01, + "learning_rate": 2.3062015503875968e-08, + "logits/chosen": -2.059849262237549, + "logits/rejected": -2.0013697147369385, + "logps/chosen": -194.4412841796875, + "logps/rejected": -288.04718017578125, + "loss": 0.6654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.052520751953125, + "rewards/margins": 0.04588622972369194, + "rewards/rejected": 0.006634521763771772, + "step": 119 + }, + { + "epoch": 0.01, + "learning_rate": 2.3255813953488372e-08, + "logits/chosen": -1.9237143993377686, + "logits/rejected": -1.9069525003433228, + "logps/chosen": -2.5616610050201416, + "logps/rejected": -242.6419677734375, + "loss": 0.6888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0010224580764770508, + "rewards/margins": 0.01638958603143692, + "rewards/rejected": -0.015367127023637295, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.3449612403100773e-08, + "logits/chosen": -2.235161542892456, + "logits/rejected": -2.244630813598633, + "logps/chosen": -189.11459350585938, + "logps/rejected": -242.1538543701172, + "loss": 0.6925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02300415001809597, + "rewards/margins": 0.03614196926355362, + "rewards/rejected": -0.05914611741900444, + "step": 121 + }, + { + "epoch": 0.01, + "learning_rate": 2.3643410852713177e-08, + "logits/chosen": -1.9776339530944824, + "logits/rejected": -2.0178420543670654, + "logps/chosen": -270.2067565917969, + "logps/rejected": -291.2716369628906, + "loss": 0.6774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02795105054974556, + "rewards/margins": 0.03748779371380806, + "rewards/rejected": -0.0095367431640625, + "step": 122 + }, + { + "epoch": 0.01, + "learning_rate": 2.383720930232558e-08, + "logits/chosen": -2.1970407962799072, + "logits/rejected": -2.18719482421875, + "logps/chosen": -102.86848449707031, + "logps/rejected": -306.4842834472656, + "loss": 0.7056, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00542449951171875, + "rewards/margins": -0.04776458814740181, + "rewards/rejected": 0.04234008863568306, + "step": 123 + }, + { + "epoch": 0.01, + "learning_rate": 2.4031007751937985e-08, + "logits/chosen": -2.2842113971710205, + "logits/rejected": -2.2474496364593506, + "logps/chosen": -96.10472869873047, + "logps/rejected": -315.30084228515625, + "loss": 0.7216, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.021469879895448685, + "rewards/margins": -0.09566116333007812, + "rewards/rejected": 0.07419128715991974, + "step": 124 + }, + { + "epoch": 0.01, + "learning_rate": 2.4224806201550386e-08, + "logits/chosen": -2.031776189804077, + "logits/rejected": -1.985163927078247, + "logps/chosen": -238.27867126464844, + "logps/rejected": -298.9018859863281, + "loss": 0.6805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05366668850183487, + "rewards/margins": 0.00500946119427681, + "rewards/rejected": 0.04865722730755806, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 2.441860465116279e-08, + "logits/chosen": -2.1735517978668213, + "logits/rejected": -2.178011655807495, + "logps/chosen": -50.97659683227539, + "logps/rejected": -139.03189086914062, + "loss": 0.6807, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008802413940429688, + "rewards/margins": 0.04737968370318413, + "rewards/rejected": -0.03857726976275444, + "step": 126 + }, + { + "epoch": 0.01, + "learning_rate": 2.461240310077519e-08, + "logits/chosen": -2.0536913871765137, + "logits/rejected": -2.0105738639831543, + "logps/chosen": -236.71385192871094, + "logps/rejected": -455.5118408203125, + "loss": 0.6745, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07910614460706711, + "rewards/margins": -0.022721856832504272, + "rewards/rejected": 0.10182800143957138, + "step": 127 + }, + { + "epoch": 0.01, + "learning_rate": 2.4806201550387595e-08, + "logits/chosen": -2.252440929412842, + "logits/rejected": -2.245352268218994, + "logps/chosen": -22.435546875, + "logps/rejected": -186.51922607421875, + "loss": 0.6818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015758706256747246, + "rewards/margins": 0.060695454478263855, + "rewards/rejected": -0.07645416259765625, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-08, + "logits/chosen": -2.218198776245117, + "logits/rejected": -2.201958417892456, + "logps/chosen": -94.10307312011719, + "logps/rejected": -240.0253448486328, + "loss": 0.7225, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04207305982708931, + "rewards/margins": -0.08489838242530823, + "rewards/rejected": 0.04282531887292862, + "step": 129 + }, + { + "epoch": 0.01, + "learning_rate": 2.5193798449612403e-08, + "logits/chosen": -1.9323276281356812, + "logits/rejected": -1.8153045177459717, + "logps/chosen": -229.43768310546875, + "logps/rejected": -493.2055969238281, + "loss": 0.6845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02827758900821209, + "rewards/margins": 0.0370025634765625, + "rewards/rejected": -0.06528015434741974, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.5387596899224807e-08, + "logits/chosen": -2.167231321334839, + "logits/rejected": -2.175071954727173, + "logps/chosen": -187.81158447265625, + "logps/rejected": -281.94439697265625, + "loss": 0.7031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03445587307214737, + "rewards/margins": 0.006315611302852631, + "rewards/rejected": -0.040771484375, + "step": 131 + }, + { + "epoch": 0.01, + "learning_rate": 2.5581395348837208e-08, + "logits/chosen": -2.1122946739196777, + "logits/rejected": -2.036062717437744, + "logps/chosen": -80.24554443359375, + "logps/rejected": -450.35870361328125, + "loss": 0.714, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02402801625430584, + "rewards/margins": -0.06200408935546875, + "rewards/rejected": 0.03797607496380806, + "step": 132 + }, + { + "epoch": 0.01, + "learning_rate": 2.5775193798449612e-08, + "logits/chosen": -2.3197362422943115, + "logits/rejected": -2.309018135070801, + "logps/chosen": -0.0019300860585644841, + "logps/rejected": -236.59925842285156, + "loss": 0.6917, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7172133084386587e-05, + "rewards/margins": 0.0055599152110517025, + "rewards/rejected": -0.00557708740234375, + "step": 133 + }, + { + "epoch": 0.01, + "learning_rate": 2.5968992248062016e-08, + "logits/chosen": -2.1737091541290283, + "logits/rejected": -2.1616549491882324, + "logps/chosen": -0.01830245368182659, + "logps/rejected": -146.1711883544922, + "loss": 0.6948, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3113021850585938e-06, + "rewards/margins": -0.006337189581245184, + "rewards/rejected": 0.0063385008834302425, + "step": 134 + }, + { + "epoch": 0.01, + "learning_rate": 2.616279069767442e-08, + "logits/chosen": -2.1548898220062256, + "logits/rejected": -2.119224786758423, + "logps/chosen": -62.70216369628906, + "logps/rejected": -340.7146911621094, + "loss": 0.7208, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0026348114479333162, + "rewards/margins": -0.11248970776796341, + "rewards/rejected": 0.11512451618909836, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.6356589147286824e-08, + "logits/chosen": -2.2298240661621094, + "logits/rejected": -2.2223899364471436, + "logps/chosen": -46.0291748046875, + "logps/rejected": -186.49749755859375, + "loss": 0.6858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004891205113381147, + "rewards/margins": 0.052582550793886185, + "rewards/rejected": -0.05747375637292862, + "step": 136 + }, + { + "epoch": 0.01, + "learning_rate": 2.6550387596899225e-08, + "logits/chosen": -2.0843329429626465, + "logits/rejected": -1.8856031894683838, + "logps/chosen": -240.97158813476562, + "logps/rejected": -715.830078125, + "loss": 0.6791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0288238525390625, + "rewards/margins": 0.0050811767578125, + "rewards/rejected": 0.02374267578125, + "step": 137 + }, + { + "epoch": 0.01, + "learning_rate": 2.6744186046511626e-08, + "logits/chosen": -2.137162685394287, + "logits/rejected": -2.114732503890991, + "logps/chosen": -273.9532165527344, + "logps/rejected": -497.05767822265625, + "loss": 0.6874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04238586500287056, + "rewards/margins": 0.0558929480612278, + "rewards/rejected": -0.09827881306409836, + "step": 138 + }, + { + "epoch": 0.01, + "learning_rate": 2.6937984496124026e-08, + "logits/chosen": -2.282623291015625, + "logits/rejected": -2.278903007507324, + "logps/chosen": -196.8769073486328, + "logps/rejected": -282.09674072265625, + "loss": 0.6832, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06101074442267418, + "rewards/margins": -0.002615358680486679, + "rewards/rejected": 0.06362610310316086, + "step": 139 + }, + { + "epoch": 0.01, + "learning_rate": 2.713178294573643e-08, + "logits/chosen": -2.3252642154693604, + "logits/rejected": -2.3071882724761963, + "logps/chosen": -202.4792022705078, + "logps/rejected": -374.9632568359375, + "loss": 0.7777, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.132598876953125, + "rewards/margins": -0.14436034858226776, + "rewards/rejected": 0.011761474423110485, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.7325581395348834e-08, + "logits/chosen": -1.9626708030700684, + "logits/rejected": -1.8531423807144165, + "logps/chosen": -364.32818603515625, + "logps/rejected": -558.65869140625, + "loss": 0.726, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0706634521484375, + "rewards/margins": -0.0505218505859375, + "rewards/rejected": -0.0201416015625, + "step": 141 + }, + { + "epoch": 0.01, + "learning_rate": 2.751937984496124e-08, + "logits/chosen": -2.267178535461426, + "logits/rejected": -2.255632162094116, + "logps/chosen": -33.88450622558594, + "logps/rejected": -78.59884643554688, + "loss": 0.6934, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.009564208798110485, + "rewards/margins": -0.014329529367387295, + "rewards/rejected": 0.02389373816549778, + "step": 142 + }, + { + "epoch": 0.01, + "learning_rate": 2.7713178294573643e-08, + "logits/chosen": -2.113788604736328, + "logits/rejected": -2.109342575073242, + "logps/chosen": -26.15936279296875, + "logps/rejected": -162.38754272460938, + "loss": 0.6881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006200790521688759, + "rewards/margins": 0.023071860894560814, + "rewards/rejected": -0.02245178259909153, + "step": 143 + }, + { + "epoch": 0.01, + "learning_rate": 2.7906976744186043e-08, + "logits/chosen": -2.104773759841919, + "logits/rejected": -2.1037392616271973, + "logps/chosen": -18.062747955322266, + "logps/rejected": -217.64671325683594, + "loss": 0.6975, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.009369850158691406, + "rewards/margins": -0.011710548773407936, + "rewards/rejected": 0.002340698381885886, + "step": 144 + }, + { + "epoch": 0.01, + "learning_rate": 2.8100775193798447e-08, + "logits/chosen": -2.1716766357421875, + "logits/rejected": -2.1700093746185303, + "logps/chosen": -5.1518425941467285, + "logps/rejected": -97.41159057617188, + "loss": 0.6881, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003114223654847592, + "rewards/margins": 0.018147896975278854, + "rewards/rejected": -0.018459320068359375, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 2.829457364341085e-08, + "logits/chosen": -2.393754720687866, + "logits/rejected": -2.367126226425171, + "logps/chosen": -1.347376823425293, + "logps/rejected": -231.91183471679688, + "loss": 0.6968, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01648123376071453, + "rewards/margins": 0.002142118290066719, + "rewards/rejected": -0.01862335205078125, + "step": 146 + }, + { + "epoch": 0.01, + "learning_rate": 2.8488372093023256e-08, + "logits/chosen": -2.1433043479919434, + "logits/rejected": -2.130708932876587, + "logps/chosen": -62.80131912231445, + "logps/rejected": -239.41006469726562, + "loss": 0.706, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012506866827607155, + "rewards/margins": -0.047086335718631744, + "rewards/rejected": 0.03457946702837944, + "step": 147 + }, + { + "epoch": 0.01, + "learning_rate": 2.868217054263566e-08, + "logits/chosen": -2.285872459411621, + "logits/rejected": -2.2621281147003174, + "logps/chosen": -259.2626953125, + "logps/rejected": -339.0538024902344, + "loss": 0.689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03607482835650444, + "rewards/margins": 0.07236023247241974, + "rewards/rejected": -0.10843505710363388, + "step": 148 + }, + { + "epoch": 0.01, + "learning_rate": 2.8875968992248064e-08, + "logits/chosen": -1.9679968357086182, + "logits/rejected": -2.007974863052368, + "logps/chosen": -209.37673950195312, + "logps/rejected": -196.74758911132812, + "loss": 0.6725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06166381761431694, + "rewards/margins": 0.01564636081457138, + "rewards/rejected": 0.04601745679974556, + "step": 149 + }, + { + "epoch": 0.01, + "learning_rate": 2.9069767441860464e-08, + "logits/chosen": -2.3523499965667725, + "logits/rejected": -2.3192667961120605, + "logps/chosen": -5.130951404571533, + "logps/rejected": -231.56216430664062, + "loss": 0.7193, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01242141705006361, + "rewards/margins": -0.089746855199337, + "rewards/rejected": 0.07732544094324112, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 2.926356589147287e-08, + "logits/chosen": -2.4001946449279785, + "logits/rejected": -2.388592481613159, + "logps/chosen": -1.2444372177124023, + "logps/rejected": -377.6877746582031, + "loss": 0.708, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.008958852849900723, + "rewards/margins": -0.07764698565006256, + "rewards/rejected": 0.08660583943128586, + "step": 151 + }, + { + "epoch": 0.01, + "learning_rate": 2.9457364341085273e-08, + "logits/chosen": -2.0804224014282227, + "logits/rejected": -2.0714125633239746, + "logps/chosen": -192.87933349609375, + "logps/rejected": -298.987548828125, + "loss": 0.6802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015686035621911287, + "rewards/margins": 0.02608642540872097, + "rewards/rejected": -0.02765502966940403, + "step": 152 + }, + { + "epoch": 0.01, + "learning_rate": 2.9651162790697677e-08, + "logits/chosen": -2.1432149410247803, + "logits/rejected": -2.1445820331573486, + "logps/chosen": -2.313002109527588, + "logps/rejected": -31.397165298461914, + "loss": 0.6893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00396153936162591, + "rewards/margins": 0.0101577527821064, + "rewards/rejected": -0.006196212954819202, + "step": 153 + }, + { + "epoch": 0.01, + "learning_rate": 2.9844961240310074e-08, + "logits/chosen": -2.165567636489868, + "logits/rejected": -2.1554954051971436, + "logps/chosen": -13.520564079284668, + "logps/rejected": -142.63619995117188, + "loss": 0.7096, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01428909320384264, + "rewards/margins": -0.056903839111328125, + "rewards/rejected": 0.04261474683880806, + "step": 154 + }, + { + "epoch": 0.01, + "learning_rate": 3.003875968992248e-08, + "logits/chosen": -2.33965802192688, + "logits/rejected": -2.316661834716797, + "logps/chosen": -39.979530334472656, + "logps/rejected": -216.6418914794922, + "loss": 0.7072, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0016887665260583162, + "rewards/margins": -0.06393852084875107, + "rewards/rejected": 0.06224975734949112, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 3.023255813953488e-08, + "logits/chosen": -2.1171958446502686, + "logits/rejected": -2.094247341156006, + "logps/chosen": -28.97739028930664, + "logps/rejected": -316.73663330078125, + "loss": 0.6727, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030858993530273438, + "rewards/margins": 0.0512874610722065, + "rewards/rejected": -0.02042846754193306, + "step": 156 + }, + { + "epoch": 0.01, + "learning_rate": 3.0426356589147286e-08, + "logits/chosen": -2.257798194885254, + "logits/rejected": -2.2453577518463135, + "logps/chosen": -0.00020704853523056954, + "logps/rejected": -135.96401977539062, + "loss": 0.6904, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.1130496103724e-06, + "rewards/margins": 0.01647545024752617, + "rewards/rejected": -0.01647033728659153, + "step": 157 + }, + { + "epoch": 0.01, + "learning_rate": 3.062015503875969e-08, + "logits/chosen": -2.054002523422241, + "logits/rejected": -2.0380358695983887, + "logps/chosen": -209.4263153076172, + "logps/rejected": -473.75018310546875, + "loss": 0.6205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10756378620862961, + "rewards/margins": 0.1925552487373352, + "rewards/rejected": -0.084991455078125, + "step": 158 + }, + { + "epoch": 0.01, + "learning_rate": 3.081395348837209e-08, + "logits/chosen": -2.04823637008667, + "logits/rejected": -2.039858341217041, + "logps/chosen": -274.9997253417969, + "logps/rejected": -296.78814697265625, + "loss": 0.6965, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02370910719037056, + "rewards/margins": -0.01593933254480362, + "rewards/rejected": -0.007769775576889515, + "step": 159 + }, + { + "epoch": 0.01, + "learning_rate": 3.100775193798449e-08, + "logits/chosen": -2.1658835411071777, + "logits/rejected": -2.1473045349121094, + "logps/chosen": -352.7749328613281, + "logps/rejected": -513.350341796875, + "loss": 0.7619, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.10919494926929474, + "rewards/margins": -0.13739930093288422, + "rewards/rejected": 0.02820434607565403, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 3.1201550387596896e-08, + "logits/chosen": -2.0167410373687744, + "logits/rejected": -1.9800187349319458, + "logps/chosen": -306.04150390625, + "logps/rejected": -459.97332763671875, + "loss": 0.741, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03995361551642418, + "rewards/margins": -0.1679229885339737, + "rewards/rejected": 0.12796936929225922, + "step": 161 + }, + { + "epoch": 0.01, + "learning_rate": 3.13953488372093e-08, + "logits/chosen": -2.282059669494629, + "logits/rejected": -2.2384564876556396, + "logps/chosen": -6.818692054366693e-05, + "logps/rejected": -191.0909881591797, + "loss": 0.6819, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.152434022827947e-07, + "rewards/margins": 0.045847367495298386, + "rewards/rejected": -0.04584808275103569, + "step": 162 + }, + { + "epoch": 0.01, + "learning_rate": 3.1589147286821704e-08, + "logits/chosen": -2.3552396297454834, + "logits/rejected": -2.3582818508148193, + "logps/chosen": -2.265660047531128, + "logps/rejected": -83.89923858642578, + "loss": 0.7102, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02167212963104248, + "rewards/margins": -0.04608619213104248, + "rewards/rejected": 0.0244140625, + "step": 163 + }, + { + "epoch": 0.01, + "learning_rate": 3.178294573643411e-08, + "logits/chosen": -2.218118190765381, + "logits/rejected": -2.2158126831054688, + "logps/chosen": -8.356454782187939e-05, + "logps/rejected": -74.43985748291016, + "loss": 0.7019, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.536743306171047e-08, + "rewards/margins": -0.03485727682709694, + "rewards/rejected": 0.03485717996954918, + "step": 164 + }, + { + "epoch": 0.01, + "learning_rate": 3.197674418604651e-08, + "logits/chosen": -1.9607172012329102, + "logits/rejected": -1.9525467157363892, + "logps/chosen": -6.627063274383545, + "logps/rejected": -210.2218017578125, + "loss": 0.6877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01249013002961874, + "rewards/margins": 0.007856035605072975, + "rewards/rejected": 0.004634094424545765, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 3.2170542635658916e-08, + "logits/chosen": -2.1466259956359863, + "logits/rejected": -2.118985652923584, + "logps/chosen": -314.6904296875, + "logps/rejected": -383.4019470214844, + "loss": 0.6513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06878662109375, + "rewards/margins": 0.09302063286304474, + "rewards/rejected": -0.02423400990664959, + "step": 166 + }, + { + "epoch": 0.01, + "learning_rate": 3.236434108527132e-08, + "logits/chosen": -2.1708710193634033, + "logits/rejected": -2.1676149368286133, + "logps/chosen": -6.520617171190679e-05, + "logps/rejected": -103.97222900390625, + "loss": 0.6936, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.2648600861430168e-07, + "rewards/margins": -0.0019968391861766577, + "rewards/rejected": 0.0019966126419603825, + "step": 167 + }, + { + "epoch": 0.01, + "learning_rate": 3.2558139534883724e-08, + "logits/chosen": -2.1683661937713623, + "logits/rejected": -2.1622884273529053, + "logps/chosen": -106.90077209472656, + "logps/rejected": -228.25759887695312, + "loss": 0.7053, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0054267882369458675, + "rewards/margins": -0.029129792004823685, + "rewards/rejected": 0.02370300330221653, + "step": 168 + }, + { + "epoch": 0.01, + "learning_rate": 3.275193798449613e-08, + "logits/chosen": -2.2707581520080566, + "logits/rejected": -2.265011787414551, + "logps/chosen": -76.34844970703125, + "logps/rejected": -272.28228759765625, + "loss": 0.6683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05024414137005806, + "rewards/margins": 0.04502563551068306, + "rewards/rejected": 0.005218505859375, + "step": 169 + }, + { + "epoch": 0.01, + "learning_rate": 3.2945736434108526e-08, + "logits/chosen": -2.1325385570526123, + "logits/rejected": -2.118776321411133, + "logps/chosen": -0.530987024307251, + "logps/rejected": -167.39724731445312, + "loss": 0.7044, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.005277547519654036, + "rewards/margins": -0.03833724185824394, + "rewards/rejected": 0.03305969387292862, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 3.3139534883720923e-08, + "logits/chosen": -2.2210404872894287, + "logits/rejected": -2.2116520404815674, + "logps/chosen": -149.61785888671875, + "logps/rejected": -258.5964050292969, + "loss": 0.6722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02298889122903347, + "rewards/margins": 0.06783752143383026, + "rewards/rejected": -0.04484863206744194, + "step": 171 + }, + { + "epoch": 0.01, + "learning_rate": 3.333333333333333e-08, + "logits/chosen": -2.1821022033691406, + "logits/rejected": -2.1770615577697754, + "logps/chosen": -4.506070399656892e-05, + "logps/rejected": -53.987327575683594, + "loss": 0.6976, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.430504283916889e-07, + "rewards/margins": -0.017676401883363724, + "rewards/rejected": 0.017676545307040215, + "step": 172 + }, + { + "epoch": 0.01, + "learning_rate": 3.352713178294573e-08, + "logits/chosen": -2.1858742237091064, + "logits/rejected": -2.1899843215942383, + "logps/chosen": -53.01707077026367, + "logps/rejected": -180.2222442626953, + "loss": 0.7052, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0026264190673828125, + "rewards/margins": -0.03776207193732262, + "rewards/rejected": 0.04038849100470543, + "step": 173 + }, + { + "epoch": 0.01, + "learning_rate": 3.3720930232558136e-08, + "logits/chosen": -2.0548062324523926, + "logits/rejected": -2.0281805992126465, + "logps/chosen": -218.36915588378906, + "logps/rejected": -267.5460205078125, + "loss": 0.7124, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04471283033490181, + "rewards/margins": -0.03082122839987278, + "rewards/rejected": -0.01389160193502903, + "step": 174 + }, + { + "epoch": 0.01, + "learning_rate": 3.391472868217054e-08, + "logits/chosen": -2.3322885036468506, + "logits/rejected": -2.3041324615478516, + "logps/chosen": -193.36334228515625, + "logps/rejected": -335.94573974609375, + "loss": 0.7145, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04727935791015625, + "rewards/margins": -0.02680206298828125, + "rewards/rejected": -0.020477294921875, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.4108527131782944e-08, + "logits/chosen": -2.039363145828247, + "logits/rejected": -2.0155978202819824, + "logps/chosen": -7.250822067260742, + "logps/rejected": -135.34185791015625, + "loss": 0.7109, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.629394644936838e-07, + "rewards/margins": -0.06964187324047089, + "rewards/rejected": 0.06964111328125, + "step": 176 + }, + { + "epoch": 0.01, + "learning_rate": 3.430232558139535e-08, + "logits/chosen": -2.246102809906006, + "logits/rejected": -2.218740224838257, + "logps/chosen": -307.8260803222656, + "logps/rejected": -405.15301513671875, + "loss": 0.7411, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08873596042394638, + "rewards/margins": -0.08602599799633026, + "rewards/rejected": -0.0027099610306322575, + "step": 177 + }, + { + "epoch": 0.01, + "learning_rate": 3.449612403100775e-08, + "logits/chosen": -2.058795928955078, + "logits/rejected": -2.061558246612549, + "logps/chosen": -0.7232761383056641, + "logps/rejected": -87.38166809082031, + "loss": 0.6847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018025041208602488, + "rewards/margins": 0.03736552596092224, + "rewards/rejected": -0.03754577785730362, + "step": 178 + }, + { + "epoch": 0.01, + "learning_rate": 3.4689922480620156e-08, + "logits/chosen": -2.046463966369629, + "logits/rejected": -2.031262159347534, + "logps/chosen": -256.88946533203125, + "logps/rejected": -241.48565673828125, + "loss": 0.6815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01845703087747097, + "rewards/margins": 0.03622741997241974, + "rewards/rejected": -0.05468444898724556, + "step": 179 + }, + { + "epoch": 0.01, + "learning_rate": 3.488372093023256e-08, + "logits/chosen": -2.310964584350586, + "logits/rejected": -2.2886922359466553, + "logps/chosen": -68.11601257324219, + "logps/rejected": -208.03741455078125, + "loss": 0.7058, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02228698693215847, + "rewards/margins": -0.01923828013241291, + "rewards/rejected": -0.0030487061012536287, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 3.5077519379844964e-08, + "logits/chosen": -2.335885524749756, + "logits/rejected": -2.3284666538238525, + "logps/chosen": -0.5551044344902039, + "logps/rejected": -155.0160675048828, + "loss": 0.7062, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.180903725791723e-05, + "rewards/margins": -0.046664685010910034, + "rewards/rejected": 0.04657287523150444, + "step": 181 + }, + { + "epoch": 0.01, + "learning_rate": 3.527131782945736e-08, + "logits/chosen": -2.180562973022461, + "logits/rejected": -2.1102380752563477, + "logps/chosen": -235.25340270996094, + "logps/rejected": -476.0574951171875, + "loss": 0.6461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11750946193933487, + "rewards/margins": 0.09610442817211151, + "rewards/rejected": 0.02140503004193306, + "step": 182 + }, + { + "epoch": 0.01, + "learning_rate": 3.5465116279069766e-08, + "logits/chosen": -2.165520668029785, + "logits/rejected": -2.157492160797119, + "logps/chosen": -16.63150405883789, + "logps/rejected": -183.53280639648438, + "loss": 0.7042, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0036542892921715975, + "rewards/margins": -0.03270702436566353, + "rewards/rejected": 0.029052734375, + "step": 183 + }, + { + "epoch": 0.01, + "learning_rate": 3.565891472868217e-08, + "logits/chosen": -2.1353607177734375, + "logits/rejected": -2.13723087310791, + "logps/chosen": -0.0803840234875679, + "logps/rejected": -63.19398498535156, + "loss": 0.6949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006984934443607926, + "rewards/margins": 0.0023361430503427982, + "rewards/rejected": -0.0016376496059820056, + "step": 184 + }, + { + "epoch": 0.01, + "learning_rate": 3.5852713178294574e-08, + "logits/chosen": -1.9163317680358887, + "logits/rejected": -1.8760298490524292, + "logps/chosen": -207.54025268554688, + "logps/rejected": -400.66131591796875, + "loss": 0.7065, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01041259802877903, + "rewards/margins": -0.0487213134765625, + "rewards/rejected": 0.05913391336798668, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 3.604651162790697e-08, + "logits/chosen": -2.0563242435455322, + "logits/rejected": -2.0365374088287354, + "logps/chosen": -277.8426513671875, + "logps/rejected": -446.5187683105469, + "loss": 0.6792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010180664248764515, + "rewards/margins": 0.04000549390912056, + "rewards/rejected": -0.02982482872903347, + "step": 186 + }, + { + "epoch": 0.01, + "learning_rate": 3.6240310077519375e-08, + "logits/chosen": -2.2421910762786865, + "logits/rejected": -2.261706829071045, + "logps/chosen": -206.7100830078125, + "logps/rejected": -298.1659240722656, + "loss": 0.7039, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012666321359574795, + "rewards/margins": -0.06891326606273651, + "rewards/rejected": 0.05624694749712944, + "step": 187 + }, + { + "epoch": 0.01, + "learning_rate": 3.643410852713178e-08, + "logits/chosen": -1.9829277992248535, + "logits/rejected": -1.9794806241989136, + "logps/chosen": -55.056251525878906, + "logps/rejected": -190.58743286132812, + "loss": 0.693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021711349487304688, + "rewards/margins": 0.018538285046815872, + "rewards/rejected": -0.04024963453412056, + "step": 188 + }, + { + "epoch": 0.01, + "learning_rate": 3.6627906976744183e-08, + "logits/chosen": -2.069054365158081, + "logits/rejected": -2.0683159828186035, + "logps/chosen": -38.32255554199219, + "logps/rejected": -134.9126739501953, + "loss": 0.6724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04172363504767418, + "rewards/margins": 0.0455932654440403, + "rewards/rejected": -0.0038696289993822575, + "step": 189 + }, + { + "epoch": 0.01, + "learning_rate": 3.682170542635659e-08, + "logits/chosen": -2.2290079593658447, + "logits/rejected": -2.227144479751587, + "logps/chosen": -0.5782865285873413, + "logps/rejected": -184.00054931640625, + "loss": 0.705, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00011525153968250379, + "rewards/margins": -0.041895244270563126, + "rewards/rejected": 0.04201049730181694, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 3.701550387596899e-08, + "logits/chosen": -2.167393445968628, + "logits/rejected": -2.168240547180176, + "logps/chosen": -9.869325637817383, + "logps/rejected": -199.0597686767578, + "loss": 0.709, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.580352874152595e-06, + "rewards/margins": -0.06342820823192596, + "rewards/rejected": 0.06342162936925888, + "step": 191 + }, + { + "epoch": 0.01, + "learning_rate": 3.7209302325581396e-08, + "logits/chosen": -2.1919400691986084, + "logits/rejected": -2.1715543270111084, + "logps/chosen": -42.3360481262207, + "logps/rejected": -349.9906005859375, + "loss": 0.6832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008686447516083717, + "rewards/margins": 0.046019360423088074, + "rewards/rejected": -0.05470580980181694, + "step": 192 + }, + { + "epoch": 0.01, + "learning_rate": 3.74031007751938e-08, + "logits/chosen": -2.0635221004486084, + "logits/rejected": -2.064488410949707, + "logps/chosen": -7.306176662445068, + "logps/rejected": -80.02229309082031, + "loss": 0.6904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006744718644768, + "rewards/margins": 0.006634855177253485, + "rewards/rejected": 0.00010986328561557457, + "step": 193 + }, + { + "epoch": 0.01, + "learning_rate": 3.75968992248062e-08, + "logits/chosen": -2.170719623565674, + "logits/rejected": -2.167904853820801, + "logps/chosen": -6.255599498748779, + "logps/rejected": -90.14698791503906, + "loss": 0.6784, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.004738290561363e-05, + "rewards/margins": 0.05131850391626358, + "rewards/rejected": -0.05138855054974556, + "step": 194 + }, + { + "epoch": 0.01, + "learning_rate": 3.77906976744186e-08, + "logits/chosen": -2.2037911415100098, + "logits/rejected": -2.1963307857513428, + "logps/chosen": -178.47894287109375, + "logps/rejected": -316.7554016113281, + "loss": 0.6543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05238037183880806, + "rewards/margins": 0.10294799506664276, + "rewards/rejected": -0.050567626953125, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 3.7984496124031005e-08, + "logits/chosen": -1.9802299737930298, + "logits/rejected": -1.9733610153198242, + "logps/chosen": -247.24085998535156, + "logps/rejected": -335.4576416015625, + "loss": 0.7094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06888427585363388, + "rewards/margins": 0.008038334548473358, + "rewards/rejected": -0.07692261040210724, + "step": 196 + }, + { + "epoch": 0.01, + "learning_rate": 3.817829457364341e-08, + "logits/chosen": -2.2555482387542725, + "logits/rejected": -2.260286808013916, + "logps/chosen": -6.9061360359191895, + "logps/rejected": -80.7017822265625, + "loss": 0.7161, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.029891539365053177, + "rewards/margins": -0.06081271171569824, + "rewards/rejected": 0.030921174213290215, + "step": 197 + }, + { + "epoch": 0.01, + "learning_rate": 3.837209302325581e-08, + "logits/chosen": -2.22025465965271, + "logits/rejected": -2.174772262573242, + "logps/chosen": -292.071044921875, + "logps/rejected": -498.82659912109375, + "loss": 0.6317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09569396823644638, + "rewards/margins": 0.144439697265625, + "rewards/rejected": -0.04874572902917862, + "step": 198 + }, + { + "epoch": 0.01, + "learning_rate": 3.856589147286822e-08, + "logits/chosen": -2.1650123596191406, + "logits/rejected": -2.170677423477173, + "logps/chosen": -297.163818359375, + "logps/rejected": -350.9908142089844, + "loss": 0.6952, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0031616210471838713, + "rewards/margins": 0.04031677171587944, + "rewards/rejected": -0.0371551513671875, + "step": 199 + }, + { + "epoch": 0.01, + "learning_rate": 3.875968992248062e-08, + "logits/chosen": -2.2092249393463135, + "logits/rejected": -2.1520304679870605, + "logps/chosen": -217.55703735351562, + "logps/rejected": -352.6907043457031, + "loss": 0.7278, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02696838416159153, + "rewards/margins": -0.07224731892347336, + "rewards/rejected": 0.04527893289923668, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 3.8953488372093025e-08, + "logits/chosen": -2.1581571102142334, + "logits/rejected": -2.1313791275024414, + "logps/chosen": -255.9960174560547, + "logps/rejected": -426.2005615234375, + "loss": 0.6894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01479949988424778, + "rewards/margins": 0.0005752565339207649, + "rewards/rejected": 0.014224243350327015, + "step": 201 + }, + { + "epoch": 0.01, + "learning_rate": 3.914728682170542e-08, + "logits/chosen": -1.8532805442810059, + "logits/rejected": -1.8474788665771484, + "logps/chosen": -7.204627513885498, + "logps/rejected": -160.81993103027344, + "loss": 0.6926, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.015460587106645107, + "rewards/margins": -0.011993027292191982, + "rewards/rejected": 0.02745361439883709, + "step": 202 + }, + { + "epoch": 0.01, + "learning_rate": 3.934108527131783e-08, + "logits/chosen": -2.0846846103668213, + "logits/rejected": -2.065925359725952, + "logps/chosen": -198.77313232421875, + "logps/rejected": -458.0824279785156, + "loss": 0.6843, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06702118366956711, + "rewards/margins": -0.014622494578361511, + "rewards/rejected": 0.08164367824792862, + "step": 203 + }, + { + "epoch": 0.01, + "learning_rate": 3.953488372093023e-08, + "logits/chosen": -2.28143310546875, + "logits/rejected": -2.262385606765747, + "logps/chosen": -67.58778381347656, + "logps/rejected": -188.25540161132812, + "loss": 0.7148, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.014591979794204235, + "rewards/margins": -0.07169342041015625, + "rewards/rejected": 0.05710143968462944, + "step": 204 + }, + { + "epoch": 0.01, + "learning_rate": 3.9728682170542635e-08, + "logits/chosen": -2.195356607437134, + "logits/rejected": -2.178926467895508, + "logps/chosen": -13.229710578918457, + "logps/rejected": -79.67826843261719, + "loss": 0.688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012631416320800781, + "rewards/margins": 0.0018159868195652962, + "rewards/rejected": 0.010815429501235485, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 3.992248062015503e-08, + "logits/chosen": -2.1388392448425293, + "logits/rejected": -2.0753564834594727, + "logps/chosen": -178.77285766601562, + "logps/rejected": -329.656494140625, + "loss": 0.6465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0705718994140625, + "rewards/margins": 0.08551330864429474, + "rewards/rejected": -0.014941406436264515, + "step": 206 + }, + { + "epoch": 0.01, + "learning_rate": 4.0116279069767437e-08, + "logits/chosen": -2.1888482570648193, + "logits/rejected": -2.218848705291748, + "logps/chosen": -263.5739440917969, + "logps/rejected": -199.06439208984375, + "loss": 0.7314, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08054504543542862, + "rewards/margins": -0.07555999606847763, + "rewards/rejected": -0.004985046572983265, + "step": 207 + }, + { + "epoch": 0.01, + "learning_rate": 4.031007751937984e-08, + "logits/chosen": -1.9640694856643677, + "logits/rejected": -1.9433809518814087, + "logps/chosen": -35.717899322509766, + "logps/rejected": -237.48226928710938, + "loss": 0.6811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05036773905158043, + "rewards/margins": 0.002310182899236679, + "rewards/rejected": 0.04805755615234375, + "step": 208 + }, + { + "epoch": 0.01, + "learning_rate": 4.0503875968992245e-08, + "logits/chosen": -2.29114031791687, + "logits/rejected": -2.282134532928467, + "logps/chosen": -4.7676568031311035, + "logps/rejected": -139.00180053710938, + "loss": 0.6904, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6845931643038057e-05, + "rewards/margins": 0.005182504653930664, + "rewards/rejected": -0.0052093504928052425, + "step": 209 + }, + { + "epoch": 0.01, + "learning_rate": 4.069767441860465e-08, + "logits/chosen": -2.382249116897583, + "logits/rejected": -2.373162269592285, + "logps/chosen": -0.10301433503627777, + "logps/rejected": -146.5461883544922, + "loss": 0.6808, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.309680091627797e-08, + "rewards/margins": 0.05110475793480873, + "rewards/rejected": -0.05110473558306694, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.089147286821705e-08, + "logits/chosen": -2.2304816246032715, + "logits/rejected": -2.219238758087158, + "logps/chosen": -43.656333923339844, + "logps/rejected": -174.64028930664062, + "loss": 0.6836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0029430389404296875, + "rewards/margins": 0.04130058363080025, + "rewards/rejected": -0.03835754469037056, + "step": 211 + }, + { + "epoch": 0.01, + "learning_rate": 4.108527131782946e-08, + "logits/chosen": -2.117316484451294, + "logits/rejected": -2.1061830520629883, + "logps/chosen": -2.694105751288589e-05, + "logps/rejected": -151.07781982421875, + "loss": 0.6926, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3841858265427618e-08, + "rewards/margins": 0.002272057579830289, + "rewards/rejected": -0.002272033831104636, + "step": 212 + }, + { + "epoch": 0.01, + "learning_rate": 4.127906976744186e-08, + "logits/chosen": -2.2577731609344482, + "logits/rejected": -2.2445828914642334, + "logps/chosen": -10.398484230041504, + "logps/rejected": -190.7407684326172, + "loss": 0.7034, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01250085886567831, + "rewards/margins": -0.028365422040224075, + "rewards/rejected": 0.01586456410586834, + "step": 213 + }, + { + "epoch": 0.01, + "learning_rate": 4.1472868217054265e-08, + "logits/chosen": -2.182987928390503, + "logits/rejected": -2.1427741050720215, + "logps/chosen": -11.39189624786377, + "logps/rejected": -240.85888671875, + "loss": 0.6856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014110374264419079, + "rewards/margins": 0.014217185787856579, + "rewards/rejected": -0.0001068115234375, + "step": 214 + }, + { + "epoch": 0.01, + "learning_rate": 4.166666666666667e-08, + "logits/chosen": -2.187999725341797, + "logits/rejected": -2.154895782470703, + "logps/chosen": -119.60977172851562, + "logps/rejected": -236.75071716308594, + "loss": 0.7048, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02590331993997097, + "rewards/margins": -0.00699920579791069, + "rewards/rejected": -0.01890411414206028, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.186046511627907e-08, + "logits/chosen": -2.1219866275787354, + "logits/rejected": -2.1176366806030273, + "logps/chosen": -32.36342239379883, + "logps/rejected": -250.9115753173828, + "loss": 0.6833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00887298583984375, + "rewards/margins": 0.03098144568502903, + "rewards/rejected": -0.02210845984518528, + "step": 216 + }, + { + "epoch": 0.01, + "learning_rate": 4.205426356589147e-08, + "logits/chosen": -2.1423676013946533, + "logits/rejected": -2.139385938644409, + "logps/chosen": -228.29859924316406, + "logps/rejected": -366.6319885253906, + "loss": 0.6527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05657958984375, + "rewards/margins": 0.12075500935316086, + "rewards/rejected": -0.06417541950941086, + "step": 217 + }, + { + "epoch": 0.01, + "learning_rate": 4.224806201550387e-08, + "logits/chosen": -2.1056690216064453, + "logits/rejected": -2.1048996448516846, + "logps/chosen": -253.70077514648438, + "logps/rejected": -301.109375, + "loss": 0.6692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06740875542163849, + "rewards/margins": 0.04151764139533043, + "rewards/rejected": 0.02589111402630806, + "step": 218 + }, + { + "epoch": 0.01, + "learning_rate": 4.244186046511627e-08, + "logits/chosen": -2.000112771987915, + "logits/rejected": -2.0116302967071533, + "logps/chosen": -205.4047088623047, + "logps/rejected": -314.54388427734375, + "loss": 0.648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07153167575597763, + "rewards/margins": 0.10976409912109375, + "rewards/rejected": -0.03823242336511612, + "step": 219 + }, + { + "epoch": 0.01, + "learning_rate": 4.2635658914728676e-08, + "logits/chosen": -2.234222888946533, + "logits/rejected": -2.2184176445007324, + "logps/chosen": -58.565120697021484, + "logps/rejected": -253.41348266601562, + "loss": 0.6965, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.025618363171815872, + "rewards/margins": -0.03377494961023331, + "rewards/rejected": 0.05939331278204918, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.282945736434108e-08, + "logits/chosen": -2.279700517654419, + "logits/rejected": -2.259567975997925, + "logps/chosen": -0.0008940626285038888, + "logps/rejected": -244.10906982421875, + "loss": 0.6938, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7626967974138097e-06, + "rewards/margins": -0.0026502148248255253, + "rewards/rejected": 0.0026519775856286287, + "step": 221 + }, + { + "epoch": 0.01, + "learning_rate": 4.3023255813953484e-08, + "logits/chosen": -2.2062480449676514, + "logits/rejected": -2.095695972442627, + "logps/chosen": -206.32977294921875, + "logps/rejected": -427.5283203125, + "loss": 0.7184, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.007870483212172985, + "rewards/margins": -0.11905822902917862, + "rewards/rejected": 0.11118774861097336, + "step": 222 + }, + { + "epoch": 0.01, + "learning_rate": 4.321705426356589e-08, + "logits/chosen": -2.3222813606262207, + "logits/rejected": -2.320951223373413, + "logps/chosen": -16.00703239440918, + "logps/rejected": -83.23139953613281, + "loss": 0.6857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012276649475097656, + "rewards/margins": 0.02864246442914009, + "rewards/rejected": -0.016365814954042435, + "step": 223 + }, + { + "epoch": 0.01, + "learning_rate": 4.341085271317829e-08, + "logits/chosen": -2.2046408653259277, + "logits/rejected": -2.167442798614502, + "logps/chosen": -13.443965911865234, + "logps/rejected": -144.3642578125, + "loss": 0.6964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012463569641113281, + "rewards/margins": 0.002128410153090954, + "rewards/rejected": -0.014591979794204235, + "step": 224 + }, + { + "epoch": 0.01, + "learning_rate": 4.3604651162790697e-08, + "logits/chosen": -2.128308057785034, + "logits/rejected": -2.1463534832000732, + "logps/chosen": -150.74334716796875, + "logps/rejected": -217.24765014648438, + "loss": 0.6652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03493347391486168, + "rewards/margins": 0.07647705078125, + "rewards/rejected": -0.04154358059167862, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.37984496124031e-08, + "logits/chosen": -2.1713309288024902, + "logits/rejected": -2.123797655105591, + "logps/chosen": -256.8193054199219, + "logps/rejected": -296.6307678222656, + "loss": 0.667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05393066629767418, + "rewards/margins": 0.08268433064222336, + "rewards/rejected": -0.02875366248190403, + "step": 226 + }, + { + "epoch": 0.01, + "learning_rate": 4.3992248062015505e-08, + "logits/chosen": -2.251145124435425, + "logits/rejected": -2.2389721870422363, + "logps/chosen": -0.008889642544090748, + "logps/rejected": -79.8895034790039, + "loss": 0.7073, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.5137717355173663e-06, + "rewards/margins": -0.0557151660323143, + "rewards/rejected": 0.055713653564453125, + "step": 227 + }, + { + "epoch": 0.01, + "learning_rate": 4.418604651162791e-08, + "logits/chosen": -2.284722089767456, + "logits/rejected": -2.2869372367858887, + "logps/chosen": -0.0009146532393060625, + "logps/rejected": -92.96585845947266, + "loss": 0.6878, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.809109330177307e-07, + "rewards/margins": 0.028840254992246628, + "rewards/rejected": -0.028839875012636185, + "step": 228 + }, + { + "epoch": 0.01, + "learning_rate": 4.4379844961240306e-08, + "logits/chosen": -2.0565574169158936, + "logits/rejected": -2.0853912830352783, + "logps/chosen": -201.91021728515625, + "logps/rejected": -317.886962890625, + "loss": 0.6932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007766723865643144, + "rewards/margins": 0.05768890678882599, + "rewards/rejected": -0.05846557766199112, + "step": 229 + }, + { + "epoch": 0.01, + "learning_rate": 4.457364341085271e-08, + "logits/chosen": -2.315744400024414, + "logits/rejected": -2.302809238433838, + "logps/chosen": -60.44493865966797, + "logps/rejected": -218.67677307128906, + "loss": 0.7014, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03485870361328125, + "rewards/margins": -0.00508880615234375, + "rewards/rejected": -0.0297698974609375, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.4767441860465114e-08, + "logits/chosen": -2.0505378246307373, + "logits/rejected": -2.0507166385650635, + "logps/chosen": -2.0027091522933915e-05, + "logps/rejected": -111.0194091796875, + "loss": 0.6818, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3840948770725845e-08, + "rewards/margins": 0.04587705433368683, + "rewards/rejected": -0.04587707668542862, + "step": 231 + }, + { + "epoch": 0.01, + "learning_rate": 4.496124031007752e-08, + "logits/chosen": -1.8248518705368042, + "logits/rejected": -1.7978324890136719, + "logps/chosen": -278.2711181640625, + "logps/rejected": -501.277099609375, + "loss": 0.7293, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0050506591796875, + "rewards/margins": -0.10117492824792862, + "rewards/rejected": 0.09612426906824112, + "step": 232 + }, + { + "epoch": 0.01, + "learning_rate": 4.515503875968992e-08, + "logits/chosen": -2.233870267868042, + "logits/rejected": -2.23297119140625, + "logps/chosen": -15.91326904296875, + "logps/rejected": -183.08963012695312, + "loss": 0.6993, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.004020881839096546, + "rewards/margins": -0.01938037946820259, + "rewards/rejected": 0.015359497629106045, + "step": 233 + }, + { + "epoch": 0.01, + "learning_rate": 4.534883720930232e-08, + "logits/chosen": -2.084238052368164, + "logits/rejected": -2.086972236633301, + "logps/chosen": -4.831381797790527, + "logps/rejected": -53.84381103515625, + "loss": 0.6967, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.022065544500947, + "rewards/margins": -0.03346271812915802, + "rewards/rejected": 0.05552826076745987, + "step": 234 + }, + { + "epoch": 0.01, + "learning_rate": 4.5542635658914724e-08, + "logits/chosen": -1.755934238433838, + "logits/rejected": -1.56979501247406, + "logps/chosen": -357.5931396484375, + "logps/rejected": -662.9580078125, + "loss": 0.7204, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04822387918829918, + "rewards/margins": -0.04880981519818306, + "rewards/rejected": 0.0005859375232830644, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.573643410852713e-08, + "logits/chosen": -2.087862968444824, + "logits/rejected": -2.1272494792938232, + "logps/chosen": -203.74652099609375, + "logps/rejected": -239.9325714111328, + "loss": 0.6603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03539733961224556, + "rewards/margins": 0.04910583794116974, + "rewards/rejected": -0.01370849646627903, + "step": 236 + }, + { + "epoch": 0.01, + "learning_rate": 4.593023255813953e-08, + "logits/chosen": -2.241306781768799, + "logits/rejected": -2.238185167312622, + "logps/chosen": -33.14936065673828, + "logps/rejected": -167.10853576660156, + "loss": 0.6791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018895721063017845, + "rewards/margins": 0.036966703832149506, + "rewards/rejected": -0.01807098463177681, + "step": 237 + }, + { + "epoch": 0.01, + "learning_rate": 4.6124031007751936e-08, + "logits/chosen": -2.0926058292388916, + "logits/rejected": -2.0316946506500244, + "logps/chosen": -171.59042358398438, + "logps/rejected": -393.6380615234375, + "loss": 0.7039, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02300567738711834, + "rewards/margins": -0.03614349663257599, + "rewards/rejected": 0.05914917215704918, + "step": 238 + }, + { + "epoch": 0.01, + "learning_rate": 4.631782945736434e-08, + "logits/chosen": -2.272139549255371, + "logits/rejected": -2.2696781158447266, + "logps/chosen": -17.14728355407715, + "logps/rejected": -41.20075607299805, + "loss": 0.69, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029291154351085424, + "rewards/margins": 0.011520576663315296, + "rewards/rejected": -0.014449692331254482, + "step": 239 + }, + { + "epoch": 0.01, + "learning_rate": 4.6511627906976744e-08, + "logits/chosen": -2.1740267276763916, + "logits/rejected": -2.160553216934204, + "logps/chosen": -2.7060292268288322e-05, + "logps/rejected": -93.03926849365234, + "loss": 0.6791, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.15253918315284e-08, + "rewards/margins": 0.05744164064526558, + "rewards/rejected": -0.05744171142578125, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.670542635658914e-08, + "logits/chosen": -2.226602792739868, + "logits/rejected": -2.217453718185425, + "logps/chosen": -191.5166015625, + "logps/rejected": -327.0640563964844, + "loss": 0.7288, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04046325758099556, + "rewards/margins": -0.08094482123851776, + "rewards/rejected": 0.0404815673828125, + "step": 241 + }, + { + "epoch": 0.01, + "learning_rate": 4.6899224806201546e-08, + "logits/chosen": -1.8918474912643433, + "logits/rejected": -1.9004889726638794, + "logps/chosen": -78.2437973022461, + "logps/rejected": -243.43475341796875, + "loss": 0.7233, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.053975678980350494, + "rewards/margins": -0.060463715344667435, + "rewards/rejected": 0.006488037295639515, + "step": 242 + }, + { + "epoch": 0.01, + "learning_rate": 4.709302325581395e-08, + "logits/chosen": -2.1429595947265625, + "logits/rejected": -2.1406569480895996, + "logps/chosen": -39.52269744873047, + "logps/rejected": -80.82978820800781, + "loss": 0.6885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015179443173110485, + "rewards/margins": 0.00448760949075222, + "rewards/rejected": 0.010691833682358265, + "step": 243 + }, + { + "epoch": 0.01, + "learning_rate": 4.7286821705426354e-08, + "logits/chosen": -2.2268357276916504, + "logits/rejected": -2.2231531143188477, + "logps/chosen": -17.985063552856445, + "logps/rejected": -134.01589965820312, + "loss": 0.7081, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012071037665009499, + "rewards/margins": -0.04411754757165909, + "rewards/rejected": 0.03204650804400444, + "step": 244 + }, + { + "epoch": 0.01, + "learning_rate": 4.748062015503876e-08, + "logits/chosen": -2.248957872390747, + "logits/rejected": -2.244027853012085, + "logps/chosen": -9.071694512385875e-05, + "logps/rejected": -158.07656860351562, + "loss": 0.7009, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.6224589078083227e-07, + "rewards/margins": -0.0337170846760273, + "rewards/rejected": 0.03371734544634819, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 4.767441860465116e-08, + "logits/chosen": -2.1775994300842285, + "logits/rejected": -2.1736159324645996, + "logps/chosen": -52.59889221191406, + "logps/rejected": -123.21879577636719, + "loss": 0.6912, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.021607208997011185, + "rewards/margins": -0.028926849365234375, + "rewards/rejected": 0.05053405836224556, + "step": 246 + }, + { + "epoch": 0.01, + "learning_rate": 4.7868217054263566e-08, + "logits/chosen": -2.1957132816314697, + "logits/rejected": -2.185354471206665, + "logps/chosen": -86.44459533691406, + "logps/rejected": -249.28363037109375, + "loss": 0.7016, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04547271877527237, + "rewards/margins": -0.01022491604089737, + "rewards/rejected": -0.035247802734375, + "step": 247 + }, + { + "epoch": 0.01, + "learning_rate": 4.806201550387597e-08, + "logits/chosen": -2.049412250518799, + "logits/rejected": -2.033435344696045, + "logps/chosen": -52.02054977416992, + "logps/rejected": -175.7147216796875, + "loss": 0.6919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02346649207174778, + "rewards/margins": 0.02585907094180584, + "rewards/rejected": -0.04932556301355362, + "step": 248 + }, + { + "epoch": 0.01, + "learning_rate": 4.8255813953488374e-08, + "logits/chosen": -1.965771198272705, + "logits/rejected": -1.925958275794983, + "logps/chosen": -234.12188720703125, + "logps/rejected": -304.9527282714844, + "loss": 0.699, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.008084106259047985, + "rewards/margins": -0.05048828199505806, + "rewards/rejected": 0.05857238918542862, + "step": 249 + }, + { + "epoch": 0.01, + "learning_rate": 4.844961240310077e-08, + "logits/chosen": -1.9958579540252686, + "logits/rejected": -1.9706462621688843, + "logps/chosen": -147.15711975097656, + "logps/rejected": -202.0009765625, + "loss": 0.6544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012194824405014515, + "rewards/margins": 0.11688080430030823, + "rewards/rejected": -0.10468597710132599, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 4.8643410852713176e-08, + "logits/chosen": -2.189932107925415, + "logits/rejected": -2.0947024822235107, + "logps/chosen": -181.11251831054688, + "logps/rejected": -321.99169921875, + "loss": 0.692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004119873046875, + "rewards/margins": 0.013275146484375, + "rewards/rejected": -0.0091552734375, + "step": 251 + }, + { + "epoch": 0.01, + "learning_rate": 4.883720930232558e-08, + "logits/chosen": -2.251513719558716, + "logits/rejected": -2.24482798576355, + "logps/chosen": -17.970569610595703, + "logps/rejected": -121.92559051513672, + "loss": 0.684, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01206817664206028, + "rewards/margins": 0.0238037109375, + "rewards/rejected": -0.011735535226762295, + "step": 252 + }, + { + "epoch": 0.01, + "learning_rate": 4.903100775193798e-08, + "logits/chosen": -2.1779215335845947, + "logits/rejected": -2.177884578704834, + "logps/chosen": -34.35943603515625, + "logps/rejected": -113.9087905883789, + "loss": 0.6737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03166656568646431, + "rewards/margins": 0.06803436577320099, + "rewards/rejected": -0.03636780008673668, + "step": 253 + }, + { + "epoch": 0.01, + "learning_rate": 4.922480620155038e-08, + "logits/chosen": -1.8548245429992676, + "logits/rejected": -1.9278806447982788, + "logps/chosen": -351.43975830078125, + "logps/rejected": -511.37847900390625, + "loss": 0.7048, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03890381008386612, + "rewards/margins": -0.0036560073494911194, + "rewards/rejected": -0.035247802734375, + "step": 254 + }, + { + "epoch": 0.01, + "learning_rate": 4.9418604651162786e-08, + "logits/chosen": -1.9292372465133667, + "logits/rejected": -1.8636871576309204, + "logps/chosen": -221.51181030273438, + "logps/rejected": -434.7959289550781, + "loss": 0.6855, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02998046949505806, + "rewards/margins": -0.004373170435428619, + "rewards/rejected": 0.03435363993048668, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 4.961240310077519e-08, + "logits/chosen": -2.208717107772827, + "logits/rejected": -2.1225762367248535, + "logps/chosen": -228.27310180664062, + "logps/rejected": -458.6257019042969, + "loss": 0.6708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05090942606329918, + "rewards/margins": 0.01811828836798668, + "rewards/rejected": 0.0327911376953125, + "step": 256 + }, + { + "epoch": 0.01, + "learning_rate": 4.9806201550387594e-08, + "logits/chosen": -2.16115140914917, + "logits/rejected": -2.1554386615753174, + "logps/chosen": -27.937267303466797, + "logps/rejected": -110.01155090332031, + "loss": 0.684, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011210250668227673, + "rewards/margins": 0.016157913953065872, + "rewards/rejected": -0.004947662353515625, + "step": 257 + }, + { + "epoch": 0.02, + "learning_rate": 5e-08, + "logits/chosen": -2.337265729904175, + "logits/rejected": -2.3203580379486084, + "logps/chosen": -48.347476959228516, + "logps/rejected": -248.3994140625, + "loss": 0.6636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05640449747443199, + "rewards/margins": 0.07733650505542755, + "rewards/rejected": -0.02093200758099556, + "step": 258 + }, + { + "epoch": 0.02, + "learning_rate": 5.01937984496124e-08, + "logits/chosen": -2.084672451019287, + "logits/rejected": -2.0734341144561768, + "logps/chosen": -218.37966918945312, + "logps/rejected": -245.64303588867188, + "loss": 0.6775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01634826697409153, + "rewards/margins": 0.07004699856042862, + "rewards/rejected": -0.086395263671875, + "step": 259 + }, + { + "epoch": 0.02, + "learning_rate": 5.0387596899224806e-08, + "logits/chosen": -2.040587902069092, + "logits/rejected": -2.0463736057281494, + "logps/chosen": -120.2880859375, + "logps/rejected": -319.53826904296875, + "loss": 0.6902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023394012823700905, + "rewards/margins": 0.022730255499482155, + "rewards/rejected": -0.04612426832318306, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 5.058139534883721e-08, + "logits/chosen": -2.1041078567504883, + "logits/rejected": -2.078789710998535, + "logps/chosen": -189.3272247314453, + "logps/rejected": -414.79803466796875, + "loss": 0.6634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03013458289206028, + "rewards/margins": 0.05554351955652237, + "rewards/rejected": -0.02540893666446209, + "step": 261 + }, + { + "epoch": 0.02, + "learning_rate": 5.0775193798449614e-08, + "logits/chosen": -2.3887860774993896, + "logits/rejected": -2.3755524158477783, + "logps/chosen": -0.7079132795333862, + "logps/rejected": -143.40435791015625, + "loss": 0.7017, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7148257029475644e-05, + "rewards/margins": -0.03392602875828743, + "rewards/rejected": 0.03394317626953125, + "step": 262 + }, + { + "epoch": 0.02, + "learning_rate": 5.096899224806202e-08, + "logits/chosen": -2.0442023277282715, + "logits/rejected": -2.0216212272644043, + "logps/chosen": -202.87220764160156, + "logps/rejected": -344.2505798339844, + "loss": 0.7194, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01228179968893528, + "rewards/margins": -0.08146820217370987, + "rewards/rejected": 0.06918640434741974, + "step": 263 + }, + { + "epoch": 0.02, + "learning_rate": 5.1162790697674416e-08, + "logits/chosen": -2.2476940155029297, + "logits/rejected": -2.249983072280884, + "logps/chosen": -46.699676513671875, + "logps/rejected": -185.60980224609375, + "loss": 0.7039, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.015306854620575905, + "rewards/margins": -0.039720915257930756, + "rewards/rejected": 0.0244140625, + "step": 264 + }, + { + "epoch": 0.02, + "learning_rate": 5.135658914728682e-08, + "logits/chosen": -2.1639981269836426, + "logits/rejected": -2.1494100093841553, + "logps/chosen": -34.2414665222168, + "logps/rejected": -186.86558532714844, + "loss": 0.6855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00654258718714118, + "rewards/margins": 0.03830299526453018, + "rewards/rejected": -0.0448455810546875, + "step": 265 + }, + { + "epoch": 0.02, + "learning_rate": 5.1550387596899224e-08, + "logits/chosen": -2.195417881011963, + "logits/rejected": -2.1776139736175537, + "logps/chosen": -15.7887601852417, + "logps/rejected": -161.14894104003906, + "loss": 0.685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002943420549854636, + "rewards/margins": 0.02740173414349556, + "rewards/rejected": -0.03034515492618084, + "step": 266 + }, + { + "epoch": 0.02, + "learning_rate": 5.174418604651163e-08, + "logits/chosen": -2.0918121337890625, + "logits/rejected": -2.059793710708618, + "logps/chosen": -0.6329014301300049, + "logps/rejected": -159.4842987060547, + "loss": 0.705, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": -0.04341888427734375, + "rewards/rejected": 0.04341888427734375, + "step": 267 + }, + { + "epoch": 0.02, + "learning_rate": 5.193798449612403e-08, + "logits/chosen": -2.2733545303344727, + "logits/rejected": -2.269205331802368, + "logps/chosen": -45.000396728515625, + "logps/rejected": -209.34893798828125, + "loss": 0.7011, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": -0.03162536770105362, + "rewards/rejected": 0.03162536770105362, + "step": 268 + }, + { + "epoch": 0.02, + "learning_rate": 5.2131782945736436e-08, + "logits/chosen": -2.2174360752105713, + "logits/rejected": -2.2127463817596436, + "logps/chosen": -24.78417205810547, + "logps/rejected": -152.9147491455078, + "loss": 0.688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008475303649902344, + "rewards/margins": 0.032972145825624466, + "rewards/rejected": -0.04144744947552681, + "step": 269 + }, + { + "epoch": 0.02, + "learning_rate": 5.232558139534884e-08, + "logits/chosen": -2.332792043685913, + "logits/rejected": -2.331085681915283, + "logps/chosen": -18.218965530395508, + "logps/rejected": -256.5147399902344, + "loss": 0.695, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.007378959562629461, + "rewards/margins": -0.002456474117934704, + "rewards/rejected": -0.0049224854446947575, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 5.2519379844961244e-08, + "logits/chosen": -2.0301876068115234, + "logits/rejected": -1.8897936344146729, + "logps/chosen": -218.69650268554688, + "logps/rejected": -484.9310607910156, + "loss": 0.7515, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06374054402112961, + "rewards/margins": -0.16674652695655823, + "rewards/rejected": 0.10300598293542862, + "step": 271 + }, + { + "epoch": 0.02, + "learning_rate": 5.271317829457365e-08, + "logits/chosen": -2.274329900741577, + "logits/rejected": -2.2677266597747803, + "logps/chosen": -36.964630126953125, + "logps/rejected": -145.18080139160156, + "loss": 0.6953, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.814697322468419e-07, + "rewards/margins": -0.019451523199677467, + "rewards/rejected": 0.01945190504193306, + "step": 272 + }, + { + "epoch": 0.02, + "learning_rate": 5.290697674418605e-08, + "logits/chosen": -2.1040940284729004, + "logits/rejected": -2.099095106124878, + "logps/chosen": -1.8123360872268677, + "logps/rejected": -185.38726806640625, + "loss": 0.6713, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0314922696561553e-05, + "rewards/margins": 0.08939534425735474, + "rewards/rejected": -0.08942566066980362, + "step": 273 + }, + { + "epoch": 0.02, + "learning_rate": 5.310077519379845e-08, + "logits/chosen": -2.3924756050109863, + "logits/rejected": -2.3866384029388428, + "logps/chosen": -25.731473922729492, + "logps/rejected": -184.6863555908203, + "loss": 0.6858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00866012554615736, + "rewards/margins": 0.027172088623046875, + "rewards/rejected": -0.01851196400821209, + "step": 274 + }, + { + "epoch": 0.02, + "learning_rate": 5.329457364341085e-08, + "logits/chosen": -2.2148661613464355, + "logits/rejected": -2.2228353023529053, + "logps/chosen": -21.79157257080078, + "logps/rejected": -191.4306182861328, + "loss": 0.71, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.010158157907426357, + "rewards/margins": -0.05175056681036949, + "rewards/rejected": 0.04159240797162056, + "step": 275 + }, + { + "epoch": 0.02, + "learning_rate": 5.348837209302325e-08, + "logits/chosen": -1.9540071487426758, + "logits/rejected": -1.926762580871582, + "logps/chosen": -374.2545471191406, + "logps/rejected": -609.978271484375, + "loss": 0.613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21457825601100922, + "rewards/margins": 0.12246399372816086, + "rewards/rejected": 0.09211426228284836, + "step": 276 + }, + { + "epoch": 0.02, + "learning_rate": 5.3682170542635655e-08, + "logits/chosen": -1.9274709224700928, + "logits/rejected": -1.8924431800842285, + "logps/chosen": -216.8302001953125, + "logps/rejected": -368.2818298339844, + "loss": 0.699, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0049072266556322575, + "rewards/margins": -0.00877990759909153, + "rewards/rejected": 0.0038726807106286287, + "step": 277 + }, + { + "epoch": 0.02, + "learning_rate": 5.387596899224805e-08, + "logits/chosen": -2.07633900642395, + "logits/rejected": -2.071009635925293, + "logps/chosen": -7.375905990600586, + "logps/rejected": -84.04999542236328, + "loss": 0.6966, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.012490558438003063, + "rewards/margins": -0.021717358380556107, + "rewards/rejected": 0.034207917749881744, + "step": 278 + }, + { + "epoch": 0.02, + "learning_rate": 5.4069767441860457e-08, + "logits/chosen": -2.10595965385437, + "logits/rejected": -2.1649203300476074, + "logps/chosen": -255.9019775390625, + "logps/rejected": -439.57232666015625, + "loss": 0.7397, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06186218187212944, + "rewards/margins": -0.09002991020679474, + "rewards/rejected": 0.028167724609375, + "step": 279 + }, + { + "epoch": 0.02, + "learning_rate": 5.426356589147286e-08, + "logits/chosen": -2.17287278175354, + "logits/rejected": -2.151055335998535, + "logps/chosen": -213.3360595703125, + "logps/rejected": -348.9080810546875, + "loss": 0.7221, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0527496337890625, + "rewards/margins": -0.10247497260570526, + "rewards/rejected": 0.04972534254193306, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 5.4457364341085265e-08, + "logits/chosen": -2.112159013748169, + "logits/rejected": -2.101902961730957, + "logps/chosen": -40.79621887207031, + "logps/rejected": -220.45864868164062, + "loss": 0.6681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009709167294204235, + "rewards/margins": 0.09422454982995987, + "rewards/rejected": -0.08451538532972336, + "step": 281 + }, + { + "epoch": 0.02, + "learning_rate": 5.465116279069767e-08, + "logits/chosen": -2.210691213607788, + "logits/rejected": -2.207160711288452, + "logps/chosen": -0.002717365277931094, + "logps/rejected": -127.06185913085938, + "loss": 0.6712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9264407455921173e-06, + "rewards/margins": 0.08969156444072723, + "rewards/rejected": -0.08968963474035263, + "step": 282 + }, + { + "epoch": 0.02, + "learning_rate": 5.484496124031007e-08, + "logits/chosen": -2.228079319000244, + "logits/rejected": -2.2220451831817627, + "logps/chosen": -0.00134780362714082, + "logps/rejected": -233.08749389648438, + "loss": 0.708, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.2971409634919837e-05, + "rewards/margins": -0.058783043175935745, + "rewards/rejected": 0.05876007303595543, + "step": 283 + }, + { + "epoch": 0.02, + "learning_rate": 5.503875968992248e-08, + "logits/chosen": -2.1736483573913574, + "logits/rejected": -2.1684041023254395, + "logps/chosen": -0.0005501421401277184, + "logps/rejected": -135.60061645507812, + "loss": 0.686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5496625849209522e-07, + "rewards/margins": 0.028903353959321976, + "rewards/rejected": -0.02890319935977459, + "step": 284 + }, + { + "epoch": 0.02, + "learning_rate": 5.523255813953488e-08, + "logits/chosen": -2.1380491256713867, + "logits/rejected": -2.104375123977661, + "logps/chosen": -240.43914794921875, + "logps/rejected": -293.00958251953125, + "loss": 0.7146, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01523437537252903, + "rewards/margins": -0.09065857529640198, + "rewards/rejected": 0.10589294880628586, + "step": 285 + }, + { + "epoch": 0.02, + "learning_rate": 5.5426356589147285e-08, + "logits/chosen": -1.9249770641326904, + "logits/rejected": -1.918298363685608, + "logps/chosen": -21.573619842529297, + "logps/rejected": -143.69476318359375, + "loss": 0.6941, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.008731651119887829, + "rewards/margins": -0.013186073862016201, + "rewards/rejected": 0.02191772498190403, + "step": 286 + }, + { + "epoch": 0.02, + "learning_rate": 5.562015503875969e-08, + "logits/chosen": -2.1980226039886475, + "logits/rejected": -2.074234962463379, + "logps/chosen": -196.541015625, + "logps/rejected": -418.4322509765625, + "loss": 0.73, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.10615234822034836, + "rewards/margins": -0.023901373147964478, + "rewards/rejected": -0.08225097507238388, + "step": 287 + }, + { + "epoch": 0.02, + "learning_rate": 5.5813953488372087e-08, + "logits/chosen": -2.255908489227295, + "logits/rejected": -2.253823757171631, + "logps/chosen": -12.69018840789795, + "logps/rejected": -143.03431701660156, + "loss": 0.697, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.012130451388657093, + "rewards/margins": -0.027644634246826172, + "rewards/rejected": 0.03977508470416069, + "step": 288 + }, + { + "epoch": 0.02, + "learning_rate": 5.600775193798449e-08, + "logits/chosen": -2.0769236087799072, + "logits/rejected": -2.118872880935669, + "logps/chosen": -281.6646728515625, + "logps/rejected": -398.5815734863281, + "loss": 0.7075, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01940307579934597, + "rewards/margins": -0.03004150465130806, + "rewards/rejected": 0.010638427920639515, + "step": 289 + }, + { + "epoch": 0.02, + "learning_rate": 5.6201550387596895e-08, + "logits/chosen": -2.2624354362487793, + "logits/rejected": -2.2339084148406982, + "logps/chosen": -219.94522094726562, + "logps/rejected": -360.2943115234375, + "loss": 0.6692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01149597205221653, + "rewards/margins": 0.0759124755859375, + "rewards/rejected": -0.06441650539636612, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 5.63953488372093e-08, + "logits/chosen": -2.249567747116089, + "logits/rejected": -2.247371196746826, + "logps/chosen": -0.00012278177018743008, + "logps/rejected": -89.37818145751953, + "loss": 0.6964, + "rewards/accuracies": 0.0, + "rewards/chosen": 8.343849913217127e-07, + "rewards/margins": -0.01285622175782919, + "rewards/rejected": 0.012857056222856045, + "step": 291 + }, + { + "epoch": 0.02, + "learning_rate": 5.65891472868217e-08, + "logits/chosen": -2.2711021900177, + "logits/rejected": -2.2706215381622314, + "logps/chosen": -7.605758190155029, + "logps/rejected": -54.69108200073242, + "loss": 0.6917, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006282806280069053, + "rewards/margins": 0.0061130523681640625, + "rewards/rejected": -0.00548477191478014, + "step": 292 + }, + { + "epoch": 0.02, + "learning_rate": 5.678294573643411e-08, + "logits/chosen": -2.023601770401001, + "logits/rejected": -1.973443627357483, + "logps/chosen": -302.05792236328125, + "logps/rejected": -515.9136962890625, + "loss": 0.6981, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012316894717514515, + "rewards/margins": 0.02585449442267418, + "rewards/rejected": -0.03817138820886612, + "step": 293 + }, + { + "epoch": 0.02, + "learning_rate": 5.697674418604651e-08, + "logits/chosen": -2.231627941131592, + "logits/rejected": -2.2291171550750732, + "logps/chosen": -8.183011054992676, + "logps/rejected": -23.73727798461914, + "loss": 0.6903, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.016859054565429688, + "rewards/margins": -0.005861664190888405, + "rewards/rejected": 0.022720718756318092, + "step": 294 + }, + { + "epoch": 0.02, + "learning_rate": 5.7170542635658915e-08, + "logits/chosen": -2.1235649585723877, + "logits/rejected": -2.1104679107666016, + "logps/chosen": -31.80849266052246, + "logps/rejected": -244.45899963378906, + "loss": 0.7279, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01771240308880806, + "rewards/margins": -0.11627350747585297, + "rewards/rejected": 0.09856110066175461, + "step": 295 + }, + { + "epoch": 0.02, + "learning_rate": 5.736434108527132e-08, + "logits/chosen": -2.0519587993621826, + "logits/rejected": -2.0492424964904785, + "logps/chosen": -319.0391540527344, + "logps/rejected": -476.298095703125, + "loss": 0.6984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010250854305922985, + "rewards/margins": 0.03612976148724556, + "rewards/rejected": -0.04638061672449112, + "step": 296 + }, + { + "epoch": 0.02, + "learning_rate": 5.755813953488372e-08, + "logits/chosen": -2.3421947956085205, + "logits/rejected": -2.342184543609619, + "logps/chosen": -1.9115030765533447, + "logps/rejected": -155.15313720703125, + "loss": 0.677, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.191036042466294e-06, + "rewards/margins": 0.06573329865932465, + "rewards/rejected": -0.06574249267578125, + "step": 297 + }, + { + "epoch": 0.02, + "learning_rate": 5.775193798449613e-08, + "logits/chosen": -2.1530864238739014, + "logits/rejected": -2.1903131008148193, + "logps/chosen": -357.79119873046875, + "logps/rejected": -374.8064270019531, + "loss": 0.745, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.09839782863855362, + "rewards/margins": -0.05520324781537056, + "rewards/rejected": -0.04319458082318306, + "step": 298 + }, + { + "epoch": 0.02, + "learning_rate": 5.7945736434108525e-08, + "logits/chosen": -2.122817277908325, + "logits/rejected": -2.0967395305633545, + "logps/chosen": -0.0001226634776685387, + "logps/rejected": -343.87530517578125, + "loss": 0.7109, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1929659926579461e-08, + "rewards/margins": -0.07003480941057205, + "rewards/rejected": 0.07003479450941086, + "step": 299 + }, + { + "epoch": 0.02, + "learning_rate": 5.813953488372093e-08, + "logits/chosen": -2.1743416786193848, + "logits/rejected": -2.1367976665496826, + "logps/chosen": -150.4690704345703, + "logps/rejected": -313.2742614746094, + "loss": 0.6958, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01891632191836834, + "rewards/margins": -0.0036056526005268097, + "rewards/rejected": -0.01531066931784153, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 5.833333333333333e-08, + "logits/chosen": -2.2102506160736084, + "logits/rejected": -2.151292085647583, + "logps/chosen": -250.2626953125, + "logps/rejected": -342.1377868652344, + "loss": 0.7051, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0198516845703125, + "rewards/margins": -0.03489990159869194, + "rewards/rejected": 0.015048217959702015, + "step": 301 + }, + { + "epoch": 0.02, + "learning_rate": 5.852713178294574e-08, + "logits/chosen": -2.119671106338501, + "logits/rejected": -2.1145131587982178, + "logps/chosen": -48.76457977294922, + "logps/rejected": -93.41310119628906, + "loss": 0.7026, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.017915725708007812, + "rewards/margins": -0.010216902941465378, + "rewards/rejected": -0.007698822300881147, + "step": 302 + }, + { + "epoch": 0.02, + "learning_rate": 5.872093023255814e-08, + "logits/chosen": -2.0688369274139404, + "logits/rejected": -2.06500244140625, + "logps/chosen": -106.07579040527344, + "logps/rejected": -270.9486083984375, + "loss": 0.7022, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00598068255931139, + "rewards/margins": -0.056262969970703125, + "rewards/rejected": 0.06224365159869194, + "step": 303 + }, + { + "epoch": 0.02, + "learning_rate": 5.8914728682170545e-08, + "logits/chosen": -2.132258176803589, + "logits/rejected": -2.123440742492676, + "logps/chosen": -23.373626708984375, + "logps/rejected": -56.70205307006836, + "loss": 0.6945, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.009425354190170765, + "rewards/margins": -0.00604400597512722, + "rewards/rejected": 0.015469360165297985, + "step": 304 + }, + { + "epoch": 0.02, + "learning_rate": 5.910852713178295e-08, + "logits/chosen": -2.1030941009521484, + "logits/rejected": -2.0842154026031494, + "logps/chosen": -164.9296112060547, + "logps/rejected": -263.7117614746094, + "loss": 0.6872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013580322265625, + "rewards/margins": 0.02506103552877903, + "rewards/rejected": -0.02641906775534153, + "step": 305 + }, + { + "epoch": 0.02, + "learning_rate": 5.930232558139535e-08, + "logits/chosen": -2.297698736190796, + "logits/rejected": -2.290243148803711, + "logps/chosen": -29.055267333984375, + "logps/rejected": -267.197265625, + "loss": 0.6809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0083465576171875, + "rewards/margins": 0.04484253004193306, + "rewards/rejected": -0.05318908765912056, + "step": 306 + }, + { + "epoch": 0.02, + "learning_rate": 5.9496124031007744e-08, + "logits/chosen": -2.1148266792297363, + "logits/rejected": -2.1102263927459717, + "logps/chosen": -276.4412536621094, + "logps/rejected": -304.36651611328125, + "loss": 0.6772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0223846435546875, + "rewards/margins": 0.07094421982765198, + "rewards/rejected": -0.04855957254767418, + "step": 307 + }, + { + "epoch": 0.02, + "learning_rate": 5.968992248062015e-08, + "logits/chosen": -2.144810438156128, + "logits/rejected": -2.14595890045166, + "logps/chosen": -8.541197776794434, + "logps/rejected": -151.72630310058594, + "loss": 0.6892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0001922607480082661, + "rewards/margins": 0.02032623253762722, + "rewards/rejected": -0.02013397216796875, + "step": 308 + }, + { + "epoch": 0.02, + "learning_rate": 5.988372093023255e-08, + "logits/chosen": -2.1919515132904053, + "logits/rejected": -2.182727098464966, + "logps/chosen": -45.60905838012695, + "logps/rejected": -212.30642700195312, + "loss": 0.7247, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.048509981483221054, + "rewards/margins": -0.07927780598402023, + "rewards/rejected": 0.03076782263815403, + "step": 309 + }, + { + "epoch": 0.02, + "learning_rate": 6.007751937984496e-08, + "logits/chosen": -2.0900590419769287, + "logits/rejected": -2.094043731689453, + "logps/chosen": -0.00012433165102265775, + "logps/rejected": -114.76058197021484, + "loss": 0.6801, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2170950160216307e-06, + "rewards/margins": 0.05293266847729683, + "rewards/rejected": -0.052930451929569244, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 6.027131782945735e-08, + "logits/chosen": -2.265336036682129, + "logits/rejected": -2.2582695484161377, + "logps/chosen": -89.40992736816406, + "logps/rejected": -306.75579833984375, + "loss": 0.7021, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.010905456729233265, + "rewards/margins": -0.03235626220703125, + "rewards/rejected": 0.04326171800494194, + "step": 311 + }, + { + "epoch": 0.02, + "learning_rate": 6.046511627906976e-08, + "logits/chosen": -2.083240270614624, + "logits/rejected": -2.0082767009735107, + "logps/chosen": -152.585205078125, + "logps/rejected": -245.32415771484375, + "loss": 0.6885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01517334021627903, + "rewards/margins": 0.04475250095129013, + "rewards/rejected": -0.05992584303021431, + "step": 312 + }, + { + "epoch": 0.02, + "learning_rate": 6.065891472868216e-08, + "logits/chosen": -2.0730628967285156, + "logits/rejected": -2.0630300045013428, + "logps/chosen": -0.01027081161737442, + "logps/rejected": -270.9876403808594, + "loss": 0.7025, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.340608474100009e-05, + "rewards/margins": -0.036521393805742264, + "rewards/rejected": 0.03643798828125, + "step": 313 + }, + { + "epoch": 0.02, + "learning_rate": 6.085271317829457e-08, + "logits/chosen": -2.1812851428985596, + "logits/rejected": -2.177889108657837, + "logps/chosen": -37.57791519165039, + "logps/rejected": -96.77790832519531, + "loss": 0.6681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04214782640337944, + "rewards/margins": 0.050902556627988815, + "rewards/rejected": -0.008754730224609375, + "step": 314 + }, + { + "epoch": 0.02, + "learning_rate": 6.104651162790697e-08, + "logits/chosen": -2.2612366676330566, + "logits/rejected": -2.244797468185425, + "logps/chosen": -231.54888916015625, + "logps/rejected": -338.2212219238281, + "loss": 0.6972, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04444275051355362, + "rewards/margins": -0.03580322116613388, + "rewards/rejected": 0.0802459716796875, + "step": 315 + }, + { + "epoch": 0.02, + "learning_rate": 6.124031007751938e-08, + "logits/chosen": -2.135446548461914, + "logits/rejected": -2.116748571395874, + "logps/chosen": -254.91372680664062, + "logps/rejected": -336.6503601074219, + "loss": 0.7081, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02870330773293972, + "rewards/margins": -0.01441497728228569, + "rewards/rejected": -0.01428833045065403, + "step": 316 + }, + { + "epoch": 0.02, + "learning_rate": 6.143410852713178e-08, + "logits/chosen": -2.0465729236602783, + "logits/rejected": -2.0469601154327393, + "logps/chosen": -38.45866394042969, + "logps/rejected": -86.55809020996094, + "loss": 0.688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00682487478479743, + "rewards/margins": 0.024659348651766777, + "rewards/rejected": -0.031484223902225494, + "step": 317 + }, + { + "epoch": 0.02, + "learning_rate": 6.162790697674418e-08, + "logits/chosen": -2.131483793258667, + "logits/rejected": -2.1151938438415527, + "logps/chosen": -0.0011937019880861044, + "logps/rejected": -230.15597534179688, + "loss": 0.6929, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.5035773180425167e-06, + "rewards/margins": -0.001134802121669054, + "rewards/rejected": 0.0011383056407794356, + "step": 318 + }, + { + "epoch": 0.02, + "learning_rate": 6.182170542635659e-08, + "logits/chosen": -2.1755619049072266, + "logits/rejected": -2.1658804416656494, + "logps/chosen": -15.505409240722656, + "logps/rejected": -67.11897277832031, + "loss": 0.6999, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012560653500258923, + "rewards/margins": -0.014377212151885033, + "rewards/rejected": 0.0018165588844567537, + "step": 319 + }, + { + "epoch": 0.02, + "learning_rate": 6.201550387596898e-08, + "logits/chosen": -2.0983378887176514, + "logits/rejected": -2.1019949913024902, + "logps/chosen": -306.73016357421875, + "logps/rejected": -416.89154052734375, + "loss": 0.6679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03752441331744194, + "rewards/margins": 0.05774841457605362, + "rewards/rejected": -0.02022399939596653, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 6.22093023255814e-08, + "logits/chosen": -2.213655710220337, + "logits/rejected": -2.1964454650878906, + "logps/chosen": -146.68406677246094, + "logps/rejected": -239.81924438476562, + "loss": 0.6639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03903961181640625, + "rewards/margins": 0.04851226881146431, + "rewards/rejected": -0.009472656063735485, + "step": 321 + }, + { + "epoch": 0.02, + "learning_rate": 6.240310077519379e-08, + "logits/chosen": -2.3094708919525146, + "logits/rejected": -2.294790506362915, + "logps/chosen": -31.518463134765625, + "logps/rejected": -114.49606323242188, + "loss": 0.6874, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.027939414605498314, + "rewards/margins": -0.014057351276278496, + "rewards/rejected": 0.04199676588177681, + "step": 322 + }, + { + "epoch": 0.02, + "learning_rate": 6.25968992248062e-08, + "logits/chosen": -2.1726672649383545, + "logits/rejected": -2.1943180561065674, + "logps/chosen": -261.9291076660156, + "logps/rejected": -290.9603271484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03277587890625, + "rewards/margins": -0.02044982835650444, + "rewards/rejected": 0.05322570726275444, + "step": 323 + }, + { + "epoch": 0.02, + "learning_rate": 6.27906976744186e-08, + "logits/chosen": -1.8642641305923462, + "logits/rejected": -1.834833025932312, + "logps/chosen": -298.44232177734375, + "logps/rejected": -445.5045471191406, + "loss": 0.7011, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.016754150390625, + "rewards/margins": -0.06134338304400444, + "rewards/rejected": 0.04458923265337944, + "step": 324 + }, + { + "epoch": 0.02, + "learning_rate": 6.298449612403101e-08, + "logits/chosen": -2.3312878608703613, + "logits/rejected": -2.31958270072937, + "logps/chosen": -56.13802719116211, + "logps/rejected": -249.81019592285156, + "loss": 0.6748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014995575184002519, + "rewards/margins": 0.0724208801984787, + "rewards/rejected": -0.07092132419347763, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 6.317829457364341e-08, + "logits/chosen": -2.1563031673431396, + "logits/rejected": -2.1426587104797363, + "logps/chosen": -0.002537044230848551, + "logps/rejected": -173.55307006835938, + "loss": 0.6883, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3113022134803032e-07, + "rewards/margins": 0.024527108296751976, + "rewards/rejected": -0.02452697791159153, + "step": 326 + }, + { + "epoch": 0.02, + "learning_rate": 6.337209302325582e-08, + "logits/chosen": -2.216362953186035, + "logits/rejected": -2.1988351345062256, + "logps/chosen": -158.2312469482422, + "logps/rejected": -319.05633544921875, + "loss": 0.6588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09854888916015625, + "rewards/margins": 0.04503936693072319, + "rewards/rejected": 0.05350952222943306, + "step": 327 + }, + { + "epoch": 0.02, + "learning_rate": 6.356589147286822e-08, + "logits/chosen": -2.336667776107788, + "logits/rejected": -2.339538812637329, + "logps/chosen": -1.6548874378204346, + "logps/rejected": -158.177978515625, + "loss": 0.6912, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010189545340836048, + "rewards/margins": 0.016787994652986526, + "rewards/rejected": -0.0269775390625, + "step": 328 + }, + { + "epoch": 0.02, + "learning_rate": 6.375968992248061e-08, + "logits/chosen": -2.1192803382873535, + "logits/rejected": -2.0940887928009033, + "logps/chosen": -163.25294494628906, + "logps/rejected": -263.5999450683594, + "loss": 0.6677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02444305457174778, + "rewards/margins": 0.09640655666589737, + "rewards/rejected": -0.07196350395679474, + "step": 329 + }, + { + "epoch": 0.02, + "learning_rate": 6.395348837209302e-08, + "logits/chosen": -2.1946969032287598, + "logits/rejected": -2.1944732666015625, + "logps/chosen": -63.643917083740234, + "logps/rejected": -100.61026000976562, + "loss": 0.7143, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03881988674402237, + "rewards/margins": -0.04497070610523224, + "rewards/rejected": 0.0061508179642260075, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 6.414728682170542e-08, + "logits/chosen": -2.0644874572753906, + "logits/rejected": -2.0466995239257812, + "logps/chosen": -0.15613918006420135, + "logps/rejected": -179.65280151367188, + "loss": 0.7137, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.002471356187015772, + "rewards/margins": -0.07832279801368713, + "rewards/rejected": 0.0758514404296875, + "step": 331 + }, + { + "epoch": 0.02, + "learning_rate": 6.434108527131783e-08, + "logits/chosen": -2.1618802547454834, + "logits/rejected": -2.133479356765747, + "logps/chosen": -68.56826782226562, + "logps/rejected": -340.29302978515625, + "loss": 0.6982, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04340973123908043, + "rewards/margins": -0.001325991004705429, + "rewards/rejected": -0.042083740234375, + "step": 332 + }, + { + "epoch": 0.02, + "learning_rate": 6.453488372093023e-08, + "logits/chosen": -2.2806661128997803, + "logits/rejected": -2.2712254524230957, + "logps/chosen": -2.509420394897461, + "logps/rejected": -163.48211669921875, + "loss": 0.6924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011399961076676846, + "rewards/margins": 0.014070010744035244, + "rewards/rejected": -0.02546997182071209, + "step": 333 + }, + { + "epoch": 0.02, + "learning_rate": 6.472868217054264e-08, + "logits/chosen": -2.0857346057891846, + "logits/rejected": -2.0835647583007812, + "logps/chosen": -51.79553985595703, + "logps/rejected": -154.126220703125, + "loss": 0.6681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.062467195093631744, + "rewards/margins": 0.048587799072265625, + "rewards/rejected": 0.013879395090043545, + "step": 334 + }, + { + "epoch": 0.02, + "learning_rate": 6.492248062015504e-08, + "logits/chosen": -1.9700634479522705, + "logits/rejected": -1.9650461673736572, + "logps/chosen": -249.33767700195312, + "logps/rejected": -344.11444091796875, + "loss": 0.7229, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03807220607995987, + "rewards/margins": -0.09255829453468323, + "rewards/rejected": 0.05448608472943306, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 6.511627906976745e-08, + "logits/chosen": -2.162208080291748, + "logits/rejected": -2.1591315269470215, + "logps/chosen": -13.96469783782959, + "logps/rejected": -236.54293823242188, + "loss": 0.6905, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.008281040005385876, + "rewards/margins": -0.0004942901432514191, + "rewards/rejected": 0.008775330148637295, + "step": 336 + }, + { + "epoch": 0.02, + "learning_rate": 6.531007751937985e-08, + "logits/chosen": -2.1748805046081543, + "logits/rejected": -2.1227333545684814, + "logps/chosen": -214.96121215820312, + "logps/rejected": -362.64642333984375, + "loss": 0.7347, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04364929348230362, + "rewards/margins": -0.10798034816980362, + "rewards/rejected": 0.0643310546875, + "step": 337 + }, + { + "epoch": 0.02, + "learning_rate": 6.550387596899226e-08, + "logits/chosen": -2.0070719718933105, + "logits/rejected": -2.0041487216949463, + "logps/chosen": -94.87167358398438, + "logps/rejected": -179.65234375, + "loss": 0.6674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028032684698700905, + "rewards/margins": 0.07721175998449326, + "rewards/rejected": -0.0491790771484375, + "step": 338 + }, + { + "epoch": 0.02, + "learning_rate": 6.569767441860464e-08, + "logits/chosen": -2.0959556102752686, + "logits/rejected": -2.0893473625183105, + "logps/chosen": -0.28266197443008423, + "logps/rejected": -106.97174072265625, + "loss": 0.6885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029288739897310734, + "rewards/margins": 0.02240758389234543, + "rewards/rejected": -0.025336457416415215, + "step": 339 + }, + { + "epoch": 0.02, + "learning_rate": 6.589147286821705e-08, + "logits/chosen": -2.2013113498687744, + "logits/rejected": -2.203063488006592, + "logps/chosen": -274.25140380859375, + "logps/rejected": -349.5506286621094, + "loss": 0.6339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1345672607421875, + "rewards/margins": 0.11563720554113388, + "rewards/rejected": 0.01893005333840847, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 6.608527131782945e-08, + "logits/chosen": -2.1973865032196045, + "logits/rejected": -2.186753988265991, + "logps/chosen": -4.495835304260254, + "logps/rejected": -146.31475830078125, + "loss": 0.6968, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0004906654357910156, + "rewards/margins": -0.01261816080659628, + "rewards/rejected": 0.013108826242387295, + "step": 341 + }, + { + "epoch": 0.02, + "learning_rate": 6.627906976744185e-08, + "logits/chosen": -2.1393511295318604, + "logits/rejected": -2.093834638595581, + "logps/chosen": -166.60284423828125, + "logps/rejected": -364.43231201171875, + "loss": 0.6982, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0004135131894145161, + "rewards/margins": -0.03692474588751793, + "rewards/rejected": 0.03651123121380806, + "step": 342 + }, + { + "epoch": 0.02, + "learning_rate": 6.647286821705426e-08, + "logits/chosen": -2.168195962905884, + "logits/rejected": -2.1669905185699463, + "logps/chosen": -13.4580717086792, + "logps/rejected": -101.61332702636719, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01690664328634739, + "rewards/margins": -0.020247744396328926, + "rewards/rejected": 0.037154387682676315, + "step": 343 + }, + { + "epoch": 0.02, + "learning_rate": 6.666666666666665e-08, + "logits/chosen": -2.1221776008605957, + "logits/rejected": -2.092301368713379, + "logps/chosen": -191.0249786376953, + "logps/rejected": -407.32525634765625, + "loss": 0.6981, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03871612623333931, + "rewards/margins": -0.01993255689740181, + "rewards/rejected": 0.05864868313074112, + "step": 344 + }, + { + "epoch": 0.02, + "learning_rate": 6.686046511627907e-08, + "logits/chosen": -2.1538307666778564, + "logits/rejected": -2.0313608646392822, + "logps/chosen": -183.97271728515625, + "logps/rejected": -425.0308837890625, + "loss": 0.7243, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0010772704845294356, + "rewards/margins": -0.07854308933019638, + "rewards/rejected": 0.079620361328125, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 6.705426356589146e-08, + "logits/chosen": -2.1622724533081055, + "logits/rejected": -2.134025812149048, + "logps/chosen": -381.3361511230469, + "logps/rejected": -528.7567138671875, + "loss": 0.6776, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02365417592227459, + "rewards/margins": -0.02449035458266735, + "rewards/rejected": 0.04814453050494194, + "step": 346 + }, + { + "epoch": 0.02, + "learning_rate": 6.724806201550387e-08, + "logits/chosen": -2.1947975158691406, + "logits/rejected": -2.1684374809265137, + "logps/chosen": -247.59548950195312, + "logps/rejected": -360.05767822265625, + "loss": 0.6818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01005401648581028, + "rewards/margins": 0.04666900634765625, + "rewards/rejected": -0.05672302469611168, + "step": 347 + }, + { + "epoch": 0.02, + "learning_rate": 6.744186046511627e-08, + "logits/chosen": -2.0516836643218994, + "logits/rejected": -2.048198938369751, + "logps/chosen": -28.526649475097656, + "logps/rejected": -117.60494995117188, + "loss": 0.6749, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030313491821289062, + "rewards/margins": 0.03967399522662163, + "rewards/rejected": -0.00936050433665514, + "step": 348 + }, + { + "epoch": 0.02, + "learning_rate": 6.763565891472868e-08, + "logits/chosen": -1.9345203638076782, + "logits/rejected": -1.8475743532180786, + "logps/chosen": -366.61474609375, + "logps/rejected": -501.1683349609375, + "loss": 0.705, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.001055908272974193, + "rewards/margins": -0.0547332763671875, + "rewards/rejected": 0.05367736890912056, + "step": 349 + }, + { + "epoch": 0.02, + "learning_rate": 6.782945736434108e-08, + "logits/chosen": -2.0998129844665527, + "logits/rejected": -2.0960495471954346, + "logps/chosen": -47.129730224609375, + "logps/rejected": -232.9011993408203, + "loss": 0.7014, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.013741684146225452, + "rewards/margins": -0.00025901757180690765, + "rewards/rejected": -0.013482666574418545, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 6.802325581395349e-08, + "logits/chosen": -2.107468843460083, + "logits/rejected": -2.109246253967285, + "logps/chosen": -0.12533387541770935, + "logps/rejected": -209.5439453125, + "loss": 0.6964, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0006298780790530145, + "rewards/margins": -0.015929866582155228, + "rewards/rejected": 0.015299987979233265, + "step": 351 + }, + { + "epoch": 0.02, + "learning_rate": 6.821705426356589e-08, + "logits/chosen": -2.188291311264038, + "logits/rejected": -2.17958402633667, + "logps/chosen": -70.66287231445312, + "logps/rejected": -221.4285430908203, + "loss": 0.6844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00202178955078125, + "rewards/margins": 0.044525146484375, + "rewards/rejected": -0.04654693603515625, + "step": 352 + }, + { + "epoch": 0.02, + "learning_rate": 6.841085271317828e-08, + "logits/chosen": -2.1518445014953613, + "logits/rejected": -2.1092066764831543, + "logps/chosen": -176.819091796875, + "logps/rejected": -420.6894226074219, + "loss": 0.5979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15568695962429047, + "rewards/margins": 0.20661164820194244, + "rewards/rejected": -0.05092468485236168, + "step": 353 + }, + { + "epoch": 0.02, + "learning_rate": 6.86046511627907e-08, + "logits/chosen": -2.068021535873413, + "logits/rejected": -2.039618730545044, + "logps/chosen": -283.32916259765625, + "logps/rejected": -414.54730224609375, + "loss": 0.7285, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02802734449505806, + "rewards/margins": -0.12033692002296448, + "rewards/rejected": 0.09230957180261612, + "step": 354 + }, + { + "epoch": 0.02, + "learning_rate": 6.879844961240309e-08, + "logits/chosen": -2.357074737548828, + "logits/rejected": -2.3354198932647705, + "logps/chosen": -4.124598854104988e-05, + "logps/rejected": -120.9106674194336, + "loss": 0.6914, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.7681169235147536e-08, + "rewards/margins": 0.00706105213612318, + "rewards/rejected": -0.007061004638671875, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 6.89922480620155e-08, + "logits/chosen": -2.2152395248413086, + "logits/rejected": -2.1698617935180664, + "logps/chosen": -186.38665771484375, + "logps/rejected": -334.87457275390625, + "loss": 0.7435, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06963653862476349, + "rewards/margins": -0.08495026081800461, + "rewards/rejected": 0.015313721261918545, + "step": 356 + }, + { + "epoch": 0.02, + "learning_rate": 6.91860465116279e-08, + "logits/chosen": -2.2020533084869385, + "logits/rejected": -2.1738762855529785, + "logps/chosen": -167.30642700195312, + "logps/rejected": -447.693603515625, + "loss": 0.6554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04593963548541069, + "rewards/margins": 0.11170807480812073, + "rewards/rejected": -0.06576843559741974, + "step": 357 + }, + { + "epoch": 0.02, + "learning_rate": 6.937984496124031e-08, + "logits/chosen": -2.0909857749938965, + "logits/rejected": -2.0451483726501465, + "logps/chosen": -283.187255859375, + "logps/rejected": -423.0539245605469, + "loss": 0.6564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04739074781537056, + "rewards/margins": 0.09005431830883026, + "rewards/rejected": -0.04266357421875, + "step": 358 + }, + { + "epoch": 0.02, + "learning_rate": 6.957364341085271e-08, + "logits/chosen": -2.183720827102661, + "logits/rejected": -2.1668930053710938, + "logps/chosen": -0.8987483978271484, + "logps/rejected": -158.74783325195312, + "loss": 0.6812, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2814998626708984e-06, + "rewards/margins": 0.04782232642173767, + "rewards/rejected": -0.047821044921875, + "step": 359 + }, + { + "epoch": 0.02, + "learning_rate": 6.976744186046512e-08, + "logits/chosen": -2.3479087352752686, + "logits/rejected": -2.3398337364196777, + "logps/chosen": -1.7194600105285645, + "logps/rejected": -99.78404235839844, + "loss": 0.6804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010336125269532204, + "rewards/margins": 0.04084302484989166, + "rewards/rejected": -0.03050689771771431, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 6.996124031007752e-08, + "logits/chosen": -2.4355642795562744, + "logits/rejected": -2.4457530975341797, + "logps/chosen": -57.893863677978516, + "logps/rejected": -304.1043395996094, + "loss": 0.6539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04120979458093643, + "rewards/margins": 0.12675972282886505, + "rewards/rejected": -0.08554992824792862, + "step": 361 + }, + { + "epoch": 0.02, + "learning_rate": 7.015503875968993e-08, + "logits/chosen": -2.1463963985443115, + "logits/rejected": -2.145475387573242, + "logps/chosen": -292.07281494140625, + "logps/rejected": -382.56878662109375, + "loss": 0.714, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.011730956844985485, + "rewards/margins": -0.08406371623277664, + "rewards/rejected": 0.07233276218175888, + "step": 362 + }, + { + "epoch": 0.02, + "learning_rate": 7.034883720930233e-08, + "logits/chosen": -2.091963529586792, + "logits/rejected": -2.070719003677368, + "logps/chosen": -81.19288635253906, + "logps/rejected": -236.78009033203125, + "loss": 0.7005, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0024116516578942537, + "rewards/margins": -0.036721039563417435, + "rewards/rejected": 0.03430938720703125, + "step": 363 + }, + { + "epoch": 0.02, + "learning_rate": 7.054263565891472e-08, + "logits/chosen": -2.047558546066284, + "logits/rejected": -2.0403263568878174, + "logps/chosen": -168.07237243652344, + "logps/rejected": -187.8345947265625, + "loss": 0.6867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04517669603228569, + "rewards/margins": 0.02441863901913166, + "rewards/rejected": 0.02075805701315403, + "step": 364 + }, + { + "epoch": 0.02, + "learning_rate": 7.073643410852713e-08, + "logits/chosen": -2.341046094894409, + "logits/rejected": -2.3410232067108154, + "logps/chosen": -9.165789604187012, + "logps/rejected": -347.99346923828125, + "loss": 0.6904, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.890991138177924e-05, + "rewards/margins": 0.00765151996165514, + "rewards/rejected": -0.0076904296875, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 7.093023255813953e-08, + "logits/chosen": -2.1452438831329346, + "logits/rejected": -2.070052146911621, + "logps/chosen": -305.7044372558594, + "logps/rejected": -502.14483642578125, + "loss": 0.7063, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04581909254193306, + "rewards/margins": -0.07069091498851776, + "rewards/rejected": 0.11651001125574112, + "step": 366 + }, + { + "epoch": 0.02, + "learning_rate": 7.112403100775194e-08, + "logits/chosen": -2.32002854347229, + "logits/rejected": -2.2747597694396973, + "logps/chosen": -137.10098266601562, + "logps/rejected": -247.13885498046875, + "loss": 0.697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0287017822265625, + "rewards/margins": 0.007386781275272369, + "rewards/rejected": -0.03608856350183487, + "step": 367 + }, + { + "epoch": 0.02, + "learning_rate": 7.131782945736434e-08, + "logits/chosen": -2.0462589263916016, + "logits/rejected": -2.0508317947387695, + "logps/chosen": -104.60430908203125, + "logps/rejected": -309.7433776855469, + "loss": 0.6715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02275543287396431, + "rewards/margins": 0.06337127834558487, + "rewards/rejected": -0.04061584547162056, + "step": 368 + }, + { + "epoch": 0.02, + "learning_rate": 7.151162790697675e-08, + "logits/chosen": -2.1594526767730713, + "logits/rejected": -2.1423401832580566, + "logps/chosen": -266.5342102050781, + "logps/rejected": -353.89959716796875, + "loss": 0.6645, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08657532185316086, + "rewards/margins": 0.0307312048971653, + "rewards/rejected": 0.05584411695599556, + "step": 369 + }, + { + "epoch": 0.02, + "learning_rate": 7.170542635658915e-08, + "logits/chosen": -2.1452982425689697, + "logits/rejected": -2.1341888904571533, + "logps/chosen": -22.492298126220703, + "logps/rejected": -172.4862060546875, + "loss": 0.6957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021450234577059746, + "rewards/margins": 0.016351891681551933, + "rewards/rejected": -0.03780212625861168, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 7.189922480620154e-08, + "logits/chosen": -2.279268980026245, + "logits/rejected": -2.259794235229492, + "logps/chosen": -28.674686431884766, + "logps/rejected": -192.48928833007812, + "loss": 0.7024, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02200164832174778, + "rewards/margins": -0.03023681789636612, + "rewards/rejected": 0.008235168643295765, + "step": 371 + }, + { + "epoch": 0.02, + "learning_rate": 7.209302325581394e-08, + "logits/chosen": -2.0049030780792236, + "logits/rejected": -1.9978946447372437, + "logps/chosen": -197.63717651367188, + "logps/rejected": -373.4859924316406, + "loss": 0.6993, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.000946044921875, + "rewards/margins": -0.0139312744140625, + "rewards/rejected": 0.0148773193359375, + "step": 372 + }, + { + "epoch": 0.02, + "learning_rate": 7.228682170542635e-08, + "logits/chosen": -2.1214773654937744, + "logits/rejected": -2.0791351795196533, + "logps/chosen": -232.79678344726562, + "logps/rejected": -358.76690673828125, + "loss": 0.6867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015176392160356045, + "rewards/margins": 0.05408630520105362, + "rewards/rejected": -0.038909912109375, + "step": 373 + }, + { + "epoch": 0.02, + "learning_rate": 7.248062015503875e-08, + "logits/chosen": -2.0971131324768066, + "logits/rejected": -2.086136817932129, + "logps/chosen": -172.48617553710938, + "logps/rejected": -235.1420440673828, + "loss": 0.6819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001017761300317943, + "rewards/margins": 0.06939392536878586, + "rewards/rejected": -0.06837616115808487, + "step": 374 + }, + { + "epoch": 0.02, + "learning_rate": 7.267441860465116e-08, + "logits/chosen": -2.1754114627838135, + "logits/rejected": -2.167997360229492, + "logps/chosen": -99.91471099853516, + "logps/rejected": -167.67913818359375, + "loss": 0.6728, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04781036451458931, + "rewards/margins": 0.04718170315027237, + "rewards/rejected": 0.0006286621210165322, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 7.286821705426356e-08, + "logits/chosen": -2.007413387298584, + "logits/rejected": -2.067464590072632, + "logps/chosen": -340.9486083984375, + "logps/rejected": -283.3266296386719, + "loss": 0.636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09890136867761612, + "rewards/margins": 0.11443176120519638, + "rewards/rejected": -0.015530395321547985, + "step": 376 + }, + { + "epoch": 0.02, + "learning_rate": 7.306201550387596e-08, + "logits/chosen": -2.267688035964966, + "logits/rejected": -2.2682878971099854, + "logps/chosen": -24.109527587890625, + "logps/rejected": -64.85470581054688, + "loss": 0.6774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015583229251205921, + "rewards/margins": 0.04186725616455078, + "rewards/rejected": -0.026284027844667435, + "step": 377 + }, + { + "epoch": 0.02, + "learning_rate": 7.325581395348837e-08, + "logits/chosen": -2.115272283554077, + "logits/rejected": -2.068178653717041, + "logps/chosen": -259.2621154785156, + "logps/rejected": -420.1104736328125, + "loss": 0.6927, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02052917517721653, + "rewards/margins": -0.0062713623046875, + "rewards/rejected": 0.02680053748190403, + "step": 378 + }, + { + "epoch": 0.02, + "learning_rate": 7.344961240310076e-08, + "logits/chosen": -2.138180732727051, + "logits/rejected": -2.1306629180908203, + "logps/chosen": -247.97909545898438, + "logps/rejected": -337.15655517578125, + "loss": 0.6848, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0183868408203125, + "rewards/margins": 0.02734069898724556, + "rewards/rejected": -0.008953857235610485, + "step": 379 + }, + { + "epoch": 0.02, + "learning_rate": 7.364341085271317e-08, + "logits/chosen": -2.0751049518585205, + "logits/rejected": -2.078847646713257, + "logps/chosen": -15.699078559875488, + "logps/rejected": -75.02037048339844, + "loss": 0.6898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017604828462935984, + "rewards/margins": 0.008250618353486061, + "rewards/rejected": -0.008426666259765625, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 7.383720930232557e-08, + "logits/chosen": -2.0849084854125977, + "logits/rejected": -2.068748950958252, + "logps/chosen": -300.0518493652344, + "logps/rejected": -391.996826171875, + "loss": 0.6216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08858337253332138, + "rewards/margins": 0.19884948432445526, + "rewards/rejected": -0.11026611179113388, + "step": 381 + }, + { + "epoch": 0.02, + "learning_rate": 7.403100775193798e-08, + "logits/chosen": -2.1555709838867188, + "logits/rejected": -2.153827667236328, + "logps/chosen": -3.7094037532806396, + "logps/rejected": -107.12830352783203, + "loss": 0.7014, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.862381163344253e-06, + "rewards/margins": -0.03248133510351181, + "rewards/rejected": 0.032485198229551315, + "step": 382 + }, + { + "epoch": 0.02, + "learning_rate": 7.422480620155038e-08, + "logits/chosen": -2.321939468383789, + "logits/rejected": -2.3148105144500732, + "logps/chosen": -28.700212478637695, + "logps/rejected": -142.51622009277344, + "loss": 0.7022, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.010037231259047985, + "rewards/margins": -0.03830413892865181, + "rewards/rejected": 0.04834137111902237, + "step": 383 + }, + { + "epoch": 0.02, + "learning_rate": 7.441860465116279e-08, + "logits/chosen": -2.157050609588623, + "logits/rejected": -2.1254284381866455, + "logps/chosen": -38.520057678222656, + "logps/rejected": -294.14068603515625, + "loss": 0.6892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010078049264848232, + "rewards/margins": 0.04282722249627113, + "rewards/rejected": -0.05290527269244194, + "step": 384 + }, + { + "epoch": 0.02, + "learning_rate": 7.461240310077519e-08, + "logits/chosen": -2.1075539588928223, + "logits/rejected": -2.0959434509277344, + "logps/chosen": -270.9920349121094, + "logps/rejected": -389.9361267089844, + "loss": 0.635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0304107666015625, + "rewards/margins": 0.15710754692554474, + "rewards/rejected": -0.12669678032398224, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 7.48062015503876e-08, + "logits/chosen": -2.229520320892334, + "logits/rejected": -2.208116054534912, + "logps/chosen": -133.8753204345703, + "logps/rejected": -307.4195556640625, + "loss": 0.7008, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02890777587890625, + "rewards/margins": -0.02267913892865181, + "rewards/rejected": 0.05158691480755806, + "step": 386 + }, + { + "epoch": 0.02, + "learning_rate": 7.5e-08, + "logits/chosen": -2.109849214553833, + "logits/rejected": -2.1053285598754883, + "logps/chosen": -12.394286155700684, + "logps/rejected": -70.7721939086914, + "loss": 0.6928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010692310519516468, + "rewards/margins": 0.013569927774369717, + "rewards/rejected": -0.024262238293886185, + "step": 387 + }, + { + "epoch": 0.02, + "learning_rate": 7.51937984496124e-08, + "logits/chosen": -2.111598491668701, + "logits/rejected": -2.0857436656951904, + "logps/chosen": -205.09371948242188, + "logps/rejected": -278.1204528808594, + "loss": 0.7156, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.006622314453125, + "rewards/margins": -0.06259460747241974, + "rewards/rejected": 0.06921692192554474, + "step": 388 + }, + { + "epoch": 0.02, + "learning_rate": 7.53875968992248e-08, + "logits/chosen": -2.17234206199646, + "logits/rejected": -2.151175022125244, + "logps/chosen": -81.75884246826172, + "logps/rejected": -231.51531982421875, + "loss": 0.6917, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.004906463902443647, + "rewards/margins": -0.027559660375118256, + "rewards/rejected": 0.03246612474322319, + "step": 389 + }, + { + "epoch": 0.02, + "learning_rate": 7.55813953488372e-08, + "logits/chosen": -2.0823974609375, + "logits/rejected": -2.072655439376831, + "logps/chosen": -2.825239243975375e-05, + "logps/rejected": -153.61170959472656, + "loss": 0.6899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.013302612118422985, + "rewards/rejected": -0.013302612118422985, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 7.577519379844961e-08, + "logits/chosen": -2.1085963249206543, + "logits/rejected": -2.088717460632324, + "logps/chosen": -117.57898712158203, + "logps/rejected": -235.96719360351562, + "loss": 0.6435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08046646416187286, + "rewards/margins": 0.09422226250171661, + "rewards/rejected": -0.01375579833984375, + "step": 391 + }, + { + "epoch": 0.02, + "learning_rate": 7.596899224806201e-08, + "logits/chosen": -2.106574296951294, + "logits/rejected": -2.0815365314483643, + "logps/chosen": -229.4424591064453, + "logps/rejected": -394.0614013671875, + "loss": 0.7089, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02679901197552681, + "rewards/margins": -0.07654266059398651, + "rewards/rejected": 0.04974365234375, + "step": 392 + }, + { + "epoch": 0.02, + "learning_rate": 7.616279069767442e-08, + "logits/chosen": -2.1387717723846436, + "logits/rejected": -2.112758159637451, + "logps/chosen": -286.43511962890625, + "logps/rejected": -450.4307861328125, + "loss": 0.6717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02989502064883709, + "rewards/margins": 0.08613280951976776, + "rewards/rejected": -0.11602783203125, + "step": 393 + }, + { + "epoch": 0.02, + "learning_rate": 7.635658914728682e-08, + "logits/chosen": -2.207310676574707, + "logits/rejected": -2.2075536251068115, + "logps/chosen": -8.549078941345215, + "logps/rejected": -124.87567901611328, + "loss": 0.6864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02179260365664959, + "rewards/margins": 0.01674041897058487, + "rewards/rejected": 0.0050521851517260075, + "step": 394 + }, + { + "epoch": 0.02, + "learning_rate": 7.655038759689923e-08, + "logits/chosen": -2.1561505794525146, + "logits/rejected": -2.138582706451416, + "logps/chosen": -32.85188293457031, + "logps/rejected": -285.72509765625, + "loss": 0.689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004315185826271772, + "rewards/margins": 0.02218322828412056, + "rewards/rejected": -0.0178680419921875, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 7.674418604651163e-08, + "logits/chosen": -2.142080783843994, + "logits/rejected": -2.1467082500457764, + "logps/chosen": -42.043373107910156, + "logps/rejected": -96.99942016601562, + "loss": 0.6914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006896972772665322, + "rewards/margins": 0.00289154052734375, + "rewards/rejected": -0.0022018433082848787, + "step": 396 + }, + { + "epoch": 0.02, + "learning_rate": 7.693798449612404e-08, + "logits/chosen": -2.1410629749298096, + "logits/rejected": -2.0762431621551514, + "logps/chosen": -206.43260192871094, + "logps/rejected": -273.24566650390625, + "loss": 0.7036, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01764526404440403, + "rewards/margins": -0.02189331129193306, + "rewards/rejected": 0.0042480467818677425, + "step": 397 + }, + { + "epoch": 0.02, + "learning_rate": 7.713178294573643e-08, + "logits/chosen": -2.1832027435302734, + "logits/rejected": -2.1849000453948975, + "logps/chosen": -20.070852279663086, + "logps/rejected": -93.86637115478516, + "loss": 0.6871, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03240222856402397, + "rewards/margins": -0.016106989234685898, + "rewards/rejected": 0.04850921779870987, + "step": 398 + }, + { + "epoch": 0.02, + "learning_rate": 7.732558139534883e-08, + "logits/chosen": -2.218764305114746, + "logits/rejected": -2.2142531871795654, + "logps/chosen": -8.88336181640625, + "logps/rejected": -84.86013793945312, + "loss": 0.7104, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.021455859765410423, + "rewards/margins": -0.04889879375696182, + "rewards/rejected": 0.02744293212890625, + "step": 399 + }, + { + "epoch": 0.02, + "learning_rate": 7.751937984496124e-08, + "logits/chosen": -2.1484005451202393, + "logits/rejected": -2.112992286682129, + "logps/chosen": -11.638165473937988, + "logps/rejected": -264.33575439453125, + "loss": 0.6945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008851814083755016, + "rewards/margins": 0.03506603091955185, + "rewards/rejected": -0.04391784593462944, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 7.771317829457364e-08, + "logits/chosen": -2.2933971881866455, + "logits/rejected": -2.280357599258423, + "logps/chosen": -33.19816589355469, + "logps/rejected": -296.8531494140625, + "loss": 0.6964, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.027980804443359375, + "rewards/margins": -0.032447051256895065, + "rewards/rejected": 0.06042785570025444, + "step": 401 + }, + { + "epoch": 0.02, + "learning_rate": 7.790697674418605e-08, + "logits/chosen": -2.1931493282318115, + "logits/rejected": -2.1895604133605957, + "logps/chosen": -56.471797943115234, + "logps/rejected": -187.8743896484375, + "loss": 0.7017, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.020241929218173027, + "rewards/margins": -0.03347435221076012, + "rewards/rejected": 0.013232422061264515, + "step": 402 + }, + { + "epoch": 0.02, + "learning_rate": 7.810077519379845e-08, + "logits/chosen": -2.2963006496429443, + "logits/rejected": -2.3930177688598633, + "logps/chosen": -281.98919677734375, + "logps/rejected": -409.52716064453125, + "loss": 0.6577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04399413987994194, + "rewards/margins": 0.08151855319738388, + "rewards/rejected": -0.03752441331744194, + "step": 403 + }, + { + "epoch": 0.02, + "learning_rate": 7.829457364341085e-08, + "logits/chosen": -2.2197422981262207, + "logits/rejected": -2.2178237438201904, + "logps/chosen": -243.4558563232422, + "logps/rejected": -329.52191162109375, + "loss": 0.7229, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0070663453079760075, + "rewards/margins": -0.12453766167163849, + "rewards/rejected": 0.11747131496667862, + "step": 404 + }, + { + "epoch": 0.02, + "learning_rate": 7.848837209302324e-08, + "logits/chosen": -2.0908660888671875, + "logits/rejected": -2.0748965740203857, + "logps/chosen": -91.77893829345703, + "logps/rejected": -232.19505310058594, + "loss": 0.6884, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.008035278879106045, + "rewards/margins": -0.00660095177590847, + "rewards/rejected": 0.014636230655014515, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 7.868217054263565e-08, + "logits/chosen": -2.1469430923461914, + "logits/rejected": -2.122933864593506, + "logps/chosen": -212.244873046875, + "logps/rejected": -339.9306335449219, + "loss": 0.6525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06723632663488388, + "rewards/margins": 0.06833801418542862, + "rewards/rejected": -0.001101684640161693, + "step": 406 + }, + { + "epoch": 0.02, + "learning_rate": 7.887596899224805e-08, + "logits/chosen": -2.1512320041656494, + "logits/rejected": -2.1491599082946777, + "logps/chosen": -0.000151271146023646, + "logps/rejected": -117.67627716064453, + "loss": 0.6895, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.556205676133686e-07, + "rewards/margins": 0.014637649059295654, + "rewards/rejected": -0.014636993408203125, + "step": 407 + }, + { + "epoch": 0.02, + "learning_rate": 7.906976744186046e-08, + "logits/chosen": -2.145172595977783, + "logits/rejected": -2.1448628902435303, + "logps/chosen": -35.59928512573242, + "logps/rejected": -203.0641326904297, + "loss": 0.685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016377640888094902, + "rewards/margins": 0.01630592532455921, + "rewards/rejected": 7.171631295932457e-05, + "step": 408 + }, + { + "epoch": 0.02, + "learning_rate": 7.926356589147286e-08, + "logits/chosen": -2.310352087020874, + "logits/rejected": -2.288309097290039, + "logps/chosen": -185.4737091064453, + "logps/rejected": -314.19970703125, + "loss": 0.6489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07513274997472763, + "rewards/margins": 0.08721771091222763, + "rewards/rejected": -0.0120849609375, + "step": 409 + }, + { + "epoch": 0.02, + "learning_rate": 7.945736434108527e-08, + "logits/chosen": -2.179658889770508, + "logits/rejected": -2.151937484741211, + "logps/chosen": -258.47015380859375, + "logps/rejected": -379.32574462890625, + "loss": 0.7489, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06997986137866974, + "rewards/margins": -0.11686097085475922, + "rewards/rejected": 0.04688110575079918, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 7.965116279069767e-08, + "logits/chosen": -2.145458459854126, + "logits/rejected": -2.10414457321167, + "logps/chosen": -57.70912170410156, + "logps/rejected": -340.854248046875, + "loss": 0.6845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002355957170948386, + "rewards/margins": 0.03703613579273224, + "rewards/rejected": -0.03939209133386612, + "step": 411 + }, + { + "epoch": 0.02, + "learning_rate": 7.984496124031007e-08, + "logits/chosen": -2.255533218383789, + "logits/rejected": -2.235367774963379, + "logps/chosen": -149.39178466796875, + "logps/rejected": -300.29315185546875, + "loss": 0.6817, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07082214206457138, + "rewards/margins": -0.006945803761482239, + "rewards/rejected": 0.07776794582605362, + "step": 412 + }, + { + "epoch": 0.02, + "learning_rate": 8.003875968992248e-08, + "logits/chosen": -2.2426910400390625, + "logits/rejected": -2.233384847640991, + "logps/chosen": -53.73741912841797, + "logps/rejected": -185.1826171875, + "loss": 0.7076, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.018669892102479935, + "rewards/margins": -0.052559662610292435, + "rewards/rejected": 0.0338897705078125, + "step": 413 + }, + { + "epoch": 0.02, + "learning_rate": 8.023255813953487e-08, + "logits/chosen": -2.227720260620117, + "logits/rejected": -2.2079789638519287, + "logps/chosen": -208.30966186523438, + "logps/rejected": -357.6863098144531, + "loss": 0.7274, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03880462795495987, + "rewards/margins": -0.06958770751953125, + "rewards/rejected": 0.03078308142721653, + "step": 414 + }, + { + "epoch": 0.02, + "learning_rate": 8.042635658914728e-08, + "logits/chosen": -1.99932861328125, + "logits/rejected": -1.953860878944397, + "logps/chosen": -191.17213439941406, + "logps/rejected": -310.16552734375, + "loss": 0.6552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04709320142865181, + "rewards/margins": 0.08264008164405823, + "rewards/rejected": -0.03554687649011612, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 8.062015503875968e-08, + "logits/chosen": -2.3542542457580566, + "logits/rejected": -2.3589060306549072, + "logps/chosen": -59.34284591674805, + "logps/rejected": -170.93603515625, + "loss": 0.701, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.002216339111328125, + "rewards/margins": -0.030059050768613815, + "rewards/rejected": 0.03227538987994194, + "step": 416 + }, + { + "epoch": 0.02, + "learning_rate": 8.081395348837209e-08, + "logits/chosen": -2.3200886249542236, + "logits/rejected": -2.307786703109741, + "logps/chosen": -6.604077498195693e-05, + "logps/rejected": -180.56710815429688, + "loss": 0.6988, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.2184544807023485e-07, + "rewards/margins": -0.022607099264860153, + "rewards/rejected": 0.02260742150247097, + "step": 417 + }, + { + "epoch": 0.02, + "learning_rate": 8.100775193798449e-08, + "logits/chosen": -2.218045711517334, + "logits/rejected": -2.1978657245635986, + "logps/chosen": -105.95001220703125, + "logps/rejected": -217.77313232421875, + "loss": 0.71, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.028903961181640625, + "rewards/margins": -0.048273466527462006, + "rewards/rejected": 0.01936950720846653, + "step": 418 + }, + { + "epoch": 0.02, + "learning_rate": 8.12015503875969e-08, + "logits/chosen": -2.128408432006836, + "logits/rejected": -2.0618817806243896, + "logps/chosen": -147.10159301757812, + "logps/rejected": -335.8837890625, + "loss": 0.6668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05834350734949112, + "rewards/margins": 0.04313354566693306, + "rewards/rejected": 0.015209960751235485, + "step": 419 + }, + { + "epoch": 0.02, + "learning_rate": 8.13953488372093e-08, + "logits/chosen": -2.0388906002044678, + "logits/rejected": -2.036742687225342, + "logps/chosen": -299.79132080078125, + "logps/rejected": -427.40545654296875, + "loss": 0.6599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1044464111328125, + "rewards/margins": 0.009310908615589142, + "rewards/rejected": 0.09513550251722336, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 8.158914728682171e-08, + "logits/chosen": -2.1103169918060303, + "logits/rejected": -2.1084704399108887, + "logps/chosen": -2.7145729064941406, + "logps/rejected": -82.35269927978516, + "loss": 0.6775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021019363775849342, + "rewards/margins": 0.04207649081945419, + "rewards/rejected": -0.02105712890625, + "step": 421 + }, + { + "epoch": 0.02, + "learning_rate": 8.17829457364341e-08, + "logits/chosen": -1.907292366027832, + "logits/rejected": -1.8590333461761475, + "logps/chosen": -330.8575439453125, + "logps/rejected": -470.27410888671875, + "loss": 0.7536, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06121215969324112, + "rewards/margins": -0.17100220918655396, + "rewards/rejected": 0.10979004204273224, + "step": 422 + }, + { + "epoch": 0.02, + "learning_rate": 8.19767441860465e-08, + "logits/chosen": -2.426893711090088, + "logits/rejected": -2.4124934673309326, + "logps/chosen": -8.00045394897461, + "logps/rejected": -208.26126098632812, + "loss": 0.6865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012497234158217907, + "rewards/margins": 0.015745829790830612, + "rewards/rejected": -0.003248596331104636, + "step": 423 + }, + { + "epoch": 0.02, + "learning_rate": 8.217054263565891e-08, + "logits/chosen": -2.191546678543091, + "logits/rejected": -2.191833257675171, + "logps/chosen": -0.07841703295707703, + "logps/rejected": -77.72628784179688, + "loss": 0.6881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001060187816619873, + "rewards/margins": 0.019244849681854248, + "rewards/rejected": -0.018184661865234375, + "step": 424 + }, + { + "epoch": 0.02, + "learning_rate": 8.236434108527131e-08, + "logits/chosen": -2.4192957878112793, + "logits/rejected": -2.3972008228302, + "logps/chosen": -14.750045776367188, + "logps/rejected": -115.01090240478516, + "loss": 0.6975, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.8610230629055877e-07, + "rewards/margins": -0.01687745936214924, + "rewards/rejected": 0.01687774620950222, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 8.255813953488372e-08, + "logits/chosen": -2.0768396854400635, + "logits/rejected": -2.0570600032806396, + "logps/chosen": -268.5911865234375, + "logps/rejected": -430.541259765625, + "loss": 0.7212, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03352661058306694, + "rewards/margins": -0.11789855360984802, + "rewards/rejected": 0.08437194675207138, + "step": 426 + }, + { + "epoch": 0.02, + "learning_rate": 8.275193798449612e-08, + "logits/chosen": -2.2287471294403076, + "logits/rejected": -2.1737747192382812, + "logps/chosen": -282.3707275390625, + "logps/rejected": -525.8325805664062, + "loss": 0.6906, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02872924879193306, + "rewards/margins": -0.03133545070886612, + "rewards/rejected": 0.06006469950079918, + "step": 427 + }, + { + "epoch": 0.02, + "learning_rate": 8.294573643410853e-08, + "logits/chosen": -2.1260409355163574, + "logits/rejected": -2.1096103191375732, + "logps/chosen": -260.1485595703125, + "logps/rejected": -363.33258056640625, + "loss": 0.6728, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04626159742474556, + "rewards/margins": 0.04010314866900444, + "rewards/rejected": 0.0061584473587572575, + "step": 428 + }, + { + "epoch": 0.02, + "learning_rate": 8.313953488372093e-08, + "logits/chosen": -2.232480049133301, + "logits/rejected": -2.238222599029541, + "logps/chosen": -47.46052932739258, + "logps/rejected": -168.2093505859375, + "loss": 0.6897, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03428840637207031, + "rewards/margins": -0.0013149268925189972, + "rewards/rejected": 0.03560333326458931, + "step": 429 + }, + { + "epoch": 0.03, + "learning_rate": 8.333333333333334e-08, + "logits/chosen": -2.1578166484832764, + "logits/rejected": -2.1471476554870605, + "logps/chosen": -42.92936706542969, + "logps/rejected": -286.076171875, + "loss": 0.6824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023191070184111595, + "rewards/margins": 0.010291289538145065, + "rewards/rejected": 0.01289978064596653, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 8.352713178294574e-08, + "logits/chosen": -2.2073020935058594, + "logits/rejected": -2.207080364227295, + "logps/chosen": -19.812564849853516, + "logps/rejected": -79.60694885253906, + "loss": 0.7065, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04362983629107475, + "rewards/margins": -0.016090773046016693, + "rewards/rejected": -0.02753906324505806, + "step": 431 + }, + { + "epoch": 0.03, + "learning_rate": 8.372093023255815e-08, + "logits/chosen": -2.2105116844177246, + "logits/rejected": -2.2141785621643066, + "logps/chosen": -4.332374572753906, + "logps/rejected": -86.65067291259766, + "loss": 0.7027, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.007309341337531805, + "rewards/margins": -0.030723953619599342, + "rewards/rejected": 0.02341461181640625, + "step": 432 + }, + { + "epoch": 0.03, + "learning_rate": 8.391472868217054e-08, + "logits/chosen": -2.1999502182006836, + "logits/rejected": -2.194326877593994, + "logps/chosen": -29.335689544677734, + "logps/rejected": -122.8352279663086, + "loss": 0.6772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015563583932816982, + "rewards/margins": 0.049554064869880676, + "rewards/rejected": -0.03399048000574112, + "step": 433 + }, + { + "epoch": 0.03, + "learning_rate": 8.410852713178294e-08, + "logits/chosen": -2.268101692199707, + "logits/rejected": -2.2568957805633545, + "logps/chosen": -21.41928482055664, + "logps/rejected": -141.39732360839844, + "loss": 0.6954, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.012302017770707607, + "rewards/margins": -0.019449997693300247, + "rewards/rejected": 0.03175201639533043, + "step": 434 + }, + { + "epoch": 0.03, + "learning_rate": 8.430232558139535e-08, + "logits/chosen": -2.267245054244995, + "logits/rejected": -2.2438852787017822, + "logps/chosen": -3.868485927581787, + "logps/rejected": -146.82379150390625, + "loss": 0.6845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014735698932781816, + "rewards/margins": 0.04352984204888344, + "rewards/rejected": -0.04205627366900444, + "step": 435 + }, + { + "epoch": 0.03, + "learning_rate": 8.449612403100774e-08, + "logits/chosen": -2.2405779361724854, + "logits/rejected": -2.2142815589904785, + "logps/chosen": -290.20806884765625, + "logps/rejected": -380.20806884765625, + "loss": 0.6728, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.10851746052503586, + "rewards/margins": -0.04773253947496414, + "rewards/rejected": 0.15625, + "step": 436 + }, + { + "epoch": 0.03, + "learning_rate": 8.468992248062015e-08, + "logits/chosen": -2.204256057739258, + "logits/rejected": -2.190760374069214, + "logps/chosen": -219.38719177246094, + "logps/rejected": -315.77728271484375, + "loss": 0.6377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07917022705078125, + "rewards/margins": 0.10861663520336151, + "rewards/rejected": -0.02944641187787056, + "step": 437 + }, + { + "epoch": 0.03, + "learning_rate": 8.488372093023254e-08, + "logits/chosen": -2.1096768379211426, + "logits/rejected": -2.10483717918396, + "logps/chosen": -16.500112533569336, + "logps/rejected": -96.98185729980469, + "loss": 0.7032, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01249771099537611, + "rewards/margins": -0.02713623084127903, + "rewards/rejected": 0.01463851984590292, + "step": 438 + }, + { + "epoch": 0.03, + "learning_rate": 8.507751937984496e-08, + "logits/chosen": -2.108142137527466, + "logits/rejected": -2.100051164627075, + "logps/chosen": -54.44265365600586, + "logps/rejected": -174.69937133789062, + "loss": 0.6831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011757278814911842, + "rewards/margins": 0.04105720669031143, + "rewards/rejected": -0.02929992787539959, + "step": 439 + }, + { + "epoch": 0.03, + "learning_rate": 8.527131782945735e-08, + "logits/chosen": -2.296860694885254, + "logits/rejected": -2.291778564453125, + "logps/chosen": -20.5166072845459, + "logps/rejected": -161.7742156982422, + "loss": 0.692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024171830154955387, + "rewards/margins": 0.006941032130271196, + "rewards/rejected": -0.009358215145766735, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 8.546511627906976e-08, + "logits/chosen": -2.3068666458129883, + "logits/rejected": -2.299292802810669, + "logps/chosen": -46.213340759277344, + "logps/rejected": -175.2652130126953, + "loss": 0.7018, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00997772254049778, + "rewards/margins": -0.049468994140625, + "rewards/rejected": 0.05944671854376793, + "step": 441 + }, + { + "epoch": 0.03, + "learning_rate": 8.565891472868216e-08, + "logits/chosen": -2.120330333709717, + "logits/rejected": -2.111626148223877, + "logps/chosen": -223.32037353515625, + "logps/rejected": -265.61627197265625, + "loss": 0.6847, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.09393616020679474, + "rewards/margins": -0.0872344970703125, + "rewards/rejected": 0.18117065727710724, + "step": 442 + }, + { + "epoch": 0.03, + "learning_rate": 8.585271317829457e-08, + "logits/chosen": -2.2338027954101562, + "logits/rejected": -2.224400281906128, + "logps/chosen": -13.119873046875, + "logps/rejected": -278.5372314453125, + "loss": 0.7014, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00220489501953125, + "rewards/margins": -0.02829437330365181, + "rewards/rejected": 0.02608947828412056, + "step": 443 + }, + { + "epoch": 0.03, + "learning_rate": 8.604651162790697e-08, + "logits/chosen": -2.08857798576355, + "logits/rejected": -2.0694987773895264, + "logps/chosen": -13.81509017944336, + "logps/rejected": -202.56173706054688, + "loss": 0.6969, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00695724505931139, + "rewards/margins": -0.017603300511837006, + "rewards/rejected": 0.02456054650247097, + "step": 444 + }, + { + "epoch": 0.03, + "learning_rate": 8.624031007751938e-08, + "logits/chosen": -2.152397632598877, + "logits/rejected": -2.147484302520752, + "logps/chosen": -8.296808664454147e-05, + "logps/rejected": -99.7103271484375, + "loss": 0.6894, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1920929132713809e-08, + "rewards/margins": 0.015092456713318825, + "rewards/rejected": -0.015092468820512295, + "step": 445 + }, + { + "epoch": 0.03, + "learning_rate": 8.643410852713178e-08, + "logits/chosen": -2.204406261444092, + "logits/rejected": -2.2281875610351562, + "logps/chosen": -254.19430541992188, + "logps/rejected": -333.0723876953125, + "loss": 0.6536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13080139458179474, + "rewards/margins": 0.014645390212535858, + "rewards/rejected": 0.11615600436925888, + "step": 446 + }, + { + "epoch": 0.03, + "learning_rate": 8.662790697674417e-08, + "logits/chosen": -2.174675703048706, + "logits/rejected": -2.181093692779541, + "logps/chosen": -0.4657137989997864, + "logps/rejected": -46.80376434326172, + "loss": 0.7004, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0003529846726451069, + "rewards/margins": -0.02792155183851719, + "rewards/rejected": 0.0282745361328125, + "step": 447 + }, + { + "epoch": 0.03, + "learning_rate": 8.682170542635659e-08, + "logits/chosen": -2.0856211185455322, + "logits/rejected": -2.0386011600494385, + "logps/chosen": -186.32244873046875, + "logps/rejected": -310.41705322265625, + "loss": 0.7262, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01946716383099556, + "rewards/margins": -0.10531921684741974, + "rewards/rejected": 0.08585204929113388, + "step": 448 + }, + { + "epoch": 0.03, + "learning_rate": 8.701550387596898e-08, + "logits/chosen": -2.135526418685913, + "logits/rejected": -2.1358821392059326, + "logps/chosen": -0.00013887396198697388, + "logps/rejected": -44.916404724121094, + "loss": 0.693, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1457090326748585e-07, + "rewards/margins": 0.0005738973850384355, + "rewards/rejected": -0.0005741119384765625, + "step": 449 + }, + { + "epoch": 0.03, + "learning_rate": 8.720930232558139e-08, + "logits/chosen": -2.256218671798706, + "logits/rejected": -2.2454705238342285, + "logps/chosen": -18.29295539855957, + "logps/rejected": -162.0178680419922, + "loss": 0.6892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004777336027473211, + "rewards/margins": 0.02754497528076172, + "rewards/rejected": -0.02276763878762722, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 8.740310077519379e-08, + "logits/chosen": -2.1288020610809326, + "logits/rejected": -2.071129322052002, + "logps/chosen": -167.78775024414062, + "logps/rejected": -153.24710083007812, + "loss": 0.6978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006427002139389515, + "rewards/margins": 0.009429931640625, + "rewards/rejected": -0.0030029297340661287, + "step": 451 + }, + { + "epoch": 0.03, + "learning_rate": 8.75968992248062e-08, + "logits/chosen": -2.060114860534668, + "logits/rejected": -2.050971031188965, + "logps/chosen": -231.72357177734375, + "logps/rejected": -334.0690612792969, + "loss": 0.6803, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01818084716796875, + "rewards/margins": -0.00643157958984375, + "rewards/rejected": 0.0246124267578125, + "step": 452 + }, + { + "epoch": 0.03, + "learning_rate": 8.77906976744186e-08, + "logits/chosen": -2.2147207260131836, + "logits/rejected": -2.1500463485717773, + "logps/chosen": -194.47616577148438, + "logps/rejected": -293.07745361328125, + "loss": 0.6486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03025360219180584, + "rewards/margins": 0.14974212646484375, + "rewards/rejected": -0.11948852986097336, + "step": 453 + }, + { + "epoch": 0.03, + "learning_rate": 8.798449612403101e-08, + "logits/chosen": -2.2979354858398438, + "logits/rejected": -2.292827606201172, + "logps/chosen": -8.110398292541504, + "logps/rejected": -64.02373504638672, + "loss": 0.6794, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009830951690673828, + "rewards/margins": 0.04202738031744957, + "rewards/rejected": -0.03219642862677574, + "step": 454 + }, + { + "epoch": 0.03, + "learning_rate": 8.817829457364341e-08, + "logits/chosen": -2.235900640487671, + "logits/rejected": -2.205686569213867, + "logps/chosen": -247.41281127929688, + "logps/rejected": -356.97637939453125, + "loss": 0.6733, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04127044603228569, + "rewards/margins": 0.01370696909725666, + "rewards/rejected": 0.02756347693502903, + "step": 455 + }, + { + "epoch": 0.03, + "learning_rate": 8.837209302325582e-08, + "logits/chosen": -2.2124907970428467, + "logits/rejected": -2.1515941619873047, + "logps/chosen": -107.791748046875, + "logps/rejected": -377.25872802734375, + "loss": 0.703, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.005230713170021772, + "rewards/margins": -0.03754577785730362, + "rewards/rejected": 0.04277649149298668, + "step": 456 + }, + { + "epoch": 0.03, + "learning_rate": 8.856589147286822e-08, + "logits/chosen": -2.2878990173339844, + "logits/rejected": -2.2765114307403564, + "logps/chosen": -151.81283569335938, + "logps/rejected": -301.12322998046875, + "loss": 0.7188, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0031921386253088713, + "rewards/margins": -0.13344116508960724, + "rewards/rejected": 0.13663330674171448, + "step": 457 + }, + { + "epoch": 0.03, + "learning_rate": 8.875968992248061e-08, + "logits/chosen": -2.149749279022217, + "logits/rejected": -2.139223098754883, + "logps/chosen": -18.84484100341797, + "logps/rejected": -175.79312133789062, + "loss": 0.6919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004638290498405695, + "rewards/margins": 0.018158340826630592, + "rewards/rejected": -0.022796630859375, + "step": 458 + }, + { + "epoch": 0.03, + "learning_rate": 8.895348837209302e-08, + "logits/chosen": -2.1351799964904785, + "logits/rejected": -2.0819005966186523, + "logps/chosen": -298.137451171875, + "logps/rejected": -396.9036560058594, + "loss": 0.617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21525879204273224, + "rewards/margins": 0.10921936482191086, + "rewards/rejected": 0.10603942722082138, + "step": 459 + }, + { + "epoch": 0.03, + "learning_rate": 8.914728682170542e-08, + "logits/chosen": -2.2354156970977783, + "logits/rejected": -2.232699155807495, + "logps/chosen": -111.31533813476562, + "logps/rejected": -189.51588439941406, + "loss": 0.7127, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.010980987921357155, + "rewards/margins": -0.056604765355587006, + "rewards/rejected": 0.045623779296875, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 8.934108527131783e-08, + "logits/chosen": -2.2008354663848877, + "logits/rejected": -2.2030527591705322, + "logps/chosen": -2.649517297744751, + "logps/rejected": -94.58871459960938, + "loss": 0.6911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019976377952843904, + "rewards/margins": 0.02572453022003174, + "rewards/rejected": -0.02772216871380806, + "step": 461 + }, + { + "epoch": 0.03, + "learning_rate": 8.953488372093023e-08, + "logits/chosen": -1.8874191045761108, + "logits/rejected": -1.8683210611343384, + "logps/chosen": -209.0155029296875, + "logps/rejected": -238.43450927734375, + "loss": 0.6585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07242126762866974, + "rewards/margins": 0.04452820122241974, + "rewards/rejected": 0.02789306640625, + "step": 462 + }, + { + "epoch": 0.03, + "learning_rate": 8.972868217054264e-08, + "logits/chosen": -2.264397144317627, + "logits/rejected": -2.260622024536133, + "logps/chosen": -0.00017248581571038812, + "logps/rejected": -143.49343872070312, + "loss": 0.6977, + "rewards/accuracies": 0.0, + "rewards/chosen": 7.628739808751561e-07, + "rewards/margins": -0.018154144287109375, + "rewards/rejected": 0.01815490797162056, + "step": 463 + }, + { + "epoch": 0.03, + "learning_rate": 8.992248062015504e-08, + "logits/chosen": -2.189257860183716, + "logits/rejected": -2.189627170562744, + "logps/chosen": -57.66779327392578, + "logps/rejected": -163.8137664794922, + "loss": 0.6894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016407012939453125, + "rewards/margins": 0.00141220074146986, + "rewards/rejected": 0.014994812197983265, + "step": 464 + }, + { + "epoch": 0.03, + "learning_rate": 9.011627906976745e-08, + "logits/chosen": -2.1777944564819336, + "logits/rejected": -2.1486427783966064, + "logps/chosen": -174.44485473632812, + "logps/rejected": -311.4056701660156, + "loss": 0.7455, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.09111633151769638, + "rewards/margins": -0.10447082668542862, + "rewards/rejected": 0.013354492373764515, + "step": 465 + }, + { + "epoch": 0.03, + "learning_rate": 9.031007751937985e-08, + "logits/chosen": -2.1614763736724854, + "logits/rejected": -2.1685802936553955, + "logps/chosen": -0.006324421614408493, + "logps/rejected": -176.8970947265625, + "loss": 0.688, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8726644814014435e-06, + "rewards/margins": 0.017495548352599144, + "rewards/rejected": -0.01749267615377903, + "step": 466 + }, + { + "epoch": 0.03, + "learning_rate": 9.050387596899226e-08, + "logits/chosen": -2.182711362838745, + "logits/rejected": -2.1772282123565674, + "logps/chosen": -54.42204666137695, + "logps/rejected": -244.57415771484375, + "loss": 0.6969, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.018227387219667435, + "rewards/margins": -0.022643279284238815, + "rewards/rejected": 0.04087066650390625, + "step": 467 + }, + { + "epoch": 0.03, + "learning_rate": 9.069767441860464e-08, + "logits/chosen": -2.133460760116577, + "logits/rejected": -2.127763271331787, + "logps/chosen": -28.630088806152344, + "logps/rejected": -198.44276428222656, + "loss": 0.6813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012031937018036842, + "rewards/margins": 0.04270820692181587, + "rewards/rejected": -0.03067626990377903, + "step": 468 + }, + { + "epoch": 0.03, + "learning_rate": 9.089147286821705e-08, + "logits/chosen": -2.3086087703704834, + "logits/rejected": -2.293262004852295, + "logps/chosen": -8.336759567260742, + "logps/rejected": -94.1164779663086, + "loss": 0.682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02403697930276394, + "rewards/margins": 0.01515865232795477, + "rewards/rejected": 0.00887832697480917, + "step": 469 + }, + { + "epoch": 0.03, + "learning_rate": 9.108527131782945e-08, + "logits/chosen": -2.4004414081573486, + "logits/rejected": -2.3799827098846436, + "logps/chosen": -0.0016892103012651205, + "logps/rejected": -125.59923553466797, + "loss": 0.6836, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7933105482370593e-05, + "rewards/margins": 0.03838082030415535, + "rewards/rejected": -0.038362886756658554, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 9.127906976744185e-08, + "logits/chosen": -1.882624864578247, + "logits/rejected": -1.8863258361816406, + "logps/chosen": -56.699119567871094, + "logps/rejected": -285.7549133300781, + "loss": 0.666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0026786804664880037, + "rewards/margins": 0.1124778762459755, + "rewards/rejected": -0.10979919880628586, + "step": 471 + }, + { + "epoch": 0.03, + "learning_rate": 9.147286821705426e-08, + "logits/chosen": -2.0310940742492676, + "logits/rejected": -1.9913275241851807, + "logps/chosen": -229.71592712402344, + "logps/rejected": -492.7398986816406, + "loss": 0.7046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03321533277630806, + "rewards/margins": 0.00509948655962944, + "rewards/rejected": -0.0383148193359375, + "step": 472 + }, + { + "epoch": 0.03, + "learning_rate": 9.166666666666665e-08, + "logits/chosen": -2.1697940826416016, + "logits/rejected": -2.1467649936676025, + "logps/chosen": -286.84027099609375, + "logps/rejected": -383.6162414550781, + "loss": 0.6297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17223511636257172, + "rewards/margins": 0.07544556260108948, + "rewards/rejected": 0.09678955376148224, + "step": 473 + }, + { + "epoch": 0.03, + "learning_rate": 9.186046511627906e-08, + "logits/chosen": -2.061882495880127, + "logits/rejected": -2.058745861053467, + "logps/chosen": -27.22347068786621, + "logps/rejected": -106.0670166015625, + "loss": 0.6812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042934418888762593, + "rewards/margins": 0.03619404137134552, + "rewards/rejected": -0.036623384803533554, + "step": 474 + }, + { + "epoch": 0.03, + "learning_rate": 9.205426356589146e-08, + "logits/chosen": -2.354513168334961, + "logits/rejected": -2.3165528774261475, + "logps/chosen": -43.208526611328125, + "logps/rejected": -293.5045166015625, + "loss": 0.6998, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.018610764294862747, + "rewards/margins": -0.008387375622987747, + "rewards/rejected": -0.010223388671875, + "step": 475 + }, + { + "epoch": 0.03, + "learning_rate": 9.224806201550387e-08, + "logits/chosen": -2.179487705230713, + "logits/rejected": -2.168727159500122, + "logps/chosen": -262.5264892578125, + "logps/rejected": -359.201904296875, + "loss": 0.6422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0972900390625, + "rewards/margins": 0.05985107272863388, + "rewards/rejected": 0.03743896633386612, + "step": 476 + }, + { + "epoch": 0.03, + "learning_rate": 9.244186046511627e-08, + "logits/chosen": -2.0834403038024902, + "logits/rejected": -2.0863611698150635, + "logps/chosen": -0.05098578706383705, + "logps/rejected": -28.116540908813477, + "loss": 0.6984, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00033658818574622273, + "rewards/margins": -0.018408525735139847, + "rewards/rejected": 0.018071938306093216, + "step": 477 + }, + { + "epoch": 0.03, + "learning_rate": 9.263565891472868e-08, + "logits/chosen": -2.1841821670532227, + "logits/rejected": -1.953322410583496, + "logps/chosen": -286.3874206542969, + "logps/rejected": -631.9330444335938, + "loss": 0.6509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07734069973230362, + "rewards/margins": 0.13663634657859802, + "rewards/rejected": -0.059295654296875, + "step": 478 + }, + { + "epoch": 0.03, + "learning_rate": 9.282945736434108e-08, + "logits/chosen": -2.1250061988830566, + "logits/rejected": -2.1245551109313965, + "logps/chosen": -269.79559326171875, + "logps/rejected": -269.95709228515625, + "loss": 0.658, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07155761867761612, + "rewards/margins": 0.048553466796875, + "rewards/rejected": 0.02300415001809597, + "step": 479 + }, + { + "epoch": 0.03, + "learning_rate": 9.302325581395349e-08, + "logits/chosen": -2.123910427093506, + "logits/rejected": -2.110139846801758, + "logps/chosen": -0.23216921091079712, + "logps/rejected": -245.91915893554688, + "loss": 0.6859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0018970519304275513, + "rewards/margins": 0.02617683820426464, + "rewards/rejected": -0.02427978627383709, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 9.321705426356589e-08, + "logits/chosen": -2.2076845169067383, + "logits/rejected": -2.2026889324188232, + "logps/chosen": -16.031444549560547, + "logps/rejected": -43.13136291503906, + "loss": 0.6891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.000408172607421875, + "rewards/margins": 0.025852203369140625, + "rewards/rejected": -0.02544403076171875, + "step": 481 + }, + { + "epoch": 0.03, + "learning_rate": 9.341085271317828e-08, + "logits/chosen": -2.0432965755462646, + "logits/rejected": -2.0445027351379395, + "logps/chosen": -32.54603576660156, + "logps/rejected": -202.1414031982422, + "loss": 0.6916, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00558128347620368, + "rewards/margins": -0.00577888498082757, + "rewards/rejected": 0.01136016845703125, + "step": 482 + }, + { + "epoch": 0.03, + "learning_rate": 9.36046511627907e-08, + "logits/chosen": -2.1473028659820557, + "logits/rejected": -2.145470380783081, + "logps/chosen": -10.027626991271973, + "logps/rejected": -100.07974243164062, + "loss": 0.7272, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.029868125915527344, + "rewards/margins": -0.10328274220228195, + "rewards/rejected": 0.07341461628675461, + "step": 483 + }, + { + "epoch": 0.03, + "learning_rate": 9.379844961240309e-08, + "logits/chosen": -2.233154535293579, + "logits/rejected": -2.23581600189209, + "logps/chosen": -0.5795636177062988, + "logps/rejected": -53.56245040893555, + "loss": 0.6945, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0024314343463629484, + "rewards/margins": -0.004048866219818592, + "rewards/rejected": 0.001617431640625, + "step": 484 + }, + { + "epoch": 0.03, + "learning_rate": 9.39922480620155e-08, + "logits/chosen": -2.1655378341674805, + "logits/rejected": -2.1806745529174805, + "logps/chosen": -289.4088134765625, + "logps/rejected": -344.6248779296875, + "loss": 0.6662, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.17094115912914276, + "rewards/margins": -0.038116455078125, + "rewards/rejected": 0.20905761420726776, + "step": 485 + }, + { + "epoch": 0.03, + "learning_rate": 9.41860465116279e-08, + "logits/chosen": -2.0354907512664795, + "logits/rejected": -2.0239417552948, + "logps/chosen": -212.35963439941406, + "logps/rejected": -391.2653503417969, + "loss": 0.6516, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1262863129377365, + "rewards/margins": -0.0010177642107009888, + "rewards/rejected": 0.1273040771484375, + "step": 486 + }, + { + "epoch": 0.03, + "learning_rate": 9.437984496124031e-08, + "logits/chosen": -2.116457462310791, + "logits/rejected": -2.110074520111084, + "logps/chosen": -8.399873733520508, + "logps/rejected": -103.97982025146484, + "loss": 0.7015, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.008311367593705654, + "rewards/margins": -0.024045467376708984, + "rewards/rejected": 0.015734100714325905, + "step": 487 + }, + { + "epoch": 0.03, + "learning_rate": 9.457364341085271e-08, + "logits/chosen": -2.1469457149505615, + "logits/rejected": -2.1439690589904785, + "logps/chosen": -0.8495959043502808, + "logps/rejected": -84.71514129638672, + "loss": 0.6911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014214515686035156, + "rewards/margins": 0.0018181800842285156, + "rewards/rejected": -0.000396728515625, + "step": 488 + }, + { + "epoch": 0.03, + "learning_rate": 9.476744186046512e-08, + "logits/chosen": -1.9630151987075806, + "logits/rejected": -1.9635674953460693, + "logps/chosen": -1.2624961137771606, + "logps/rejected": -61.49968338012695, + "loss": 0.6822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011489189229905605, + "rewards/margins": 0.018491066992282867, + "rewards/rejected": -0.0070018768310546875, + "step": 489 + }, + { + "epoch": 0.03, + "learning_rate": 9.496124031007752e-08, + "logits/chosen": -2.1394128799438477, + "logits/rejected": -2.1421236991882324, + "logps/chosen": -8.173164367675781, + "logps/rejected": -104.82762145996094, + "loss": 0.682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02276601828634739, + "rewards/margins": 0.02235708199441433, + "rewards/rejected": 0.0004089355643372983, + "step": 490 + }, + { + "epoch": 0.03, + "learning_rate": 9.515503875968993e-08, + "logits/chosen": -2.1930530071258545, + "logits/rejected": -2.1874539852142334, + "logps/chosen": -0.005670477170497179, + "logps/rejected": -161.02503967285156, + "loss": 0.7015, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.5513116270303726e-05, + "rewards/margins": -0.030643798410892487, + "rewards/rejected": 0.03061828576028347, + "step": 491 + }, + { + "epoch": 0.03, + "learning_rate": 9.534883720930232e-08, + "logits/chosen": -2.0226242542266846, + "logits/rejected": -2.0125045776367188, + "logps/chosen": -63.083885192871094, + "logps/rejected": -121.87528991699219, + "loss": 0.6982, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.280639793956652e-05, + "rewards/margins": -0.019844818860292435, + "rewards/rejected": 0.01987762562930584, + "step": 492 + }, + { + "epoch": 0.03, + "learning_rate": 9.554263565891472e-08, + "logits/chosen": -2.145477533340454, + "logits/rejected": -2.101341724395752, + "logps/chosen": -171.81312561035156, + "logps/rejected": -384.37628173828125, + "loss": 0.7155, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05400543287396431, + "rewards/margins": -0.11547394096851349, + "rewards/rejected": 0.1694793701171875, + "step": 493 + }, + { + "epoch": 0.03, + "learning_rate": 9.573643410852713e-08, + "logits/chosen": -2.2778186798095703, + "logits/rejected": -2.292144775390625, + "logps/chosen": -229.84542846679688, + "logps/rejected": -306.5752868652344, + "loss": 0.7119, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02055969275534153, + "rewards/margins": -0.10731201618909836, + "rewards/rejected": 0.12787170708179474, + "step": 494 + }, + { + "epoch": 0.03, + "learning_rate": 9.593023255813953e-08, + "logits/chosen": -2.2143518924713135, + "logits/rejected": -2.204993486404419, + "logps/chosen": -3.6128053665161133, + "logps/rejected": -117.40784454345703, + "loss": 0.6968, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0017486572032794356, + "rewards/margins": -0.018674470484256744, + "rewards/rejected": 0.020423127338290215, + "step": 495 + }, + { + "epoch": 0.03, + "learning_rate": 9.612403100775194e-08, + "logits/chosen": -1.953924536705017, + "logits/rejected": -1.8846465349197388, + "logps/chosen": -100.66818237304688, + "logps/rejected": -434.6741638183594, + "loss": 0.6854, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04824981838464737, + "rewards/margins": -0.04144744575023651, + "rewards/rejected": 0.08969726413488388, + "step": 496 + }, + { + "epoch": 0.03, + "learning_rate": 9.631782945736434e-08, + "logits/chosen": -2.1253890991210938, + "logits/rejected": -2.12375545501709, + "logps/chosen": -0.6146668791770935, + "logps/rejected": -103.42892456054688, + "loss": 0.6835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006148535292595625, + "rewards/margins": 0.04103774577379227, + "rewards/rejected": -0.04718628153204918, + "step": 497 + }, + { + "epoch": 0.03, + "learning_rate": 9.651162790697675e-08, + "logits/chosen": -1.9747288227081299, + "logits/rejected": -1.950326681137085, + "logps/chosen": -168.0972442626953, + "logps/rejected": -214.21328735351562, + "loss": 0.6929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006724548526108265, + "rewards/margins": 0.009783935733139515, + "rewards/rejected": -0.00305938720703125, + "step": 498 + }, + { + "epoch": 0.03, + "learning_rate": 9.670542635658915e-08, + "logits/chosen": -2.17069149017334, + "logits/rejected": -2.1023359298706055, + "logps/chosen": -264.2095642089844, + "logps/rejected": -481.2662658691406, + "loss": 0.6602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10480652004480362, + "rewards/margins": 0.03238830715417862, + "rewards/rejected": 0.072418212890625, + "step": 499 + }, + { + "epoch": 0.03, + "learning_rate": 9.689922480620154e-08, + "logits/chosen": -1.9859172105789185, + "logits/rejected": -1.8812320232391357, + "logps/chosen": -173.3461456298828, + "logps/rejected": -368.69134521484375, + "loss": 0.6801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02289886586368084, + "rewards/margins": 0.01804351806640625, + "rewards/rejected": 0.004855346865952015, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 9.709302325581394e-08, + "logits/chosen": -2.0899062156677246, + "logits/rejected": -2.0983853340148926, + "logps/chosen": -259.51531982421875, + "logps/rejected": -348.1174011230469, + "loss": 0.6145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19164124131202698, + "rewards/margins": 0.13457642495632172, + "rewards/rejected": 0.05706482008099556, + "step": 501 + }, + { + "epoch": 0.03, + "learning_rate": 9.728682170542635e-08, + "logits/chosen": -2.2224297523498535, + "logits/rejected": -2.2196950912475586, + "logps/chosen": -38.13951110839844, + "logps/rejected": -98.21392059326172, + "loss": 0.7057, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02204284630715847, + "rewards/margins": -0.025893401354551315, + "rewards/rejected": 0.0038505555130541325, + "step": 502 + }, + { + "epoch": 0.03, + "learning_rate": 9.748062015503875e-08, + "logits/chosen": -2.073981285095215, + "logits/rejected": -2.050121545791626, + "logps/chosen": -55.630096435546875, + "logps/rejected": -398.6771240234375, + "loss": 0.6984, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.018618011847138405, + "rewards/margins": -0.021065521985292435, + "rewards/rejected": 0.002447509905323386, + "step": 503 + }, + { + "epoch": 0.03, + "learning_rate": 9.767441860465116e-08, + "logits/chosen": -2.0392873287200928, + "logits/rejected": -2.0229382514953613, + "logps/chosen": -220.12359619140625, + "logps/rejected": -189.7462921142578, + "loss": 0.7152, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.015435791574418545, + "rewards/margins": -0.08835754543542862, + "rewards/rejected": 0.0729217529296875, + "step": 504 + }, + { + "epoch": 0.03, + "learning_rate": 9.786821705426356e-08, + "logits/chosen": -2.1395528316497803, + "logits/rejected": -2.1329331398010254, + "logps/chosen": -273.2276306152344, + "logps/rejected": -415.0590515136719, + "loss": 0.6656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07928772270679474, + "rewards/margins": 0.010394290089607239, + "rewards/rejected": 0.0688934326171875, + "step": 505 + }, + { + "epoch": 0.03, + "learning_rate": 9.806201550387595e-08, + "logits/chosen": -2.114321708679199, + "logits/rejected": -2.090705394744873, + "logps/chosen": -261.866943359375, + "logps/rejected": -519.8572998046875, + "loss": 0.6494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09048767387866974, + "rewards/margins": 0.07131043076515198, + "rewards/rejected": 0.01917724683880806, + "step": 506 + }, + { + "epoch": 0.03, + "learning_rate": 9.825581395348837e-08, + "logits/chosen": -2.2100889682769775, + "logits/rejected": -2.2038464546203613, + "logps/chosen": -0.00019359085126779974, + "logps/rejected": -130.8646240234375, + "loss": 0.6824, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5377183899545344e-06, + "rewards/margins": 0.044886793941259384, + "rewards/rejected": -0.04488525539636612, + "step": 507 + }, + { + "epoch": 0.03, + "learning_rate": 9.844961240310076e-08, + "logits/chosen": -2.270698070526123, + "logits/rejected": -2.2552740573883057, + "logps/chosen": -2.467665195465088, + "logps/rejected": -123.627685546875, + "loss": 0.6923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0004648923932109028, + "rewards/margins": 0.002167773200199008, + "rewards/rejected": -0.0017028808360919356, + "step": 508 + }, + { + "epoch": 0.03, + "learning_rate": 9.864341085271317e-08, + "logits/chosen": -2.005129814147949, + "logits/rejected": -1.9977210760116577, + "logps/chosen": -81.86216735839844, + "logps/rejected": -133.16017150878906, + "loss": 0.679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0285491943359375, + "rewards/margins": 0.03145294263958931, + "rewards/rejected": -0.0029037476051598787, + "step": 509 + }, + { + "epoch": 0.03, + "learning_rate": 9.883720930232557e-08, + "logits/chosen": -2.024778366088867, + "logits/rejected": -2.0082271099090576, + "logps/chosen": -198.28326416015625, + "logps/rejected": -284.3665466308594, + "loss": 0.6299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12925873696804047, + "rewards/margins": 0.1438491940498352, + "rewards/rejected": -0.014590454287827015, + "step": 510 + }, + { + "epoch": 0.03, + "learning_rate": 9.903100775193798e-08, + "logits/chosen": -2.2011027336120605, + "logits/rejected": -2.1865789890289307, + "logps/chosen": -151.97837829589844, + "logps/rejected": -282.48724365234375, + "loss": 0.7044, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02192230336368084, + "rewards/margins": -0.05871123820543289, + "rewards/rejected": 0.08063354343175888, + "step": 511 + }, + { + "epoch": 0.03, + "learning_rate": 9.922480620155038e-08, + "logits/chosen": -2.182311773300171, + "logits/rejected": -2.1574740409851074, + "logps/chosen": -241.31158447265625, + "logps/rejected": -499.9454040527344, + "loss": 0.7141, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0247955322265625, + "rewards/margins": -0.04851074516773224, + "rewards/rejected": 0.02371521107852459, + "step": 512 + }, + { + "epoch": 0.03, + "learning_rate": 9.941860465116279e-08, + "logits/chosen": -2.165386199951172, + "logits/rejected": -2.1648757457733154, + "logps/chosen": -2.3676021099090576, + "logps/rejected": -38.13824462890625, + "loss": 0.6961, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.009093808941543102, + "rewards/margins": -0.0032847877591848373, + "rewards/rejected": -0.005809021182358265, + "step": 513 + }, + { + "epoch": 0.03, + "learning_rate": 9.961240310077519e-08, + "logits/chosen": -2.3045756816864014, + "logits/rejected": -2.2914023399353027, + "logps/chosen": -1.3826137781143188, + "logps/rejected": -156.86241149902344, + "loss": 0.7035, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0014909267192706466, + "rewards/margins": -0.04930739477276802, + "rewards/rejected": 0.04781646654009819, + "step": 514 + }, + { + "epoch": 0.03, + "learning_rate": 9.98062015503876e-08, + "logits/chosen": -2.242105722427368, + "logits/rejected": -2.2042994499206543, + "logps/chosen": -183.11834716796875, + "logps/rejected": -340.95562744140625, + "loss": 0.6948, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.013339233584702015, + "rewards/margins": -0.06994324177503586, + "rewards/rejected": 0.05660400539636612, + "step": 515 + }, + { + "epoch": 0.03, + "learning_rate": 1e-07, + "logits/chosen": -2.035857915878296, + "logits/rejected": -2.0355029106140137, + "logps/chosen": -46.26608657836914, + "logps/rejected": -167.45948791503906, + "loss": 0.6721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.020019913092255592, + "rewards/margins": 0.06703072041273117, + "rewards/rejected": -0.04701080545783043, + "step": 516 + }, + { + "epoch": 0.03, + "learning_rate": 9.99999991118777e-08, + "logits/chosen": -2.1730892658233643, + "logits/rejected": -2.1967928409576416, + "logps/chosen": -222.14678955078125, + "logps/rejected": -297.8724365234375, + "loss": 0.7026, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03209686279296875, + "rewards/margins": -0.07333221286535263, + "rewards/rejected": 0.10542907565832138, + "step": 517 + }, + { + "epoch": 0.03, + "learning_rate": 9.999999644751087e-08, + "logits/chosen": -2.1146340370178223, + "logits/rejected": -2.1105897426605225, + "logps/chosen": -47.31151580810547, + "logps/rejected": -188.47613525390625, + "loss": 0.6801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00635948171839118, + "rewards/margins": 0.03838004916906357, + "rewards/rejected": -0.03202056884765625, + "step": 518 + }, + { + "epoch": 0.03, + "learning_rate": 9.999999200689959e-08, + "logits/chosen": -2.1873762607574463, + "logits/rejected": -2.1860218048095703, + "logps/chosen": -10.256758689880371, + "logps/rejected": -26.66078758239746, + "loss": 0.698, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.014043903909623623, + "rewards/margins": -0.005222225561738014, + "rewards/rejected": -0.008821678347885609, + "step": 519 + }, + { + "epoch": 0.03, + "learning_rate": 9.999998579004402e-08, + "logits/chosen": -2.147284507751465, + "logits/rejected": -2.132962465286255, + "logps/chosen": -231.11300659179688, + "logps/rejected": -322.9777526855469, + "loss": 0.6333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19554901123046875, + "rewards/margins": 0.03898772597312927, + "rewards/rejected": 0.15656128525733948, + "step": 520 + }, + { + "epoch": 0.03, + "learning_rate": 9.999997779694437e-08, + "logits/chosen": -2.2621986865997314, + "logits/rejected": -2.2587506771087646, + "logps/chosen": -0.20213502645492554, + "logps/rejected": -98.4502944946289, + "loss": 0.6874, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.74181360282455e-07, + "rewards/margins": 0.023212159052491188, + "rewards/rejected": -0.023212432861328125, + "step": 521 + }, + { + "epoch": 0.03, + "learning_rate": 9.999996802760094e-08, + "logits/chosen": -2.0755555629730225, + "logits/rejected": -2.152217149734497, + "logps/chosen": -265.1253356933594, + "logps/rejected": -220.4013214111328, + "loss": 0.6657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06377258151769638, + "rewards/margins": 0.07341919094324112, + "rewards/rejected": -0.009646606631577015, + "step": 522 + }, + { + "epoch": 0.03, + "learning_rate": 9.999995648201406e-08, + "logits/chosen": -2.1873772144317627, + "logits/rejected": -2.2106480598449707, + "logps/chosen": -357.02191162109375, + "logps/rejected": -385.9412841796875, + "loss": 0.5941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18479004502296448, + "rewards/margins": 0.2589050531387329, + "rewards/rejected": -0.07411499321460724, + "step": 523 + }, + { + "epoch": 0.03, + "learning_rate": 9.999994316018415e-08, + "logits/chosen": -2.1890199184417725, + "logits/rejected": -2.186702013015747, + "logps/chosen": -1.1370832920074463, + "logps/rejected": -183.24246215820312, + "loss": 0.6796, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017640698701143265, + "rewards/margins": 0.037106335163116455, + "rewards/rejected": -0.01946563832461834, + "step": 524 + }, + { + "epoch": 0.03, + "learning_rate": 9.999992806211168e-08, + "logits/chosen": -2.115328788757324, + "logits/rejected": -2.050745725631714, + "logps/chosen": -265.2793884277344, + "logps/rejected": -317.67987060546875, + "loss": 0.6019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15261535346508026, + "rewards/margins": 0.20020446181297302, + "rewards/rejected": -0.04758911207318306, + "step": 525 + }, + { + "epoch": 0.03, + "learning_rate": 9.99999111877972e-08, + "logits/chosen": -2.3519561290740967, + "logits/rejected": -2.344578266143799, + "logps/chosen": -0.057500313967466354, + "logps/rejected": -224.45860290527344, + "loss": 0.6851, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.395974772502086e-06, + "rewards/margins": 0.03244594484567642, + "rewards/rejected": -0.03244934231042862, + "step": 526 + }, + { + "epoch": 0.03, + "learning_rate": 9.99998925372413e-08, + "logits/chosen": -2.211272954940796, + "logits/rejected": -2.0614283084869385, + "logps/chosen": -279.4825439453125, + "logps/rejected": -598.870849609375, + "loss": 0.6961, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.003948974888771772, + "rewards/margins": -0.0024414064828306437, + "rewards/rejected": -0.0015075684059411287, + "step": 527 + }, + { + "epoch": 0.03, + "learning_rate": 9.999987211044463e-08, + "logits/chosen": -2.126556158065796, + "logits/rejected": -2.0969314575195312, + "logps/chosen": -167.53819274902344, + "logps/rejected": -283.106689453125, + "loss": 0.6739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05180816724896431, + "rewards/margins": 0.00458526611328125, + "rewards/rejected": 0.04722290113568306, + "step": 528 + }, + { + "epoch": 0.03, + "learning_rate": 9.999984990740792e-08, + "logits/chosen": -2.2421936988830566, + "logits/rejected": -2.228985071182251, + "logps/chosen": -179.49014282226562, + "logps/rejected": -242.5892333984375, + "loss": 0.6668, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07830657809972763, + "rewards/margins": -0.01905059814453125, + "rewards/rejected": 0.09735717624425888, + "step": 529 + }, + { + "epoch": 0.03, + "learning_rate": 9.999982592813198e-08, + "logits/chosen": -2.1111738681793213, + "logits/rejected": -2.0989506244659424, + "logps/chosen": -89.57424926757812, + "logps/rejected": -227.0380859375, + "loss": 0.6994, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02439269982278347, + "rewards/margins": -0.04212188720703125, + "rewards/rejected": 0.06651458889245987, + "step": 530 + }, + { + "epoch": 0.03, + "learning_rate": 9.999980017261766e-08, + "logits/chosen": -2.2424254417419434, + "logits/rejected": -2.2267847061157227, + "logps/chosen": -225.74891662597656, + "logps/rejected": -291.0597839355469, + "loss": 0.6252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11339111626148224, + "rewards/margins": 0.13814392685890198, + "rewards/rejected": -0.02475280873477459, + "step": 531 + }, + { + "epoch": 0.03, + "learning_rate": 9.999977264086586e-08, + "logits/chosen": -2.1961722373962402, + "logits/rejected": -1.9878854751586914, + "logps/chosen": -236.4697723388672, + "logps/rejected": -572.9034423828125, + "loss": 0.6904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01932830922305584, + "rewards/margins": 0.03797149658203125, + "rewards/rejected": -0.05729980394244194, + "step": 532 + }, + { + "epoch": 0.03, + "learning_rate": 9.999974333287753e-08, + "logits/chosen": -2.207111120223999, + "logits/rejected": -2.116856336593628, + "logps/chosen": -143.7783203125, + "logps/rejected": -285.7412109375, + "loss": 0.6693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04045715555548668, + "rewards/margins": 0.03547058254480362, + "rewards/rejected": 0.004986572545021772, + "step": 533 + }, + { + "epoch": 0.03, + "learning_rate": 9.999971224865376e-08, + "logits/chosen": -2.3152692317962646, + "logits/rejected": -2.2736034393310547, + "logps/chosen": -254.37704467773438, + "logps/rejected": -409.9259033203125, + "loss": 0.7092, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0262451171875, + "rewards/margins": -0.06943054497241974, + "rewards/rejected": 0.09567566215991974, + "step": 534 + }, + { + "epoch": 0.03, + "learning_rate": 9.999967938819565e-08, + "logits/chosen": -2.2288119792938232, + "logits/rejected": -2.2245090007781982, + "logps/chosen": -26.267681121826172, + "logps/rejected": -132.8842315673828, + "loss": 0.6978, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00054931640625, + "rewards/margins": -0.014506530947983265, + "rewards/rejected": 0.013957214541733265, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 9.999964475150433e-08, + "logits/chosen": -2.192153215408325, + "logits/rejected": -2.183248519897461, + "logps/chosen": -7.4790215492248535, + "logps/rejected": -72.81645202636719, + "loss": 0.7005, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.14984139954322e-06, + "rewards/margins": -0.029183007776737213, + "rewards/rejected": 0.02917785756289959, + "step": 536 + }, + { + "epoch": 0.03, + "learning_rate": 9.999960833858106e-08, + "logits/chosen": -2.060845375061035, + "logits/rejected": -1.9654170274734497, + "logps/chosen": -286.15625, + "logps/rejected": -554.129638671875, + "loss": 0.7719, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.13339538872241974, + "rewards/margins": -0.15028992295265198, + "rewards/rejected": 0.01689453236758709, + "step": 537 + }, + { + "epoch": 0.03, + "learning_rate": 9.999957014942713e-08, + "logits/chosen": -2.221926212310791, + "logits/rejected": -2.2077085971832275, + "logps/chosen": -95.96088409423828, + "logps/rejected": -148.12188720703125, + "loss": 0.6894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011997222900390625, + "rewards/margins": 0.003505706787109375, + "rewards/rejected": 0.00849151611328125, + "step": 538 + }, + { + "epoch": 0.03, + "learning_rate": 9.99995301840439e-08, + "logits/chosen": -2.1798274517059326, + "logits/rejected": -2.1601107120513916, + "logps/chosen": -196.29193115234375, + "logps/rejected": -457.5833740234375, + "loss": 0.6445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14244996011257172, + "rewards/margins": 0.039721690118312836, + "rewards/rejected": 0.10272826999425888, + "step": 539 + }, + { + "epoch": 0.03, + "learning_rate": 9.999948844243278e-08, + "logits/chosen": -2.134307622909546, + "logits/rejected": -2.137554883956909, + "logps/chosen": -51.22346878051758, + "logps/rejected": -118.98551940917969, + "loss": 0.6926, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.003337860107421875, + "rewards/margins": -0.00732421875, + "rewards/rejected": 0.010662078857421875, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 9.999944492459524e-08, + "logits/chosen": -2.0851283073425293, + "logits/rejected": -2.0593760013580322, + "logps/chosen": -151.2192840576172, + "logps/rejected": -228.49136352539062, + "loss": 0.6902, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03548126295208931, + "rewards/margins": -0.02126007154583931, + "rewards/rejected": 0.05674133449792862, + "step": 541 + }, + { + "epoch": 0.03, + "learning_rate": 9.999939963053286e-08, + "logits/chosen": -2.07848858833313, + "logits/rejected": -2.054523468017578, + "logps/chosen": -183.70840454101562, + "logps/rejected": -243.63690185546875, + "loss": 0.6861, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04189453274011612, + "rewards/margins": -0.02633972465991974, + "rewards/rejected": 0.06823425740003586, + "step": 542 + }, + { + "epoch": 0.03, + "learning_rate": 9.999935256024723e-08, + "logits/chosen": -2.3633458614349365, + "logits/rejected": -2.3906757831573486, + "logps/chosen": -211.33773803710938, + "logps/rejected": -231.7032470703125, + "loss": 0.6424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09162598103284836, + "rewards/margins": 0.07036743313074112, + "rewards/rejected": 0.02125854603946209, + "step": 543 + }, + { + "epoch": 0.03, + "learning_rate": 9.999930371374003e-08, + "logits/chosen": -2.1154065132141113, + "logits/rejected": -2.0437207221984863, + "logps/chosen": -350.52227783203125, + "logps/rejected": -387.6717529296875, + "loss": 0.6629, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.10254821926355362, + "rewards/margins": -0.0027862563729286194, + "rewards/rejected": 0.10533447563648224, + "step": 544 + }, + { + "epoch": 0.03, + "learning_rate": 9.999925309101297e-08, + "logits/chosen": -2.215064764022827, + "logits/rejected": -2.2061939239501953, + "logps/chosen": -5.100200653076172, + "logps/rejected": -128.46685791015625, + "loss": 0.6912, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4305115314527939e-07, + "rewards/margins": 0.006067037582397461, + "rewards/rejected": -0.0060668946243822575, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 9.999920069206789e-08, + "logits/chosen": -2.1202609539031982, + "logits/rejected": -2.106386661529541, + "logps/chosen": -418.3406982421875, + "logps/rejected": -481.1691589355469, + "loss": 0.6104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16109924018383026, + "rewards/margins": 0.12186278402805328, + "rewards/rejected": 0.03923645243048668, + "step": 546 + }, + { + "epoch": 0.03, + "learning_rate": 9.999914651690662e-08, + "logits/chosen": -2.075239419937134, + "logits/rejected": -2.0522758960723877, + "logps/chosen": -307.4853820800781, + "logps/rejected": -442.8782958984375, + "loss": 0.6217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13198243081569672, + "rewards/margins": 0.17887574434280396, + "rewards/rejected": -0.04689330980181694, + "step": 547 + }, + { + "epoch": 0.03, + "learning_rate": 9.999909056553108e-08, + "logits/chosen": -2.189087152481079, + "logits/rejected": -2.1751415729522705, + "logps/chosen": -1.5922898054122925, + "logps/rejected": -165.20687866210938, + "loss": 0.6889, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6954881466044753e-07, + "rewards/margins": 0.017026914283633232, + "rewards/rejected": -0.01702728308737278, + "step": 548 + }, + { + "epoch": 0.03, + "learning_rate": 9.999903283794328e-08, + "logits/chosen": -2.2773773670196533, + "logits/rejected": -2.21955943107605, + "logps/chosen": -185.36961364746094, + "logps/rejected": -384.60687255859375, + "loss": 0.651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11995697021484375, + "rewards/margins": 0.03813934326171875, + "rewards/rejected": 0.081817626953125, + "step": 549 + }, + { + "epoch": 0.03, + "learning_rate": 9.999897333414526e-08, + "logits/chosen": -2.1225228309631348, + "logits/rejected": -2.124877452850342, + "logps/chosen": -207.28704833984375, + "logps/rejected": -374.82537841796875, + "loss": 0.6138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18947143852710724, + "rewards/margins": 0.09013672173023224, + "rewards/rejected": 0.099334716796875, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 9.999891205413915e-08, + "logits/chosen": -2.2724571228027344, + "logits/rejected": -2.2680842876434326, + "logps/chosen": -0.7567451000213623, + "logps/rejected": -46.07432556152344, + "loss": 0.6949, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0016280472045764327, + "rewards/margins": -0.00625464366748929, + "rewards/rejected": 0.007882690988481045, + "step": 551 + }, + { + "epoch": 0.03, + "learning_rate": 9.99988489979271e-08, + "logits/chosen": -2.19398832321167, + "logits/rejected": -2.182830572128296, + "logps/chosen": -214.037109375, + "logps/rejected": -535.6268920898438, + "loss": 0.6015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10892181843519211, + "rewards/margins": 0.27800750732421875, + "rewards/rejected": -0.16908569633960724, + "step": 552 + }, + { + "epoch": 0.03, + "learning_rate": 9.999878416551137e-08, + "logits/chosen": -2.144989252090454, + "logits/rejected": -2.063650131225586, + "logps/chosen": -269.3663024902344, + "logps/rejected": -427.985107421875, + "loss": 0.7083, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.014758300967514515, + "rewards/margins": -0.07950440049171448, + "rewards/rejected": 0.06474609673023224, + "step": 553 + }, + { + "epoch": 0.03, + "learning_rate": 9.999871755689425e-08, + "logits/chosen": -2.2589473724365234, + "logits/rejected": -2.2607264518737793, + "logps/chosen": -26.162540435791016, + "logps/rejected": -97.25292205810547, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.015526771545410156, + "rewards/margins": -0.02882213518023491, + "rewards/rejected": 0.044348906725645065, + "step": 554 + }, + { + "epoch": 0.03, + "learning_rate": 9.999864917207812e-08, + "logits/chosen": -2.2662065029144287, + "logits/rejected": -2.2533907890319824, + "logps/chosen": -0.00017213061801157892, + "logps/rejected": -195.31204223632812, + "loss": 0.6849, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3111639418639243e-06, + "rewards/margins": 0.033366609364748, + "rewards/rejected": -0.03336792066693306, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 9.999857901106539e-08, + "logits/chosen": -2.185746431350708, + "logits/rejected": -2.1808536052703857, + "logps/chosen": -63.135032653808594, + "logps/rejected": -235.48236083984375, + "loss": 0.6822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0353546142578125, + "rewards/margins": 0.02247924730181694, + "rewards/rejected": 0.012875366024672985, + "step": 556 + }, + { + "epoch": 0.03, + "learning_rate": 9.999850707385858e-08, + "logits/chosen": -2.1264584064483643, + "logits/rejected": -2.1135151386260986, + "logps/chosen": -72.99054718017578, + "logps/rejected": -343.9966125488281, + "loss": 0.679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01009445171803236, + "rewards/margins": 0.00636520329862833, + "rewards/rejected": 0.003729248186573386, + "step": 557 + }, + { + "epoch": 0.03, + "learning_rate": 9.99984333604602e-08, + "logits/chosen": -2.089367151260376, + "logits/rejected": -2.021480083465576, + "logps/chosen": -203.57705688476562, + "logps/rejected": -315.5541687011719, + "loss": 0.6664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08424072712659836, + "rewards/margins": 0.0477447547018528, + "rewards/rejected": 0.03649597242474556, + "step": 558 + }, + { + "epoch": 0.03, + "learning_rate": 9.999835787087292e-08, + "logits/chosen": -2.226591110229492, + "logits/rejected": -2.1866366863250732, + "logps/chosen": -251.49913024902344, + "logps/rejected": -462.33990478515625, + "loss": 0.6608, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1132354736328125, + "rewards/margins": -0.004782103002071381, + "rewards/rejected": 0.11801757663488388, + "step": 559 + }, + { + "epoch": 0.03, + "learning_rate": 9.99982806050994e-08, + "logits/chosen": -2.068627119064331, + "logits/rejected": -2.06842041015625, + "logps/chosen": -27.381494522094727, + "logps/rejected": -125.41220092773438, + "loss": 0.6795, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01810932159423828, + "rewards/margins": 0.0366731658577919, + "rewards/rejected": -0.01856384240090847, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 9.999820156314238e-08, + "logits/chosen": -2.0534567832946777, + "logits/rejected": -2.0531609058380127, + "logps/chosen": -68.31282806396484, + "logps/rejected": -191.2197265625, + "loss": 0.6942, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05971679836511612, + "rewards/margins": -0.06114349514245987, + "rewards/rejected": 0.12086029350757599, + "step": 561 + }, + { + "epoch": 0.03, + "learning_rate": 9.999812074500467e-08, + "logits/chosen": -2.064020872116089, + "logits/rejected": -2.0508811473846436, + "logps/chosen": -272.39459228515625, + "logps/rejected": -340.186767578125, + "loss": 0.6624, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07833557575941086, + "rewards/margins": -0.01070251315832138, + "rewards/rejected": 0.08903808891773224, + "step": 562 + }, + { + "epoch": 0.03, + "learning_rate": 9.999803815068916e-08, + "logits/chosen": -2.326960325241089, + "logits/rejected": -2.324467897415161, + "logps/chosen": -0.03365316241979599, + "logps/rejected": -208.01092529296875, + "loss": 0.6684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000389559572795406, + "rewards/margins": 0.10215560346841812, + "rewards/rejected": -0.10254516452550888, + "step": 563 + }, + { + "epoch": 0.03, + "learning_rate": 9.999795378019875e-08, + "logits/chosen": -2.1650187969207764, + "logits/rejected": -2.1491894721984863, + "logps/chosen": -98.52950286865234, + "logps/rejected": -204.9955596923828, + "loss": 0.6889, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.035596467554569244, + "rewards/margins": -0.006597135215997696, + "rewards/rejected": 0.04219360277056694, + "step": 564 + }, + { + "epoch": 0.03, + "learning_rate": 9.999786763353645e-08, + "logits/chosen": -2.063812255859375, + "logits/rejected": -2.0420706272125244, + "logps/chosen": -243.85569763183594, + "logps/rejected": -338.6640319824219, + "loss": 0.6895, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03433075174689293, + "rewards/margins": -0.03916473314166069, + "rewards/rejected": 0.07349548488855362, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 9.999777971070535e-08, + "logits/chosen": -2.2399709224700928, + "logits/rejected": -2.22517728805542, + "logps/chosen": -119.0340347290039, + "logps/rejected": -316.3210754394531, + "loss": 0.6764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0627647414803505, + "rewards/margins": 0.026375580579042435, + "rewards/rejected": 0.03638916090130806, + "step": 566 + }, + { + "epoch": 0.03, + "learning_rate": 9.999769001170854e-08, + "logits/chosen": -2.2331128120422363, + "logits/rejected": -2.236633062362671, + "logps/chosen": -41.652587890625, + "logps/rejected": -182.90931701660156, + "loss": 0.6887, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00501174945384264, + "rewards/margins": -0.00363235455006361, + "rewards/rejected": 0.00864410400390625, + "step": 567 + }, + { + "epoch": 0.03, + "learning_rate": 9.999759853654921e-08, + "logits/chosen": -2.279334306716919, + "logits/rejected": -2.2812998294830322, + "logps/chosen": -36.298561096191406, + "logps/rejected": -113.45150756835938, + "loss": 0.6782, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0255889892578125, + "rewards/margins": 0.02363738976418972, + "rewards/rejected": 0.0019515991443768144, + "step": 568 + }, + { + "epoch": 0.03, + "learning_rate": 9.999750528523063e-08, + "logits/chosen": -2.2054355144500732, + "logits/rejected": -2.1785449981689453, + "logps/chosen": -259.47918701171875, + "logps/rejected": -397.6019287109375, + "loss": 0.6758, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06967773288488388, + "rewards/margins": -0.0196990966796875, + "rewards/rejected": 0.08937682956457138, + "step": 569 + }, + { + "epoch": 0.03, + "learning_rate": 9.999741025775608e-08, + "logits/chosen": -2.189143180847168, + "logits/rejected": -2.1093204021453857, + "logps/chosen": -314.61309814453125, + "logps/rejected": -557.9378051757812, + "loss": 0.693, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01557006873190403, + "rewards/margins": -0.07835693657398224, + "rewards/rejected": 0.09392700344324112, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 9.999731345412896e-08, + "logits/chosen": -2.1825437545776367, + "logits/rejected": -2.1190123558044434, + "logps/chosen": -207.89486694335938, + "logps/rejected": -314.7565002441406, + "loss": 0.606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1984207183122635, + "rewards/margins": 0.161112979054451, + "rewards/rejected": 0.0373077392578125, + "step": 571 + }, + { + "epoch": 0.03, + "learning_rate": 9.99972148743527e-08, + "logits/chosen": -2.1358675956726074, + "logits/rejected": -2.100306749343872, + "logps/chosen": -189.4857177734375, + "logps/rejected": -251.392333984375, + "loss": 0.6799, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0390625, + "rewards/margins": 0.004006955772638321, + "rewards/rejected": 0.03505554422736168, + "step": 572 + }, + { + "epoch": 0.03, + "learning_rate": 9.99971145184308e-08, + "logits/chosen": -2.1336417198181152, + "logits/rejected": -2.0983834266662598, + "logps/chosen": -255.10987854003906, + "logps/rejected": -425.25006103515625, + "loss": 0.5885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2574264705181122, + "rewards/margins": 0.14000703394412994, + "rewards/rejected": 0.11741943657398224, + "step": 573 + }, + { + "epoch": 0.03, + "learning_rate": 9.999701238636684e-08, + "logits/chosen": -2.2365968227386475, + "logits/rejected": -2.2310829162597656, + "logps/chosen": -0.043139487504959106, + "logps/rejected": -95.31526947021484, + "loss": 0.7007, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0013267099857330322, + "rewards/margins": -0.028817707672715187, + "rewards/rejected": 0.027490997686982155, + "step": 574 + }, + { + "epoch": 0.03, + "learning_rate": 9.999690847816445e-08, + "logits/chosen": -2.232119560241699, + "logits/rejected": -2.219587564468384, + "logps/chosen": -175.41549682617188, + "logps/rejected": -390.5945739746094, + "loss": 0.7026, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01460876502096653, + "rewards/margins": -0.06600341945886612, + "rewards/rejected": 0.0806121826171875, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 9.999680279382729e-08, + "logits/chosen": -2.0520389080047607, + "logits/rejected": -2.0388729572296143, + "logps/chosen": -39.97011947631836, + "logps/rejected": -172.12205505371094, + "loss": 0.7048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06879883259534836, + "rewards/margins": 0.019471734762191772, + "rewards/rejected": -0.08827056735754013, + "step": 576 + }, + { + "epoch": 0.03, + "learning_rate": 9.999669533335913e-08, + "logits/chosen": -1.9041827917099, + "logits/rejected": -1.997115969657898, + "logps/chosen": -320.05426025390625, + "logps/rejected": -351.86749267578125, + "loss": 0.6699, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12129516899585724, + "rewards/margins": -0.02351073920726776, + "rewards/rejected": 0.144805908203125, + "step": 577 + }, + { + "epoch": 0.03, + "learning_rate": 9.99965860967638e-08, + "logits/chosen": -2.075965404510498, + "logits/rejected": -2.073042392730713, + "logps/chosen": -16.150161743164062, + "logps/rejected": -146.66098022460938, + "loss": 0.6987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009560775943100452, + "rewards/margins": 0.006363295949995518, + "rewards/rejected": -0.01592407189309597, + "step": 578 + }, + { + "epoch": 0.03, + "learning_rate": 9.999647508404517e-08, + "logits/chosen": -2.2327921390533447, + "logits/rejected": -2.2308456897735596, + "logps/chosen": -50.72284698486328, + "logps/rejected": -139.45884704589844, + "loss": 0.708, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03361091762781143, + "rewards/margins": -0.020068742334842682, + "rewards/rejected": -0.01354217529296875, + "step": 579 + }, + { + "epoch": 0.03, + "learning_rate": 9.999636229520719e-08, + "logits/chosen": -2.30790376663208, + "logits/rejected": -2.3053195476531982, + "logps/chosen": -7.010436534881592, + "logps/rejected": -141.0398712158203, + "loss": 0.7024, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012316226959228516, + "rewards/margins": -0.040812015533447266, + "rewards/rejected": 0.02849578857421875, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 9.999624773025385e-08, + "logits/chosen": -2.0685513019561768, + "logits/rejected": -2.118303060531616, + "logps/chosen": -276.531005859375, + "logps/rejected": -408.76318359375, + "loss": 0.7179, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.014007568359375, + "rewards/margins": -0.08876343071460724, + "rewards/rejected": 0.07475586235523224, + "step": 581 + }, + { + "epoch": 0.03, + "learning_rate": 9.999613138918925e-08, + "logits/chosen": -2.2795090675354004, + "logits/rejected": -2.255836248397827, + "logps/chosen": -37.634910583496094, + "logps/rejected": -210.89541625976562, + "loss": 0.6614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.040546417236328125, + "rewards/margins": 0.08864212036132812, + "rewards/rejected": -0.048095703125, + "step": 582 + }, + { + "epoch": 0.03, + "learning_rate": 9.999601327201751e-08, + "logits/chosen": -2.143925905227661, + "logits/rejected": -2.1173856258392334, + "logps/chosen": -412.39715576171875, + "logps/rejected": -416.9367980957031, + "loss": 0.6837, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.11413269490003586, + "rewards/margins": -0.05321044474840164, + "rewards/rejected": 0.1673431396484375, + "step": 583 + }, + { + "epoch": 0.03, + "learning_rate": 9.999589337874281e-08, + "logits/chosen": -2.3100650310516357, + "logits/rejected": -2.299619674682617, + "logps/chosen": -12.066309928894043, + "logps/rejected": -139.08734130859375, + "loss": 0.6783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012500762939453125, + "rewards/margins": 0.049312591552734375, + "rewards/rejected": -0.03681182861328125, + "step": 584 + }, + { + "epoch": 0.03, + "learning_rate": 9.999577170936941e-08, + "logits/chosen": -2.1272401809692383, + "logits/rejected": -2.0876858234405518, + "logps/chosen": -231.37330627441406, + "logps/rejected": -497.91424560546875, + "loss": 0.6169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20277710258960724, + "rewards/margins": 0.04707030951976776, + "rewards/rejected": 0.15570679306983948, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 9.999564826390166e-08, + "logits/chosen": -2.154430389404297, + "logits/rejected": -2.144420623779297, + "logps/chosen": -1.4842520952224731, + "logps/rejected": -156.5112762451172, + "loss": 0.6707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023312604054808617, + "rewards/margins": 0.061703719198703766, + "rewards/rejected": -0.03839111328125, + "step": 586 + }, + { + "epoch": 0.03, + "learning_rate": 9.999552304234393e-08, + "logits/chosen": -2.203406810760498, + "logits/rejected": -2.1822867393493652, + "logps/chosen": -222.42181396484375, + "logps/rejected": -409.0373840332031, + "loss": 0.7047, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02893218956887722, + "rewards/margins": -0.1315567046403885, + "rewards/rejected": 0.16048888862133026, + "step": 587 + }, + { + "epoch": 0.03, + "learning_rate": 9.999539604470067e-08, + "logits/chosen": -2.3667221069335938, + "logits/rejected": -2.3586771488189697, + "logps/chosen": -36.300872802734375, + "logps/rejected": -153.14453125, + "loss": 0.6772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004227829165756702, + "rewards/margins": 0.0631183609366417, + "rewards/rejected": -0.06734619289636612, + "step": 588 + }, + { + "epoch": 0.03, + "learning_rate": 9.999526727097637e-08, + "logits/chosen": -2.1392996311187744, + "logits/rejected": -2.079075336456299, + "logps/chosen": -312.5065002441406, + "logps/rejected": -478.43603515625, + "loss": 0.5891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2730255126953125, + "rewards/margins": 0.13671875, + "rewards/rejected": 0.1363067626953125, + "step": 589 + }, + { + "epoch": 0.03, + "learning_rate": 9.999513672117564e-08, + "logits/chosen": -1.9482383728027344, + "logits/rejected": -1.9023786783218384, + "logps/chosen": -176.06283569335938, + "logps/rejected": -423.6776123046875, + "loss": 0.685, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07806396484375, + "rewards/margins": -0.0345001220703125, + "rewards/rejected": 0.1125640869140625, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 9.999500439530308e-08, + "logits/chosen": -2.2697370052337646, + "logits/rejected": -2.2769417762756348, + "logps/chosen": -23.020959854125977, + "logps/rejected": -116.16925048828125, + "loss": 0.6937, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.006432342808693647, + "rewards/margins": -0.0006629941053688526, + "rewards/rejected": 0.0070953369140625, + "step": 591 + }, + { + "epoch": 0.03, + "learning_rate": 9.999487029336342e-08, + "logits/chosen": -2.090707540512085, + "logits/rejected": -2.1072592735290527, + "logps/chosen": -189.24314880371094, + "logps/rejected": -241.71697998046875, + "loss": 0.6663, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08444061130285263, + "rewards/margins": -0.01773224025964737, + "rewards/rejected": 0.1021728515625, + "step": 592 + }, + { + "epoch": 0.03, + "learning_rate": 9.999473441536143e-08, + "logits/chosen": -2.114062786102295, + "logits/rejected": -2.0272538661956787, + "logps/chosen": -275.0045471191406, + "logps/rejected": -412.30560302734375, + "loss": 0.6233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15569153428077698, + "rewards/margins": 0.12442933022975922, + "rewards/rejected": 0.03126220777630806, + "step": 593 + }, + { + "epoch": 0.03, + "learning_rate": 9.99945967613019e-08, + "logits/chosen": -2.1536293029785156, + "logits/rejected": -2.1451144218444824, + "logps/chosen": -184.5746307373047, + "logps/rejected": -240.89892578125, + "loss": 0.6424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06914215534925461, + "rewards/margins": 0.1486557126045227, + "rewards/rejected": -0.0795135498046875, + "step": 594 + }, + { + "epoch": 0.03, + "learning_rate": 9.999445733118977e-08, + "logits/chosen": -1.9873803853988647, + "logits/rejected": -1.9619450569152832, + "logps/chosen": -246.28936767578125, + "logps/rejected": -422.8121337890625, + "loss": 0.6047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15189209580421448, + "rewards/margins": 0.17636413872241974, + "rewards/rejected": -0.02447204664349556, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 9.999431612502995e-08, + "logits/chosen": -2.0015199184417725, + "logits/rejected": -1.9784342050552368, + "logps/chosen": -129.006591796875, + "logps/rejected": -267.7740478515625, + "loss": 0.6944, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03343048319220543, + "rewards/margins": -0.09840239584445953, + "rewards/rejected": 0.13183288276195526, + "step": 596 + }, + { + "epoch": 0.03, + "learning_rate": 9.999417314282747e-08, + "logits/chosen": -2.151182174682617, + "logits/rejected": -2.1062753200531006, + "logps/chosen": -257.2481689453125, + "logps/rejected": -379.89324951171875, + "loss": 0.7237, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04184875637292862, + "rewards/margins": -0.1760406494140625, + "rewards/rejected": 0.21788941323757172, + "step": 597 + }, + { + "epoch": 0.03, + "learning_rate": 9.999402838458742e-08, + "logits/chosen": -2.2968454360961914, + "logits/rejected": -2.285320997238159, + "logps/chosen": -90.35859680175781, + "logps/rejected": -217.8493194580078, + "loss": 0.6642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048919677734375, + "rewards/margins": 0.07483978569507599, + "rewards/rejected": -0.02592010609805584, + "step": 598 + }, + { + "epoch": 0.03, + "learning_rate": 9.999388185031494e-08, + "logits/chosen": -2.119070529937744, + "logits/rejected": -2.099968194961548, + "logps/chosen": -228.02157592773438, + "logps/rejected": -352.6471252441406, + "loss": 0.6369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15032958984375, + "rewards/margins": 0.03149108588695526, + "rewards/rejected": 0.11883850395679474, + "step": 599 + }, + { + "epoch": 0.03, + "learning_rate": 9.999373354001521e-08, + "logits/chosen": -2.2620151042938232, + "logits/rejected": -2.218731164932251, + "logps/chosen": -109.84679412841797, + "logps/rejected": -273.56536865234375, + "loss": 0.7009, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.032860565930604935, + "rewards/margins": -0.07045364379882812, + "rewards/rejected": 0.10331421345472336, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 9.999358345369354e-08, + "logits/chosen": -2.196013927459717, + "logits/rejected": -2.1921446323394775, + "logps/chosen": -40.126861572265625, + "logps/rejected": -282.8340148925781, + "loss": 0.651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03575287014245987, + "rewards/margins": 0.13775482773780823, + "rewards/rejected": -0.10200195759534836, + "step": 601 + }, + { + "epoch": 0.04, + "learning_rate": 9.999343159135522e-08, + "logits/chosen": -2.1966443061828613, + "logits/rejected": -2.1968467235565186, + "logps/chosen": -284.6410217285156, + "logps/rejected": -275.6844482421875, + "loss": 0.7047, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01869812048971653, + "rewards/margins": -0.05495911091566086, + "rewards/rejected": 0.03626098856329918, + "step": 602 + }, + { + "epoch": 0.04, + "learning_rate": 9.999327795300569e-08, + "logits/chosen": -1.905167579650879, + "logits/rejected": -1.8259748220443726, + "logps/chosen": -237.9642333984375, + "logps/rejected": -414.5568542480469, + "loss": 0.6991, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02250060997903347, + "rewards/margins": -0.0027221664786338806, + "rewards/rejected": -0.01977844350039959, + "step": 603 + }, + { + "epoch": 0.04, + "learning_rate": 9.999312253865037e-08, + "logits/chosen": -2.16910457611084, + "logits/rejected": -2.1654791831970215, + "logps/chosen": -14.759051322937012, + "logps/rejected": -147.2722625732422, + "loss": 0.6823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008018112741410732, + "rewards/margins": 0.03414116054773331, + "rewards/rejected": -0.026123046875, + "step": 604 + }, + { + "epoch": 0.04, + "learning_rate": 9.99929653482948e-08, + "logits/chosen": -2.169313907623291, + "logits/rejected": -2.164740562438965, + "logps/chosen": -22.913150787353516, + "logps/rejected": -63.237937927246094, + "loss": 0.683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007344245910644531, + "rewards/margins": 0.0321718230843544, + "rewards/rejected": -0.02482757531106472, + "step": 605 + }, + { + "epoch": 0.04, + "learning_rate": 9.999280638194455e-08, + "logits/chosen": -2.316722869873047, + "logits/rejected": -2.2875301837921143, + "logps/chosen": -34.55801010131836, + "logps/rejected": -294.24407958984375, + "loss": 0.6678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.045343782752752304, + "rewards/margins": 0.05498123541474342, + "rewards/rejected": -0.009637451730668545, + "step": 606 + }, + { + "epoch": 0.04, + "learning_rate": 9.999264563960528e-08, + "logits/chosen": -2.2567780017852783, + "logits/rejected": -2.2486250400543213, + "logps/chosen": -285.0356140136719, + "logps/rejected": -357.19970703125, + "loss": 0.6654, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.10903014987707138, + "rewards/margins": -0.00033264607191085815, + "rewards/rejected": 0.10936279594898224, + "step": 607 + }, + { + "epoch": 0.04, + "learning_rate": 9.99924831212827e-08, + "logits/chosen": -2.1566543579101562, + "logits/rejected": -2.160151720046997, + "logps/chosen": -75.53682708740234, + "logps/rejected": -377.0599365234375, + "loss": 0.6995, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01448669470846653, + "rewards/margins": -0.02485656924545765, + "rewards/rejected": 0.03934326395392418, + "step": 608 + }, + { + "epoch": 0.04, + "learning_rate": 9.999231882698258e-08, + "logits/chosen": -2.0799622535705566, + "logits/rejected": -2.0359139442443848, + "logps/chosen": -62.971099853515625, + "logps/rejected": -429.7173767089844, + "loss": 0.6947, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.017034148797392845, + "rewards/margins": -0.028968049213290215, + "rewards/rejected": 0.04600219801068306, + "step": 609 + }, + { + "epoch": 0.04, + "learning_rate": 9.999215275671076e-08, + "logits/chosen": -2.2938954830169678, + "logits/rejected": -2.286562919616699, + "logps/chosen": -0.00018047165940515697, + "logps/rejected": -105.59001922607422, + "loss": 0.7042, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.4101327034695714e-07, + "rewards/margins": -0.046798381954431534, + "rewards/rejected": 0.046797942370176315, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 9.999198491047312e-08, + "logits/chosen": -2.023355484008789, + "logits/rejected": -2.0169618129730225, + "logps/chosen": -25.997295379638672, + "logps/rejected": -93.23515319824219, + "loss": 0.6933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0029176713433116674, + "rewards/margins": 0.01876697689294815, + "rewards/rejected": -0.01584930531680584, + "step": 611 + }, + { + "epoch": 0.04, + "learning_rate": 9.999181528827565e-08, + "logits/chosen": -2.1344475746154785, + "logits/rejected": -2.0694119930267334, + "logps/chosen": -233.41864013671875, + "logps/rejected": -418.2952880859375, + "loss": 0.6416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12660522758960724, + "rewards/margins": 0.06425170600414276, + "rewards/rejected": 0.06235351786017418, + "step": 612 + }, + { + "epoch": 0.04, + "learning_rate": 9.999164389012436e-08, + "logits/chosen": -2.3814380168914795, + "logits/rejected": -2.3769242763519287, + "logps/chosen": -81.20899200439453, + "logps/rejected": -164.3525390625, + "loss": 0.7085, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0036895752418786287, + "rewards/margins": -0.01629486121237278, + "rewards/rejected": 0.012605286203324795, + "step": 613 + }, + { + "epoch": 0.04, + "learning_rate": 9.999147071602534e-08, + "logits/chosen": -2.1568243503570557, + "logits/rejected": -2.1455259323120117, + "logps/chosen": -85.84502410888672, + "logps/rejected": -185.46128845214844, + "loss": 0.6854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024996185675263405, + "rewards/margins": 0.012454986572265625, + "rewards/rejected": 0.01254119910299778, + "step": 614 + }, + { + "epoch": 0.04, + "learning_rate": 9.999129576598476e-08, + "logits/chosen": -2.2423486709594727, + "logits/rejected": -2.2585864067077637, + "logps/chosen": -131.74905395507812, + "logps/rejected": -151.57943725585938, + "loss": 0.6712, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06007690355181694, + "rewards/margins": -0.02879181131720543, + "rewards/rejected": 0.08886871486902237, + "step": 615 + }, + { + "epoch": 0.04, + "learning_rate": 9.999111904000881e-08, + "logits/chosen": -2.2868072986602783, + "logits/rejected": -2.2504355907440186, + "logps/chosen": -72.66334533691406, + "logps/rejected": -309.9551696777344, + "loss": 0.6892, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.09652099758386612, + "rewards/margins": -0.04938048869371414, + "rewards/rejected": 0.14590148627758026, + "step": 616 + }, + { + "epoch": 0.04, + "learning_rate": 9.999094053810377e-08, + "logits/chosen": -2.2519187927246094, + "logits/rejected": -2.236403465270996, + "logps/chosen": -162.06173706054688, + "logps/rejected": -299.4141540527344, + "loss": 0.694, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02071228064596653, + "rewards/margins": -0.02764892764389515, + "rewards/rejected": 0.04836120828986168, + "step": 617 + }, + { + "epoch": 0.04, + "learning_rate": 9.999076026027601e-08, + "logits/chosen": -2.337761402130127, + "logits/rejected": -2.3380956649780273, + "logps/chosen": -1.5031673908233643, + "logps/rejected": -25.328886032104492, + "loss": 0.6956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00958875473588705, + "rewards/margins": 0.0019055008888244629, + "rewards/rejected": -0.011494255624711514, + "step": 618 + }, + { + "epoch": 0.04, + "learning_rate": 9.99905782065319e-08, + "logits/chosen": -2.194697141647339, + "logits/rejected": -2.171539068222046, + "logps/chosen": -186.0430145263672, + "logps/rejected": -363.05792236328125, + "loss": 0.6765, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02947235107421875, + "rewards/margins": -0.02854767069220543, + "rewards/rejected": 0.05802002176642418, + "step": 619 + }, + { + "epoch": 0.04, + "learning_rate": 9.999039437687792e-08, + "logits/chosen": -2.1746606826782227, + "logits/rejected": -2.1750404834747314, + "logps/chosen": -0.016987914219498634, + "logps/rejected": -176.38217163085938, + "loss": 0.6934, + "rewards/accuracies": 0.0, + "rewards/chosen": 5.601160410151351e-06, + "rewards/margins": -0.00207264581695199, + "rewards/rejected": 0.0020782470237463713, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 9.999020877132063e-08, + "logits/chosen": -1.9622300863265991, + "logits/rejected": -1.9492145776748657, + "logps/chosen": -203.07162475585938, + "logps/rejected": -422.8570556640625, + "loss": 0.6367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13823547959327698, + "rewards/margins": 0.06227417290210724, + "rewards/rejected": 0.07596130669116974, + "step": 621 + }, + { + "epoch": 0.04, + "learning_rate": 9.999002138986656e-08, + "logits/chosen": -2.0437846183776855, + "logits/rejected": -2.0191357135772705, + "logps/chosen": -336.15887451171875, + "logps/rejected": -501.5343017578125, + "loss": 0.6502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05498352274298668, + "rewards/margins": 0.14076538383960724, + "rewards/rejected": -0.08578186482191086, + "step": 622 + }, + { + "epoch": 0.04, + "learning_rate": 9.998983223252243e-08, + "logits/chosen": -2.1433358192443848, + "logits/rejected": -2.1444339752197266, + "logps/chosen": -16.605575561523438, + "logps/rejected": -122.30093383789062, + "loss": 0.6849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0038896561600267887, + "rewards/margins": 0.01781024970114231, + "rewards/rejected": -0.013920593075454235, + "step": 623 + }, + { + "epoch": 0.04, + "learning_rate": 9.998964129929492e-08, + "logits/chosen": -2.1117115020751953, + "logits/rejected": -2.0230627059936523, + "logps/chosen": -189.4799346923828, + "logps/rejected": -398.3115234375, + "loss": 0.6552, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.17496947944164276, + "rewards/margins": -0.06365357339382172, + "rewards/rejected": 0.23862305283546448, + "step": 624 + }, + { + "epoch": 0.04, + "learning_rate": 9.998944859019082e-08, + "logits/chosen": -2.0515894889831543, + "logits/rejected": -2.047271251678467, + "logps/chosen": -19.872467041015625, + "logps/rejected": -65.62986755371094, + "loss": 0.6713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03843288496136665, + "rewards/margins": 0.04186001047492027, + "rewards/rejected": -0.0034271241165697575, + "step": 625 + }, + { + "epoch": 0.04, + "learning_rate": 9.9989254105217e-08, + "logits/chosen": -2.368896722793579, + "logits/rejected": -2.3616504669189453, + "logps/chosen": -30.279285430908203, + "logps/rejected": -121.02738189697266, + "loss": 0.6824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024935340508818626, + "rewards/margins": 0.017878912389278412, + "rewards/rejected": 0.00705642718821764, + "step": 626 + }, + { + "epoch": 0.04, + "learning_rate": 9.998905784438032e-08, + "logits/chosen": -2.0140247344970703, + "logits/rejected": -1.999308466911316, + "logps/chosen": -32.19331359863281, + "logps/rejected": -228.82131958007812, + "loss": 0.685, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.029393387958407402, + "rewards/margins": -0.0018657688051462173, + "rewards/rejected": 0.03125915676355362, + "step": 627 + }, + { + "epoch": 0.04, + "learning_rate": 9.998885980768779e-08, + "logits/chosen": -2.135049343109131, + "logits/rejected": -2.084059715270996, + "logps/chosen": -158.66400146484375, + "logps/rejected": -357.9656982421875, + "loss": 0.7107, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03766326978802681, + "rewards/margins": -0.03928680345416069, + "rewards/rejected": 0.0016235351795330644, + "step": 628 + }, + { + "epoch": 0.04, + "learning_rate": 9.998865999514646e-08, + "logits/chosen": -2.1606228351593018, + "logits/rejected": -2.1472997665405273, + "logps/chosen": -150.20614624023438, + "logps/rejected": -321.9228820800781, + "loss": 0.6909, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.09494781494140625, + "rewards/margins": -0.10508576035499573, + "rewards/rejected": 0.20003357529640198, + "step": 629 + }, + { + "epoch": 0.04, + "learning_rate": 9.998845840676338e-08, + "logits/chosen": -2.3184797763824463, + "logits/rejected": -2.3161747455596924, + "logps/chosen": -0.0014711173716932535, + "logps/rejected": -105.36204528808594, + "loss": 0.6955, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.095975767384516e-05, + "rewards/margins": -0.009219639003276825, + "rewards/rejected": 0.00920867919921875, + "step": 630 + }, + { + "epoch": 0.04, + "learning_rate": 9.998825504254576e-08, + "logits/chosen": -2.2933993339538574, + "logits/rejected": -2.2932403087615967, + "logps/chosen": -2.1681370735168457, + "logps/rejected": -132.05348205566406, + "loss": 0.6774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009825992397964, + "rewards/margins": 0.053360845893621445, + "rewards/rejected": -0.04353485256433487, + "step": 631 + }, + { + "epoch": 0.04, + "learning_rate": 9.99880499025008e-08, + "logits/chosen": -2.1797757148742676, + "logits/rejected": -2.1872479915618896, + "logps/chosen": -279.7243347167969, + "logps/rejected": -488.9655456542969, + "loss": 0.61, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22845153510570526, + "rewards/margins": 0.14441528916358948, + "rewards/rejected": 0.08403625339269638, + "step": 632 + }, + { + "epoch": 0.04, + "learning_rate": 9.998784298663577e-08, + "logits/chosen": -1.990846037864685, + "logits/rejected": -2.0050737857818604, + "logps/chosen": -292.6423034667969, + "logps/rejected": -318.3355407714844, + "loss": 0.6423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07692565768957138, + "rewards/margins": 0.13762816786766052, + "rewards/rejected": -0.06070251390337944, + "step": 633 + }, + { + "epoch": 0.04, + "learning_rate": 9.998763429495805e-08, + "logits/chosen": -2.248140573501587, + "logits/rejected": -2.232818126678467, + "logps/chosen": -29.119674682617188, + "logps/rejected": -146.10348510742188, + "loss": 0.6651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04582557827234268, + "rewards/margins": 0.0539371520280838, + "rewards/rejected": -0.008111572824418545, + "step": 634 + }, + { + "epoch": 0.04, + "learning_rate": 9.998742382747504e-08, + "logits/chosen": -2.1236624717712402, + "logits/rejected": -2.1074647903442383, + "logps/chosen": -193.3886260986328, + "logps/rejected": -305.0848693847656, + "loss": 0.6427, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1831100434064865, + "rewards/margins": -0.059559643268585205, + "rewards/rejected": 0.24266968667507172, + "step": 635 + }, + { + "epoch": 0.04, + "learning_rate": 9.998721158419423e-08, + "logits/chosen": -2.0032706260681152, + "logits/rejected": -2.0079712867736816, + "logps/chosen": -0.09523214399814606, + "logps/rejected": -271.89947509765625, + "loss": 0.7241, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.681548481741629e-07, + "rewards/margins": -0.11979447305202484, + "rewards/rejected": 0.11979370564222336, + "step": 636 + }, + { + "epoch": 0.04, + "learning_rate": 9.998699756512316e-08, + "logits/chosen": -2.174184560775757, + "logits/rejected": -2.1359987258911133, + "logps/chosen": -258.7586669921875, + "logps/rejected": -449.3382263183594, + "loss": 0.6126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20893554389476776, + "rewards/margins": 0.08272399008274078, + "rewards/rejected": 0.12621155381202698, + "step": 637 + }, + { + "epoch": 0.04, + "learning_rate": 9.99867817702694e-08, + "logits/chosen": -2.2143354415893555, + "logits/rejected": -2.1791629791259766, + "logps/chosen": -45.575504302978516, + "logps/rejected": -274.5490417480469, + "loss": 0.7103, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.016565322875976562, + "rewards/margins": -0.06687889248132706, + "rewards/rejected": 0.08344421535730362, + "step": 638 + }, + { + "epoch": 0.04, + "learning_rate": 9.998656419964065e-08, + "logits/chosen": -2.1982192993164062, + "logits/rejected": -2.1917226314544678, + "logps/chosen": -85.7393798828125, + "logps/rejected": -237.64834594726562, + "loss": 0.6282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08632049709558487, + "rewards/margins": 0.16877594590187073, + "rewards/rejected": -0.08245544880628586, + "step": 639 + }, + { + "epoch": 0.04, + "learning_rate": 9.998634485324465e-08, + "logits/chosen": -2.211176872253418, + "logits/rejected": -2.2035458087921143, + "logps/chosen": -7.915907859802246, + "logps/rejected": -103.69627380371094, + "loss": 0.6919, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.012424183078110218, + "rewards/margins": -0.008439159952104092, + "rewards/rejected": 0.02086334303021431, + "step": 640 + }, + { + "epoch": 0.04, + "learning_rate": 9.998612373108915e-08, + "logits/chosen": -2.146088123321533, + "logits/rejected": -2.100619077682495, + "logps/chosen": -53.880699157714844, + "logps/rejected": -237.7591552734375, + "loss": 0.6613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.036620330065488815, + "rewards/margins": 0.0885826125741005, + "rewards/rejected": -0.05196228250861168, + "step": 641 + }, + { + "epoch": 0.04, + "learning_rate": 9.998590083318201e-08, + "logits/chosen": -2.166128396987915, + "logits/rejected": -2.166233539581299, + "logps/chosen": -3.6019647121429443, + "logps/rejected": -28.87198829650879, + "loss": 0.6936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005144596099853516, + "rewards/margins": 0.007388400845229626, + "rewards/rejected": -0.012532996945083141, + "step": 642 + }, + { + "epoch": 0.04, + "learning_rate": 9.998567615953119e-08, + "logits/chosen": -2.076202630996704, + "logits/rejected": -2.0697708129882812, + "logps/chosen": -2.5236587524414062, + "logps/rejected": -66.28048706054688, + "loss": 0.6876, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1290779841365293e-05, + "rewards/margins": 0.006483531091362238, + "rewards/rejected": -0.006504822056740522, + "step": 643 + }, + { + "epoch": 0.04, + "learning_rate": 9.998544971014465e-08, + "logits/chosen": -2.1516528129577637, + "logits/rejected": -2.1507046222686768, + "logps/chosen": -115.8287582397461, + "logps/rejected": -202.64556884765625, + "loss": 0.6751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.040879059582948685, + "rewards/margins": 0.029686737805604935, + "rewards/rejected": 0.01119232177734375, + "step": 644 + }, + { + "epoch": 0.04, + "learning_rate": 9.99852214850304e-08, + "logits/chosen": -2.014915943145752, + "logits/rejected": -2.006079912185669, + "logps/chosen": -185.3004150390625, + "logps/rejected": -309.1614990234375, + "loss": 0.7005, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08955536037683487, + "rewards/margins": -0.13876494765281677, + "rewards/rejected": 0.22832031548023224, + "step": 645 + }, + { + "epoch": 0.04, + "learning_rate": 9.99849914841966e-08, + "logits/chosen": -2.304208993911743, + "logits/rejected": -2.282334089279175, + "logps/chosen": -87.5494155883789, + "logps/rejected": -262.35101318359375, + "loss": 0.677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027495576068758965, + "rewards/margins": 0.054949190467596054, + "rewards/rejected": -0.02745361439883709, + "step": 646 + }, + { + "epoch": 0.04, + "learning_rate": 9.99847597076514e-08, + "logits/chosen": -2.2741000652313232, + "logits/rejected": -2.2731215953826904, + "logps/chosen": -0.08136694133281708, + "logps/rejected": -97.74552917480469, + "loss": 0.6814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008561522117815912, + "rewards/margins": 0.044865552335977554, + "rewards/rejected": -0.04400939866900444, + "step": 647 + }, + { + "epoch": 0.04, + "learning_rate": 9.998452615540302e-08, + "logits/chosen": -2.249777317047119, + "logits/rejected": -2.2447643280029297, + "logps/chosen": -25.46393585205078, + "logps/rejected": -205.00442504882812, + "loss": 0.692, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0060897828079760075, + "rewards/margins": -0.01880493201315403, + "rewards/rejected": 0.02489471435546875, + "step": 648 + }, + { + "epoch": 0.04, + "learning_rate": 9.99842908274598e-08, + "logits/chosen": -2.2321882247924805, + "logits/rejected": -2.217038631439209, + "logps/chosen": -237.1140899658203, + "logps/rejected": -371.50396728515625, + "loss": 0.6064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2737320065498352, + "rewards/margins": 0.0628250241279602, + "rewards/rejected": 0.210906982421875, + "step": 649 + }, + { + "epoch": 0.04, + "learning_rate": 9.998405372383004e-08, + "logits/chosen": -2.0885283946990967, + "logits/rejected": -2.082707405090332, + "logps/chosen": -20.970882415771484, + "logps/rejected": -174.02345275878906, + "loss": 0.6906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012437820434570312, + "rewards/margins": 0.015799332410097122, + "rewards/rejected": -0.0033615112770348787, + "step": 650 + }, + { + "epoch": 0.04, + "learning_rate": 9.998381484452221e-08, + "logits/chosen": -2.3431496620178223, + "logits/rejected": -2.3348560333251953, + "logps/chosen": -37.94263458251953, + "logps/rejected": -129.60250854492188, + "loss": 0.7112, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04275360330939293, + "rewards/margins": -0.03546600416302681, + "rewards/rejected": -0.0072875977493822575, + "step": 651 + }, + { + "epoch": 0.04, + "learning_rate": 9.998357418954478e-08, + "logits/chosen": -2.1640806198120117, + "logits/rejected": -2.097966194152832, + "logps/chosen": -270.09857177734375, + "logps/rejected": -486.0503234863281, + "loss": 0.6331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15079346299171448, + "rewards/margins": 0.02662048488855362, + "rewards/rejected": 0.12417297810316086, + "step": 652 + }, + { + "epoch": 0.04, + "learning_rate": 9.998333175890629e-08, + "logits/chosen": -2.067466974258423, + "logits/rejected": -2.046748638153076, + "logps/chosen": -154.58843994140625, + "logps/rejected": -363.55316162109375, + "loss": 0.6527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10194855183362961, + "rewards/margins": 0.03979645296931267, + "rewards/rejected": 0.06215209886431694, + "step": 653 + }, + { + "epoch": 0.04, + "learning_rate": 9.998308755261535e-08, + "logits/chosen": -2.2574334144592285, + "logits/rejected": -2.187596559524536, + "logps/chosen": -154.24545288085938, + "logps/rejected": -328.1901550292969, + "loss": 0.6486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13337402045726776, + "rewards/margins": 0.07031554728746414, + "rewards/rejected": 0.06305847316980362, + "step": 654 + }, + { + "epoch": 0.04, + "learning_rate": 9.998284157068066e-08, + "logits/chosen": -2.0278806686401367, + "logits/rejected": -2.0185041427612305, + "logps/chosen": -11.500090599060059, + "logps/rejected": -278.932373046875, + "loss": 0.6886, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.583069188716763e-07, + "rewards/margins": 0.028612423688173294, + "rewards/rejected": -0.02861328236758709, + "step": 655 + }, + { + "epoch": 0.04, + "learning_rate": 9.998259381311095e-08, + "logits/chosen": -2.095855236053467, + "logits/rejected": -2.093059778213501, + "logps/chosen": -280.78900146484375, + "logps/rejected": -303.72467041015625, + "loss": 0.6552, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.16552428901195526, + "rewards/margins": -0.026571661233901978, + "rewards/rejected": 0.19209595024585724, + "step": 656 + }, + { + "epoch": 0.04, + "learning_rate": 9.998234427991501e-08, + "logits/chosen": -2.153578281402588, + "logits/rejected": -2.139799118041992, + "logps/chosen": -5.717684745788574, + "logps/rejected": -144.96734619140625, + "loss": 0.7065, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01767602004110813, + "rewards/margins": -0.03429131582379341, + "rewards/rejected": 0.01661529578268528, + "step": 657 + }, + { + "epoch": 0.04, + "learning_rate": 9.998209297110172e-08, + "logits/chosen": -1.9889408349990845, + "logits/rejected": -1.955308437347412, + "logps/chosen": -292.12701416015625, + "logps/rejected": -324.9978332519531, + "loss": 0.6502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17297974228858948, + "rewards/margins": 0.0056304931640625, + "rewards/rejected": 0.16734924912452698, + "step": 658 + }, + { + "epoch": 0.04, + "learning_rate": 9.998183988667999e-08, + "logits/chosen": -2.0790951251983643, + "logits/rejected": -2.0737829208374023, + "logps/chosen": -12.07519245147705, + "logps/rejected": -199.00062561035156, + "loss": 0.6775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01248998660594225, + "rewards/margins": 0.048705197870731354, + "rewards/rejected": -0.03621521219611168, + "step": 659 + }, + { + "epoch": 0.04, + "learning_rate": 9.998158502665882e-08, + "logits/chosen": -2.0991220474243164, + "logits/rejected": -2.0943093299865723, + "logps/chosen": -57.400611877441406, + "logps/rejected": -116.08927154541016, + "loss": 0.679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03543739393353462, + "rewards/margins": 0.014771653339266777, + "rewards/rejected": 0.020665740594267845, + "step": 660 + }, + { + "epoch": 0.04, + "learning_rate": 9.998132839104726e-08, + "logits/chosen": -2.162121295928955, + "logits/rejected": -2.1226205825805664, + "logps/chosen": -36.62901306152344, + "logps/rejected": -422.82427978515625, + "loss": 0.6715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03537140041589737, + "rewards/margins": 0.06057891994714737, + "rewards/rejected": -0.02520751953125, + "step": 661 + }, + { + "epoch": 0.04, + "learning_rate": 9.998106997985445e-08, + "logits/chosen": -2.2725024223327637, + "logits/rejected": -2.2546322345733643, + "logps/chosen": -45.66019058227539, + "logps/rejected": -129.44989013671875, + "loss": 0.6905, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02445068396627903, + "rewards/margins": -0.02336120791733265, + "rewards/rejected": 0.04781189188361168, + "step": 662 + }, + { + "epoch": 0.04, + "learning_rate": 9.998080979308953e-08, + "logits/chosen": -2.1795759201049805, + "logits/rejected": -2.146543025970459, + "logps/chosen": -88.67682647705078, + "logps/rejected": -327.1226501464844, + "loss": 0.6611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02308044396340847, + "rewards/margins": 0.10369873046875, + "rewards/rejected": -0.08061828464269638, + "step": 663 + }, + { + "epoch": 0.04, + "learning_rate": 9.998054783076177e-08, + "logits/chosen": -2.0946593284606934, + "logits/rejected": -2.0751006603240967, + "logps/chosen": -290.0233154296875, + "logps/rejected": -467.97802734375, + "loss": 0.6479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05733032152056694, + "rewards/margins": 0.07643432915210724, + "rewards/rejected": -0.01910400390625, + "step": 664 + }, + { + "epoch": 0.04, + "learning_rate": 9.998028409288047e-08, + "logits/chosen": -2.1788723468780518, + "logits/rejected": -2.178126573562622, + "logps/chosen": -139.0380401611328, + "logps/rejected": -376.3556823730469, + "loss": 0.7103, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07710724323987961, + "rewards/margins": -0.15062102675437927, + "rewards/rejected": 0.22772827744483948, + "step": 665 + }, + { + "epoch": 0.04, + "learning_rate": 9.9980018579455e-08, + "logits/chosen": -2.097996473312378, + "logits/rejected": -2.0822556018829346, + "logps/chosen": -327.08392333984375, + "logps/rejected": -392.99810791015625, + "loss": 0.6655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04514770582318306, + "rewards/margins": 0.0001220703125, + "rewards/rejected": 0.04502563551068306, + "step": 666 + }, + { + "epoch": 0.04, + "learning_rate": 9.997975129049479e-08, + "logits/chosen": -2.2806310653686523, + "logits/rejected": -2.2759594917297363, + "logps/chosen": -0.0002553252852521837, + "logps/rejected": -208.76620483398438, + "loss": 0.6805, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1810701582580805e-06, + "rewards/margins": 0.05138615146279335, + "rewards/rejected": -0.05138397216796875, + "step": 667 + }, + { + "epoch": 0.04, + "learning_rate": 9.997948222600934e-08, + "logits/chosen": -2.03340744972229, + "logits/rejected": -1.8982744216918945, + "logps/chosen": -222.81777954101562, + "logps/rejected": -416.26708984375, + "loss": 0.6704, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08077087253332138, + "rewards/margins": -0.008816532790660858, + "rewards/rejected": 0.08958740532398224, + "step": 668 + }, + { + "epoch": 0.04, + "learning_rate": 9.997921138600818e-08, + "logits/chosen": -2.196896553039551, + "logits/rejected": -2.182008743286133, + "logps/chosen": -39.53450393676758, + "logps/rejected": -106.93467712402344, + "loss": 0.6837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009231949225068092, + "rewards/margins": 0.027531815692782402, + "rewards/rejected": -0.01829986646771431, + "step": 669 + }, + { + "epoch": 0.04, + "learning_rate": 9.997893877050099e-08, + "logits/chosen": -2.067094326019287, + "logits/rejected": -2.0726029872894287, + "logps/chosen": -7.990123271942139, + "logps/rejected": -245.51364135742188, + "loss": 0.672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015708303079009056, + "rewards/margins": 0.06937041133642197, + "rewards/rejected": -0.05366211012005806, + "step": 670 + }, + { + "epoch": 0.04, + "learning_rate": 9.99786643794974e-08, + "logits/chosen": -2.089200019836426, + "logits/rejected": -2.0953922271728516, + "logps/chosen": -201.86436462402344, + "logps/rejected": -268.47344970703125, + "loss": 0.6078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1958053559064865, + "rewards/margins": 0.10431365668773651, + "rewards/rejected": 0.09149169921875, + "step": 671 + }, + { + "epoch": 0.04, + "learning_rate": 9.997838821300719e-08, + "logits/chosen": -1.9922231435775757, + "logits/rejected": -1.9932388067245483, + "logps/chosen": -254.9901580810547, + "logps/rejected": -353.1272277832031, + "loss": 0.6918, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04118805006146431, + "rewards/margins": -0.05893707647919655, + "rewards/rejected": 0.10012512654066086, + "step": 672 + }, + { + "epoch": 0.04, + "learning_rate": 9.997811027104017e-08, + "logits/chosen": -2.047851800918579, + "logits/rejected": -2.046602725982666, + "logps/chosen": -12.785909652709961, + "logps/rejected": -95.86436462402344, + "loss": 0.6846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010143280029296875, + "rewards/margins": 0.02059631422162056, + "rewards/rejected": -0.01045303326100111, + "step": 673 + }, + { + "epoch": 0.04, + "learning_rate": 9.997783055360619e-08, + "logits/chosen": -1.882811427116394, + "logits/rejected": -1.8788180351257324, + "logps/chosen": -55.286468505859375, + "logps/rejected": -183.20486450195312, + "loss": 0.7081, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.010290145874023438, + "rewards/margins": -0.07161369919776917, + "rewards/rejected": 0.06132354959845543, + "step": 674 + }, + { + "epoch": 0.04, + "learning_rate": 9.99775490607152e-08, + "logits/chosen": -2.1875555515289307, + "logits/rejected": -2.1771183013916016, + "logps/chosen": -56.82716751098633, + "logps/rejected": -277.6126403808594, + "loss": 0.6842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019399642944335938, + "rewards/margins": 0.003136824816465378, + "rewards/rejected": 0.01626281812787056, + "step": 675 + }, + { + "epoch": 0.04, + "learning_rate": 9.997726579237721e-08, + "logits/chosen": -2.281925678253174, + "logits/rejected": -2.2528576850891113, + "logps/chosen": -58.06262969970703, + "logps/rejected": -318.04559326171875, + "loss": 0.6629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0077156065963208675, + "rewards/margins": 0.120366670191288, + "rewards/rejected": -0.128082275390625, + "step": 676 + }, + { + "epoch": 0.04, + "learning_rate": 9.997698074860226e-08, + "logits/chosen": -1.9951947927474976, + "logits/rejected": -1.9970017671585083, + "logps/chosen": -206.48208618164062, + "logps/rejected": -263.5121154785156, + "loss": 0.7061, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0239105224609375, + "rewards/margins": -0.02362365648150444, + "rewards/rejected": -0.0002868652518372983, + "step": 677 + }, + { + "epoch": 0.04, + "learning_rate": 9.99766939294005e-08, + "logits/chosen": -2.1237244606018066, + "logits/rejected": -2.089381456375122, + "logps/chosen": -213.12289428710938, + "logps/rejected": -378.0621337890625, + "loss": 0.6226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16941070556640625, + "rewards/margins": 0.10029449313879013, + "rewards/rejected": 0.06911621242761612, + "step": 678 + }, + { + "epoch": 0.04, + "learning_rate": 9.99764053347821e-08, + "logits/chosen": -2.263582468032837, + "logits/rejected": -2.2582924365997314, + "logps/chosen": -34.117767333984375, + "logps/rejected": -117.07952117919922, + "loss": 0.6767, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02851410023868084, + "rewards/margins": 0.0014869701117277145, + "rewards/rejected": 0.027027130126953125, + "step": 679 + }, + { + "epoch": 0.04, + "learning_rate": 9.997611496475733e-08, + "logits/chosen": -2.050753593444824, + "logits/rejected": -2.0452191829681396, + "logps/chosen": -9.136883735656738, + "logps/rejected": -127.69207763671875, + "loss": 0.6587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033075332175940275, + "rewards/margins": 0.1411321610212326, + "rewards/rejected": -0.144439697265625, + "step": 680 + }, + { + "epoch": 0.04, + "learning_rate": 9.99758228193365e-08, + "logits/chosen": -2.2367730140686035, + "logits/rejected": -2.239577293395996, + "logps/chosen": -20.45488739013672, + "logps/rejected": -139.07435607910156, + "loss": 0.6902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04509792476892471, + "rewards/margins": 0.055019572377204895, + "rewards/rejected": -0.10011749714612961, + "step": 681 + }, + { + "epoch": 0.04, + "learning_rate": 9.997552889852999e-08, + "logits/chosen": -2.1623969078063965, + "logits/rejected": -2.0721094608306885, + "logps/chosen": -198.80426025390625, + "logps/rejected": -323.24542236328125, + "loss": 0.6137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09222412109375, + "rewards/margins": 0.21285399794578552, + "rewards/rejected": -0.12062988430261612, + "step": 682 + }, + { + "epoch": 0.04, + "learning_rate": 9.997523320234822e-08, + "logits/chosen": -2.2594616413116455, + "logits/rejected": -2.2587428092956543, + "logps/chosen": -73.62258911132812, + "logps/rejected": -176.27220153808594, + "loss": 0.7046, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03726806864142418, + "rewards/margins": -0.01843719743192196, + "rewards/rejected": -0.01883087120950222, + "step": 683 + }, + { + "epoch": 0.04, + "learning_rate": 9.997493573080172e-08, + "logits/chosen": -2.086841106414795, + "logits/rejected": -1.9987868070602417, + "logps/chosen": -193.5856475830078, + "logps/rejected": -430.52130126953125, + "loss": 0.6292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12684784829616547, + "rewards/margins": 0.07469940930604935, + "rewards/rejected": 0.05214843899011612, + "step": 684 + }, + { + "epoch": 0.04, + "learning_rate": 9.997463648390105e-08, + "logits/chosen": -2.2310118675231934, + "logits/rejected": -2.2256388664245605, + "logps/chosen": -30.3663330078125, + "logps/rejected": -154.21603393554688, + "loss": 0.6429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09771690517663956, + "rewards/margins": 0.1068492904305458, + "rewards/rejected": -0.00913238525390625, + "step": 685 + }, + { + "epoch": 0.04, + "learning_rate": 9.997433546165684e-08, + "logits/chosen": -2.108018636703491, + "logits/rejected": -2.1076395511627197, + "logps/chosen": -0.03814917430281639, + "logps/rejected": -161.2244873046875, + "loss": 0.6788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044048167183063924, + "rewards/margins": 0.058760568499565125, + "rewards/rejected": -0.05920105054974556, + "step": 686 + }, + { + "epoch": 0.04, + "learning_rate": 9.997403266407978e-08, + "logits/chosen": -1.9939247369766235, + "logits/rejected": -1.9894535541534424, + "logps/chosen": -0.06823423504829407, + "logps/rejected": -186.14849853515625, + "loss": 0.6928, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.075467586517334e-06, + "rewards/margins": -0.006406663451343775, + "rewards/rejected": 0.0064025879837572575, + "step": 687 + }, + { + "epoch": 0.04, + "learning_rate": 9.997372809118063e-08, + "logits/chosen": -2.109802484512329, + "logits/rejected": -2.091444492340088, + "logps/chosen": -211.66848754882812, + "logps/rejected": -334.91754150390625, + "loss": 0.7057, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05348816141486168, + "rewards/margins": -0.13229064643383026, + "rewards/rejected": 0.18577881157398224, + "step": 688 + }, + { + "epoch": 0.04, + "learning_rate": 9.997342174297021e-08, + "logits/chosen": -2.0037081241607666, + "logits/rejected": -1.9881491661071777, + "logps/chosen": -0.0022463405039161444, + "logps/rejected": -207.75279235839844, + "loss": 0.685, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9899756807717495e-05, + "rewards/margins": 0.03342278674244881, + "rewards/rejected": -0.03344268724322319, + "step": 689 + }, + { + "epoch": 0.04, + "learning_rate": 9.99731136194594e-08, + "logits/chosen": -2.163198947906494, + "logits/rejected": -2.1575331687927246, + "logps/chosen": -22.63981056213379, + "logps/rejected": -90.20718383789062, + "loss": 0.7241, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.05724067613482475, + "rewards/margins": -0.06434822082519531, + "rewards/rejected": 0.007107544224709272, + "step": 690 + }, + { + "epoch": 0.04, + "learning_rate": 9.997280372065916e-08, + "logits/chosen": -2.2584428787231445, + "logits/rejected": -2.257314920425415, + "logps/chosen": -34.797149658203125, + "logps/rejected": -132.93820190429688, + "loss": 0.6735, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03766784816980362, + "rewards/margins": 0.03549804911017418, + "rewards/rejected": 0.0021697997581213713, + "step": 691 + }, + { + "epoch": 0.04, + "learning_rate": 9.997249204658049e-08, + "logits/chosen": -2.1823995113372803, + "logits/rejected": -2.1083295345306396, + "logps/chosen": -186.36082458496094, + "logps/rejected": -432.3083801269531, + "loss": 0.6854, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05207061767578125, + "rewards/margins": -0.05270538479089737, + "rewards/rejected": 0.10477600246667862, + "step": 692 + }, + { + "epoch": 0.04, + "learning_rate": 9.997217859723445e-08, + "logits/chosen": -2.149726390838623, + "logits/rejected": -2.1452550888061523, + "logps/chosen": -60.798805236816406, + "logps/rejected": -174.21258544921875, + "loss": 0.6555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06973381340503693, + "rewards/margins": 0.07772941887378693, + "rewards/rejected": -0.00799560546875, + "step": 693 + }, + { + "epoch": 0.04, + "learning_rate": 9.99718633726322e-08, + "logits/chosen": -2.183542251586914, + "logits/rejected": -2.1179749965667725, + "logps/chosen": -208.08531188964844, + "logps/rejected": -324.01434326171875, + "loss": 0.6633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06649017333984375, + "rewards/margins": 0.06053008884191513, + "rewards/rejected": 0.0059600831009447575, + "step": 694 + }, + { + "epoch": 0.04, + "learning_rate": 9.99715463727849e-08, + "logits/chosen": -2.1897597312927246, + "logits/rejected": -2.176307439804077, + "logps/chosen": -3.578622579574585, + "logps/rejected": -201.25564575195312, + "loss": 0.6711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0032587051391601562, + "rewards/margins": 0.0697900801897049, + "rewards/rejected": -0.06653137505054474, + "step": 695 + }, + { + "epoch": 0.04, + "learning_rate": 9.997122759770384e-08, + "logits/chosen": -2.1681249141693115, + "logits/rejected": -2.167978286743164, + "logps/chosen": -0.05410850793123245, + "logps/rejected": -28.99595069885254, + "loss": 0.6849, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003945123462472111, + "rewards/margins": 0.028423236683011055, + "rewards/rejected": -0.02881774865090847, + "step": 696 + }, + { + "epoch": 0.04, + "learning_rate": 9.997090704740036e-08, + "logits/chosen": -2.1946640014648438, + "logits/rejected": -2.1938912868499756, + "logps/chosen": -20.38190460205078, + "logps/rejected": -113.27388000488281, + "loss": 0.6896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006233024876564741, + "rewards/margins": 0.011481285095214844, + "rewards/rejected": -0.00524826068431139, + "step": 697 + }, + { + "epoch": 0.04, + "learning_rate": 9.997058472188582e-08, + "logits/chosen": -1.9465563297271729, + "logits/rejected": -1.9467271566390991, + "logps/chosen": -69.29912567138672, + "logps/rejected": -254.6446075439453, + "loss": 0.7063, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.027649689465761185, + "rewards/margins": -0.015732575207948685, + "rewards/rejected": -0.0119171142578125, + "step": 698 + }, + { + "epoch": 0.04, + "learning_rate": 9.997026062117167e-08, + "logits/chosen": -2.079538583755493, + "logits/rejected": -2.045947551727295, + "logps/chosen": -316.10162353515625, + "logps/rejected": -382.5281066894531, + "loss": 0.6495, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.19779358804225922, + "rewards/margins": -0.03800658881664276, + "rewards/rejected": 0.23580017685890198, + "step": 699 + }, + { + "epoch": 0.04, + "learning_rate": 9.996993474526943e-08, + "logits/chosen": -1.9222979545593262, + "logits/rejected": -1.8717330694198608, + "logps/chosen": -350.62896728515625, + "logps/rejected": -525.4059448242188, + "loss": 0.6718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03281250223517418, + "rewards/margins": 0.06666870415210724, + "rewards/rejected": -0.03385620191693306, + "step": 700 + }, + { + "epoch": 0.04, + "learning_rate": 9.996960709419069e-08, + "logits/chosen": -2.2966418266296387, + "logits/rejected": -2.286574363708496, + "logps/chosen": -12.15412712097168, + "logps/rejected": -196.32749938964844, + "loss": 0.676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048059653490781784, + "rewards/margins": 0.027037618681788445, + "rewards/rejected": 0.02102203480899334, + "step": 701 + }, + { + "epoch": 0.04, + "learning_rate": 9.996927766794707e-08, + "logits/chosen": -2.1277706623077393, + "logits/rejected": -2.12611985206604, + "logps/chosen": -8.596321105957031, + "logps/rejected": -39.35306167602539, + "loss": 0.6946, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.009418487548828125, + "rewards/margins": -0.002144241239875555, + "rewards/rejected": -0.00727424630895257, + "step": 702 + }, + { + "epoch": 0.04, + "learning_rate": 9.996894646655029e-08, + "logits/chosen": -2.094529867172241, + "logits/rejected": -1.923903465270996, + "logps/chosen": -207.51380920410156, + "logps/rejected": -515.8192138671875, + "loss": 0.6591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08867035061120987, + "rewards/margins": 0.01736907660961151, + "rewards/rejected": 0.07130127400159836, + "step": 703 + }, + { + "epoch": 0.04, + "learning_rate": 9.996861349001209e-08, + "logits/chosen": -2.2647037506103516, + "logits/rejected": -2.2675344944000244, + "logps/chosen": -0.0008746407111175358, + "logps/rejected": -201.32177734375, + "loss": 0.6932, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.2643943964339996e-07, + "rewards/margins": -0.0019380926387384534, + "rewards/rejected": 0.0019378662109375, + "step": 704 + }, + { + "epoch": 0.04, + "learning_rate": 9.996827873834434e-08, + "logits/chosen": -2.199862480163574, + "logits/rejected": -2.1936545372009277, + "logps/chosen": -40.693851470947266, + "logps/rejected": -163.0546112060547, + "loss": 0.6642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021010970696806908, + "rewards/margins": 0.08665581047534943, + "rewards/rejected": -0.06564483791589737, + "step": 705 + }, + { + "epoch": 0.04, + "learning_rate": 9.996794221155889e-08, + "logits/chosen": -2.0440027713775635, + "logits/rejected": -2.0169429779052734, + "logps/chosen": -199.63470458984375, + "logps/rejected": -370.7615051269531, + "loss": 0.6798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03907928615808487, + "rewards/margins": 0.02072601392865181, + "rewards/rejected": 0.01835327222943306, + "step": 706 + }, + { + "epoch": 0.04, + "learning_rate": 9.996760390966771e-08, + "logits/chosen": -2.0800750255584717, + "logits/rejected": -2.0423834323883057, + "logps/chosen": -310.186767578125, + "logps/rejected": -431.8497619628906, + "loss": 0.6377, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.21234741806983948, + "rewards/margins": -0.014425650238990784, + "rewards/rejected": 0.22677306830883026, + "step": 707 + }, + { + "epoch": 0.04, + "learning_rate": 9.996726383268283e-08, + "logits/chosen": -2.099721670150757, + "logits/rejected": -2.0892817974090576, + "logps/chosen": -227.3404541015625, + "logps/rejected": -344.565673828125, + "loss": 0.6551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06280212849378586, + "rewards/margins": 0.06271667778491974, + "rewards/rejected": 8.544921729480848e-05, + "step": 708 + }, + { + "epoch": 0.04, + "learning_rate": 9.996692198061633e-08, + "logits/chosen": -1.9838165044784546, + "logits/rejected": -1.9764087200164795, + "logps/chosen": -210.09149169921875, + "logps/rejected": -259.4533386230469, + "loss": 0.6878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009263611398637295, + "rewards/margins": 0.00569610670208931, + "rewards/rejected": 0.0035675049293786287, + "step": 709 + }, + { + "epoch": 0.04, + "learning_rate": 9.996657835348034e-08, + "logits/chosen": -2.3765780925750732, + "logits/rejected": -2.3329672813415527, + "logps/chosen": -0.008650841191411018, + "logps/rejected": -353.410400390625, + "loss": 0.6538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003382180293556303, + "rewards/margins": 0.17323356866836548, + "rewards/rejected": -0.17357178032398224, + "step": 710 + }, + { + "epoch": 0.04, + "learning_rate": 9.996623295128708e-08, + "logits/chosen": -2.020148515701294, + "logits/rejected": -1.9787875413894653, + "logps/chosen": -163.40869140625, + "logps/rejected": -249.22097778320312, + "loss": 0.6268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18608704209327698, + "rewards/margins": 0.06268768757581711, + "rewards/rejected": 0.12339935451745987, + "step": 711 + }, + { + "epoch": 0.04, + "learning_rate": 9.996588577404879e-08, + "logits/chosen": -2.0327203273773193, + "logits/rejected": -2.046044111251831, + "logps/chosen": -374.520263671875, + "logps/rejected": -349.11749267578125, + "loss": 0.5838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22828979790210724, + "rewards/margins": 0.16071167588233948, + "rewards/rejected": 0.06757812947034836, + "step": 712 + }, + { + "epoch": 0.04, + "learning_rate": 9.996553682177785e-08, + "logits/chosen": -2.2031733989715576, + "logits/rejected": -2.199789047241211, + "logps/chosen": -46.95106506347656, + "logps/rejected": -121.30638122558594, + "loss": 0.6697, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05675201490521431, + "rewards/margins": 0.02698059193789959, + "rewards/rejected": 0.02977142296731472, + "step": 713 + }, + { + "epoch": 0.04, + "learning_rate": 9.996518609448663e-08, + "logits/chosen": -2.406724452972412, + "logits/rejected": -2.3839449882507324, + "logps/chosen": -14.507068634033203, + "logps/rejected": -270.86041259765625, + "loss": 0.6292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00863637961447239, + "rewards/margins": 0.2690489888191223, + "rewards/rejected": -0.2604126036167145, + "step": 714 + }, + { + "epoch": 0.04, + "learning_rate": 9.996483359218759e-08, + "logits/chosen": -2.1436736583709717, + "logits/rejected": -2.0934133529663086, + "logps/chosen": -345.12628173828125, + "logps/rejected": -522.933837890625, + "loss": 0.5934, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33430176973342896, + "rewards/margins": 0.12363892793655396, + "rewards/rejected": 0.210662841796875, + "step": 715 + }, + { + "epoch": 0.04, + "learning_rate": 9.996447931489326e-08, + "logits/chosen": -1.9688267707824707, + "logits/rejected": -1.9696500301361084, + "logps/chosen": -32.518150329589844, + "logps/rejected": -80.0935287475586, + "loss": 0.6923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018515778705477715, + "rewards/margins": 0.028487397357821465, + "rewards/rejected": -0.04700317606329918, + "step": 716 + }, + { + "epoch": 0.04, + "learning_rate": 9.996412326261623e-08, + "logits/chosen": -2.2441515922546387, + "logits/rejected": -2.2077324390411377, + "logps/chosen": -182.08660888671875, + "logps/rejected": -320.689697265625, + "loss": 0.642, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.15466919541358948, + "rewards/margins": -0.009588614106178284, + "rewards/rejected": 0.16425780951976776, + "step": 717 + }, + { + "epoch": 0.04, + "learning_rate": 9.996376543536913e-08, + "logits/chosen": -2.2655045986175537, + "logits/rejected": -2.2625598907470703, + "logps/chosen": -17.42482566833496, + "logps/rejected": -89.85813903808594, + "loss": 0.6976, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.337860107421875e-05, + "rewards/margins": -0.009487343020737171, + "rewards/rejected": 0.00952072162181139, + "step": 718 + }, + { + "epoch": 0.04, + "learning_rate": 9.996340583316467e-08, + "logits/chosen": -1.945674180984497, + "logits/rejected": -1.898525595664978, + "logps/chosen": -258.153076171875, + "logps/rejected": -452.5976867675781, + "loss": 0.6107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2549072206020355, + "rewards/margins": 0.07170714437961578, + "rewards/rejected": 0.18320007622241974, + "step": 719 + }, + { + "epoch": 0.04, + "learning_rate": 9.996304445601567e-08, + "logits/chosen": -2.184483528137207, + "logits/rejected": -2.179685592651367, + "logps/chosen": -11.364975929260254, + "logps/rejected": -72.45513916015625, + "loss": 0.717, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.024220561608672142, + "rewards/margins": -0.06436185538768768, + "rewards/rejected": 0.04014129564166069, + "step": 720 + }, + { + "epoch": 0.04, + "learning_rate": 9.99626813039349e-08, + "logits/chosen": -2.1463561058044434, + "logits/rejected": -2.126326560974121, + "logps/chosen": -51.3404655456543, + "logps/rejected": -144.060302734375, + "loss": 0.671, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0775783583521843, + "rewards/margins": -0.00031165778636932373, + "rewards/rejected": 0.07789001613855362, + "step": 721 + }, + { + "epoch": 0.04, + "learning_rate": 9.996231637693531e-08, + "logits/chosen": -2.053318500518799, + "logits/rejected": -2.044546365737915, + "logps/chosen": -0.01002228632569313, + "logps/rejected": -221.86676025390625, + "loss": 0.6737, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.617947814087529e-07, + "rewards/margins": 0.08446986973285675, + "rewards/rejected": -0.08446960896253586, + "step": 722 + }, + { + "epoch": 0.04, + "learning_rate": 9.996194967502986e-08, + "logits/chosen": -2.4085543155670166, + "logits/rejected": -2.396113157272339, + "logps/chosen": -26.814289093017578, + "logps/rejected": -162.414794921875, + "loss": 0.6778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0017210006481036544, + "rewards/margins": 0.07220593094825745, + "rewards/rejected": -0.07048492878675461, + "step": 723 + }, + { + "epoch": 0.04, + "learning_rate": 9.996158119823156e-08, + "logits/chosen": -2.364809513092041, + "logits/rejected": -2.295132875442505, + "logps/chosen": -308.1702880859375, + "logps/rejected": -404.80157470703125, + "loss": 0.5689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.272918701171875, + "rewards/margins": 0.22622069716453552, + "rewards/rejected": 0.04669800028204918, + "step": 724 + }, + { + "epoch": 0.04, + "learning_rate": 9.996121094655351e-08, + "logits/chosen": -2.1087677478790283, + "logits/rejected": -2.107938766479492, + "logps/chosen": -254.32394409179688, + "logps/rejected": -387.9209899902344, + "loss": 0.6432, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.26063233613967896, + "rewards/margins": -0.052276611328125, + "rewards/rejected": 0.31290894746780396, + "step": 725 + }, + { + "epoch": 0.04, + "learning_rate": 9.996083892000886e-08, + "logits/chosen": -2.126073122024536, + "logits/rejected": -2.122680902481079, + "logps/chosen": -265.96490478515625, + "logps/rejected": -373.99407958984375, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1464691162109375, + "rewards/margins": -0.12978821992874146, + "rewards/rejected": 0.27625733613967896, + "step": 726 + }, + { + "epoch": 0.04, + "learning_rate": 9.996046511861083e-08, + "logits/chosen": -2.1738409996032715, + "logits/rejected": -2.1504828929901123, + "logps/chosen": -9.715346336364746, + "logps/rejected": -151.3599853515625, + "loss": 0.6798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01714611053466797, + "rewards/margins": 0.0443357452750206, + "rewards/rejected": -0.02718963660299778, + "step": 727 + }, + { + "epoch": 0.04, + "learning_rate": 9.996008954237269e-08, + "logits/chosen": -2.215789556503296, + "logits/rejected": -2.2129268646240234, + "logps/chosen": -222.11404418945312, + "logps/rejected": -351.6031494140625, + "loss": 0.673, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.17448730766773224, + "rewards/margins": -0.13957519829273224, + "rewards/rejected": 0.3140625059604645, + "step": 728 + }, + { + "epoch": 0.04, + "learning_rate": 9.995971219130778e-08, + "logits/chosen": -2.2704617977142334, + "logits/rejected": -2.2740588188171387, + "logps/chosen": -36.90284729003906, + "logps/rejected": -120.32255554199219, + "loss": 0.693, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0052433013916015625, + "rewards/margins": -0.0067081451416015625, + "rewards/rejected": 0.00146484375, + "step": 729 + }, + { + "epoch": 0.04, + "learning_rate": 9.995933306542952e-08, + "logits/chosen": -2.2522010803222656, + "logits/rejected": -2.1488139629364014, + "logps/chosen": -273.73455810546875, + "logps/rejected": -458.85321044921875, + "loss": 0.7055, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03337707743048668, + "rewards/margins": -0.07348021864891052, + "rewards/rejected": 0.1068572998046875, + "step": 730 + }, + { + "epoch": 0.04, + "learning_rate": 9.995895216475138e-08, + "logits/chosen": -2.218273878097534, + "logits/rejected": -2.2112057209014893, + "logps/chosen": -20.454303741455078, + "logps/rejected": -142.93089294433594, + "loss": 0.677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033523559104651213, + "rewards/margins": 0.07007903605699539, + "rewards/rejected": -0.07343139499425888, + "step": 731 + }, + { + "epoch": 0.04, + "learning_rate": 9.995856948928687e-08, + "logits/chosen": -2.293410301208496, + "logits/rejected": -2.2779762744903564, + "logps/chosen": -3.4377458095550537, + "logps/rejected": -97.98910522460938, + "loss": 0.6636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012119293212890625, + "rewards/margins": 0.10704346001148224, + "rewards/rejected": -0.09492416679859161, + "step": 732 + }, + { + "epoch": 0.04, + "learning_rate": 9.995818503904962e-08, + "logits/chosen": -2.2148754596710205, + "logits/rejected": -2.221670389175415, + "logps/chosen": -75.79876708984375, + "logps/rejected": -242.3225555419922, + "loss": 0.6689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03692779690027237, + "rewards/margins": 0.06614227592945099, + "rewards/rejected": -0.02921447716653347, + "step": 733 + }, + { + "epoch": 0.04, + "learning_rate": 9.995779881405324e-08, + "logits/chosen": -2.331754207611084, + "logits/rejected": -2.308138132095337, + "logps/chosen": -74.74127197265625, + "logps/rejected": -273.0235900878906, + "loss": 0.6491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09538421779870987, + "rewards/margins": 0.07579497992992401, + "rewards/rejected": 0.01958923414349556, + "step": 734 + }, + { + "epoch": 0.04, + "learning_rate": 9.995741081431149e-08, + "logits/chosen": -2.1778743267059326, + "logits/rejected": -2.1761436462402344, + "logps/chosen": -214.36431884765625, + "logps/rejected": -281.55084228515625, + "loss": 0.6195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2720657289028168, + "rewards/margins": 0.00266873836517334, + "rewards/rejected": 0.26939699053764343, + "step": 735 + }, + { + "epoch": 0.04, + "learning_rate": 9.995702103983814e-08, + "logits/chosen": -2.0799543857574463, + "logits/rejected": -2.0709388256073, + "logps/chosen": -266.5823059082031, + "logps/rejected": -374.85821533203125, + "loss": 0.6328, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.21630249917507172, + "rewards/margins": -0.01945190131664276, + "rewards/rejected": 0.23575440049171448, + "step": 736 + }, + { + "epoch": 0.04, + "learning_rate": 9.995662949064703e-08, + "logits/chosen": -2.204862117767334, + "logits/rejected": -2.2745473384857178, + "logps/chosen": -225.6720428466797, + "logps/rejected": -213.16659545898438, + "loss": 0.6736, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.15575408935546875, + "rewards/margins": -0.09039460122585297, + "rewards/rejected": 0.24614869058132172, + "step": 737 + }, + { + "epoch": 0.04, + "learning_rate": 9.995623616675209e-08, + "logits/chosen": -2.072075128555298, + "logits/rejected": -2.0696845054626465, + "logps/chosen": -280.98577880859375, + "logps/rejected": -406.4005432128906, + "loss": 0.6743, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08214416354894638, + "rewards/margins": -0.07327880710363388, + "rewards/rejected": 0.15542297065258026, + "step": 738 + }, + { + "epoch": 0.04, + "learning_rate": 9.995584106816726e-08, + "logits/chosen": -2.236772060394287, + "logits/rejected": -2.2298061847686768, + "logps/chosen": -3.0685887336730957, + "logps/rejected": -142.84146118164062, + "loss": 0.6894, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4836272056491e-07, + "rewards/margins": 0.015215516090393066, + "rewards/rejected": -0.015216064639389515, + "step": 739 + }, + { + "epoch": 0.04, + "learning_rate": 9.99554441949066e-08, + "logits/chosen": -2.085270881652832, + "logits/rejected": -2.0802972316741943, + "logps/chosen": -51.424800872802734, + "logps/rejected": -225.67620849609375, + "loss": 0.6739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026536941528320312, + "rewards/margins": 0.05437660217285156, + "rewards/rejected": -0.02783966064453125, + "step": 740 + }, + { + "epoch": 0.04, + "learning_rate": 9.995504554698422e-08, + "logits/chosen": -2.3575336933135986, + "logits/rejected": -2.311277389526367, + "logps/chosen": -60.397911071777344, + "logps/rejected": -194.14007568359375, + "loss": 0.6907, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02027740515768528, + "rewards/margins": -0.0005218498408794403, + "rewards/rejected": 0.02079925499856472, + "step": 741 + }, + { + "epoch": 0.04, + "learning_rate": 9.995464512441424e-08, + "logits/chosen": -2.186023712158203, + "logits/rejected": -2.134256362915039, + "logps/chosen": -227.40771484375, + "logps/rejected": -341.31671142578125, + "loss": 0.6219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19841308891773224, + "rewards/margins": 0.04254761338233948, + "rewards/rejected": 0.15586547553539276, + "step": 742 + }, + { + "epoch": 0.04, + "learning_rate": 9.995424292721093e-08, + "logits/chosen": -2.103748083114624, + "logits/rejected": -2.1080269813537598, + "logps/chosen": -2.3364933440461755e-05, + "logps/rejected": -153.3582763671875, + "loss": 0.6804, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.384149411227554e-08, + "rewards/margins": 0.05141756311058998, + "rewards/rejected": -0.05141754075884819, + "step": 743 + }, + { + "epoch": 0.04, + "learning_rate": 9.995383895538856e-08, + "logits/chosen": -1.9550554752349854, + "logits/rejected": -1.9564682245254517, + "logps/chosen": -17.002687454223633, + "logps/rejected": -189.47076416015625, + "loss": 0.6956, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02328033559024334, + "rewards/margins": -0.0285568218678236, + "rewards/rejected": 0.05183715745806694, + "step": 744 + }, + { + "epoch": 0.04, + "learning_rate": 9.995343320896146e-08, + "logits/chosen": -2.247765302658081, + "logits/rejected": -2.246042013168335, + "logps/chosen": -6.295546054840088, + "logps/rejected": -156.568115234375, + "loss": 0.6819, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.307891918462701e-05, + "rewards/margins": 0.05545177683234215, + "rewards/rejected": -0.05547485500574112, + "step": 745 + }, + { + "epoch": 0.04, + "learning_rate": 9.995302568794409e-08, + "logits/chosen": -2.2771050930023193, + "logits/rejected": -2.2572760581970215, + "logps/chosen": -185.03631591796875, + "logps/rejected": -299.50347900390625, + "loss": 0.626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15578308701515198, + "rewards/margins": 0.09980469197034836, + "rewards/rejected": 0.05597839504480362, + "step": 746 + }, + { + "epoch": 0.04, + "learning_rate": 9.995261639235088e-08, + "logits/chosen": -2.065889835357666, + "logits/rejected": -2.0688881874084473, + "logps/chosen": -11.250786781311035, + "logps/rejected": -71.66415405273438, + "loss": 0.6894, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.536743306171047e-08, + "rewards/margins": 0.014913083054125309, + "rewards/rejected": -0.01491317804902792, + "step": 747 + }, + { + "epoch": 0.04, + "learning_rate": 9.995220532219639e-08, + "logits/chosen": -2.249934434890747, + "logits/rejected": -2.238785982131958, + "logps/chosen": -59.26569747924805, + "logps/rejected": -203.81678771972656, + "loss": 0.6782, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0070556639693677425, + "rewards/margins": 0.02883148193359375, + "rewards/rejected": -0.02177581749856472, + "step": 748 + }, + { + "epoch": 0.04, + "learning_rate": 9.995179247749523e-08, + "logits/chosen": -2.1072497367858887, + "logits/rejected": -2.077712059020996, + "logps/chosen": -166.66932678222656, + "logps/rejected": -268.67724609375, + "loss": 0.6305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11185455322265625, + "rewards/margins": 0.13630524277687073, + "rewards/rejected": -0.02445068396627903, + "step": 749 + }, + { + "epoch": 0.04, + "learning_rate": 9.995137785826206e-08, + "logits/chosen": -2.2965521812438965, + "logits/rejected": -2.3262016773223877, + "logps/chosen": -169.35548400878906, + "logps/rejected": -259.14385986328125, + "loss": 0.6635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04483490064740181, + "rewards/margins": 0.05258026346564293, + "rewards/rejected": -0.0077453614212572575, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 9.995096146451163e-08, + "logits/chosen": -2.16532564163208, + "logits/rejected": -2.1891591548919678, + "logps/chosen": -243.03248596191406, + "logps/rejected": -592.44580078125, + "loss": 0.738, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.13334350287914276, + "rewards/margins": -0.34605103731155396, + "rewards/rejected": 0.4793945252895355, + "step": 751 + }, + { + "epoch": 0.04, + "learning_rate": 9.99505432962587e-08, + "logits/chosen": -2.134695053100586, + "logits/rejected": -2.1096692085266113, + "logps/chosen": -106.06581115722656, + "logps/rejected": -309.73876953125, + "loss": 0.6536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048851776868104935, + "rewards/margins": 0.10522690415382385, + "rewards/rejected": -0.05637512356042862, + "step": 752 + }, + { + "epoch": 0.04, + "learning_rate": 9.995012335351811e-08, + "logits/chosen": -2.0946457386016846, + "logits/rejected": -2.0905892848968506, + "logps/chosen": -0.0011985893361270428, + "logps/rejected": -154.51756286621094, + "loss": 0.6862, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5650422938051634e-05, + "rewards/margins": 0.02750357612967491, + "rewards/rejected": -0.02751922607421875, + "step": 753 + }, + { + "epoch": 0.04, + "learning_rate": 9.994970163630483e-08, + "logits/chosen": -2.2241976261138916, + "logits/rejected": -2.181316614151001, + "logps/chosen": -0.0036513369996100664, + "logps/rejected": -231.8045654296875, + "loss": 0.6854, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0072103982092813e-05, + "rewards/margins": 0.031404562294483185, + "rewards/rejected": -0.03143463283777237, + "step": 754 + }, + { + "epoch": 0.04, + "learning_rate": 9.994927814463383e-08, + "logits/chosen": -2.434981107711792, + "logits/rejected": -2.4316391944885254, + "logps/chosen": -1.4066084623336792, + "logps/rejected": -185.89984130859375, + "loss": 0.673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019014669582247734, + "rewards/margins": 0.10949179530143738, + "rewards/rejected": -0.12850646674633026, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 9.994885287852012e-08, + "logits/chosen": -2.0219430923461914, + "logits/rejected": -2.0285069942474365, + "logps/chosen": -372.00994873046875, + "logps/rejected": -435.7940368652344, + "loss": 0.6961, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12388000637292862, + "rewards/margins": -0.16473999619483948, + "rewards/rejected": 0.2886199951171875, + "step": 756 + }, + { + "epoch": 0.04, + "learning_rate": 9.994842583797885e-08, + "logits/chosen": -2.0205485820770264, + "logits/rejected": -2.002185821533203, + "logps/chosen": -73.16007232666016, + "logps/rejected": -239.5003662109375, + "loss": 0.6864, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02746124379336834, + "rewards/margins": -0.00382843054831028, + "rewards/rejected": 0.03128967434167862, + "step": 757 + }, + { + "epoch": 0.04, + "learning_rate": 9.994799702302514e-08, + "logits/chosen": -2.3008670806884766, + "logits/rejected": -2.295971632003784, + "logps/chosen": -26.48572540283203, + "logps/rejected": -260.0434265136719, + "loss": 0.6822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006834983825683594, + "rewards/margins": 0.061734966933727264, + "rewards/rejected": -0.06856995075941086, + "step": 758 + }, + { + "epoch": 0.04, + "learning_rate": 9.994756643367428e-08, + "logits/chosen": -2.189741611480713, + "logits/rejected": -2.127664089202881, + "logps/chosen": -212.05752563476562, + "logps/rejected": -308.757080078125, + "loss": 0.754, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.09197693318128586, + "rewards/margins": -0.12286987900733948, + "rewards/rejected": 0.03089294396340847, + "step": 759 + }, + { + "epoch": 0.04, + "learning_rate": 9.994713406994153e-08, + "logits/chosen": -2.281486749649048, + "logits/rejected": -2.276289463043213, + "logps/chosen": -20.911014556884766, + "logps/rejected": -181.841064453125, + "loss": 0.6754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01287612970918417, + "rewards/margins": 0.049491122364997864, + "rewards/rejected": -0.03661499172449112, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 9.994669993184226e-08, + "logits/chosen": -2.2242655754089355, + "logits/rejected": -2.2178542613983154, + "logps/chosen": -58.12529754638672, + "logps/rejected": -303.9329833984375, + "loss": 0.6883, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01343307550996542, + "rewards/margins": 0.01401901338249445, + "rewards/rejected": -0.0005859375232830644, + "step": 761 + }, + { + "epoch": 0.04, + "learning_rate": 9.994626401939188e-08, + "logits/chosen": -2.061156988143921, + "logits/rejected": -2.069568634033203, + "logps/chosen": -165.11932373046875, + "logps/rejected": -140.2161865234375, + "loss": 0.6193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19627074897289276, + "rewards/margins": 0.059805288910865784, + "rewards/rejected": 0.13646546006202698, + "step": 762 + }, + { + "epoch": 0.04, + "learning_rate": 9.994582633260591e-08, + "logits/chosen": -2.294564723968506, + "logits/rejected": -2.280557632446289, + "logps/chosen": -6.512838363647461, + "logps/rejected": -141.52207946777344, + "loss": 0.7068, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.023972798138856888, + "rewards/margins": -0.026403523981571198, + "rewards/rejected": 0.0024307251442223787, + "step": 763 + }, + { + "epoch": 0.04, + "learning_rate": 9.994538687149987e-08, + "logits/chosen": -2.159587860107422, + "logits/rejected": -2.2085330486297607, + "logps/chosen": -283.82574462890625, + "logps/rejected": -240.6002197265625, + "loss": 0.6114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16163329780101776, + "rewards/margins": 0.10759276896715164, + "rewards/rejected": 0.05404052883386612, + "step": 764 + }, + { + "epoch": 0.04, + "learning_rate": 9.994494563608938e-08, + "logits/chosen": -2.080455780029297, + "logits/rejected": -2.051335573196411, + "logps/chosen": -0.37959739565849304, + "logps/rejected": -500.1689453125, + "loss": 0.6776, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1718273526639678e-05, + "rewards/margins": 0.0555415041744709, + "rewards/rejected": -0.05552978441119194, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 9.994450262639011e-08, + "logits/chosen": -1.9644801616668701, + "logits/rejected": -1.8474886417388916, + "logps/chosen": -239.27593994140625, + "logps/rejected": -384.614501953125, + "loss": 0.6216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23070983588695526, + "rewards/margins": 0.023428335785865784, + "rewards/rejected": 0.20728150010108948, + "step": 766 + }, + { + "epoch": 0.04, + "learning_rate": 9.994405784241782e-08, + "logits/chosen": -1.7867428064346313, + "logits/rejected": -1.7848093509674072, + "logps/chosen": -27.189908981323242, + "logps/rejected": -88.65144348144531, + "loss": 0.6747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024849509820342064, + "rewards/margins": 0.05143718793988228, + "rewards/rejected": -0.026587678119540215, + "step": 767 + }, + { + "epoch": 0.04, + "learning_rate": 9.994361128418827e-08, + "logits/chosen": -2.26184344291687, + "logits/rejected": -2.22892689704895, + "logps/chosen": -284.1076354980469, + "logps/rejected": -393.6418762207031, + "loss": 0.6307, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.25867006182670593, + "rewards/margins": -0.0053253173828125, + "rewards/rejected": 0.26399537920951843, + "step": 768 + }, + { + "epoch": 0.04, + "learning_rate": 9.994316295171735e-08, + "logits/chosen": -2.3046600818634033, + "logits/rejected": -2.299252986907959, + "logps/chosen": -34.87898254394531, + "logps/rejected": -170.58761596679688, + "loss": 0.6668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055953215807676315, + "rewards/margins": 0.043224334716796875, + "rewards/rejected": 0.012728882022202015, + "step": 769 + }, + { + "epoch": 0.04, + "learning_rate": 9.994271284502098e-08, + "logits/chosen": -2.1916191577911377, + "logits/rejected": -2.196763038635254, + "logps/chosen": -11.136449813842773, + "logps/rejected": -140.70361328125, + "loss": 0.6888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0026255608536303043, + "rewards/margins": 0.027022838592529297, + "rewards/rejected": -0.02439727820456028, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 9.994226096411517e-08, + "logits/chosen": -2.1878044605255127, + "logits/rejected": -2.1857476234436035, + "logps/chosen": -19.142906188964844, + "logps/rejected": -203.66879272460938, + "loss": 0.6924, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.023349953815340996, + "rewards/margins": -0.012833213433623314, + "rewards/rejected": 0.03618316724896431, + "step": 771 + }, + { + "epoch": 0.04, + "learning_rate": 9.994180730901595e-08, + "logits/chosen": -2.2619926929473877, + "logits/rejected": -2.2452688217163086, + "logps/chosen": -69.27102661132812, + "logps/rejected": -217.58119201660156, + "loss": 0.6602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049112703651189804, + "rewards/margins": 0.07042007893323898, + "rewards/rejected": -0.02130737341940403, + "step": 772 + }, + { + "epoch": 0.04, + "learning_rate": 9.994135187973944e-08, + "logits/chosen": -2.208162546157837, + "logits/rejected": -2.206388235092163, + "logps/chosen": -5.085288047790527, + "logps/rejected": -144.13954162597656, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.012403822503983974, + "rewards/margins": -0.008331346325576305, + "rewards/rejected": 0.02073516882956028, + "step": 773 + }, + { + "epoch": 0.05, + "learning_rate": 9.994089467630182e-08, + "logits/chosen": -2.1671533584594727, + "logits/rejected": -2.134037494659424, + "logps/chosen": -106.76937866210938, + "logps/rejected": -275.22900390625, + "loss": 0.702, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00789489783346653, + "rewards/margins": -0.04284057766199112, + "rewards/rejected": 0.0507354736328125, + "step": 774 + }, + { + "epoch": 0.05, + "learning_rate": 9.994043569871933e-08, + "logits/chosen": -2.3533082008361816, + "logits/rejected": -2.3425166606903076, + "logps/chosen": -8.513423919677734, + "logps/rejected": -121.16876220703125, + "loss": 0.6838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012390995398163795, + "rewards/margins": 0.024027347564697266, + "rewards/rejected": -0.011636353097856045, + "step": 775 + }, + { + "epoch": 0.05, + "learning_rate": 9.993997494700829e-08, + "logits/chosen": -2.171358108520508, + "logits/rejected": -2.160027027130127, + "logps/chosen": -205.09921264648438, + "logps/rejected": -238.76116943359375, + "loss": 0.6642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06971435993909836, + "rewards/margins": 0.023258976638317108, + "rewards/rejected": 0.04645538330078125, + "step": 776 + }, + { + "epoch": 0.05, + "learning_rate": 9.993951242118505e-08, + "logits/chosen": -2.2569456100463867, + "logits/rejected": -2.210491418838501, + "logps/chosen": -268.43890380859375, + "logps/rejected": -466.997314453125, + "loss": 0.6271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16970215737819672, + "rewards/margins": 0.04001465439796448, + "rewards/rejected": 0.12968750298023224, + "step": 777 + }, + { + "epoch": 0.05, + "learning_rate": 9.993904812126606e-08, + "logits/chosen": -2.194925308227539, + "logits/rejected": -2.1961493492126465, + "logps/chosen": -1.4172345399856567, + "logps/rejected": -228.36196899414062, + "loss": 0.7038, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0029683231841772795, + "rewards/margins": -0.04649554565548897, + "rewards/rejected": 0.04352722316980362, + "step": 778 + }, + { + "epoch": 0.05, + "learning_rate": 9.993858204726779e-08, + "logits/chosen": -2.010399580001831, + "logits/rejected": -2.0076353549957275, + "logps/chosen": -52.074119567871094, + "logps/rejected": -181.840576171875, + "loss": 0.6749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013554763980209827, + "rewards/margins": 0.0817440003156662, + "rewards/rejected": -0.09529876708984375, + "step": 779 + }, + { + "epoch": 0.05, + "learning_rate": 9.99381141992068e-08, + "logits/chosen": -2.038071393966675, + "logits/rejected": -2.0351483821868896, + "logps/chosen": -69.27838897705078, + "logps/rejected": -207.39952087402344, + "loss": 0.677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0040840147994458675, + "rewards/margins": 0.03312302008271217, + "rewards/rejected": -0.03720703348517418, + "step": 780 + }, + { + "epoch": 0.05, + "learning_rate": 9.993764457709973e-08, + "logits/chosen": -2.0746994018554688, + "logits/rejected": -2.0652406215667725, + "logps/chosen": -0.010963651351630688, + "logps/rejected": -132.193115234375, + "loss": 0.6974, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.156155748409219e-05, + "rewards/margins": -0.0168087650090456, + "rewards/rejected": 0.01675720326602459, + "step": 781 + }, + { + "epoch": 0.05, + "learning_rate": 9.993717318096326e-08, + "logits/chosen": -2.1966371536254883, + "logits/rejected": -2.1954452991485596, + "logps/chosen": -0.08054545521736145, + "logps/rejected": -73.37864685058594, + "loss": 0.6884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008942224085330963, + "rewards/margins": 0.020915279164910316, + "rewards/rejected": -0.02002105675637722, + "step": 782 + }, + { + "epoch": 0.05, + "learning_rate": 9.993670001081412e-08, + "logits/chosen": -2.1330909729003906, + "logits/rejected": -2.119361400604248, + "logps/chosen": -14.250097274780273, + "logps/rejected": -195.656982421875, + "loss": 0.6738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025000859051942825, + "rewards/margins": 0.053420353680849075, + "rewards/rejected": -0.02841949462890625, + "step": 783 + }, + { + "epoch": 0.05, + "learning_rate": 9.993622506666914e-08, + "logits/chosen": -2.223569393157959, + "logits/rejected": -2.2146501541137695, + "logps/chosen": -3.4093623980879784e-05, + "logps/rejected": -92.6138916015625, + "loss": 0.6886, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0728763299994171e-07, + "rewards/margins": 0.018098341301083565, + "rewards/rejected": -0.01809844933450222, + "step": 784 + }, + { + "epoch": 0.05, + "learning_rate": 9.993574834854517e-08, + "logits/chosen": -2.061134099960327, + "logits/rejected": -1.9895570278167725, + "logps/chosen": -242.19866943359375, + "logps/rejected": -488.1403503417969, + "loss": 0.6706, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.146514892578125, + "rewards/margins": -0.06413574516773224, + "rewards/rejected": 0.21065063774585724, + "step": 785 + }, + { + "epoch": 0.05, + "learning_rate": 9.993526985645916e-08, + "logits/chosen": -2.0737037658691406, + "logits/rejected": -2.0928399562835693, + "logps/chosen": -174.74583435058594, + "logps/rejected": -238.033447265625, + "loss": 0.6558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08305817097425461, + "rewards/margins": 0.04847259819507599, + "rewards/rejected": 0.03458557277917862, + "step": 786 + }, + { + "epoch": 0.05, + "learning_rate": 9.993478959042812e-08, + "logits/chosen": -2.1967966556549072, + "logits/rejected": -2.1872458457946777, + "logps/chosen": -41.81818389892578, + "logps/rejected": -111.84742736816406, + "loss": 0.684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021520614624023438, + "rewards/margins": 0.04988441616296768, + "rewards/rejected": -0.07140503078699112, + "step": 787 + }, + { + "epoch": 0.05, + "learning_rate": 9.993430755046907e-08, + "logits/chosen": -1.9753390550613403, + "logits/rejected": -2.0166711807250977, + "logps/chosen": -284.41925048828125, + "logps/rejected": -387.07562255859375, + "loss": 0.6029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2742981016635895, + "rewards/margins": 0.023352056741714478, + "rewards/rejected": 0.250946044921875, + "step": 788 + }, + { + "epoch": 0.05, + "learning_rate": 9.993382373659918e-08, + "logits/chosen": -2.0906903743743896, + "logits/rejected": -2.099130868911743, + "logps/chosen": -22.379013061523438, + "logps/rejected": -69.8652114868164, + "loss": 0.6879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00899276789277792, + "rewards/margins": 0.01442565955221653, + "rewards/rejected": -0.005432892125099897, + "step": 789 + }, + { + "epoch": 0.05, + "learning_rate": 9.99333381488356e-08, + "logits/chosen": -2.1335737705230713, + "logits/rejected": -2.1267430782318115, + "logps/chosen": -75.91804504394531, + "logps/rejected": -175.68460083007812, + "loss": 0.6226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09888305515050888, + "rewards/margins": 0.21065369248390198, + "rewards/rejected": -0.1117706298828125, + "step": 790 + }, + { + "epoch": 0.05, + "learning_rate": 9.993285078719561e-08, + "logits/chosen": -2.0234668254852295, + "logits/rejected": -2.0254087448120117, + "logps/chosen": -161.01637268066406, + "logps/rejected": -406.73284912109375, + "loss": 0.7195, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04960174486041069, + "rewards/margins": -0.17801666259765625, + "rewards/rejected": 0.22761841118335724, + "step": 791 + }, + { + "epoch": 0.05, + "learning_rate": 9.993236165169652e-08, + "logits/chosen": -2.1595218181610107, + "logits/rejected": -2.160700559616089, + "logps/chosen": -0.012316208332777023, + "logps/rejected": -83.39251708984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0001278278505196795, + "rewards/margins": -0.0036235456354916096, + "rewards/rejected": 0.0037513733841478825, + "step": 792 + }, + { + "epoch": 0.05, + "learning_rate": 9.993187074235568e-08, + "logits/chosen": -2.2932844161987305, + "logits/rejected": -2.2759604454040527, + "logps/chosen": -76.56060028076172, + "logps/rejected": -294.4287109375, + "loss": 0.6491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06463547050952911, + "rewards/margins": 0.11598129570484161, + "rewards/rejected": -0.0513458251953125, + "step": 793 + }, + { + "epoch": 0.05, + "learning_rate": 9.993137805919056e-08, + "logits/chosen": -2.19854474067688, + "logits/rejected": -2.149632692337036, + "logps/chosen": -226.87391662597656, + "logps/rejected": -487.309814453125, + "loss": 0.6188, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2705124020576477, + "rewards/margins": -0.011286914348602295, + "rewards/rejected": 0.28179931640625, + "step": 794 + }, + { + "epoch": 0.05, + "learning_rate": 9.993088360221865e-08, + "logits/chosen": -2.0557920932769775, + "logits/rejected": -2.0238757133483887, + "logps/chosen": -446.7569580078125, + "logps/rejected": -501.6933898925781, + "loss": 0.6075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.260153204202652, + "rewards/margins": 0.06946411728858948, + "rewards/rejected": 0.1906890869140625, + "step": 795 + }, + { + "epoch": 0.05, + "learning_rate": 9.993038737145751e-08, + "logits/chosen": -2.008120059967041, + "logits/rejected": -1.9913772344589233, + "logps/chosen": -32.19930648803711, + "logps/rejected": -207.64114379882812, + "loss": 0.6871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0012989044189453125, + "rewards/margins": 0.030434036627411842, + "rewards/rejected": -0.02913513220846653, + "step": 796 + }, + { + "epoch": 0.05, + "learning_rate": 9.992988936692479e-08, + "logits/chosen": -2.2377257347106934, + "logits/rejected": -2.2448925971984863, + "logps/chosen": -0.0005281136836856604, + "logps/rejected": -185.90838623046875, + "loss": 0.6879, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.9593002532665196e-08, + "rewards/margins": 0.020990049466490746, + "rewards/rejected": -0.02098998986184597, + "step": 797 + }, + { + "epoch": 0.05, + "learning_rate": 9.992938958863815e-08, + "logits/chosen": -2.171213388442993, + "logits/rejected": -2.1308069229125977, + "logps/chosen": -50.09516143798828, + "logps/rejected": -283.3621520996094, + "loss": 0.6211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07027168571949005, + "rewards/margins": 0.22994881868362427, + "rewards/rejected": -0.15967713296413422, + "step": 798 + }, + { + "epoch": 0.05, + "learning_rate": 9.992888803661537e-08, + "logits/chosen": -2.2038071155548096, + "logits/rejected": -2.199881076812744, + "logps/chosen": -0.0006101926555857062, + "logps/rejected": -108.77732849121094, + "loss": 0.6899, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.960464477539063e-08, + "rewards/margins": 0.01323083695024252, + "rewards/rejected": -0.013230896554887295, + "step": 799 + }, + { + "epoch": 0.05, + "learning_rate": 9.992838471087425e-08, + "logits/chosen": -2.080219268798828, + "logits/rejected": -2.05303955078125, + "logps/chosen": -271.8619689941406, + "logps/rejected": -383.157470703125, + "loss": 0.6287, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2577270567417145, + "rewards/margins": 0.001452624797821045, + "rewards/rejected": 0.25627443194389343, + "step": 800 + }, + { + "epoch": 0.05, + "learning_rate": 9.992787961143267e-08, + "logits/chosen": -2.230950117111206, + "logits/rejected": -2.2332804203033447, + "logps/chosen": -67.43898010253906, + "logps/rejected": -145.84495544433594, + "loss": 0.6992, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02724762074649334, + "rewards/margins": -0.04865875095129013, + "rewards/rejected": 0.07590637356042862, + "step": 801 + }, + { + "epoch": 0.05, + "learning_rate": 9.99273727383086e-08, + "logits/chosen": -2.2103583812713623, + "logits/rejected": -2.2097465991973877, + "logps/chosen": -27.987850189208984, + "logps/rejected": -73.08028411865234, + "loss": 0.7131, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.021384621039032936, + "rewards/margins": -0.04349537193775177, + "rewards/rejected": 0.022110749036073685, + "step": 802 + }, + { + "epoch": 0.05, + "learning_rate": 9.992686409152003e-08, + "logits/chosen": -2.2870728969573975, + "logits/rejected": -2.2768115997314453, + "logps/chosen": -14.756255149841309, + "logps/rejected": -177.4575958251953, + "loss": 0.6746, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0422210329561494e-05, + "rewards/margins": 0.06952829658985138, + "rewards/rejected": -0.06955871731042862, + "step": 803 + }, + { + "epoch": 0.05, + "learning_rate": 9.992635367108503e-08, + "logits/chosen": -1.9659620523452759, + "logits/rejected": -1.9625797271728516, + "logps/chosen": -11.111698150634766, + "logps/rejected": -91.66773986816406, + "loss": 0.6731, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012144947424530983, + "rewards/margins": 0.0563054084777832, + "rewards/rejected": -0.04416046291589737, + "step": 804 + }, + { + "epoch": 0.05, + "learning_rate": 9.992584147702173e-08, + "logits/chosen": -2.1622300148010254, + "logits/rejected": -2.1338400840759277, + "logps/chosen": -50.91654586791992, + "logps/rejected": -203.15640258789062, + "loss": 0.6946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011471176519989967, + "rewards/margins": 0.012852858752012253, + "rewards/rejected": -0.02432403527200222, + "step": 805 + }, + { + "epoch": 0.05, + "learning_rate": 9.992532750934832e-08, + "logits/chosen": -2.275174140930176, + "logits/rejected": -2.248901605606079, + "logps/chosen": -183.3502197265625, + "logps/rejected": -422.403564453125, + "loss": 0.7137, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.15108337998390198, + "rewards/margins": -0.23598632216453552, + "rewards/rejected": 0.3870697021484375, + "step": 806 + }, + { + "epoch": 0.05, + "learning_rate": 9.992481176808308e-08, + "logits/chosen": -2.0933854579925537, + "logits/rejected": -2.066967725753784, + "logps/chosen": -347.5791320800781, + "logps/rejected": -584.7486572265625, + "loss": 0.5731, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3815460205078125, + "rewards/margins": 0.07854309678077698, + "rewards/rejected": 0.3030029237270355, + "step": 807 + }, + { + "epoch": 0.05, + "learning_rate": 9.992429425324429e-08, + "logits/chosen": -2.1598289012908936, + "logits/rejected": -2.167930841445923, + "logps/chosen": -192.45448303222656, + "logps/rejected": -326.8538818359375, + "loss": 0.7092, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0035446167457848787, + "rewards/margins": -0.1461746245622635, + "rewards/rejected": 0.14971923828125, + "step": 808 + }, + { + "epoch": 0.05, + "learning_rate": 9.992377496485039e-08, + "logits/chosen": -2.2440929412841797, + "logits/rejected": -2.235337495803833, + "logps/chosen": -48.45396423339844, + "logps/rejected": -179.304931640625, + "loss": 0.6944, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05963249132037163, + "rewards/margins": -0.06176643446087837, + "rewards/rejected": 0.12139892578125, + "step": 809 + }, + { + "epoch": 0.05, + "learning_rate": 9.992325390291978e-08, + "logits/chosen": -2.1083202362060547, + "logits/rejected": -2.087026596069336, + "logps/chosen": -233.25283813476562, + "logps/rejected": -323.78179931640625, + "loss": 0.7313, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03634338453412056, + "rewards/margins": -0.2033843994140625, + "rewards/rejected": 0.23972778022289276, + "step": 810 + }, + { + "epoch": 0.05, + "learning_rate": 9.9922731067471e-08, + "logits/chosen": -1.9560238122940063, + "logits/rejected": -1.9257241487503052, + "logps/chosen": -279.479248046875, + "logps/rejected": -442.0901794433594, + "loss": 0.5808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2472991943359375, + "rewards/margins": 0.19248656928539276, + "rewards/rejected": 0.05481262132525444, + "step": 811 + }, + { + "epoch": 0.05, + "learning_rate": 9.99222064585226e-08, + "logits/chosen": -2.1922225952148438, + "logits/rejected": -2.1805012226104736, + "logps/chosen": -0.0014407768612727523, + "logps/rejected": -133.58848571777344, + "loss": 0.6841, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.988035577000119e-06, + "rewards/margins": 0.03650243207812309, + "rewards/rejected": -0.03649444505572319, + "step": 812 + }, + { + "epoch": 0.05, + "learning_rate": 9.992168007609325e-08, + "logits/chosen": -2.214804172515869, + "logits/rejected": -2.1838607788085938, + "logps/chosen": -194.55975341796875, + "logps/rejected": -401.06903076171875, + "loss": 0.6737, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.19098205864429474, + "rewards/margins": -0.17713318765163422, + "rewards/rejected": 0.36811524629592896, + "step": 813 + }, + { + "epoch": 0.05, + "learning_rate": 9.992115192020162e-08, + "logits/chosen": -2.3214452266693115, + "logits/rejected": -2.314486265182495, + "logps/chosen": -15.45715618133545, + "logps/rejected": -119.72970581054688, + "loss": 0.6589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012020778842270374, + "rewards/margins": 0.12467489391565323, + "rewards/rejected": -0.11265411227941513, + "step": 814 + }, + { + "epoch": 0.05, + "learning_rate": 9.992062199086649e-08, + "logits/chosen": -1.997545838356018, + "logits/rejected": -1.9844163656234741, + "logps/chosen": -218.21482849121094, + "logps/rejected": -381.382568359375, + "loss": 0.6846, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12972259521484375, + "rewards/margins": -0.1638382077217102, + "rewards/rejected": 0.29356080293655396, + "step": 815 + }, + { + "epoch": 0.05, + "learning_rate": 9.992009028810666e-08, + "logits/chosen": -2.1098663806915283, + "logits/rejected": -2.064556360244751, + "logps/chosen": -165.93893432617188, + "logps/rejected": -292.8531494140625, + "loss": 0.6769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02497100830078125, + "rewards/margins": 0.05862579494714737, + "rewards/rejected": -0.03365478664636612, + "step": 816 + }, + { + "epoch": 0.05, + "learning_rate": 9.991955681194106e-08, + "logits/chosen": -2.2933545112609863, + "logits/rejected": -2.2727367877960205, + "logps/chosen": -31.04891586303711, + "logps/rejected": -225.59677124023438, + "loss": 0.6552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01792278327047825, + "rewards/margins": 0.16952228546142578, + "rewards/rejected": -0.18744507431983948, + "step": 817 + }, + { + "epoch": 0.05, + "learning_rate": 9.99190215623886e-08, + "logits/chosen": -1.9592921733856201, + "logits/rejected": -1.8565479516983032, + "logps/chosen": -424.0066833496094, + "logps/rejected": -817.9241943359375, + "loss": 0.6641, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.14855042099952698, + "rewards/margins": -0.0897064208984375, + "rewards/rejected": 0.23825684189796448, + "step": 818 + }, + { + "epoch": 0.05, + "learning_rate": 9.991848453946834e-08, + "logits/chosen": -2.1104795932769775, + "logits/rejected": -2.0289664268493652, + "logps/chosen": -258.43145751953125, + "logps/rejected": -445.7212219238281, + "loss": 0.6038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28983765840530396, + "rewards/margins": 0.015591442584991455, + "rewards/rejected": 0.2742462158203125, + "step": 819 + }, + { + "epoch": 0.05, + "learning_rate": 9.991794574319931e-08, + "logits/chosen": -2.095592737197876, + "logits/rejected": -2.0939459800720215, + "logps/chosen": -28.75048065185547, + "logps/rejected": -107.31340789794922, + "loss": 0.6939, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024249648675322533, + "rewards/margins": 0.01877479813992977, + "rewards/rejected": -0.043024446815252304, + "step": 820 + }, + { + "epoch": 0.05, + "learning_rate": 9.991740517360068e-08, + "logits/chosen": -2.166175127029419, + "logits/rejected": -2.16475248336792, + "logps/chosen": -26.271852493286133, + "logps/rejected": -113.75167083740234, + "loss": 0.6786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02454853057861328, + "rewards/margins": 0.03308811038732529, + "rewards/rejected": -0.008539581671357155, + "step": 821 + }, + { + "epoch": 0.05, + "learning_rate": 9.991686283069165e-08, + "logits/chosen": -2.10863995552063, + "logits/rejected": -2.0974745750427246, + "logps/chosen": -235.1146240234375, + "logps/rejected": -318.976318359375, + "loss": 0.5977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27500611543655396, + "rewards/margins": 0.08217163383960724, + "rewards/rejected": 0.19283448159694672, + "step": 822 + }, + { + "epoch": 0.05, + "learning_rate": 9.991631871449146e-08, + "logits/chosen": -2.111321449279785, + "logits/rejected": -2.094815969467163, + "logps/chosen": -199.3238525390625, + "logps/rejected": -323.41253662109375, + "loss": 0.6735, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.14314880967140198, + "rewards/margins": -0.06524047255516052, + "rewards/rejected": 0.2083892822265625, + "step": 823 + }, + { + "epoch": 0.05, + "learning_rate": 9.991577282501949e-08, + "logits/chosen": -2.093012571334839, + "logits/rejected": -2.103616237640381, + "logps/chosen": -262.43487548828125, + "logps/rejected": -434.6497802734375, + "loss": 0.6332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14290772378444672, + "rewards/margins": 0.031924448907375336, + "rewards/rejected": 0.11098327487707138, + "step": 824 + }, + { + "epoch": 0.05, + "learning_rate": 9.991522516229509e-08, + "logits/chosen": -2.347606658935547, + "logits/rejected": -2.3701999187469482, + "logps/chosen": -238.19143676757812, + "logps/rejected": -340.92156982421875, + "loss": 0.6847, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12356109917163849, + "rewards/margins": -0.12224578857421875, + "rewards/rejected": 0.24580688774585724, + "step": 825 + }, + { + "epoch": 0.05, + "learning_rate": 9.991467572633774e-08, + "logits/chosen": -2.0789077281951904, + "logits/rejected": -2.067533016204834, + "logps/chosen": -29.483076095581055, + "logps/rejected": -159.111083984375, + "loss": 0.6526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06217193603515625, + "rewards/margins": 0.11104736477136612, + "rewards/rejected": -0.04887542873620987, + "step": 826 + }, + { + "epoch": 0.05, + "learning_rate": 9.991412451716694e-08, + "logits/chosen": -2.2670183181762695, + "logits/rejected": -2.2672929763793945, + "logps/chosen": -10.59535026550293, + "logps/rejected": -61.265174865722656, + "loss": 0.6858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0071843150071799755, + "rewards/margins": 0.03328504413366318, + "rewards/rejected": -0.04046935960650444, + "step": 827 + }, + { + "epoch": 0.05, + "learning_rate": 9.991357153480229e-08, + "logits/chosen": -2.071645736694336, + "logits/rejected": -2.0624587535858154, + "logps/chosen": -277.96746826171875, + "logps/rejected": -399.0975341796875, + "loss": 0.6442, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.20317688584327698, + "rewards/margins": -0.026132196187973022, + "rewards/rejected": 0.22930908203125, + "step": 828 + }, + { + "epoch": 0.05, + "learning_rate": 9.991301677926341e-08, + "logits/chosen": -2.1877200603485107, + "logits/rejected": -2.13236927986145, + "logps/chosen": -204.08013916015625, + "logps/rejected": -453.2266540527344, + "loss": 0.617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26349639892578125, + "rewards/margins": 0.027159109711647034, + "rewards/rejected": 0.23633728921413422, + "step": 829 + }, + { + "epoch": 0.05, + "learning_rate": 9.991246025057003e-08, + "logits/chosen": -1.9506351947784424, + "logits/rejected": -1.943285346031189, + "logps/chosen": -34.57368087768555, + "logps/rejected": -139.8800048828125, + "loss": 0.665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07115745544433594, + "rewards/margins": 0.0657070130109787, + "rewards/rejected": 0.005450439639389515, + "step": 830 + }, + { + "epoch": 0.05, + "learning_rate": 9.991190194874192e-08, + "logits/chosen": -2.096858024597168, + "logits/rejected": -2.07734751701355, + "logps/chosen": -39.44477844238281, + "logps/rejected": -372.1474914550781, + "loss": 0.6828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009531021118164062, + "rewards/margins": 0.014746475964784622, + "rewards/rejected": -0.005215454380959272, + "step": 831 + }, + { + "epoch": 0.05, + "learning_rate": 9.99113418737989e-08, + "logits/chosen": -2.335662364959717, + "logits/rejected": -2.334516763687134, + "logps/chosen": -80.37635040283203, + "logps/rejected": -280.88616943359375, + "loss": 0.6476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09653931111097336, + "rewards/margins": 0.07128296047449112, + "rewards/rejected": 0.02525634877383709, + "step": 832 + }, + { + "epoch": 0.05, + "learning_rate": 9.991078002576088e-08, + "logits/chosen": -2.203606128692627, + "logits/rejected": -2.1824464797973633, + "logps/chosen": -291.7062072753906, + "logps/rejected": -417.199462890625, + "loss": 0.6479, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.24700318276882172, + "rewards/margins": -0.023718252778053284, + "rewards/rejected": 0.270721435546875, + "step": 833 + }, + { + "epoch": 0.05, + "learning_rate": 9.991021640464781e-08, + "logits/chosen": -1.9478005170822144, + "logits/rejected": -1.8951115608215332, + "logps/chosen": -200.83914184570312, + "logps/rejected": -421.24627685546875, + "loss": 0.6385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.103668212890625, + "rewards/margins": 0.10914306342601776, + "rewards/rejected": -0.005474853795021772, + "step": 834 + }, + { + "epoch": 0.05, + "learning_rate": 9.990965101047972e-08, + "logits/chosen": -2.250231981277466, + "logits/rejected": -2.230879068374634, + "logps/chosen": -38.83504867553711, + "logps/rejected": -311.5331726074219, + "loss": 0.6573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0330963134765625, + "rewards/margins": 0.10964050143957138, + "rewards/rejected": -0.07654418796300888, + "step": 835 + }, + { + "epoch": 0.05, + "learning_rate": 9.990908384327668e-08, + "logits/chosen": -2.184828758239746, + "logits/rejected": -2.17077898979187, + "logps/chosen": -243.91357421875, + "logps/rejected": -313.5666809082031, + "loss": 0.6905, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1292572021484375, + "rewards/margins": -0.13953247666358948, + "rewards/rejected": 0.268789678812027, + "step": 836 + }, + { + "epoch": 0.05, + "learning_rate": 9.990851490305886e-08, + "logits/chosen": -2.1685738563537598, + "logits/rejected": -2.164431571960449, + "logps/chosen": -44.06586837768555, + "logps/rejected": -172.3068389892578, + "loss": 0.672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007171630859375, + "rewards/margins": 0.06968536227941513, + "rewards/rejected": -0.07040252536535263, + "step": 837 + }, + { + "epoch": 0.05, + "learning_rate": 9.990794418984647e-08, + "logits/chosen": -2.1590898036956787, + "logits/rejected": -2.1481595039367676, + "logps/chosen": -203.06814575195312, + "logps/rejected": -343.6080017089844, + "loss": 0.6206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.229248046875, + "rewards/margins": 0.04293212294578552, + "rewards/rejected": 0.18631592392921448, + "step": 838 + }, + { + "epoch": 0.05, + "learning_rate": 9.990737170365977e-08, + "logits/chosen": -2.0654587745666504, + "logits/rejected": -2.0553905963897705, + "logps/chosen": -35.93885040283203, + "logps/rejected": -234.4369354248047, + "loss": 0.6562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07422981411218643, + "rewards/margins": 0.07192420959472656, + "rewards/rejected": 0.0023056031204760075, + "step": 839 + }, + { + "epoch": 0.05, + "learning_rate": 9.990679744451909e-08, + "logits/chosen": -2.1211752891540527, + "logits/rejected": -2.1007018089294434, + "logps/chosen": -208.5374755859375, + "logps/rejected": -344.84375, + "loss": 0.6184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17785950005054474, + "rewards/margins": 0.07520141452550888, + "rewards/rejected": 0.10265808552503586, + "step": 840 + }, + { + "epoch": 0.05, + "learning_rate": 9.990622141244486e-08, + "logits/chosen": -2.218695878982544, + "logits/rejected": -2.197997808456421, + "logps/chosen": -9.488831710768864e-05, + "logps/rejected": -133.3719482421875, + "loss": 0.6971, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.4569893614389e-07, + "rewards/margins": -0.015906108543276787, + "rewards/rejected": 0.01590576209127903, + "step": 841 + }, + { + "epoch": 0.05, + "learning_rate": 9.990564360745752e-08, + "logits/chosen": -2.249833345413208, + "logits/rejected": -2.240288496017456, + "logps/chosen": -132.7083282470703, + "logps/rejected": -203.44012451171875, + "loss": 0.6529, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.18291626870632172, + "rewards/margins": -0.05527648329734802, + "rewards/rejected": 0.23819275200366974, + "step": 842 + }, + { + "epoch": 0.05, + "learning_rate": 9.990506402957761e-08, + "logits/chosen": -2.305859088897705, + "logits/rejected": -2.300204277038574, + "logps/chosen": -1.0880804061889648, + "logps/rejected": -98.05728912353516, + "loss": 0.6947, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.005702078342437744, + "rewards/margins": -0.034914530813694, + "rewards/rejected": 0.040616609156131744, + "step": 843 + }, + { + "epoch": 0.05, + "learning_rate": 9.990448267882571e-08, + "logits/chosen": -2.173013210296631, + "logits/rejected": -2.1350443363189697, + "logps/chosen": -210.573486328125, + "logps/rejected": -395.066162109375, + "loss": 0.6043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27247315645217896, + "rewards/margins": 0.054467782378196716, + "rewards/rejected": 0.21800537407398224, + "step": 844 + }, + { + "epoch": 0.05, + "learning_rate": 9.990389955522247e-08, + "logits/chosen": -2.200716972351074, + "logits/rejected": -2.2025179862976074, + "logps/chosen": -3.135404109954834, + "logps/rejected": -70.90498352050781, + "loss": 0.6974, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.002122640609741211, + "rewards/margins": -0.006976461503654718, + "rewards/rejected": 0.0048538208939135075, + "step": 845 + }, + { + "epoch": 0.05, + "learning_rate": 9.990331465878863e-08, + "logits/chosen": -2.335669994354248, + "logits/rejected": -2.3253800868988037, + "logps/chosen": -153.1482391357422, + "logps/rejected": -248.08543395996094, + "loss": 0.6458, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.222789004445076, + "rewards/margins": -0.037736520171165466, + "rewards/rejected": 0.26052552461624146, + "step": 846 + }, + { + "epoch": 0.05, + "learning_rate": 9.990272798954494e-08, + "logits/chosen": -1.9925119876861572, + "logits/rejected": -1.9638713598251343, + "logps/chosen": -230.99710083007812, + "logps/rejected": -477.3699035644531, + "loss": 0.6059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2864929139614105, + "rewards/margins": 0.013513177633285522, + "rewards/rejected": 0.272979736328125, + "step": 847 + }, + { + "epoch": 0.05, + "learning_rate": 9.990213954751223e-08, + "logits/chosen": -2.1640288829803467, + "logits/rejected": -2.1483330726623535, + "logps/chosen": -84.20401000976562, + "logps/rejected": -264.3505859375, + "loss": 0.6464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00496673583984375, + "rewards/margins": 0.18614959716796875, + "rewards/rejected": -0.181182861328125, + "step": 848 + }, + { + "epoch": 0.05, + "learning_rate": 9.990154933271144e-08, + "logits/chosen": -2.112670660018921, + "logits/rejected": -2.0711464881896973, + "logps/chosen": -91.09005737304688, + "logps/rejected": -173.00393676757812, + "loss": 0.7077, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0521240234375, + "rewards/margins": -0.009622190147638321, + "rewards/rejected": -0.04250183328986168, + "step": 849 + }, + { + "epoch": 0.05, + "learning_rate": 9.990095734516354e-08, + "logits/chosen": -2.217984914779663, + "logits/rejected": -2.2015221118927, + "logps/chosen": -10.892728805541992, + "logps/rejected": -183.3075408935547, + "loss": 0.6888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011280060280114412, + "rewards/margins": 0.009582138620316982, + "rewards/rejected": -0.01071014441549778, + "step": 850 + }, + { + "epoch": 0.05, + "learning_rate": 9.990036358488952e-08, + "logits/chosen": -2.357414722442627, + "logits/rejected": -2.353127956390381, + "logps/chosen": -2.598957061767578, + "logps/rejected": -51.99664306640625, + "loss": 0.6949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021466707810759544, + "rewards/margins": 0.0008252393454313278, + "rewards/rejected": -0.022291947156190872, + "step": 851 + }, + { + "epoch": 0.05, + "learning_rate": 9.989976805191051e-08, + "logits/chosen": -2.1247971057891846, + "logits/rejected": -2.0993635654449463, + "logps/chosen": -130.16796875, + "logps/rejected": -216.69354248046875, + "loss": 0.6615, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.13447265326976776, + "rewards/margins": -0.031369030475616455, + "rewards/rejected": 0.16584168374538422, + "step": 852 + }, + { + "epoch": 0.05, + "learning_rate": 9.989917074624765e-08, + "logits/chosen": -2.2418324947357178, + "logits/rejected": -2.2312264442443848, + "logps/chosen": -4.839840767090209e-05, + "logps/rejected": -263.6466369628906, + "loss": 0.6544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.549706212244928e-07, + "rewards/margins": 0.1618833988904953, + "rewards/rejected": -0.16188354790210724, + "step": 853 + }, + { + "epoch": 0.05, + "learning_rate": 9.989857166792217e-08, + "logits/chosen": -2.1671817302703857, + "logits/rejected": -2.176786184310913, + "logps/chosen": -164.50579833984375, + "logps/rejected": -297.502197265625, + "loss": 0.6153, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22235718369483948, + "rewards/margins": 0.029272466897964478, + "rewards/rejected": 0.193084716796875, + "step": 854 + }, + { + "epoch": 0.05, + "learning_rate": 9.989797081695532e-08, + "logits/chosen": -2.059319257736206, + "logits/rejected": -2.0511856079101562, + "logps/chosen": -17.61809539794922, + "logps/rejected": -54.79050827026367, + "loss": 0.7041, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00848388671875, + "rewards/margins": -0.029534531757235527, + "rewards/rejected": 0.021050645038485527, + "step": 855 + }, + { + "epoch": 0.05, + "learning_rate": 9.989736819336849e-08, + "logits/chosen": -2.1115920543670654, + "logits/rejected": -2.0861575603485107, + "logps/chosen": -230.34092712402344, + "logps/rejected": -323.0599060058594, + "loss": 0.5967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.300485223531723, + "rewards/margins": 0.0699004977941513, + "rewards/rejected": 0.23058472573757172, + "step": 856 + }, + { + "epoch": 0.05, + "learning_rate": 9.989676379718306e-08, + "logits/chosen": -2.16705322265625, + "logits/rejected": -2.1753926277160645, + "logps/chosen": -33.19173812866211, + "logps/rejected": -112.31645965576172, + "loss": 0.6663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015970993787050247, + "rewards/margins": 0.06985588371753693, + "rewards/rejected": -0.05388488993048668, + "step": 857 + }, + { + "epoch": 0.05, + "learning_rate": 9.989615762842052e-08, + "logits/chosen": -2.3474504947662354, + "logits/rejected": -2.3278558254241943, + "logps/chosen": -21.600536346435547, + "logps/rejected": -246.2458953857422, + "loss": 0.6422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022890090942382812, + "rewards/margins": 0.18138466775417328, + "rewards/rejected": -0.15849457681179047, + "step": 858 + }, + { + "epoch": 0.05, + "learning_rate": 9.989554968710238e-08, + "logits/chosen": -2.086843729019165, + "logits/rejected": -2.0786006450653076, + "logps/chosen": -8.530242919921875, + "logps/rejected": -206.02481079101562, + "loss": 0.6756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01429824810475111, + "rewards/margins": 0.042795561254024506, + "rewards/rejected": -0.02849731408059597, + "step": 859 + }, + { + "epoch": 0.05, + "learning_rate": 9.989493997325025e-08, + "logits/chosen": -2.1709342002868652, + "logits/rejected": -2.179504632949829, + "logps/chosen": -123.0538101196289, + "logps/rejected": -233.78565979003906, + "loss": 0.7255, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.024872589856386185, + "rewards/margins": -0.0879463255405426, + "rewards/rejected": 0.06307373195886612, + "step": 860 + }, + { + "epoch": 0.05, + "learning_rate": 9.98943284868858e-08, + "logits/chosen": -2.170619249343872, + "logits/rejected": -2.1568291187286377, + "logps/chosen": -0.0006440613651648164, + "logps/rejected": -181.7113037109375, + "loss": 0.6751, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.315420705504948e-06, + "rewards/margins": 0.07469155639410019, + "rewards/rejected": -0.07469787448644638, + "step": 861 + }, + { + "epoch": 0.05, + "learning_rate": 9.989371522803071e-08, + "logits/chosen": -2.061551094055176, + "logits/rejected": -2.0356457233428955, + "logps/chosen": -245.67437744140625, + "logps/rejected": -371.0364990234375, + "loss": 0.6384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07197418063879013, + "rewards/margins": 0.09867095947265625, + "rewards/rejected": -0.02669677697122097, + "step": 862 + }, + { + "epoch": 0.05, + "learning_rate": 9.989310019670683e-08, + "logits/chosen": -2.0703303813934326, + "logits/rejected": -2.0786304473876953, + "logps/chosen": -45.584712982177734, + "logps/rejected": -140.7064208984375, + "loss": 0.7091, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04235382005572319, + "rewards/margins": -0.02255401574075222, + "rewards/rejected": -0.01979980431497097, + "step": 863 + }, + { + "epoch": 0.05, + "learning_rate": 9.989248339293597e-08, + "logits/chosen": -2.236035108566284, + "logits/rejected": -2.23504376411438, + "logps/chosen": -5.648614883422852, + "logps/rejected": -179.29708862304688, + "loss": 0.6837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009912967681884766, + "rewards/margins": 0.044344235211610794, + "rewards/rejected": -0.05425720289349556, + "step": 864 + }, + { + "epoch": 0.05, + "learning_rate": 9.989186481674006e-08, + "logits/chosen": -2.281745672225952, + "logits/rejected": -2.271366596221924, + "logps/chosen": -0.004699655808508396, + "logps/rejected": -203.89697265625, + "loss": 0.6838, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.397394023835659e-05, + "rewards/margins": 0.04261079803109169, + "rewards/rejected": -0.04270477220416069, + "step": 865 + }, + { + "epoch": 0.05, + "learning_rate": 9.989124446814106e-08, + "logits/chosen": -2.021368980407715, + "logits/rejected": -1.9696173667907715, + "logps/chosen": -203.6884765625, + "logps/rejected": -363.73114013671875, + "loss": 0.6097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.303079217672348, + "rewards/margins": 0.01250302791595459, + "rewards/rejected": 0.29057618975639343, + "step": 866 + }, + { + "epoch": 0.05, + "learning_rate": 9.989062234716099e-08, + "logits/chosen": -2.3968758583068848, + "logits/rejected": -2.3964014053344727, + "logps/chosen": -119.59880065917969, + "logps/rejected": -233.00001525878906, + "loss": 0.6551, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0052589415572583675, + "rewards/margins": 0.15346451103687286, + "rewards/rejected": -0.15872345864772797, + "step": 867 + }, + { + "epoch": 0.05, + "learning_rate": 9.9889998453822e-08, + "logits/chosen": -2.0223162174224854, + "logits/rejected": -1.914370059967041, + "logps/chosen": -221.139404296875, + "logps/rejected": -447.7686767578125, + "loss": 0.6923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03536377102136612, + "rewards/margins": 0.006170652806758881, + "rewards/rejected": -0.041534423828125, + "step": 868 + }, + { + "epoch": 0.05, + "learning_rate": 9.988937278814621e-08, + "logits/chosen": -2.007587432861328, + "logits/rejected": -1.9882169961929321, + "logps/chosen": -190.28829956054688, + "logps/rejected": -259.83843994140625, + "loss": 0.6007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23204194009304047, + "rewards/margins": 0.1261032223701477, + "rewards/rejected": 0.10593872517347336, + "step": 869 + }, + { + "epoch": 0.05, + "learning_rate": 9.988874535015587e-08, + "logits/chosen": -2.4283361434936523, + "logits/rejected": -2.392627477645874, + "logps/chosen": -76.68596649169922, + "logps/rejected": -228.92800903320312, + "loss": 0.6478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06360550224781036, + "rewards/margins": 0.13727493584156036, + "rewards/rejected": -0.07366943359375, + "step": 870 + }, + { + "epoch": 0.05, + "learning_rate": 9.988811613987328e-08, + "logits/chosen": -1.9561846256256104, + "logits/rejected": -1.9566102027893066, + "logps/chosen": -4.916692733764648, + "logps/rejected": -184.13381958007812, + "loss": 0.6759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012669229879975319, + "rewards/margins": 0.07564711570739746, + "rewards/rejected": -0.08831634372472763, + "step": 871 + }, + { + "epoch": 0.05, + "learning_rate": 9.988748515732074e-08, + "logits/chosen": -2.244657039642334, + "logits/rejected": -2.2179009914398193, + "logps/chosen": -234.7981414794922, + "logps/rejected": -363.8214416503906, + "loss": 0.6437, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1990615874528885, + "rewards/margins": -0.03519439697265625, + "rewards/rejected": 0.23425598442554474, + "step": 872 + }, + { + "epoch": 0.05, + "learning_rate": 9.988685240252072e-08, + "logits/chosen": -1.9933497905731201, + "logits/rejected": -1.9919371604919434, + "logps/chosen": -17.47725486755371, + "logps/rejected": -85.13616943359375, + "loss": 0.6877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006674385163933039, + "rewards/margins": 0.003956794738769531, + "rewards/rejected": 0.0027175904251635075, + "step": 873 + }, + { + "epoch": 0.05, + "learning_rate": 9.988621787549567e-08, + "logits/chosen": -2.1141111850738525, + "logits/rejected": -2.120980739593506, + "logps/chosen": -78.77886199951172, + "logps/rejected": -254.59584045410156, + "loss": 0.6581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06393814086914062, + "rewards/margins": 0.07438888400793076, + "rewards/rejected": -0.01045074500143528, + "step": 874 + }, + { + "epoch": 0.05, + "learning_rate": 9.988558157626815e-08, + "logits/chosen": -2.3144547939300537, + "logits/rejected": -2.2996010780334473, + "logps/chosen": -60.379310607910156, + "logps/rejected": -163.45477294921875, + "loss": 0.6496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06828766316175461, + "rewards/margins": 0.11043854057788849, + "rewards/rejected": -0.04215088114142418, + "step": 875 + }, + { + "epoch": 0.05, + "learning_rate": 9.988494350486077e-08, + "logits/chosen": -2.2217490673065186, + "logits/rejected": -2.2576801776885986, + "logps/chosen": -256.689453125, + "logps/rejected": -253.75259399414062, + "loss": 0.5572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36529541015625, + "rewards/margins": 0.14975281059741974, + "rewards/rejected": 0.21554259955883026, + "step": 876 + }, + { + "epoch": 0.05, + "learning_rate": 9.988430366129616e-08, + "logits/chosen": -2.241039752960205, + "logits/rejected": -2.175647020339966, + "logps/chosen": -272.69122314453125, + "logps/rejected": -573.9549560546875, + "loss": 0.5461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4246276915073395, + "rewards/margins": 0.14456787705421448, + "rewards/rejected": 0.280059814453125, + "step": 877 + }, + { + "epoch": 0.05, + "learning_rate": 9.988366204559708e-08, + "logits/chosen": -2.11360502243042, + "logits/rejected": -2.058527708053589, + "logps/chosen": -61.89519500732422, + "logps/rejected": -320.7720031738281, + "loss": 0.6175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09066696465015411, + "rewards/margins": 0.2128807008266449, + "rewards/rejected": -0.12221374362707138, + "step": 878 + }, + { + "epoch": 0.05, + "learning_rate": 9.988301865778633e-08, + "logits/chosen": -2.3563060760498047, + "logits/rejected": -2.3404054641723633, + "logps/chosen": -41.25889205932617, + "logps/rejected": -222.47128295898438, + "loss": 0.6517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07273445278406143, + "rewards/margins": 0.0803150162100792, + "rewards/rejected": -0.007580566685646772, + "step": 879 + }, + { + "epoch": 0.05, + "learning_rate": 9.988237349788673e-08, + "logits/chosen": -2.148391008377075, + "logits/rejected": -2.1391754150390625, + "logps/chosen": -33.680904388427734, + "logps/rejected": -111.69233703613281, + "loss": 0.681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017696380615234375, + "rewards/margins": 0.01079635601490736, + "rewards/rejected": 0.006900024600327015, + "step": 880 + }, + { + "epoch": 0.05, + "learning_rate": 9.988172656592123e-08, + "logits/chosen": -2.217135429382324, + "logits/rejected": -2.206587076187134, + "logps/chosen": -178.27633666992188, + "logps/rejected": -295.025634765625, + "loss": 0.6593, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.20929871499538422, + "rewards/margins": -0.12096862494945526, + "rewards/rejected": 0.3302673399448395, + "step": 881 + }, + { + "epoch": 0.05, + "learning_rate": 9.988107786191281e-08, + "logits/chosen": -2.21526837348938, + "logits/rejected": -2.1847290992736816, + "logps/chosen": -20.415443420410156, + "logps/rejected": -247.4884033203125, + "loss": 0.6578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012489700689911842, + "rewards/margins": 0.13308601081371307, + "rewards/rejected": -0.12059631198644638, + "step": 882 + }, + { + "epoch": 0.05, + "learning_rate": 9.988042738588451e-08, + "logits/chosen": -2.3253448009490967, + "logits/rejected": -2.287412405014038, + "logps/chosen": -227.44277954101562, + "logps/rejected": -392.62994384765625, + "loss": 0.6482, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1948089599609375, + "rewards/margins": -0.05343933403491974, + "rewards/rejected": 0.24824829399585724, + "step": 883 + }, + { + "epoch": 0.05, + "learning_rate": 9.987977513785944e-08, + "logits/chosen": -2.1947710514068604, + "logits/rejected": -2.1902151107788086, + "logps/chosen": -45.0771598815918, + "logps/rejected": -115.60381317138672, + "loss": 0.6691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02889709547162056, + "rewards/margins": 0.06772461533546448, + "rewards/rejected": -0.03882751613855362, + "step": 884 + }, + { + "epoch": 0.05, + "learning_rate": 9.987912111786075e-08, + "logits/chosen": -2.2365810871124268, + "logits/rejected": -2.2206175327301025, + "logps/chosen": -165.2591552734375, + "logps/rejected": -284.41497802734375, + "loss": 0.6228, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.266732782125473, + "rewards/margins": -0.0126190185546875, + "rewards/rejected": 0.2793518006801605, + "step": 885 + }, + { + "epoch": 0.05, + "learning_rate": 9.987846532591171e-08, + "logits/chosen": -2.1543169021606445, + "logits/rejected": -2.1312167644500732, + "logps/chosen": -22.529088973999023, + "logps/rejected": -156.92294311523438, + "loss": 0.6837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.045813944190740585, + "rewards/margins": 0.0920141190290451, + "rewards/rejected": -0.137828066945076, + "step": 886 + }, + { + "epoch": 0.05, + "learning_rate": 9.98778077620356e-08, + "logits/chosen": -2.12595796585083, + "logits/rejected": -2.0488624572753906, + "logps/chosen": -19.4981746673584, + "logps/rejected": -243.34019470214844, + "loss": 0.6624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03359222412109375, + "rewards/margins": 0.09483185410499573, + "rewards/rejected": -0.06123962625861168, + "step": 887 + }, + { + "epoch": 0.05, + "learning_rate": 9.987714842625577e-08, + "logits/chosen": -2.070181131362915, + "logits/rejected": -2.073676347732544, + "logps/chosen": -18.282073974609375, + "logps/rejected": -118.23910522460938, + "loss": 0.6591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02811603620648384, + "rewards/margins": 0.11978626251220703, + "rewards/rejected": -0.09167023003101349, + "step": 888 + }, + { + "epoch": 0.05, + "learning_rate": 9.987648731859566e-08, + "logits/chosen": -2.3712778091430664, + "logits/rejected": -2.3239150047302246, + "logps/chosen": -21.286788940429688, + "logps/rejected": -426.81854248046875, + "loss": 0.6351, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012273216620087624, + "rewards/margins": 0.2681131362915039, + "rewards/rejected": -0.280386358499527, + "step": 889 + }, + { + "epoch": 0.05, + "learning_rate": 9.987582443907874e-08, + "logits/chosen": -2.1258769035339355, + "logits/rejected": -2.123067855834961, + "logps/chosen": -21.219974517822266, + "logps/rejected": -117.21833801269531, + "loss": 0.6938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0579771064221859, + "rewards/margins": 0.04627856984734535, + "rewards/rejected": -0.10425567626953125, + "step": 890 + }, + { + "epoch": 0.05, + "learning_rate": 9.987515978772858e-08, + "logits/chosen": -2.196739435195923, + "logits/rejected": -2.167161226272583, + "logps/chosen": -310.4583435058594, + "logps/rejected": -402.23779296875, + "loss": 0.6436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02222595177590847, + "rewards/margins": 0.21188659965991974, + "rewards/rejected": -0.18966065347194672, + "step": 891 + }, + { + "epoch": 0.05, + "learning_rate": 9.987449336456876e-08, + "logits/chosen": -2.1001815795898438, + "logits/rejected": -2.0808603763580322, + "logps/chosen": -157.6852264404297, + "logps/rejected": -281.7725830078125, + "loss": 0.6376, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.19567108154296875, + "rewards/margins": -0.00023956596851348877, + "rewards/rejected": 0.19591064751148224, + "step": 892 + }, + { + "epoch": 0.05, + "learning_rate": 9.987382516962299e-08, + "logits/chosen": -2.038684129714966, + "logits/rejected": -2.0159146785736084, + "logps/chosen": -229.7150115966797, + "logps/rejected": -355.2840881347656, + "loss": 0.6596, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2603042721748352, + "rewards/margins": -0.1352950930595398, + "rewards/rejected": 0.395599365234375, + "step": 893 + }, + { + "epoch": 0.05, + "learning_rate": 9.987315520291499e-08, + "logits/chosen": -2.0567538738250732, + "logits/rejected": -2.0568249225616455, + "logps/chosen": -12.651351928710938, + "logps/rejected": -128.7899932861328, + "loss": 0.6665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012036419473588467, + "rewards/margins": 0.10435209423303604, + "rewards/rejected": -0.092315673828125, + "step": 894 + }, + { + "epoch": 0.05, + "learning_rate": 9.987248346446856e-08, + "logits/chosen": -2.1634390354156494, + "logits/rejected": -2.14792799949646, + "logps/chosen": -59.81672668457031, + "logps/rejected": -186.25650024414062, + "loss": 0.7063, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.022568894550204277, + "rewards/margins": -0.08729896694421768, + "rewards/rejected": 0.10986786335706711, + "step": 895 + }, + { + "epoch": 0.05, + "learning_rate": 9.987180995430756e-08, + "logits/chosen": -2.334878921508789, + "logits/rejected": -2.3184642791748047, + "logps/chosen": -73.5494155883789, + "logps/rejected": -199.3334197998047, + "loss": 0.6944, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.012186432257294655, + "rewards/margins": -0.036936186254024506, + "rewards/rejected": 0.04912262037396431, + "step": 896 + }, + { + "epoch": 0.05, + "learning_rate": 9.987113467245593e-08, + "logits/chosen": -2.288766622543335, + "logits/rejected": -2.2911829948425293, + "logps/chosen": -0.5610864162445068, + "logps/rejected": -157.90853881835938, + "loss": 0.6905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002327561378479004, + "rewards/margins": 0.010122084990143776, + "rewards/rejected": -0.01244964636862278, + "step": 897 + }, + { + "epoch": 0.05, + "learning_rate": 9.987045761893764e-08, + "logits/chosen": -2.0818541049957275, + "logits/rejected": -2.0798468589782715, + "logps/chosen": -1.9123342037200928, + "logps/rejected": -145.66148376464844, + "loss": 0.6925, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.170635277958354e-05, + "rewards/margins": 0.002438855357468128, + "rewards/rejected": -0.0024505616165697575, + "step": 898 + }, + { + "epoch": 0.05, + "learning_rate": 9.986977879377677e-08, + "logits/chosen": -2.216470718383789, + "logits/rejected": -2.1755449771881104, + "logps/chosen": -58.552162170410156, + "logps/rejected": -269.3577880859375, + "loss": 0.6439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011381149291992188, + "rewards/margins": 0.14888115227222443, + "rewards/rejected": -0.13750000298023224, + "step": 899 + }, + { + "epoch": 0.05, + "learning_rate": 9.98690981969974e-08, + "logits/chosen": -2.2525880336761475, + "logits/rejected": -2.2336347103118896, + "logps/chosen": -155.71136474609375, + "logps/rejected": -309.986572265625, + "loss": 0.6834, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.16855469346046448, + "rewards/margins": -0.12969666719436646, + "rewards/rejected": 0.29825136065483093, + "step": 900 + }, + { + "epoch": 0.05, + "learning_rate": 9.986841582862372e-08, + "logits/chosen": -2.287370443344116, + "logits/rejected": -2.2866594791412354, + "logps/chosen": -6.341425895690918, + "logps/rejected": -110.15071868896484, + "loss": 0.7386, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.011563110165297985, + "rewards/margins": -0.16454239189624786, + "rewards/rejected": 0.1529792845249176, + "step": 901 + }, + { + "epoch": 0.05, + "learning_rate": 9.986773168867999e-08, + "logits/chosen": -2.196059465408325, + "logits/rejected": -2.169548988342285, + "logps/chosen": -233.19036865234375, + "logps/rejected": -311.2897033691406, + "loss": 0.634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18317566812038422, + "rewards/margins": 0.017095953226089478, + "rewards/rejected": 0.16607971489429474, + "step": 902 + }, + { + "epoch": 0.05, + "learning_rate": 9.98670457771905e-08, + "logits/chosen": -2.0557703971862793, + "logits/rejected": -2.0138723850250244, + "logps/chosen": -286.9165344238281, + "logps/rejected": -474.6448059082031, + "loss": 0.6713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0192108154296875, + "rewards/margins": 0.08400268852710724, + "rewards/rejected": -0.06479187309741974, + "step": 903 + }, + { + "epoch": 0.05, + "learning_rate": 9.98663580941796e-08, + "logits/chosen": -2.3238017559051514, + "logits/rejected": -2.315932512283325, + "logps/chosen": -15.081013679504395, + "logps/rejected": -87.57876586914062, + "loss": 0.7059, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.027739524841308594, + "rewards/margins": -0.027905846014618874, + "rewards/rejected": 0.00016632080951239914, + "step": 904 + }, + { + "epoch": 0.05, + "learning_rate": 9.986566863967177e-08, + "logits/chosen": -2.2643487453460693, + "logits/rejected": -2.265841484069824, + "logps/chosen": -2.728997230529785, + "logps/rejected": -131.49264526367188, + "loss": 0.6934, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0005275965086184442, + "rewards/margins": -0.001597237540408969, + "rewards/rejected": 0.0010696410899981856, + "step": 905 + }, + { + "epoch": 0.05, + "learning_rate": 9.986497741369144e-08, + "logits/chosen": -2.079332113265991, + "logits/rejected": -2.0812082290649414, + "logps/chosen": -6.007314682006836, + "logps/rejected": -79.2313232421875, + "loss": 0.7035, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012406873516738415, + "rewards/margins": -0.03090205043554306, + "rewards/rejected": 0.01849517785012722, + "step": 906 + }, + { + "epoch": 0.05, + "learning_rate": 9.986428441626322e-08, + "logits/chosen": -2.219663619995117, + "logits/rejected": -2.2157328128814697, + "logps/chosen": -26.479969024658203, + "logps/rejected": -179.6642608642578, + "loss": 0.6961, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.004217720124870539, + "rewards/margins": -0.02703075483441353, + "rewards/rejected": 0.03124847449362278, + "step": 907 + }, + { + "epoch": 0.05, + "learning_rate": 9.986358964741168e-08, + "logits/chosen": -2.181626081466675, + "logits/rejected": -2.1305739879608154, + "logps/chosen": -189.9378204345703, + "logps/rejected": -364.262939453125, + "loss": 0.6899, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.14214935898780823, + "rewards/margins": -0.14754486083984375, + "rewards/rejected": 0.289694219827652, + "step": 908 + }, + { + "epoch": 0.05, + "learning_rate": 9.986289310716155e-08, + "logits/chosen": -2.0562469959259033, + "logits/rejected": -2.0619068145751953, + "logps/chosen": -0.52336585521698, + "logps/rejected": -206.1982421875, + "loss": 0.6925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00944396574050188, + "rewards/margins": 0.008817753754556179, + "rewards/rejected": -0.01826171949505806, + "step": 909 + }, + { + "epoch": 0.05, + "learning_rate": 9.986219479553755e-08, + "logits/chosen": -2.167337656021118, + "logits/rejected": -2.1548659801483154, + "logps/chosen": -22.600614547729492, + "logps/rejected": -193.455810546875, + "loss": 0.6976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06254406273365021, + "rewards/margins": 0.06592273712158203, + "rewards/rejected": -0.12846679985523224, + "step": 910 + }, + { + "epoch": 0.05, + "learning_rate": 9.986149471256447e-08, + "logits/chosen": -2.2335689067840576, + "logits/rejected": -2.257093906402588, + "logps/chosen": -157.11557006835938, + "logps/rejected": -222.40219116210938, + "loss": 0.5731, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33549806475639343, + "rewards/margins": 0.11466524004936218, + "rewards/rejected": 0.22083282470703125, + "step": 911 + }, + { + "epoch": 0.05, + "learning_rate": 9.986079285826721e-08, + "logits/chosen": -2.1039490699768066, + "logits/rejected": -2.0729639530181885, + "logps/chosen": -336.0179748535156, + "logps/rejected": -487.7339782714844, + "loss": 0.6647, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.23140870034694672, + "rewards/margins": -0.10534058511257172, + "rewards/rejected": 0.33674928545951843, + "step": 912 + }, + { + "epoch": 0.05, + "learning_rate": 9.98600892326707e-08, + "logits/chosen": -2.216416835784912, + "logits/rejected": -2.1779236793518066, + "logps/chosen": -229.8750457763672, + "logps/rejected": -346.3406982421875, + "loss": 0.5992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3795303404331207, + "rewards/margins": 0.027845770120620728, + "rewards/rejected": 0.3516845703125, + "step": 913 + }, + { + "epoch": 0.05, + "learning_rate": 9.985938383579992e-08, + "logits/chosen": -2.0951550006866455, + "logits/rejected": -2.0443713665008545, + "logps/chosen": -136.9894256591797, + "logps/rejected": -389.2442321777344, + "loss": 0.676, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12087097018957138, + "rewards/margins": -0.11479493230581284, + "rewards/rejected": 0.23566590249538422, + "step": 914 + }, + { + "epoch": 0.05, + "learning_rate": 9.985867666767992e-08, + "logits/chosen": -1.880342960357666, + "logits/rejected": -1.850152611732483, + "logps/chosen": -295.0856628417969, + "logps/rejected": -310.4014892578125, + "loss": 0.7037, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00933837890625, + "rewards/margins": -0.04301147535443306, + "rewards/rejected": 0.03367309644818306, + "step": 915 + }, + { + "epoch": 0.05, + "learning_rate": 9.985796772833588e-08, + "logits/chosen": -1.9495278596878052, + "logits/rejected": -1.8671704530715942, + "logps/chosen": -238.6990966796875, + "logps/rejected": -339.8876037597656, + "loss": 0.6811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04505005106329918, + "rewards/margins": 0.0393219031393528, + "rewards/rejected": 0.0057281493209302425, + "step": 916 + }, + { + "epoch": 0.05, + "learning_rate": 9.985725701779292e-08, + "logits/chosen": -2.182905435562134, + "logits/rejected": -2.1769778728485107, + "logps/chosen": -80.11175537109375, + "logps/rejected": -289.46649169921875, + "loss": 0.6388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04111633449792862, + "rewards/margins": 0.18995362520217896, + "rewards/rejected": -0.14883728325366974, + "step": 917 + }, + { + "epoch": 0.05, + "learning_rate": 9.98565445360763e-08, + "logits/chosen": -2.128885507583618, + "logits/rejected": -2.1085925102233887, + "logps/chosen": -225.71115112304688, + "logps/rejected": -455.53631591796875, + "loss": 0.5907, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28256532549858093, + "rewards/margins": 0.08887635171413422, + "rewards/rejected": 0.19368897378444672, + "step": 918 + }, + { + "epoch": 0.05, + "learning_rate": 9.985583028321137e-08, + "logits/chosen": -2.232253313064575, + "logits/rejected": -2.213226079940796, + "logps/chosen": -198.12954711914062, + "logps/rejected": -376.34320068359375, + "loss": 0.6871, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.13807983696460724, + "rewards/margins": -0.14806212484836578, + "rewards/rejected": 0.286141961812973, + "step": 919 + }, + { + "epoch": 0.05, + "learning_rate": 9.985511425922345e-08, + "logits/chosen": -2.131239891052246, + "logits/rejected": -2.1306958198547363, + "logps/chosen": -6.954013824462891, + "logps/rejected": -223.9484100341797, + "loss": 0.7031, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.006127548404037952, + "rewards/margins": -0.033572006970644, + "rewards/rejected": 0.02744445763528347, + "step": 920 + }, + { + "epoch": 0.05, + "learning_rate": 9.985439646413803e-08, + "logits/chosen": -2.0787079334259033, + "logits/rejected": -2.068619966506958, + "logps/chosen": -17.15118408203125, + "logps/rejected": -172.59848022460938, + "loss": 0.6841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015210342593491077, + "rewards/margins": 0.046273425221443176, + "rewards/rejected": -0.06148376688361168, + "step": 921 + }, + { + "epoch": 0.05, + "learning_rate": 9.985367689798058e-08, + "logits/chosen": -2.2382030487060547, + "logits/rejected": -2.235713243484497, + "logps/chosen": -97.73344421386719, + "logps/rejected": -249.13963317871094, + "loss": 0.6736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07599335163831711, + "rewards/margins": 0.01615753397345543, + "rewards/rejected": 0.05983581766486168, + "step": 922 + }, + { + "epoch": 0.05, + "learning_rate": 9.985295556077665e-08, + "logits/chosen": -2.116670608520508, + "logits/rejected": -2.11702561378479, + "logps/chosen": -41.65321350097656, + "logps/rejected": -272.36865234375, + "loss": 0.6634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025104904547333717, + "rewards/margins": 0.10888481140136719, + "rewards/rejected": -0.08377990871667862, + "step": 923 + }, + { + "epoch": 0.05, + "learning_rate": 9.98522324525519e-08, + "logits/chosen": -2.2557172775268555, + "logits/rejected": -2.241720676422119, + "logps/chosen": -3.147098686895333e-05, + "logps/rejected": -133.76942443847656, + "loss": 0.6557, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9073085866239126e-07, + "rewards/margins": 0.15590496361255646, + "rewards/rejected": -0.15590515732765198, + "step": 924 + }, + { + "epoch": 0.05, + "learning_rate": 9.985150757333198e-08, + "logits/chosen": -2.2009267807006836, + "logits/rejected": -2.197674512863159, + "logps/chosen": -0.09268847852945328, + "logps/rejected": -65.8955078125, + "loss": 0.6949, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0007377550355158746, + "rewards/margins": -0.0067344591952860355, + "rewards/rejected": 0.0059967041015625, + "step": 925 + }, + { + "epoch": 0.05, + "learning_rate": 9.985078092314268e-08, + "logits/chosen": -2.240428924560547, + "logits/rejected": -2.242384910583496, + "logps/chosen": -61.67864990234375, + "logps/rejected": -130.84608459472656, + "loss": 0.6445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08215179294347763, + "rewards/margins": 0.10350342094898224, + "rewards/rejected": -0.02135162428021431, + "step": 926 + }, + { + "epoch": 0.05, + "learning_rate": 9.985005250200977e-08, + "logits/chosen": -2.215850353240967, + "logits/rejected": -2.2024803161621094, + "logps/chosen": -13.90652847290039, + "logps/rejected": -207.3154754638672, + "loss": 0.668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04254493862390518, + "rewards/margins": 0.05019722133874893, + "rewards/rejected": -0.00765228271484375, + "step": 927 + }, + { + "epoch": 0.05, + "learning_rate": 9.984932230995917e-08, + "logits/chosen": -2.0995442867279053, + "logits/rejected": -2.1341309547424316, + "logps/chosen": -223.8270263671875, + "logps/rejected": -564.431884765625, + "loss": 0.585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.218597412109375, + "rewards/margins": 0.12847900390625, + "rewards/rejected": 0.090118408203125, + "step": 928 + }, + { + "epoch": 0.05, + "learning_rate": 9.98485903470168e-08, + "logits/chosen": -2.2020890712738037, + "logits/rejected": -2.1986231803894043, + "logps/chosen": -30.24620819091797, + "logps/rejected": -166.666259765625, + "loss": 0.6227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10226211696863174, + "rewards/margins": 0.16651535034179688, + "rewards/rejected": -0.06425323337316513, + "step": 929 + }, + { + "epoch": 0.05, + "learning_rate": 9.984785661320867e-08, + "logits/chosen": -1.9826349020004272, + "logits/rejected": -1.9468544721603394, + "logps/chosen": -301.222412109375, + "logps/rejected": -376.8949890136719, + "loss": 0.6856, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08099365234375, + "rewards/margins": -0.08561401069164276, + "rewards/rejected": 0.16660766303539276, + "step": 930 + }, + { + "epoch": 0.05, + "learning_rate": 9.984712110856083e-08, + "logits/chosen": -2.106923818588257, + "logits/rejected": -2.1137912273406982, + "logps/chosen": -224.6361083984375, + "logps/rejected": -307.83795166015625, + "loss": 0.6251, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.29774782061576843, + "rewards/margins": -0.03375852108001709, + "rewards/rejected": 0.3315063416957855, + "step": 931 + }, + { + "epoch": 0.05, + "learning_rate": 9.984638383309942e-08, + "logits/chosen": -2.1471500396728516, + "logits/rejected": -2.1470704078674316, + "logps/chosen": -82.78369140625, + "logps/rejected": -175.65061950683594, + "loss": 0.6947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01087188720703125, + "rewards/margins": 0.0149993896484375, + "rewards/rejected": -0.02587127685546875, + "step": 932 + }, + { + "epoch": 0.05, + "learning_rate": 9.984564478685065e-08, + "logits/chosen": -2.155304193496704, + "logits/rejected": -2.1463375091552734, + "logps/chosen": -230.8909149169922, + "logps/rejected": -311.19580078125, + "loss": 0.6351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03963165357708931, + "rewards/margins": 0.209278866648674, + "rewards/rejected": -0.169647216796875, + "step": 933 + }, + { + "epoch": 0.05, + "learning_rate": 9.984490396984073e-08, + "logits/chosen": -2.291064977645874, + "logits/rejected": -2.2834465503692627, + "logps/chosen": -248.1922607421875, + "logps/rejected": -345.4547424316406, + "loss": 0.6491, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.24956969916820526, + "rewards/margins": -0.13659362494945526, + "rewards/rejected": 0.3861633241176605, + "step": 934 + }, + { + "epoch": 0.05, + "learning_rate": 9.984416138209601e-08, + "logits/chosen": -2.2381556034088135, + "logits/rejected": -2.2346112728118896, + "logps/chosen": -38.41911315917969, + "logps/rejected": -136.0205535888672, + "loss": 0.6795, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019116593524813652, + "rewards/margins": 0.0335514098405838, + "rewards/rejected": -0.014434814453125, + "step": 935 + }, + { + "epoch": 0.05, + "learning_rate": 9.984341702364288e-08, + "logits/chosen": -2.3332791328430176, + "logits/rejected": -2.329460382461548, + "logps/chosen": -45.010337829589844, + "logps/rejected": -100.83407592773438, + "loss": 0.6953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01693878136575222, + "rewards/margins": 0.007297515869140625, + "rewards/rejected": -0.024236297234892845, + "step": 936 + }, + { + "epoch": 0.05, + "learning_rate": 9.984267089450775e-08, + "logits/chosen": -2.0796897411346436, + "logits/rejected": -2.064500570297241, + "logps/chosen": -228.16990661621094, + "logps/rejected": -467.7839050292969, + "loss": 0.6662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10687714070081711, + "rewards/margins": 0.026036076247692108, + "rewards/rejected": 0.080841064453125, + "step": 937 + }, + { + "epoch": 0.05, + "learning_rate": 9.984192299471715e-08, + "logits/chosen": -2.079725742340088, + "logits/rejected": -2.0747923851013184, + "logps/chosen": -260.12908935546875, + "logps/rejected": -347.5635681152344, + "loss": 0.5994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2631286680698395, + "rewards/margins": 0.08812561631202698, + "rewards/rejected": 0.1750030517578125, + "step": 938 + }, + { + "epoch": 0.05, + "learning_rate": 9.984117332429765e-08, + "logits/chosen": -2.145500421524048, + "logits/rejected": -2.1380577087402344, + "logps/chosen": -89.26313781738281, + "logps/rejected": -248.9073486328125, + "loss": 0.6953, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.010667419992387295, + "rewards/margins": -0.00338897667825222, + "rewards/rejected": 0.014056396670639515, + "step": 939 + }, + { + "epoch": 0.05, + "learning_rate": 9.984042188327586e-08, + "logits/chosen": -2.1431124210357666, + "logits/rejected": -2.1281020641326904, + "logps/chosen": -169.90005493164062, + "logps/rejected": -281.90301513671875, + "loss": 0.6415, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.23469848930835724, + "rewards/margins": -0.060781851410865784, + "rewards/rejected": 0.295480340719223, + "step": 940 + }, + { + "epoch": 0.05, + "learning_rate": 9.98396686716785e-08, + "logits/chosen": -2.3215765953063965, + "logits/rejected": -2.2993361949920654, + "logps/chosen": -139.1683349609375, + "logps/rejected": -252.08424377441406, + "loss": 0.6331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0852203369140625, + "rewards/margins": 0.185455322265625, + "rewards/rejected": -0.1002349853515625, + "step": 941 + }, + { + "epoch": 0.05, + "learning_rate": 9.98389136895323e-08, + "logits/chosen": -2.1316256523132324, + "logits/rejected": -2.117906332015991, + "logps/chosen": -232.70166015625, + "logps/rejected": -271.64788818359375, + "loss": 0.5892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33663636445999146, + "rewards/margins": 0.08858337998390198, + "rewards/rejected": 0.24805298447608948, + "step": 942 + }, + { + "epoch": 0.05, + "learning_rate": 9.983815693686411e-08, + "logits/chosen": -2.0966949462890625, + "logits/rejected": -2.0946426391601562, + "logps/chosen": -3.54146671295166, + "logps/rejected": -158.3259735107422, + "loss": 0.6846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024248624220490456, + "rewards/margins": 0.05950229614973068, + "rewards/rejected": -0.08375091850757599, + "step": 943 + }, + { + "epoch": 0.05, + "learning_rate": 9.98373984137008e-08, + "logits/chosen": -2.1987969875335693, + "logits/rejected": -2.18991756439209, + "logps/chosen": -0.7811022400856018, + "logps/rejected": -153.90354919433594, + "loss": 0.6842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006834983825683594, + "rewards/margins": 0.043184854090213776, + "rewards/rejected": -0.05001983791589737, + "step": 944 + }, + { + "epoch": 0.05, + "learning_rate": 9.983663812006932e-08, + "logits/chosen": -2.135768175125122, + "logits/rejected": -2.139505386352539, + "logps/chosen": -0.00021300926164258271, + "logps/rejected": -55.25325393676758, + "loss": 0.686, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.535870049148798e-08, + "rewards/margins": 0.02879323996603489, + "rewards/rejected": -0.0287933349609375, + "step": 945 + }, + { + "epoch": 0.06, + "learning_rate": 9.983587605599667e-08, + "logits/chosen": -2.0609774589538574, + "logits/rejected": -2.0049874782562256, + "logps/chosen": -334.0958251953125, + "logps/rejected": -530.6181030273438, + "loss": 0.5323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4953857362270355, + "rewards/margins": 0.20668333768844604, + "rewards/rejected": 0.2887023985385895, + "step": 946 + }, + { + "epoch": 0.06, + "learning_rate": 9.983511222150993e-08, + "logits/chosen": -2.2777302265167236, + "logits/rejected": -2.250363826751709, + "logps/chosen": -6.273530960083008, + "logps/rejected": -130.87696838378906, + "loss": 0.6977, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0018369675381109118, + "rewards/margins": -0.02521495707333088, + "rewards/rejected": 0.02337799035012722, + "step": 947 + }, + { + "epoch": 0.06, + "learning_rate": 9.983434661663623e-08, + "logits/chosen": -2.226034164428711, + "logits/rejected": -2.237475872039795, + "logps/chosen": -13.878458976745605, + "logps/rejected": -153.02821350097656, + "loss": 0.7073, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.017420673742890358, + "rewards/margins": -0.03319673612713814, + "rewards/rejected": 0.01577606238424778, + "step": 948 + }, + { + "epoch": 0.06, + "learning_rate": 9.983357924140279e-08, + "logits/chosen": -2.334979295730591, + "logits/rejected": -2.313629388809204, + "logps/chosen": -0.15865430235862732, + "logps/rejected": -124.47045135498047, + "loss": 0.6768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007938891649246216, + "rewards/margins": 0.06388354301452637, + "rewards/rejected": -0.06467743217945099, + "step": 949 + }, + { + "epoch": 0.06, + "learning_rate": 9.983281009583684e-08, + "logits/chosen": -2.083857774734497, + "logits/rejected": -2.0922060012817383, + "logps/chosen": -176.92417907714844, + "logps/rejected": -263.2839660644531, + "loss": 0.6522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04534912109375, + "rewards/margins": 0.1497650146484375, + "rewards/rejected": -0.1044158935546875, + "step": 950 + }, + { + "epoch": 0.06, + "learning_rate": 9.983203917996571e-08, + "logits/chosen": -2.214850425720215, + "logits/rejected": -2.167541027069092, + "logps/chosen": -237.68716430664062, + "logps/rejected": -316.7392578125, + "loss": 0.6242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19263306260108948, + "rewards/margins": 0.041772469878196716, + "rewards/rejected": 0.15086059272289276, + "step": 951 + }, + { + "epoch": 0.06, + "learning_rate": 9.98312664938168e-08, + "logits/chosen": -2.0842695236206055, + "logits/rejected": -2.070330858230591, + "logps/chosen": -302.6020202636719, + "logps/rejected": -449.0066223144531, + "loss": 0.5587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.435202032327652, + "rewards/margins": 0.12457275390625, + "rewards/rejected": 0.310629278421402, + "step": 952 + }, + { + "epoch": 0.06, + "learning_rate": 9.983049203741756e-08, + "logits/chosen": -2.3212363719940186, + "logits/rejected": -2.315305233001709, + "logps/chosen": -98.14421844482422, + "logps/rejected": -195.94227600097656, + "loss": 0.7022, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01706390455365181, + "rewards/margins": -0.02521819993853569, + "rewards/rejected": 0.0422821044921875, + "step": 953 + }, + { + "epoch": 0.06, + "learning_rate": 9.982971581079547e-08, + "logits/chosen": -2.0893120765686035, + "logits/rejected": -2.084047555923462, + "logps/chosen": -90.8642807006836, + "logps/rejected": -234.15499877929688, + "loss": 0.6536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08386001735925674, + "rewards/margins": 0.07846450805664062, + "rewards/rejected": 0.0053955079056322575, + "step": 954 + }, + { + "epoch": 0.06, + "learning_rate": 9.982893781397816e-08, + "logits/chosen": -1.9379782676696777, + "logits/rejected": -1.9229390621185303, + "logps/chosen": -14.49740982055664, + "logps/rejected": -244.0851593017578, + "loss": 0.6648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024683093652129173, + "rewards/margins": 0.08690538257360458, + "rewards/rejected": -0.06222229078412056, + "step": 955 + }, + { + "epoch": 0.06, + "learning_rate": 9.982815804699323e-08, + "logits/chosen": -2.0277161598205566, + "logits/rejected": -1.991132378578186, + "logps/chosen": -220.59310913085938, + "logps/rejected": -329.7135314941406, + "loss": 0.594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3535049557685852, + "rewards/margins": 0.03730317950248718, + "rewards/rejected": 0.316201776266098, + "step": 956 + }, + { + "epoch": 0.06, + "learning_rate": 9.982737650986838e-08, + "logits/chosen": -2.252816915512085, + "logits/rejected": -2.2490341663360596, + "logps/chosen": -0.038189586251974106, + "logps/rejected": -124.75424194335938, + "loss": 0.6647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002392477123066783, + "rewards/margins": 0.11449651420116425, + "rewards/rejected": -0.11210403591394424, + "step": 957 + }, + { + "epoch": 0.06, + "learning_rate": 9.982659320263141e-08, + "logits/chosen": -1.9386109113693237, + "logits/rejected": -1.9448174238204956, + "logps/chosen": -172.51602172851562, + "logps/rejected": -264.4296875, + "loss": 0.7052, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00469970703125, + "rewards/margins": -0.06291809678077698, + "rewards/rejected": 0.05821838602423668, + "step": 958 + }, + { + "epoch": 0.06, + "learning_rate": 9.98258081253101e-08, + "logits/chosen": -2.0977773666381836, + "logits/rejected": -2.0469748973846436, + "logps/chosen": -191.0421905517578, + "logps/rejected": -276.6862487792969, + "loss": 0.6365, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.19900360703468323, + "rewards/margins": -0.0020736604928970337, + "rewards/rejected": 0.20107726752758026, + "step": 959 + }, + { + "epoch": 0.06, + "learning_rate": 9.982502127793236e-08, + "logits/chosen": -2.0933144092559814, + "logits/rejected": -2.081325054168701, + "logps/chosen": -0.015640148892998695, + "logps/rejected": -136.68148803710938, + "loss": 0.6804, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.696897228015587e-05, + "rewards/margins": 0.05157098174095154, + "rewards/rejected": -0.05164794996380806, + "step": 960 + }, + { + "epoch": 0.06, + "learning_rate": 9.982423266052615e-08, + "logits/chosen": -2.2452235221862793, + "logits/rejected": -2.2301642894744873, + "logps/chosen": -32.31285858154297, + "logps/rejected": -121.94998168945312, + "loss": 0.6722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030715180560946465, + "rewards/margins": 0.10911941528320312, + "rewards/rejected": -0.13983459770679474, + "step": 961 + }, + { + "epoch": 0.06, + "learning_rate": 9.982344227311947e-08, + "logits/chosen": -2.03269362449646, + "logits/rejected": -2.005688190460205, + "logps/chosen": -220.96055603027344, + "logps/rejected": -236.40904235839844, + "loss": 0.6853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015528869815170765, + "rewards/margins": 0.03113403357565403, + "rewards/rejected": -0.015605163760483265, + "step": 962 + }, + { + "epoch": 0.06, + "learning_rate": 9.982265011574043e-08, + "logits/chosen": -2.394021511077881, + "logits/rejected": -2.4024386405944824, + "logps/chosen": -56.66563415527344, + "logps/rejected": -208.566650390625, + "loss": 0.6773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035610198974609375, + "rewards/margins": 0.020626068115234375, + "rewards/rejected": 0.014984130859375, + "step": 963 + }, + { + "epoch": 0.06, + "learning_rate": 9.982185618841713e-08, + "logits/chosen": -2.1265320777893066, + "logits/rejected": -2.101968765258789, + "logps/chosen": -245.19662475585938, + "logps/rejected": -325.61785888671875, + "loss": 0.6363, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.24508515000343323, + "rewards/margins": -0.010020434856414795, + "rewards/rejected": 0.255105584859848, + "step": 964 + }, + { + "epoch": 0.06, + "learning_rate": 9.982106049117779e-08, + "logits/chosen": -2.130901575088501, + "logits/rejected": -2.107503652572632, + "logps/chosen": -149.70913696289062, + "logps/rejected": -267.8372497558594, + "loss": 0.6764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0175628662109375, + "rewards/margins": 0.09195556491613388, + "rewards/rejected": -0.10951843112707138, + "step": 965 + }, + { + "epoch": 0.06, + "learning_rate": 9.98202630240507e-08, + "logits/chosen": -2.1632063388824463, + "logits/rejected": -2.108917236328125, + "logps/chosen": -156.33889770507812, + "logps/rejected": -425.1044921875, + "loss": 0.6559, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.17985229194164276, + "rewards/margins": -0.04378357529640198, + "rewards/rejected": 0.22363586723804474, + "step": 966 + }, + { + "epoch": 0.06, + "learning_rate": 9.981946378706417e-08, + "logits/chosen": -2.1843392848968506, + "logits/rejected": -2.1815669536590576, + "logps/chosen": -260.37457275390625, + "logps/rejected": -343.131591796875, + "loss": 0.6335, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.22722168266773224, + "rewards/margins": -0.026672378182411194, + "rewards/rejected": 0.25389406085014343, + "step": 967 + }, + { + "epoch": 0.06, + "learning_rate": 9.981866278024658e-08, + "logits/chosen": -2.238769054412842, + "logits/rejected": -2.2269155979156494, + "logps/chosen": -14.842820167541504, + "logps/rejected": -230.69229125976562, + "loss": 0.6736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010946178808808327, + "rewards/margins": 0.05758466571569443, + "rewards/rejected": -0.04663848876953125, + "step": 968 + }, + { + "epoch": 0.06, + "learning_rate": 9.98178600036264e-08, + "logits/chosen": -2.118053913116455, + "logits/rejected": -2.085313558578491, + "logps/chosen": -258.758056640625, + "logps/rejected": -438.1180419921875, + "loss": 0.5772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33247682452201843, + "rewards/margins": 0.0921020656824112, + "rewards/rejected": 0.24037475883960724, + "step": 969 + }, + { + "epoch": 0.06, + "learning_rate": 9.981705545723213e-08, + "logits/chosen": -2.3295681476593018, + "logits/rejected": -2.331237554550171, + "logps/chosen": -0.00417147483676672, + "logps/rejected": -107.65354919433594, + "loss": 0.6722, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8972794098081067e-05, + "rewards/margins": 0.08287202566862106, + "rewards/rejected": -0.0829010009765625, + "step": 970 + }, + { + "epoch": 0.06, + "learning_rate": 9.981624914109238e-08, + "logits/chosen": -2.274259328842163, + "logits/rejected": -2.2809760570526123, + "logps/chosen": -0.010032259859144688, + "logps/rejected": -264.4068603515625, + "loss": 0.6602, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012770993635058403, + "rewards/margins": 0.133292093873024, + "rewards/rejected": -0.13341979682445526, + "step": 971 + }, + { + "epoch": 0.06, + "learning_rate": 9.981544105523581e-08, + "logits/chosen": -1.8428187370300293, + "logits/rejected": -1.8914791345596313, + "logps/chosen": -246.774169921875, + "logps/rejected": -227.57843017578125, + "loss": 0.6524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0930023193359375, + "rewards/margins": 0.05716400220990181, + "rewards/rejected": 0.03583831712603569, + "step": 972 + }, + { + "epoch": 0.06, + "learning_rate": 9.981463119969107e-08, + "logits/chosen": -2.079371452331543, + "logits/rejected": -2.076540946960449, + "logps/chosen": -217.57228088378906, + "logps/rejected": -333.02606201171875, + "loss": 0.6997, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.18119965493679047, + "rewards/margins": -0.22298429906368256, + "rewards/rejected": 0.404183954000473, + "step": 973 + }, + { + "epoch": 0.06, + "learning_rate": 9.981381957448698e-08, + "logits/chosen": -2.1798818111419678, + "logits/rejected": -2.1631834506988525, + "logps/chosen": -120.57638549804688, + "logps/rejected": -237.0269012451172, + "loss": 0.6734, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1579490751028061, + "rewards/margins": -0.08840255439281464, + "rewards/rejected": 0.24635162949562073, + "step": 974 + }, + { + "epoch": 0.06, + "learning_rate": 9.981300617965234e-08, + "logits/chosen": -1.973236083984375, + "logits/rejected": -1.9736770391464233, + "logps/chosen": -21.487348556518555, + "logps/rejected": -72.70931243896484, + "loss": 0.7061, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.040198709815740585, + "rewards/margins": -0.01318378560245037, + "rewards/rejected": -0.027014924213290215, + "step": 975 + }, + { + "epoch": 0.06, + "learning_rate": 9.981219101521605e-08, + "logits/chosen": -2.261462688446045, + "logits/rejected": -2.2646539211273193, + "logps/chosen": -37.92622375488281, + "logps/rejected": -97.04471588134766, + "loss": 0.6663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.040247347205877304, + "rewards/margins": 0.05412445217370987, + "rewards/rejected": -0.01387710589915514, + "step": 976 + }, + { + "epoch": 0.06, + "learning_rate": 9.98113740812071e-08, + "logits/chosen": -2.2651994228363037, + "logits/rejected": -2.257188558578491, + "logps/chosen": -88.04766082763672, + "logps/rejected": -244.03697204589844, + "loss": 0.6295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09366302937269211, + "rewards/margins": 0.16509400308132172, + "rewards/rejected": -0.07143097370862961, + "step": 977 + }, + { + "epoch": 0.06, + "learning_rate": 9.981055537765448e-08, + "logits/chosen": -2.178854465484619, + "logits/rejected": -2.183556318283081, + "logps/chosen": -0.00010669046605471522, + "logps/rejected": -180.87820434570312, + "loss": 0.6604, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.390568725895719e-07, + "rewards/margins": 0.13549421727657318, + "rewards/rejected": -0.1354934722185135, + "step": 978 + }, + { + "epoch": 0.06, + "learning_rate": 9.980973490458727e-08, + "logits/chosen": -2.2425599098205566, + "logits/rejected": -2.208874225616455, + "logps/chosen": -5.697483539581299, + "logps/rejected": -244.72097778320312, + "loss": 0.6362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04383368417620659, + "rewards/margins": 0.19157691299915314, + "rewards/rejected": -0.14774322509765625, + "step": 979 + }, + { + "epoch": 0.06, + "learning_rate": 9.980891266203464e-08, + "logits/chosen": -2.0985395908355713, + "logits/rejected": -2.097162961959839, + "logps/chosen": -37.25054931640625, + "logps/rejected": -70.87322998046875, + "loss": 0.6787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03653449937701225, + "rewards/margins": 0.004687879234552383, + "rewards/rejected": 0.03184662014245987, + "step": 980 + }, + { + "epoch": 0.06, + "learning_rate": 9.98080886500258e-08, + "logits/chosen": -2.1955089569091797, + "logits/rejected": -2.171302556991577, + "logps/chosen": -191.98995971679688, + "logps/rejected": -371.884033203125, + "loss": 0.6243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.247222900390625, + "rewards/margins": 0.028540030121803284, + "rewards/rejected": 0.21868287026882172, + "step": 981 + }, + { + "epoch": 0.06, + "learning_rate": 9.980726286859e-08, + "logits/chosen": -2.0844452381134033, + "logits/rejected": -2.0802736282348633, + "logps/chosen": -75.00345611572266, + "logps/rejected": -299.36846923828125, + "loss": 0.6823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02122345007956028, + "rewards/margins": 0.00395050086081028, + "rewards/rejected": 0.01727294921875, + "step": 982 + }, + { + "epoch": 0.06, + "learning_rate": 9.980643531775659e-08, + "logits/chosen": -2.237611770629883, + "logits/rejected": -2.2290990352630615, + "logps/chosen": -0.23486857116222382, + "logps/rejected": -176.9033966064453, + "loss": 0.683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002203752053901553, + "rewards/margins": 0.04898948594927788, + "rewards/rejected": -0.0511932373046875, + "step": 983 + }, + { + "epoch": 0.06, + "learning_rate": 9.980560599755497e-08, + "logits/chosen": -1.9946129322052002, + "logits/rejected": -1.9929202795028687, + "logps/chosen": -29.85826301574707, + "logps/rejected": -79.14686584472656, + "loss": 0.6972, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.009532165713608265, + "rewards/margins": -0.025434114038944244, + "rewards/rejected": 0.034966278821229935, + "step": 984 + }, + { + "epoch": 0.06, + "learning_rate": 9.98047749080146e-08, + "logits/chosen": -2.050983190536499, + "logits/rejected": -1.99014413356781, + "logps/chosen": -190.22784423828125, + "logps/rejected": -257.82208251953125, + "loss": 0.6502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07799530029296875, + "rewards/margins": 0.10806121677160263, + "rewards/rejected": -0.03006591834127903, + "step": 985 + }, + { + "epoch": 0.06, + "learning_rate": 9.980394204916501e-08, + "logits/chosen": -2.1717045307159424, + "logits/rejected": -2.141444206237793, + "logps/chosen": -185.10220336914062, + "logps/rejected": -207.5469970703125, + "loss": 0.6874, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.015103149227797985, + "rewards/margins": -0.00544128380715847, + "rewards/rejected": -0.009661865420639515, + "step": 986 + }, + { + "epoch": 0.06, + "learning_rate": 9.980310742103577e-08, + "logits/chosen": -2.096925973892212, + "logits/rejected": -2.1031038761138916, + "logps/chosen": -15.805232048034668, + "logps/rejected": -88.44336700439453, + "loss": 0.6837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024998951703310013, + "rewards/margins": 0.012981128878891468, + "rewards/rejected": 0.012017822824418545, + "step": 987 + }, + { + "epoch": 0.06, + "learning_rate": 9.980227102365654e-08, + "logits/chosen": -2.0464088916778564, + "logits/rejected": -2.049597978591919, + "logps/chosen": -0.003693578066304326, + "logps/rejected": -111.21471405029297, + "loss": 0.681, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.789058584719896e-05, + "rewards/margins": 0.04861585423350334, + "rewards/rejected": -0.048567961901426315, + "step": 988 + }, + { + "epoch": 0.06, + "learning_rate": 9.980143285705705e-08, + "logits/chosen": -2.3812131881713867, + "logits/rejected": -2.3844573497772217, + "logps/chosen": -24.8369140625, + "logps/rejected": -128.49078369140625, + "loss": 0.6623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07379455864429474, + "rewards/margins": 0.0601196326315403, + "rewards/rejected": 0.013674926944077015, + "step": 989 + }, + { + "epoch": 0.06, + "learning_rate": 9.980059292126706e-08, + "logits/chosen": -1.9101850986480713, + "logits/rejected": -1.8974941968917847, + "logps/chosen": -49.14039611816406, + "logps/rejected": -188.8473663330078, + "loss": 0.6329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07893715053796768, + "rewards/margins": 0.16599006950855255, + "rewards/rejected": -0.08705291897058487, + "step": 990 + }, + { + "epoch": 0.06, + "learning_rate": 9.97997512163164e-08, + "logits/chosen": -2.1100196838378906, + "logits/rejected": -2.032585620880127, + "logps/chosen": -303.10546875, + "logps/rejected": -472.77569580078125, + "loss": 0.6181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24037475883960724, + "rewards/margins": 0.013143926858901978, + "rewards/rejected": 0.22723083198070526, + "step": 991 + }, + { + "epoch": 0.06, + "learning_rate": 9.979890774223499e-08, + "logits/chosen": -2.357356548309326, + "logits/rejected": -2.2999179363250732, + "logps/chosen": -79.39813232421875, + "logps/rejected": -467.3158264160156, + "loss": 0.6126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06538086384534836, + "rewards/margins": 0.2641448974609375, + "rewards/rejected": -0.19876404106616974, + "step": 992 + }, + { + "epoch": 0.06, + "learning_rate": 9.979806249905276e-08, + "logits/chosen": -2.2328670024871826, + "logits/rejected": -2.227217435836792, + "logps/chosen": -2.347430467605591, + "logps/rejected": -122.1884765625, + "loss": 0.6599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010754442773759365, + "rewards/margins": 0.11013340950012207, + "rewards/rejected": -0.09937896579504013, + "step": 993 + }, + { + "epoch": 0.06, + "learning_rate": 9.979721548679978e-08, + "logits/chosen": -1.9599467515945435, + "logits/rejected": -2.0555660724639893, + "logps/chosen": -247.21463012695312, + "logps/rejected": -423.140625, + "loss": 0.6176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15644989907741547, + "rewards/margins": 0.03163910657167435, + "rewards/rejected": 0.12481079250574112, + "step": 994 + }, + { + "epoch": 0.06, + "learning_rate": 9.979636670550611e-08, + "logits/chosen": -2.182974338531494, + "logits/rejected": -2.1496317386627197, + "logps/chosen": -231.2081298828125, + "logps/rejected": -326.36688232421875, + "loss": 0.5838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3019165098667145, + "rewards/margins": 0.0877685546875, + "rewards/rejected": 0.21414795517921448, + "step": 995 + }, + { + "epoch": 0.06, + "learning_rate": 9.979551615520192e-08, + "logits/chosen": -2.053478956222534, + "logits/rejected": -2.034817934036255, + "logps/chosen": -168.57432556152344, + "logps/rejected": -371.52789306640625, + "loss": 0.5897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3089584410190582, + "rewards/margins": 0.05198821425437927, + "rewards/rejected": 0.25697022676467896, + "step": 996 + }, + { + "epoch": 0.06, + "learning_rate": 9.979466383591742e-08, + "logits/chosen": -2.137392520904541, + "logits/rejected": -2.136215925216675, + "logps/chosen": -7.625661373138428, + "logps/rejected": -145.12643432617188, + "loss": 0.6676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024989604949951172, + "rewards/margins": 0.07678404450416565, + "rewards/rejected": -0.05179443582892418, + "step": 997 + }, + { + "epoch": 0.06, + "learning_rate": 9.97938097476829e-08, + "logits/chosen": -2.000048875808716, + "logits/rejected": -2.009943962097168, + "logps/chosen": -253.03536987304688, + "logps/rejected": -266.0021667480469, + "loss": 0.5903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3668579161167145, + "rewards/margins": 0.040966808795928955, + "rewards/rejected": 0.3258911073207855, + "step": 998 + }, + { + "epoch": 0.06, + "learning_rate": 9.979295389052866e-08, + "logits/chosen": -2.1846060752868652, + "logits/rejected": -2.1816632747650146, + "logps/chosen": -44.3603515625, + "logps/rejected": -102.58892822265625, + "loss": 0.6915, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.030471419915556908, + "rewards/margins": -0.011035537347197533, + "rewards/rejected": 0.04150695726275444, + "step": 999 + }, + { + "epoch": 0.06, + "learning_rate": 9.979209626448517e-08, + "logits/chosen": -2.218801498413086, + "logits/rejected": -2.219714403152466, + "logps/chosen": -21.566984176635742, + "logps/rejected": -151.00576782226562, + "loss": 0.7093, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0015090942615643144, + "rewards/margins": -0.06627197563648224, + "rewards/rejected": 0.06476288288831711, + "step": 1000 + }, + { + "epoch": 0.06, + "learning_rate": 9.979123686958284e-08, + "logits/chosen": -2.168231964111328, + "logits/rejected": -2.154254674911499, + "logps/chosen": -260.5521545410156, + "logps/rejected": -354.1014404296875, + "loss": 0.5998, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3475799560546875, + "rewards/margins": -0.036541759967803955, + "rewards/rejected": 0.38412171602249146, + "step": 1001 + }, + { + "epoch": 0.06, + "learning_rate": 9.979037570585223e-08, + "logits/chosen": -2.4578146934509277, + "logits/rejected": -2.4583921432495117, + "logps/chosen": -0.4958310127258301, + "logps/rejected": -86.65328216552734, + "loss": 0.7062, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.004769447725266218, + "rewards/margins": -0.04917176067829132, + "rewards/rejected": 0.044402312487363815, + "step": 1002 + }, + { + "epoch": 0.06, + "learning_rate": 9.978951277332391e-08, + "logits/chosen": -2.1506435871124268, + "logits/rejected": -2.105529546737671, + "logps/chosen": -181.0504913330078, + "logps/rejected": -516.018798828125, + "loss": 0.6056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21414338052272797, + "rewards/margins": 0.08399811387062073, + "rewards/rejected": 0.13014526665210724, + "step": 1003 + }, + { + "epoch": 0.06, + "learning_rate": 9.978864807202856e-08, + "logits/chosen": -2.2178337574005127, + "logits/rejected": -2.202214241027832, + "logps/chosen": -193.2448272705078, + "logps/rejected": -287.031005859375, + "loss": 0.5515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29816436767578125, + "rewards/margins": 0.2645919919013977, + "rewards/rejected": 0.03357238695025444, + "step": 1004 + }, + { + "epoch": 0.06, + "learning_rate": 9.978778160199688e-08, + "logits/chosen": -2.177624225616455, + "logits/rejected": -2.1737473011016846, + "logps/chosen": -22.866891860961914, + "logps/rejected": -174.58413696289062, + "loss": 0.6941, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.024624062702059746, + "rewards/margins": -0.033084675669670105, + "rewards/rejected": 0.057708740234375, + "step": 1005 + }, + { + "epoch": 0.06, + "learning_rate": 9.978691336325967e-08, + "logits/chosen": -2.192918300628662, + "logits/rejected": -2.1933207511901855, + "logps/chosen": -46.10372543334961, + "logps/rejected": -211.01715087890625, + "loss": 0.6668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04290657117962837, + "rewards/margins": 0.0656406432390213, + "rewards/rejected": -0.02273407019674778, + "step": 1006 + }, + { + "epoch": 0.06, + "learning_rate": 9.978604335584777e-08, + "logits/chosen": -1.9767839908599854, + "logits/rejected": -1.9666213989257812, + "logps/chosen": -10.071468353271484, + "logps/rejected": -207.76385498046875, + "loss": 0.6732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008262253366410732, + "rewards/margins": 0.08773688971996307, + "rewards/rejected": -0.09599914401769638, + "step": 1007 + }, + { + "epoch": 0.06, + "learning_rate": 9.978517157979206e-08, + "logits/chosen": -2.2977020740509033, + "logits/rejected": -2.2757108211517334, + "logps/chosen": -29.20661735534668, + "logps/rejected": -268.46185302734375, + "loss": 0.6417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006227684207260609, + "rewards/margins": 0.18480435013771057, + "rewards/rejected": -0.17857666313648224, + "step": 1008 + }, + { + "epoch": 0.06, + "learning_rate": 9.978429803512354e-08, + "logits/chosen": -2.1026272773742676, + "logits/rejected": -2.093886613845825, + "logps/chosen": -22.087749481201172, + "logps/rejected": -156.75991821289062, + "loss": 0.6707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02628192864358425, + "rewards/margins": 0.06683216243982315, + "rewards/rejected": -0.04055023193359375, + "step": 1009 + }, + { + "epoch": 0.06, + "learning_rate": 9.978342272187324e-08, + "logits/chosen": -2.128955125808716, + "logits/rejected": -2.1332030296325684, + "logps/chosen": -407.09039306640625, + "logps/rejected": -483.21417236328125, + "loss": 0.4771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5845794677734375, + "rewards/margins": 0.3763885498046875, + "rewards/rejected": 0.20819091796875, + "step": 1010 + }, + { + "epoch": 0.06, + "learning_rate": 9.978254564007223e-08, + "logits/chosen": -2.2861196994781494, + "logits/rejected": -2.2674295902252197, + "logps/chosen": -10.959186553955078, + "logps/rejected": -198.83935546875, + "loss": 0.6935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018444061279296875, + "rewards/margins": 0.013742830604314804, + "rewards/rejected": -0.03218689188361168, + "step": 1011 + }, + { + "epoch": 0.06, + "learning_rate": 9.97816667897517e-08, + "logits/chosen": -2.117528200149536, + "logits/rejected": -2.1128554344177246, + "logps/chosen": -77.86659240722656, + "logps/rejected": -142.87156677246094, + "loss": 0.6733, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.089380644261837, + "rewards/margins": -0.022243499755859375, + "rewards/rejected": 0.11162414401769638, + "step": 1012 + }, + { + "epoch": 0.06, + "learning_rate": 9.978078617094286e-08, + "logits/chosen": -2.1915929317474365, + "logits/rejected": -2.1811161041259766, + "logps/chosen": -52.84918975830078, + "logps/rejected": -287.6614990234375, + "loss": 0.6753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004379653837531805, + "rewards/margins": 0.052621081471443176, + "rewards/rejected": -0.05700073391199112, + "step": 1013 + }, + { + "epoch": 0.06, + "learning_rate": 9.977990378367698e-08, + "logits/chosen": -2.2772560119628906, + "logits/rejected": -2.2667171955108643, + "logps/chosen": -274.06634521484375, + "logps/rejected": -385.9646911621094, + "loss": 0.5893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3738464415073395, + "rewards/margins": 0.07485657930374146, + "rewards/rejected": 0.298989862203598, + "step": 1014 + }, + { + "epoch": 0.06, + "learning_rate": 9.977901962798542e-08, + "logits/chosen": -2.1734395027160645, + "logits/rejected": -2.1423656940460205, + "logps/chosen": -199.51919555664062, + "logps/rejected": -311.15106201171875, + "loss": 0.569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3171752989292145, + "rewards/margins": 0.1583251953125, + "rewards/rejected": 0.15885010361671448, + "step": 1015 + }, + { + "epoch": 0.06, + "learning_rate": 9.977813370389959e-08, + "logits/chosen": -2.2805421352386475, + "logits/rejected": -2.2805163860321045, + "logps/chosen": -2.2389919757843018, + "logps/rejected": -23.813650131225586, + "loss": 0.6859, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010105753317475319, + "rewards/margins": 0.020192289724946022, + "rewards/rejected": -0.03029804304242134, + "step": 1016 + }, + { + "epoch": 0.06, + "learning_rate": 9.977724601145096e-08, + "logits/chosen": -2.147855281829834, + "logits/rejected": -2.1390206813812256, + "logps/chosen": -10.439682960510254, + "logps/rejected": -157.42498779296875, + "loss": 0.669, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.961822691664565e-06, + "rewards/margins": 0.09920721501111984, + "rewards/rejected": -0.09921417385339737, + "step": 1017 + }, + { + "epoch": 0.06, + "learning_rate": 9.977635655067106e-08, + "logits/chosen": -2.247206926345825, + "logits/rejected": -2.2363388538360596, + "logps/chosen": -11.488384246826172, + "logps/rejected": -75.4867172241211, + "loss": 0.7059, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.011133193969726562, + "rewards/margins": -0.06186638027429581, + "rewards/rejected": 0.07299957424402237, + "step": 1018 + }, + { + "epoch": 0.06, + "learning_rate": 9.977546532159149e-08, + "logits/chosen": -2.3348214626312256, + "logits/rejected": -2.325279951095581, + "logps/chosen": -25.559520721435547, + "logps/rejected": -114.0550537109375, + "loss": 0.6766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009259223937988281, + "rewards/margins": 0.07772350311279297, + "rewards/rejected": -0.08698272705078125, + "step": 1019 + }, + { + "epoch": 0.06, + "learning_rate": 9.977457232424392e-08, + "logits/chosen": -2.134687662124634, + "logits/rejected": -2.1191184520721436, + "logps/chosen": -225.855712890625, + "logps/rejected": -312.3915710449219, + "loss": 0.5654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38193511962890625, + "rewards/margins": 0.1019698977470398, + "rewards/rejected": 0.27996522188186646, + "step": 1020 + }, + { + "epoch": 0.06, + "learning_rate": 9.977367755866006e-08, + "logits/chosen": -2.2205381393432617, + "logits/rejected": -2.0370233058929443, + "logps/chosen": -275.6081848144531, + "logps/rejected": -573.6961059570312, + "loss": 0.5975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.278054803609848, + "rewards/margins": 0.10729674994945526, + "rewards/rejected": 0.17075805366039276, + "step": 1021 + }, + { + "epoch": 0.06, + "learning_rate": 9.977278102487172e-08, + "logits/chosen": -2.2164907455444336, + "logits/rejected": -2.1743252277374268, + "logps/chosen": -40.156734466552734, + "logps/rejected": -332.25091552734375, + "loss": 0.6308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018637467175722122, + "rewards/margins": 0.23295935988426208, + "rewards/rejected": -0.21432189643383026, + "step": 1022 + }, + { + "epoch": 0.06, + "learning_rate": 9.977188272291072e-08, + "logits/chosen": -2.0219063758850098, + "logits/rejected": -1.9707159996032715, + "logps/chosen": -270.01422119140625, + "logps/rejected": -568.0294799804688, + "loss": 0.5899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2750183045864105, + "rewards/margins": 0.08820800483226776, + "rewards/rejected": 0.18681029975414276, + "step": 1023 + }, + { + "epoch": 0.06, + "learning_rate": 9.977098265280898e-08, + "logits/chosen": -2.0686960220336914, + "logits/rejected": -2.068666458129883, + "logps/chosen": -10.748099327087402, + "logps/rejected": -61.803955078125, + "loss": 0.7039, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00272712716832757, + "rewards/margins": -0.04083824157714844, + "rewards/rejected": 0.04356537014245987, + "step": 1024 + }, + { + "epoch": 0.06, + "learning_rate": 9.97700808145985e-08, + "logits/chosen": -1.9308112859725952, + "logits/rejected": -1.923830270767212, + "logps/chosen": -294.137451171875, + "logps/rejected": -340.8663330078125, + "loss": 0.5955, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3773742616176605, + "rewards/margins": -0.020251482725143433, + "rewards/rejected": 0.39762574434280396, + "step": 1025 + }, + { + "epoch": 0.06, + "learning_rate": 9.976917720831128e-08, + "logits/chosen": -2.126279830932617, + "logits/rejected": -2.1216228008270264, + "logps/chosen": -29.051267623901367, + "logps/rejected": -121.3353500366211, + "loss": 0.6604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012365341186523438, + "rewards/margins": 0.1503887176513672, + "rewards/rejected": -0.16275405883789062, + "step": 1026 + }, + { + "epoch": 0.06, + "learning_rate": 9.976827183397946e-08, + "logits/chosen": -2.2199602127075195, + "logits/rejected": -2.2130706310272217, + "logps/chosen": -15.83633041381836, + "logps/rejected": -281.8475646972656, + "loss": 0.6613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018868064507842064, + "rewards/margins": 0.09587307274341583, + "rewards/rejected": -0.07700500637292862, + "step": 1027 + }, + { + "epoch": 0.06, + "learning_rate": 9.976736469163516e-08, + "logits/chosen": -2.303524971008301, + "logits/rejected": -2.3036489486694336, + "logps/chosen": -36.94062423706055, + "logps/rejected": -138.6099853515625, + "loss": 0.6591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.057897187769412994, + "rewards/margins": 0.0778907835483551, + "rewards/rejected": -0.01999359205365181, + "step": 1028 + }, + { + "epoch": 0.06, + "learning_rate": 9.976645578131065e-08, + "logits/chosen": -2.1106724739074707, + "logits/rejected": -2.075073480606079, + "logps/chosen": -212.57948303222656, + "logps/rejected": -454.35858154296875, + "loss": 0.5445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4572708308696747, + "rewards/margins": 0.10088959336280823, + "rewards/rejected": 0.35638123750686646, + "step": 1029 + }, + { + "epoch": 0.06, + "learning_rate": 9.976554510303817e-08, + "logits/chosen": -2.2349276542663574, + "logits/rejected": -2.2074406147003174, + "logps/chosen": -171.6524658203125, + "logps/rejected": -340.76190185546875, + "loss": 0.6054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25163576006889343, + "rewards/margins": 0.03572389483451843, + "rewards/rejected": 0.215911865234375, + "step": 1030 + }, + { + "epoch": 0.06, + "learning_rate": 9.976463265685012e-08, + "logits/chosen": -2.2710437774658203, + "logits/rejected": -2.269782543182373, + "logps/chosen": -20.577781677246094, + "logps/rejected": -185.3443603515625, + "loss": 0.6892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026270676404237747, + "rewards/margins": 0.04732856526970863, + "rewards/rejected": -0.07359924167394638, + "step": 1031 + }, + { + "epoch": 0.06, + "learning_rate": 9.976371844277887e-08, + "logits/chosen": -2.020190954208374, + "logits/rejected": -2.1012485027313232, + "logps/chosen": -242.6000518798828, + "logps/rejected": -196.0469970703125, + "loss": 0.7093, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.05380401760339737, + "rewards/margins": -0.03143463283777237, + "rewards/rejected": -0.022369384765625, + "step": 1032 + }, + { + "epoch": 0.06, + "learning_rate": 9.976280246085693e-08, + "logits/chosen": -2.2695822715759277, + "logits/rejected": -2.264038562774658, + "logps/chosen": -79.97596740722656, + "logps/rejected": -221.52102661132812, + "loss": 0.6374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12506866455078125, + "rewards/margins": 0.09293975681066513, + "rewards/rejected": 0.03212890774011612, + "step": 1033 + }, + { + "epoch": 0.06, + "learning_rate": 9.976188471111684e-08, + "logits/chosen": -2.1748430728912354, + "logits/rejected": -2.180870532989502, + "logps/chosen": -165.11990356445312, + "logps/rejected": -379.9453125, + "loss": 0.6635, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.21055451035499573, + "rewards/margins": -0.15104827284812927, + "rewards/rejected": 0.361602783203125, + "step": 1034 + }, + { + "epoch": 0.06, + "learning_rate": 9.976096519359119e-08, + "logits/chosen": -2.1662466526031494, + "logits/rejected": -2.151134729385376, + "logps/chosen": -144.76173400878906, + "logps/rejected": -241.341796875, + "loss": 0.679, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00933074951171875, + "rewards/margins": 0.08365020900964737, + "rewards/rejected": -0.09298095852136612, + "step": 1035 + }, + { + "epoch": 0.06, + "learning_rate": 9.976004390831263e-08, + "logits/chosen": -2.20400071144104, + "logits/rejected": -2.1930205821990967, + "logps/chosen": -126.33663177490234, + "logps/rejected": -254.98623657226562, + "loss": 0.6568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.034479524940252304, + "rewards/margins": 0.11774216592311859, + "rewards/rejected": -0.08326263725757599, + "step": 1036 + }, + { + "epoch": 0.06, + "learning_rate": 9.975912085531392e-08, + "logits/chosen": -2.144054412841797, + "logits/rejected": -2.1338183879852295, + "logps/chosen": -0.0003082416660618037, + "logps/rejected": -146.08116149902344, + "loss": 0.658, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2405292838811874e-06, + "rewards/margins": 0.1456977277994156, + "rewards/rejected": -0.1456954926252365, + "step": 1037 + }, + { + "epoch": 0.06, + "learning_rate": 9.975819603462785e-08, + "logits/chosen": -2.0895931720733643, + "logits/rejected": -2.094440221786499, + "logps/chosen": -42.419342041015625, + "logps/rejected": -137.13565063476562, + "loss": 0.7013, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012263107113540173, + "rewards/margins": -0.013465499505400658, + "rewards/rejected": 0.0012023926246911287, + "step": 1038 + }, + { + "epoch": 0.06, + "learning_rate": 9.975726944628725e-08, + "logits/chosen": -1.9822262525558472, + "logits/rejected": -1.9712121486663818, + "logps/chosen": -67.99507904052734, + "logps/rejected": -146.56918334960938, + "loss": 0.6508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07843780517578125, + "rewards/margins": 0.06305541843175888, + "rewards/rejected": 0.015382385812699795, + "step": 1039 + }, + { + "epoch": 0.06, + "learning_rate": 9.975634109032506e-08, + "logits/chosen": -2.2047648429870605, + "logits/rejected": -2.181434154510498, + "logps/chosen": -0.16206824779510498, + "logps/rejected": -278.2314453125, + "loss": 0.6357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017555594677105546, + "rewards/margins": 0.2445884346961975, + "rewards/rejected": -0.24634400010108948, + "step": 1040 + }, + { + "epoch": 0.06, + "learning_rate": 9.975541096677425e-08, + "logits/chosen": -2.030349016189575, + "logits/rejected": -2.016069173812866, + "logps/chosen": -124.08628845214844, + "logps/rejected": -282.89349365234375, + "loss": 0.6452, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.19917984306812286, + "rewards/margins": -0.043852999806404114, + "rewards/rejected": 0.24303284287452698, + "step": 1041 + }, + { + "epoch": 0.06, + "learning_rate": 9.975447907566785e-08, + "logits/chosen": -2.2023160457611084, + "logits/rejected": -2.1865181922912598, + "logps/chosen": -37.09564971923828, + "logps/rejected": -215.91036987304688, + "loss": 0.6265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.056917574256658554, + "rewards/margins": 0.23088303208351135, + "rewards/rejected": -0.1739654541015625, + "step": 1042 + }, + { + "epoch": 0.06, + "learning_rate": 9.975354541703899e-08, + "logits/chosen": -2.2716968059539795, + "logits/rejected": -2.2573025226593018, + "logps/chosen": -7.524044990539551, + "logps/rejected": -157.22354125976562, + "loss": 0.6465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021387577056884766, + "rewards/margins": 0.15984125435352325, + "rewards/rejected": -0.1384536772966385, + "step": 1043 + }, + { + "epoch": 0.06, + "learning_rate": 9.975260999092081e-08, + "logits/chosen": -2.0803020000457764, + "logits/rejected": -2.099578619003296, + "logps/chosen": -276.37481689453125, + "logps/rejected": -373.939208984375, + "loss": 0.5876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39454957842826843, + "rewards/margins": 0.005523681640625, + "rewards/rejected": 0.38902589678764343, + "step": 1044 + }, + { + "epoch": 0.06, + "learning_rate": 9.975167279734657e-08, + "logits/chosen": -2.174669027328491, + "logits/rejected": -2.12957501411438, + "logps/chosen": -291.6651306152344, + "logps/rejected": -342.3453369140625, + "loss": 0.5381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.372171014547348, + "rewards/margins": 0.2286224216222763, + "rewards/rejected": 0.14354859292507172, + "step": 1045 + }, + { + "epoch": 0.06, + "learning_rate": 9.975073383634956e-08, + "logits/chosen": -1.9555962085723877, + "logits/rejected": -1.9603543281555176, + "logps/chosen": -187.61578369140625, + "logps/rejected": -240.9761199951172, + "loss": 0.715, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0018157958984375, + "rewards/margins": -0.11678924411535263, + "rewards/rejected": 0.11860504001379013, + "step": 1046 + }, + { + "epoch": 0.06, + "learning_rate": 9.97497931079631e-08, + "logits/chosen": -2.121340751647949, + "logits/rejected": -2.113797187805176, + "logps/chosen": -35.07862091064453, + "logps/rejected": -236.65574645996094, + "loss": 0.6369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0344112403690815, + "rewards/margins": 0.20225486159324646, + "rewards/rejected": -0.16784362494945526, + "step": 1047 + }, + { + "epoch": 0.06, + "learning_rate": 9.974885061222066e-08, + "logits/chosen": -2.191216230392456, + "logits/rejected": -2.196795701980591, + "logps/chosen": -0.01160342339426279, + "logps/rejected": -39.81637191772461, + "loss": 0.6898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021234258019831032, + "rewards/margins": 0.0015641619684174657, + "rewards/rejected": -0.0017765045631676912, + "step": 1048 + }, + { + "epoch": 0.06, + "learning_rate": 9.974790634915568e-08, + "logits/chosen": -2.3247387409210205, + "logits/rejected": -2.288944959640503, + "logps/chosen": -79.60772705078125, + "logps/rejected": -176.21054077148438, + "loss": 0.6634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03016052208840847, + "rewards/margins": 0.09372711181640625, + "rewards/rejected": -0.06356658786535263, + "step": 1049 + }, + { + "epoch": 0.06, + "learning_rate": 9.974696031880175e-08, + "logits/chosen": -2.3603997230529785, + "logits/rejected": -2.348752737045288, + "logps/chosen": -0.27321019768714905, + "logps/rejected": -189.43710327148438, + "loss": 0.6765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0054716975428164005, + "rewards/margins": 0.06185618042945862, + "rewards/rejected": -0.06732787936925888, + "step": 1050 + }, + { + "epoch": 0.06, + "learning_rate": 9.974601252119244e-08, + "logits/chosen": -2.189335823059082, + "logits/rejected": -2.1869559288024902, + "logps/chosen": -29.80968475341797, + "logps/rejected": -95.63804626464844, + "loss": 0.6582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09704170376062393, + "rewards/margins": 0.022159196436405182, + "rewards/rejected": 0.07488250732421875, + "step": 1051 + }, + { + "epoch": 0.06, + "learning_rate": 9.974506295636142e-08, + "logits/chosen": -2.0264294147491455, + "logits/rejected": -2.010059356689453, + "logps/chosen": -29.398340225219727, + "logps/rejected": -195.11041259765625, + "loss": 0.6687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026823807507753372, + "rewards/margins": 0.06658820807933807, + "rewards/rejected": -0.039764404296875, + "step": 1052 + }, + { + "epoch": 0.06, + "learning_rate": 9.974411162434243e-08, + "logits/chosen": -2.1472010612487793, + "logits/rejected": -2.1395955085754395, + "logps/chosen": -269.93011474609375, + "logps/rejected": -378.45880126953125, + "loss": 0.5924, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3589630126953125, + "rewards/margins": -0.0052093565464019775, + "rewards/rejected": 0.3641723692417145, + "step": 1053 + }, + { + "epoch": 0.06, + "learning_rate": 9.974315852516928e-08, + "logits/chosen": -2.074634313583374, + "logits/rejected": -2.070098876953125, + "logps/chosen": -17.202335357666016, + "logps/rejected": -168.766845703125, + "loss": 0.6793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029909705743193626, + "rewards/margins": 0.027909278869628906, + "rewards/rejected": 0.0020004273392260075, + "step": 1054 + }, + { + "epoch": 0.06, + "learning_rate": 9.974220365887582e-08, + "logits/chosen": -2.181898593902588, + "logits/rejected": -2.1884593963623047, + "logps/chosen": -47.660614013671875, + "logps/rejected": -151.57359313964844, + "loss": 0.6797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02178649976849556, + "rewards/margins": 0.03400726616382599, + "rewards/rejected": -0.01222076453268528, + "step": 1055 + }, + { + "epoch": 0.06, + "learning_rate": 9.974124702549597e-08, + "logits/chosen": -2.153310775756836, + "logits/rejected": -2.115222454071045, + "logps/chosen": -234.55441284179688, + "logps/rejected": -373.4958801269531, + "loss": 0.6118, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.28156739473342896, + "rewards/margins": -0.04597166180610657, + "rewards/rejected": 0.3275390565395355, + "step": 1056 + }, + { + "epoch": 0.06, + "learning_rate": 9.974028862506372e-08, + "logits/chosen": -2.12199330329895, + "logits/rejected": -2.0834274291992188, + "logps/chosen": -237.11598205566406, + "logps/rejected": -320.3537292480469, + "loss": 0.6822, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03424530103802681, + "rewards/margins": -0.03849029913544655, + "rewards/rejected": 0.07273560017347336, + "step": 1057 + }, + { + "epoch": 0.06, + "learning_rate": 9.97393284576131e-08, + "logits/chosen": -2.398937940597534, + "logits/rejected": -2.384159803390503, + "logps/chosen": -27.75498390197754, + "logps/rejected": -204.65640258789062, + "loss": 0.6491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012490463443100452, + "rewards/margins": 0.1709774136543274, + "rewards/rejected": -0.15848694741725922, + "step": 1058 + }, + { + "epoch": 0.06, + "learning_rate": 9.973836652317824e-08, + "logits/chosen": -2.1503005027770996, + "logits/rejected": -2.1401960849761963, + "logps/chosen": -191.26040649414062, + "logps/rejected": -208.30526733398438, + "loss": 0.6146, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.27367860078811646, + "rewards/margins": -0.029740899801254272, + "rewards/rejected": 0.3034195005893707, + "step": 1059 + }, + { + "epoch": 0.06, + "learning_rate": 9.97374028217933e-08, + "logits/chosen": -2.171647310256958, + "logits/rejected": -2.154000759124756, + "logps/chosen": -116.5204086303711, + "logps/rejected": -281.33282470703125, + "loss": 0.6641, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.17371444404125214, + "rewards/margins": -0.15367813408374786, + "rewards/rejected": 0.327392578125, + "step": 1060 + }, + { + "epoch": 0.06, + "learning_rate": 9.973643735349254e-08, + "logits/chosen": -2.2295119762420654, + "logits/rejected": -2.1999082565307617, + "logps/chosen": -166.7403564453125, + "logps/rejected": -258.2965087890625, + "loss": 0.6574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008731079287827015, + "rewards/margins": 0.14075317978858948, + "rewards/rejected": -0.13202209770679474, + "step": 1061 + }, + { + "epoch": 0.06, + "learning_rate": 9.973547011831021e-08, + "logits/chosen": -2.0740036964416504, + "logits/rejected": -2.0973422527313232, + "logps/chosen": -279.26007080078125, + "logps/rejected": -349.6708984375, + "loss": 0.6732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05487060546875, + "rewards/margins": 0.06328125298023224, + "rewards/rejected": -0.008410644717514515, + "step": 1062 + }, + { + "epoch": 0.06, + "learning_rate": 9.97345011162807e-08, + "logits/chosen": -2.1995410919189453, + "logits/rejected": -2.190842866897583, + "logps/chosen": -0.048066239804029465, + "logps/rejected": -171.55368041992188, + "loss": 0.685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033386386348865926, + "rewards/margins": 0.038791198283433914, + "rewards/rejected": -0.03912506252527237, + "step": 1063 + }, + { + "epoch": 0.06, + "learning_rate": 9.973353034743845e-08, + "logits/chosen": -2.230257987976074, + "logits/rejected": -2.253939628601074, + "logps/chosen": -143.24020385742188, + "logps/rejected": -252.00559997558594, + "loss": 0.655, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2022354155778885, + "rewards/margins": -0.08162842690944672, + "rewards/rejected": 0.2838638424873352, + "step": 1064 + }, + { + "epoch": 0.06, + "learning_rate": 9.973255781181791e-08, + "logits/chosen": -1.9678863286972046, + "logits/rejected": -1.962775468826294, + "logps/chosen": -35.13723373413086, + "logps/rejected": -235.11419677734375, + "loss": 0.6488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.046506501734256744, + "rewards/margins": 0.13914260268211365, + "rewards/rejected": -0.0926361083984375, + "step": 1065 + }, + { + "epoch": 0.06, + "learning_rate": 9.973158350945368e-08, + "logits/chosen": -2.2407658100128174, + "logits/rejected": -2.227780818939209, + "logps/chosen": -205.0646514892578, + "logps/rejected": -259.8892822265625, + "loss": 0.6388, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.221577450633049, + "rewards/margins": -0.061616525053977966, + "rewards/rejected": 0.283193975687027, + "step": 1066 + }, + { + "epoch": 0.06, + "learning_rate": 9.973060744038032e-08, + "logits/chosen": -2.07474422454834, + "logits/rejected": -2.0730199813842773, + "logps/chosen": -40.208702087402344, + "logps/rejected": -97.83966064453125, + "loss": 0.689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04480476304888725, + "rewards/margins": 0.047513198107481, + "rewards/rejected": -0.09231796115636826, + "step": 1067 + }, + { + "epoch": 0.06, + "learning_rate": 9.972962960463251e-08, + "logits/chosen": -2.208348274230957, + "logits/rejected": -2.2015960216522217, + "logps/chosen": -53.68348693847656, + "logps/rejected": -210.62521362304688, + "loss": 0.7053, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02563171461224556, + "rewards/margins": -0.01942138746380806, + "rewards/rejected": -0.0062103271484375, + "step": 1068 + }, + { + "epoch": 0.06, + "learning_rate": 9.972865000224501e-08, + "logits/chosen": -2.2220356464385986, + "logits/rejected": -2.2053420543670654, + "logps/chosen": -11.355356216430664, + "logps/rejected": -195.09286499023438, + "loss": 0.6631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0034953118301928043, + "rewards/margins": 0.12569379806518555, + "rewards/rejected": -0.12219848483800888, + "step": 1069 + }, + { + "epoch": 0.06, + "learning_rate": 9.972766863325264e-08, + "logits/chosen": -2.1583521366119385, + "logits/rejected": -2.1091978549957275, + "logps/chosen": -164.24908447265625, + "logps/rejected": -488.5585021972656, + "loss": 0.5933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.301910400390625, + "rewards/margins": 0.07844848930835724, + "rewards/rejected": 0.22346191108226776, + "step": 1070 + }, + { + "epoch": 0.06, + "learning_rate": 9.972668549769021e-08, + "logits/chosen": -2.1812870502471924, + "logits/rejected": -2.1267075538635254, + "logps/chosen": -296.2112731933594, + "logps/rejected": -388.1065368652344, + "loss": 0.6536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03408508375287056, + "rewards/margins": 0.12373657524585724, + "rewards/rejected": -0.08965148776769638, + "step": 1071 + }, + { + "epoch": 0.06, + "learning_rate": 9.972570059559267e-08, + "logits/chosen": -1.9540876150131226, + "logits/rejected": -1.9546408653259277, + "logps/chosen": -270.3890686035156, + "logps/rejected": -458.8383483886719, + "loss": 0.6083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.192047119140625, + "rewards/margins": 0.09626159816980362, + "rewards/rejected": 0.09578552097082138, + "step": 1072 + }, + { + "epoch": 0.06, + "learning_rate": 9.972471392699502e-08, + "logits/chosen": -2.1839847564697266, + "logits/rejected": -2.1567788124084473, + "logps/chosen": -193.41604614257812, + "logps/rejected": -396.0865478515625, + "loss": 0.635, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.320169061422348, + "rewards/margins": -0.13295289874076843, + "rewards/rejected": 0.45312196016311646, + "step": 1073 + }, + { + "epoch": 0.06, + "learning_rate": 9.972372549193231e-08, + "logits/chosen": -2.046111822128296, + "logits/rejected": -2.043421506881714, + "logps/chosen": -0.5484140515327454, + "logps/rejected": -56.58563995361328, + "loss": 0.7071, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.010055017657577991, + "rewards/margins": -0.04750542715191841, + "rewards/rejected": 0.037450410425662994, + "step": 1074 + }, + { + "epoch": 0.06, + "learning_rate": 9.972273529043962e-08, + "logits/chosen": -2.0078632831573486, + "logits/rejected": -2.0097708702087402, + "logps/chosen": -13.320112228393555, + "logps/rejected": -75.49482727050781, + "loss": 0.7266, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.07087745517492294, + "rewards/margins": -0.049391552805900574, + "rewards/rejected": -0.02148590050637722, + "step": 1075 + }, + { + "epoch": 0.06, + "learning_rate": 9.972174332255216e-08, + "logits/chosen": -1.8708679676055908, + "logits/rejected": -1.8680115938186646, + "logps/chosen": -0.033867839723825455, + "logps/rejected": -84.55314636230469, + "loss": 0.6855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001038293936289847, + "rewards/margins": 0.03176352381706238, + "rewards/rejected": -0.03280181810259819, + "step": 1076 + }, + { + "epoch": 0.06, + "learning_rate": 9.972074958830518e-08, + "logits/chosen": -2.2082231044769287, + "logits/rejected": -2.203685760498047, + "logps/chosen": -0.00012516466085799038, + "logps/rejected": -102.15406799316406, + "loss": 0.6914, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.741740843248408e-07, + "rewards/margins": 0.007487213704735041, + "rewards/rejected": -0.007487487979233265, + "step": 1077 + }, + { + "epoch": 0.06, + "learning_rate": 9.971975408773396e-08, + "logits/chosen": -2.1966447830200195, + "logits/rejected": -2.1847712993621826, + "logps/chosen": -0.00013934989692643285, + "logps/rejected": -152.66094970703125, + "loss": 0.6342, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.032649591565132e-07, + "rewards/margins": 0.2515968978404999, + "rewards/rejected": -0.2515976130962372, + "step": 1078 + }, + { + "epoch": 0.06, + "learning_rate": 9.971875682087385e-08, + "logits/chosen": -2.2746145725250244, + "logits/rejected": -2.2650301456451416, + "logps/chosen": -0.0001341050083283335, + "logps/rejected": -123.85420227050781, + "loss": 0.647, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4551915445207286e-12, + "rewards/margins": 0.1939949095249176, + "rewards/rejected": -0.1939949095249176, + "step": 1079 + }, + { + "epoch": 0.06, + "learning_rate": 9.971775778776032e-08, + "logits/chosen": -1.9914863109588623, + "logits/rejected": -1.9494178295135498, + "logps/chosen": -269.1910400390625, + "logps/rejected": -338.1719665527344, + "loss": 0.6302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11341247707605362, + "rewards/margins": 0.09670715034008026, + "rewards/rejected": 0.01670532301068306, + "step": 1080 + }, + { + "epoch": 0.06, + "learning_rate": 9.971675698842883e-08, + "logits/chosen": -2.0531558990478516, + "logits/rejected": -2.0278921127319336, + "logps/chosen": -0.14073799550533295, + "logps/rejected": -251.35415649414062, + "loss": 0.6372, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2851101423148066e-05, + "rewards/margins": 0.23718485236167908, + "rewards/rejected": -0.2372177094221115, + "step": 1081 + }, + { + "epoch": 0.06, + "learning_rate": 9.971575442291493e-08, + "logits/chosen": -2.0770070552825928, + "logits/rejected": -2.0727548599243164, + "logps/chosen": -25.379276275634766, + "logps/rejected": -43.63740921020508, + "loss": 0.7232, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.11190014332532883, + "rewards/margins": -0.02173633873462677, + "rewards/rejected": -0.09016380459070206, + "step": 1082 + }, + { + "epoch": 0.06, + "learning_rate": 9.971475009125426e-08, + "logits/chosen": -2.2542035579681396, + "logits/rejected": -2.253472328186035, + "logps/chosen": -0.0010409895330667496, + "logps/rejected": -83.9364013671875, + "loss": 0.703, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.6700088255893206e-06, + "rewards/margins": -0.03922004997730255, + "rewards/rejected": 0.039217378944158554, + "step": 1083 + }, + { + "epoch": 0.06, + "learning_rate": 9.971374399348247e-08, + "logits/chosen": -2.2453057765960693, + "logits/rejected": -2.2189548015594482, + "logps/chosen": -192.06341552734375, + "logps/rejected": -385.3634948730469, + "loss": 0.6441, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.21074829995632172, + "rewards/margins": -0.07189331948757172, + "rewards/rejected": 0.28264161944389343, + "step": 1084 + }, + { + "epoch": 0.06, + "learning_rate": 9.971273612963533e-08, + "logits/chosen": -2.222764730453491, + "logits/rejected": -2.2273073196411133, + "logps/chosen": -1.8834340572357178, + "logps/rejected": -155.74624633789062, + "loss": 0.6873, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010479211807250977, + "rewards/margins": 0.036215733736753464, + "rewards/rejected": -0.04669494554400444, + "step": 1085 + }, + { + "epoch": 0.06, + "learning_rate": 9.971172649974864e-08, + "logits/chosen": -2.2165896892547607, + "logits/rejected": -2.212859630584717, + "logps/chosen": -15.250638961791992, + "logps/rejected": -151.1134796142578, + "loss": 0.6145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024985982105135918, + "rewards/margins": 0.3145260810852051, + "rewards/rejected": -0.2895401120185852, + "step": 1086 + }, + { + "epoch": 0.06, + "learning_rate": 9.971071510385824e-08, + "logits/chosen": -2.1144862174987793, + "logits/rejected": -2.018580436706543, + "logps/chosen": -325.90447998046875, + "logps/rejected": -436.1191711425781, + "loss": 0.4841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.51654052734375, + "rewards/margins": 0.33561402559280396, + "rewards/rejected": 0.18092651665210724, + "step": 1087 + }, + { + "epoch": 0.06, + "learning_rate": 9.970970194200008e-08, + "logits/chosen": -2.1218175888061523, + "logits/rejected": -2.143571615219116, + "logps/chosen": -68.34848022460938, + "logps/rejected": -139.08151245117188, + "loss": 0.6631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10453949123620987, + "rewards/margins": 0.02227325737476349, + "rewards/rejected": 0.08226623386144638, + "step": 1088 + }, + { + "epoch": 0.06, + "learning_rate": 9.970868701421015e-08, + "logits/chosen": -2.062490940093994, + "logits/rejected": -2.05155348777771, + "logps/chosen": -83.96438598632812, + "logps/rejected": -225.18907165527344, + "loss": 0.6423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06698761135339737, + "rewards/margins": 0.12609557807445526, + "rewards/rejected": -0.05910797044634819, + "step": 1089 + }, + { + "epoch": 0.06, + "learning_rate": 9.970767032052452e-08, + "logits/chosen": -2.1090924739837646, + "logits/rejected": -2.1010050773620605, + "logps/chosen": -223.0196533203125, + "logps/rejected": -275.29815673828125, + "loss": 0.6075, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3578124940395355, + "rewards/margins": -0.002255260944366455, + "rewards/rejected": 0.360067754983902, + "step": 1090 + }, + { + "epoch": 0.06, + "learning_rate": 9.970665186097929e-08, + "logits/chosen": -2.095935821533203, + "logits/rejected": -2.0905075073242188, + "logps/chosen": -10.269621849060059, + "logps/rejected": -266.8041687011719, + "loss": 0.6525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009599686018191278, + "rewards/margins": 0.16951733827590942, + "rewards/rejected": -0.17047730088233948, + "step": 1091 + }, + { + "epoch": 0.06, + "learning_rate": 9.970563163561063e-08, + "logits/chosen": -2.2445926666259766, + "logits/rejected": -2.2139992713928223, + "logps/chosen": -52.87908172607422, + "logps/rejected": -279.906494140625, + "loss": 0.6236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0631893202662468, + "rewards/margins": 0.2576473355293274, + "rewards/rejected": -0.1944580078125, + "step": 1092 + }, + { + "epoch": 0.06, + "learning_rate": 9.970460964445482e-08, + "logits/chosen": -2.130732774734497, + "logits/rejected": -2.0982866287231445, + "logps/chosen": -192.16224670410156, + "logps/rejected": -378.07476806640625, + "loss": 0.5565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2743881344795227, + "rewards/margins": 0.2622879147529602, + "rewards/rejected": 0.0121002197265625, + "step": 1093 + }, + { + "epoch": 0.06, + "learning_rate": 9.970358588754813e-08, + "logits/chosen": -2.085806131362915, + "logits/rejected": -2.091845750808716, + "logps/chosen": -223.22982788085938, + "logps/rejected": -249.8406982421875, + "loss": 0.672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008096314035356045, + "rewards/margins": 0.06294403225183487, + "rewards/rejected": -0.07104034721851349, + "step": 1094 + }, + { + "epoch": 0.06, + "learning_rate": 9.970256036492693e-08, + "logits/chosen": -2.2435405254364014, + "logits/rejected": -2.2526612281799316, + "logps/chosen": -191.99667358398438, + "logps/rejected": -301.4980773925781, + "loss": 0.6123, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.25893402099609375, + "rewards/margins": -0.023295611143112183, + "rewards/rejected": 0.28222963213920593, + "step": 1095 + }, + { + "epoch": 0.06, + "learning_rate": 9.970153307662768e-08, + "logits/chosen": -2.143355369567871, + "logits/rejected": -2.149456024169922, + "logps/chosen": -26.781980514526367, + "logps/rejected": -170.8479766845703, + "loss": 0.6573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.040526773780584335, + "rewards/margins": 0.0661737471818924, + "rewards/rejected": -0.02564697340130806, + "step": 1096 + }, + { + "epoch": 0.06, + "learning_rate": 9.970050402268687e-08, + "logits/chosen": -2.2432761192321777, + "logits/rejected": -2.2327094078063965, + "logps/chosen": -24.64987564086914, + "logps/rejected": -185.2046356201172, + "loss": 0.7062, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0021791458129882812, + "rewards/margins": -0.05185375362634659, + "rewards/rejected": 0.05403289943933487, + "step": 1097 + }, + { + "epoch": 0.06, + "learning_rate": 9.969947320314103e-08, + "logits/chosen": -1.8601531982421875, + "logits/rejected": -1.8330187797546387, + "logps/chosen": -255.28030395507812, + "logps/rejected": -287.1399230957031, + "loss": 0.5657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.358468621969223, + "rewards/margins": 0.08702695369720459, + "rewards/rejected": 0.27144166827201843, + "step": 1098 + }, + { + "epoch": 0.06, + "learning_rate": 9.96984406180268e-08, + "logits/chosen": -2.1343517303466797, + "logits/rejected": -2.093775749206543, + "logps/chosen": -255.45858764648438, + "logps/rejected": -332.5845031738281, + "loss": 0.6226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2654174864292145, + "rewards/margins": 0.0143280029296875, + "rewards/rejected": 0.251089483499527, + "step": 1099 + }, + { + "epoch": 0.06, + "learning_rate": 9.969740626738085e-08, + "logits/chosen": -2.273367166519165, + "logits/rejected": -2.2643020153045654, + "logps/chosen": -7.539280414581299, + "logps/rejected": -125.62225341796875, + "loss": 0.698, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0060389041900634766, + "rewards/margins": -0.00014443369582295418, + "rewards/rejected": -0.005894470494240522, + "step": 1100 + }, + { + "epoch": 0.06, + "learning_rate": 9.969637015123994e-08, + "logits/chosen": -2.0546622276306152, + "logits/rejected": -1.9958385229110718, + "logps/chosen": -197.9920654296875, + "logps/rejected": -418.1184997558594, + "loss": 0.5913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3226455748081207, + "rewards/margins": 0.033702075481414795, + "rewards/rejected": 0.28894349932670593, + "step": 1101 + }, + { + "epoch": 0.06, + "learning_rate": 9.969533226964086e-08, + "logits/chosen": -2.2366819381713867, + "logits/rejected": -2.219733953475952, + "logps/chosen": -275.44073486328125, + "logps/rejected": -409.3357238769531, + "loss": 0.6449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05015258863568306, + "rewards/margins": 0.09416809678077698, + "rewards/rejected": -0.04401550441980362, + "step": 1102 + }, + { + "epoch": 0.06, + "learning_rate": 9.969429262262051e-08, + "logits/chosen": -1.9454033374786377, + "logits/rejected": -1.7888928651809692, + "logps/chosen": -195.28005981445312, + "logps/rejected": -579.30859375, + "loss": 0.5806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06809387356042862, + "rewards/margins": 0.439627081155777, + "rewards/rejected": -0.37153321504592896, + "step": 1103 + }, + { + "epoch": 0.06, + "learning_rate": 9.969325121021579e-08, + "logits/chosen": -2.2137956619262695, + "logits/rejected": -2.208078384399414, + "logps/chosen": -293.52239990234375, + "logps/rejected": -367.4374694824219, + "loss": 0.5492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5153228640556335, + "rewards/margins": 0.06432494521141052, + "rewards/rejected": 0.450997918844223, + "step": 1104 + }, + { + "epoch": 0.06, + "learning_rate": 9.969220803246372e-08, + "logits/chosen": -2.309650421142578, + "logits/rejected": -2.306344747543335, + "logps/chosen": -34.20676040649414, + "logps/rejected": -157.8553009033203, + "loss": 0.6523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0806068405508995, + "rewards/margins": 0.053211212158203125, + "rewards/rejected": 0.02739563025534153, + "step": 1105 + }, + { + "epoch": 0.06, + "learning_rate": 9.969116308940135e-08, + "logits/chosen": -2.048323154449463, + "logits/rejected": -2.029489040374756, + "logps/chosen": -183.01141357421875, + "logps/rejected": -294.64935302734375, + "loss": 0.597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.321533203125, + "rewards/margins": 0.04067382216453552, + "rewards/rejected": 0.2808593809604645, + "step": 1106 + }, + { + "epoch": 0.06, + "learning_rate": 9.969011638106579e-08, + "logits/chosen": -2.1952133178710938, + "logits/rejected": -2.225062847137451, + "logps/chosen": -352.2319641113281, + "logps/rejected": -379.2149963378906, + "loss": 0.5766, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.45993348956108093, + "rewards/margins": -0.014657586812973022, + "rewards/rejected": 0.47459107637405396, + "step": 1107 + }, + { + "epoch": 0.06, + "learning_rate": 9.968906790749426e-08, + "logits/chosen": -2.0832693576812744, + "logits/rejected": -2.0908849239349365, + "logps/chosen": -223.5059814453125, + "logps/rejected": -260.8692932128906, + "loss": 0.6736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03763427957892418, + "rewards/margins": 0.0020080581307411194, + "rewards/rejected": 0.03562622144818306, + "step": 1108 + }, + { + "epoch": 0.06, + "learning_rate": 9.968801766872397e-08, + "logits/chosen": -2.163914918899536, + "logits/rejected": -2.1600492000579834, + "logps/chosen": -16.125286102294922, + "logps/rejected": -99.2717056274414, + "loss": 0.6783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01249465998262167, + "rewards/margins": 0.04737396538257599, + "rewards/rejected": -0.034879304468631744, + "step": 1109 + }, + { + "epoch": 0.06, + "learning_rate": 9.968696566479222e-08, + "logits/chosen": -2.0032429695129395, + "logits/rejected": -2.006197452545166, + "logps/chosen": -302.06536865234375, + "logps/rejected": -350.0234680175781, + "loss": 0.6652, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.25306397676467896, + "rewards/margins": -0.18459472060203552, + "rewards/rejected": 0.4376586973667145, + "step": 1110 + }, + { + "epoch": 0.06, + "learning_rate": 9.968591189573643e-08, + "logits/chosen": -2.1980128288269043, + "logits/rejected": -2.1558070182800293, + "logps/chosen": -39.16349792480469, + "logps/rejected": -267.385498046875, + "loss": 0.6011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03601112589240074, + "rewards/margins": 0.3601261377334595, + "rewards/rejected": -0.32411500811576843, + "step": 1111 + }, + { + "epoch": 0.06, + "learning_rate": 9.9684856361594e-08, + "logits/chosen": -2.240460157394409, + "logits/rejected": -2.2262651920318604, + "logps/chosen": -104.73616027832031, + "logps/rejected": -390.6835021972656, + "loss": 0.6154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09530258178710938, + "rewards/margins": 0.2395225614309311, + "rewards/rejected": -0.14421997964382172, + "step": 1112 + }, + { + "epoch": 0.06, + "learning_rate": 9.968379906240245e-08, + "logits/chosen": -2.0189294815063477, + "logits/rejected": -2.0171566009521484, + "logps/chosen": -27.051862716674805, + "logps/rejected": -117.83320617675781, + "loss": 0.7299, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.09292621910572052, + "rewards/margins": -0.06579914689064026, + "rewards/rejected": -0.02712707594037056, + "step": 1113 + }, + { + "epoch": 0.06, + "learning_rate": 9.968273999819932e-08, + "logits/chosen": -1.8705551624298096, + "logits/rejected": -1.813167691230774, + "logps/chosen": -245.89859008789062, + "logps/rejected": -415.3180236816406, + "loss": 0.6193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13579407334327698, + "rewards/margins": 0.15385742485523224, + "rewards/rejected": -0.01806335523724556, + "step": 1114 + }, + { + "epoch": 0.06, + "learning_rate": 9.968167916902224e-08, + "logits/chosen": -2.191352367401123, + "logits/rejected": -2.1892101764678955, + "logps/chosen": -190.077880859375, + "logps/rejected": -252.64334106445312, + "loss": 0.5413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4075759947299957, + "rewards/margins": 0.176097109913826, + "rewards/rejected": 0.23147888481616974, + "step": 1115 + }, + { + "epoch": 0.06, + "learning_rate": 9.96806165749089e-08, + "logits/chosen": -2.3267197608947754, + "logits/rejected": -2.3150904178619385, + "logps/chosen": -47.10566711425781, + "logps/rejected": -199.50384521484375, + "loss": 0.6555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04800758510828018, + "rewards/margins": 0.09914588928222656, + "rewards/rejected": -0.05113830789923668, + "step": 1116 + }, + { + "epoch": 0.07, + "learning_rate": 9.967955221589702e-08, + "logits/chosen": -2.1130614280700684, + "logits/rejected": -2.1060800552368164, + "logps/chosen": -40.39671325683594, + "logps/rejected": -85.35832214355469, + "loss": 0.6582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049585726112127304, + "rewards/margins": 0.055677033960819244, + "rewards/rejected": -0.006091308780014515, + "step": 1117 + }, + { + "epoch": 0.07, + "learning_rate": 9.967848609202446e-08, + "logits/chosen": -2.2697644233703613, + "logits/rejected": -2.267195224761963, + "logps/chosen": -43.17144012451172, + "logps/rejected": -223.1296844482422, + "loss": 0.6832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025301743298768997, + "rewards/margins": 0.06480903923511505, + "rewards/rejected": -0.09011077880859375, + "step": 1118 + }, + { + "epoch": 0.07, + "learning_rate": 9.967741820332906e-08, + "logits/chosen": -2.042616367340088, + "logits/rejected": -2.068568229675293, + "logps/chosen": -165.18780517578125, + "logps/rejected": -394.96002197265625, + "loss": 0.5782, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3095535337924957, + "rewards/margins": 0.11111603677272797, + "rewards/rejected": 0.19843749701976776, + "step": 1119 + }, + { + "epoch": 0.07, + "learning_rate": 9.967634854984878e-08, + "logits/chosen": -2.005535364151001, + "logits/rejected": -2.01789927482605, + "logps/chosen": -259.4851379394531, + "logps/rejected": -449.2872009277344, + "loss": 0.6477, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2784271240234375, + "rewards/margins": -0.1688995361328125, + "rewards/rejected": 0.44732666015625, + "step": 1120 + }, + { + "epoch": 0.07, + "learning_rate": 9.967527713162161e-08, + "logits/chosen": -2.111161708831787, + "logits/rejected": -2.105731725692749, + "logps/chosen": -17.041492462158203, + "logps/rejected": -131.10707092285156, + "loss": 0.6771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030773544684052467, + "rewards/margins": 0.09167823940515518, + "rewards/rejected": -0.1224517822265625, + "step": 1121 + }, + { + "epoch": 0.07, + "learning_rate": 9.967420394868559e-08, + "logits/chosen": -2.083794355392456, + "logits/rejected": -2.0073797702789307, + "logps/chosen": -186.49407958984375, + "logps/rejected": -350.5834045410156, + "loss": 0.621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0885467529296875, + "rewards/margins": 0.195709228515625, + "rewards/rejected": -0.1071624755859375, + "step": 1122 + }, + { + "epoch": 0.07, + "learning_rate": 9.967312900107887e-08, + "logits/chosen": -1.9864115715026855, + "logits/rejected": -1.970211148262024, + "logps/chosen": -209.98207092285156, + "logps/rejected": -440.22991943359375, + "loss": 0.6498, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4434463679790497, + "rewards/margins": -0.24892422556877136, + "rewards/rejected": 0.692370593547821, + "step": 1123 + }, + { + "epoch": 0.07, + "learning_rate": 9.967205228883964e-08, + "logits/chosen": -2.289991617202759, + "logits/rejected": -2.2476906776428223, + "logps/chosen": -182.06402587890625, + "logps/rejected": -374.9913635253906, + "loss": 0.5715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3374267518520355, + "rewards/margins": 0.10107116401195526, + "rewards/rejected": 0.23635558784008026, + "step": 1124 + }, + { + "epoch": 0.07, + "learning_rate": 9.967097381200612e-08, + "logits/chosen": -2.2476656436920166, + "logits/rejected": -2.2157528400421143, + "logps/chosen": -117.00169372558594, + "logps/rejected": -321.5356140136719, + "loss": 0.6668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04354095458984375, + "rewards/margins": 0.00043487548828125, + "rewards/rejected": 0.0431060791015625, + "step": 1125 + }, + { + "epoch": 0.07, + "learning_rate": 9.966989357061666e-08, + "logits/chosen": -2.1923089027404785, + "logits/rejected": -2.1954851150512695, + "logps/chosen": -110.5819091796875, + "logps/rejected": -184.8779296875, + "loss": 0.6534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06390380859375, + "rewards/margins": 0.10081787407398224, + "rewards/rejected": -0.03691406175494194, + "step": 1126 + }, + { + "epoch": 0.07, + "learning_rate": 9.96688115647096e-08, + "logits/chosen": -2.2863476276397705, + "logits/rejected": -2.280693769454956, + "logps/chosen": -24.22802734375, + "logps/rejected": -155.95281982421875, + "loss": 0.6644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0044769286178052425, + "rewards/margins": 0.12212830036878586, + "rewards/rejected": -0.12660522758960724, + "step": 1127 + }, + { + "epoch": 0.07, + "learning_rate": 9.966772779432342e-08, + "logits/chosen": -2.2516751289367676, + "logits/rejected": -2.1061384677886963, + "logps/chosen": -176.13165283203125, + "logps/rejected": -406.4322814941406, + "loss": 0.6937, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06666412204504013, + "rewards/margins": -0.06414032727479935, + "rewards/rejected": 0.13080444931983948, + "step": 1128 + }, + { + "epoch": 0.07, + "learning_rate": 9.966664225949659e-08, + "logits/chosen": -2.3518049716949463, + "logits/rejected": -2.270343065261841, + "logps/chosen": -147.52914428710938, + "logps/rejected": -517.2796630859375, + "loss": 0.6715, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2123367339372635, + "rewards/margins": -0.193522647023201, + "rewards/rejected": 0.4058593809604645, + "step": 1129 + }, + { + "epoch": 0.07, + "learning_rate": 9.966555496026767e-08, + "logits/chosen": -2.0874664783477783, + "logits/rejected": -2.0697643756866455, + "logps/chosen": -49.295169830322266, + "logps/rejected": -272.62982177734375, + "loss": 0.666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08875274658203125, + "rewards/margins": 0.01644744724035263, + "rewards/rejected": 0.07230529934167862, + "step": 1130 + }, + { + "epoch": 0.07, + "learning_rate": 9.966446589667532e-08, + "logits/chosen": -2.1677446365356445, + "logits/rejected": -2.0147199630737305, + "logps/chosen": -203.5175323486328, + "logps/rejected": -583.3426513671875, + "loss": 0.5312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37299346923828125, + "rewards/margins": 0.2873428463935852, + "rewards/rejected": 0.08565063774585724, + "step": 1131 + }, + { + "epoch": 0.07, + "learning_rate": 9.966337506875818e-08, + "logits/chosen": -2.2897610664367676, + "logits/rejected": -2.2854955196380615, + "logps/chosen": -0.0014936106745153666, + "logps/rejected": -148.41845703125, + "loss": 0.6614, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5150336366787087e-05, + "rewards/margins": 0.13182057440280914, + "rewards/rejected": -0.131805419921875, + "step": 1132 + }, + { + "epoch": 0.07, + "learning_rate": 9.966228247655504e-08, + "logits/chosen": -2.207536458969116, + "logits/rejected": -2.2091610431671143, + "logps/chosen": -9.539632797241211, + "logps/rejected": -128.82972717285156, + "loss": 0.6465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03570957109332085, + "rewards/margins": 0.15240879356861115, + "rewards/rejected": -0.11669921875, + "step": 1133 + }, + { + "epoch": 0.07, + "learning_rate": 9.966118812010471e-08, + "logits/chosen": -2.1474082469940186, + "logits/rejected": -2.13921856880188, + "logps/chosen": -1.621537446975708, + "logps/rejected": -105.27361297607422, + "loss": 0.6215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007290124776773155, + "rewards/margins": 0.3096943199634552, + "rewards/rejected": -0.3089652955532074, + "step": 1134 + }, + { + "epoch": 0.07, + "learning_rate": 9.966009199944605e-08, + "logits/chosen": -2.1718332767486572, + "logits/rejected": -2.176382541656494, + "logps/chosen": -56.526893615722656, + "logps/rejected": -240.56216430664062, + "loss": 0.6318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09581146389245987, + "rewards/margins": 0.15143433213233948, + "rewards/rejected": -0.05562286451458931, + "step": 1135 + }, + { + "epoch": 0.07, + "learning_rate": 9.9658994114618e-08, + "logits/chosen": -2.0492687225341797, + "logits/rejected": -1.9843438863754272, + "logps/chosen": -300.14044189453125, + "logps/rejected": -444.10430908203125, + "loss": 0.5914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.381796270608902, + "rewards/margins": 0.037518322467803955, + "rewards/rejected": 0.344277948141098, + "step": 1136 + }, + { + "epoch": 0.07, + "learning_rate": 9.965789446565958e-08, + "logits/chosen": -2.2899668216705322, + "logits/rejected": -2.2271018028259277, + "logps/chosen": -11.638282775878906, + "logps/rejected": -306.06072998046875, + "loss": 0.6225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012438583187758923, + "rewards/margins": 0.2986660301685333, + "rewards/rejected": -0.28622743487358093, + "step": 1137 + }, + { + "epoch": 0.07, + "learning_rate": 9.965679305260986e-08, + "logits/chosen": -2.353222370147705, + "logits/rejected": -2.322493076324463, + "logps/chosen": -100.51148223876953, + "logps/rejected": -316.28131103515625, + "loss": 0.6334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06093292310833931, + "rewards/margins": 0.14270782470703125, + "rewards/rejected": -0.08177490532398224, + "step": 1138 + }, + { + "epoch": 0.07, + "learning_rate": 9.965568987550793e-08, + "logits/chosen": -2.0799105167388916, + "logits/rejected": -2.0855202674865723, + "logps/chosen": -258.95538330078125, + "logps/rejected": -414.32354736328125, + "loss": 0.4753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5774200558662415, + "rewards/margins": 0.37617188692092896, + "rewards/rejected": 0.2012481689453125, + "step": 1139 + }, + { + "epoch": 0.07, + "learning_rate": 9.9654584934393e-08, + "logits/chosen": -2.154977560043335, + "logits/rejected": -2.1422553062438965, + "logps/chosen": -212.6591796875, + "logps/rejected": -252.41932678222656, + "loss": 0.663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07071381062269211, + "rewards/margins": 0.04121246933937073, + "rewards/rejected": 0.02950134314596653, + "step": 1140 + }, + { + "epoch": 0.07, + "learning_rate": 9.965347822930434e-08, + "logits/chosen": -2.0118703842163086, + "logits/rejected": -2.006781578063965, + "logps/chosen": -52.750885009765625, + "logps/rejected": -137.13162231445312, + "loss": 0.6452, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.093353271484375, + "rewards/margins": 0.08241729438304901, + "rewards/rejected": 0.010935974307358265, + "step": 1141 + }, + { + "epoch": 0.07, + "learning_rate": 9.965236976028126e-08, + "logits/chosen": -2.153484344482422, + "logits/rejected": -2.127140760421753, + "logps/chosen": -126.74897766113281, + "logps/rejected": -221.169677734375, + "loss": 0.6302, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2810623347759247, + "rewards/margins": -0.02953946590423584, + "rewards/rejected": 0.3106018006801605, + "step": 1142 + }, + { + "epoch": 0.07, + "learning_rate": 9.965125952736312e-08, + "logits/chosen": -2.2754316329956055, + "logits/rejected": -2.2771031856536865, + "logps/chosen": -19.777854919433594, + "logps/rejected": -108.39664459228516, + "loss": 0.6908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017731858417391777, + "rewards/margins": 0.024766160175204277, + "rewards/rejected": -0.042498018592596054, + "step": 1143 + }, + { + "epoch": 0.07, + "learning_rate": 9.965014753058938e-08, + "logits/chosen": -2.1847734451293945, + "logits/rejected": -2.173510789871216, + "logps/chosen": -29.985610961914062, + "logps/rejected": -111.50637817382812, + "loss": 0.6795, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017384720966219902, + "rewards/margins": 0.04090690612792969, + "rewards/rejected": -0.023522187024354935, + "step": 1144 + }, + { + "epoch": 0.07, + "learning_rate": 9.964903376999952e-08, + "logits/chosen": -2.1025309562683105, + "logits/rejected": -2.128417491912842, + "logps/chosen": -335.9638977050781, + "logps/rejected": -351.89715576171875, + "loss": 0.5903, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3726959228515625, + "rewards/margins": -0.010174572467803955, + "rewards/rejected": 0.38287049531936646, + "step": 1145 + }, + { + "epoch": 0.07, + "learning_rate": 9.964791824563312e-08, + "logits/chosen": -1.9848151206970215, + "logits/rejected": -1.991339921951294, + "logps/chosen": -46.150169372558594, + "logps/rejected": -145.58816528320312, + "loss": 0.6562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07825928181409836, + "rewards/margins": 0.0604858435690403, + "rewards/rejected": 0.01777343824505806, + "step": 1146 + }, + { + "epoch": 0.07, + "learning_rate": 9.964680095752982e-08, + "logits/chosen": -2.1433870792388916, + "logits/rejected": -2.135824680328369, + "logps/chosen": -4.495455741882324, + "logps/rejected": -126.7412338256836, + "loss": 0.5839, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0046388148330152035, + "rewards/margins": 0.5051459074020386, + "rewards/rejected": -0.5097846984863281, + "step": 1147 + }, + { + "epoch": 0.07, + "learning_rate": 9.96456819057293e-08, + "logits/chosen": -2.2528445720672607, + "logits/rejected": -2.253220319747925, + "logps/chosen": -10.376235008239746, + "logps/rejected": -54.938812255859375, + "loss": 0.6597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01249475497752428, + "rewards/margins": 0.15212011337280273, + "rewards/rejected": -0.16461487114429474, + "step": 1148 + }, + { + "epoch": 0.07, + "learning_rate": 9.96445610902713e-08, + "logits/chosen": -2.25028920173645, + "logits/rejected": -2.2256484031677246, + "logps/chosen": -187.3162384033203, + "logps/rejected": -285.5826416015625, + "loss": 0.6042, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.37454530596733093, + "rewards/margins": -0.04267576336860657, + "rewards/rejected": 0.4172210693359375, + "step": 1149 + }, + { + "epoch": 0.07, + "learning_rate": 9.964343851119567e-08, + "logits/chosen": -2.1468405723571777, + "logits/rejected": -2.1437251567840576, + "logps/chosen": -4.114529132843018, + "logps/rejected": -65.31809997558594, + "loss": 0.6838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011458205990493298, + "rewards/margins": 0.009199905209243298, + "rewards/rejected": 0.00225830078125, + "step": 1150 + }, + { + "epoch": 0.07, + "learning_rate": 9.964231416854227e-08, + "logits/chosen": -2.187756299972534, + "logits/rejected": -2.1696410179138184, + "logps/chosen": -171.10174560546875, + "logps/rejected": -283.0006103515625, + "loss": 0.6535, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2954696714878082, + "rewards/margins": -0.18000641465187073, + "rewards/rejected": 0.47547608613967896, + "step": 1151 + }, + { + "epoch": 0.07, + "learning_rate": 9.964118806235104e-08, + "logits/chosen": -2.058328866958618, + "logits/rejected": -2.0589678287506104, + "logps/chosen": -346.8659362792969, + "logps/rejected": -397.48846435546875, + "loss": 0.5263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4622558653354645, + "rewards/margins": 0.08601075410842896, + "rewards/rejected": 0.3762451112270355, + "step": 1152 + }, + { + "epoch": 0.07, + "learning_rate": 9.9640060192662e-08, + "logits/chosen": -2.2033157348632812, + "logits/rejected": -2.2026023864746094, + "logps/chosen": -0.00010525976540520787, + "logps/rejected": -70.26290130615234, + "loss": 0.6892, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.675342092421488e-07, + "rewards/margins": 0.01593179814517498, + "rewards/rejected": -0.015932464972138405, + "step": 1153 + }, + { + "epoch": 0.07, + "learning_rate": 9.963893055951519e-08, + "logits/chosen": -2.172302722930908, + "logits/rejected": -2.1790153980255127, + "logps/chosen": -29.67371368408203, + "logps/rejected": -215.90811157226562, + "loss": 0.7052, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04100227355957031, + "rewards/margins": -0.08503380417823792, + "rewards/rejected": 0.12603607773780823, + "step": 1154 + }, + { + "epoch": 0.07, + "learning_rate": 9.963779916295076e-08, + "logits/chosen": -2.0013082027435303, + "logits/rejected": -1.9966975450515747, + "logps/chosen": -0.003965874668210745, + "logps/rejected": -146.81951904296875, + "loss": 0.6831, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.4317359424894676e-05, + "rewards/margins": 0.04449621960520744, + "rewards/rejected": -0.04445190355181694, + "step": 1155 + }, + { + "epoch": 0.07, + "learning_rate": 9.963666600300891e-08, + "logits/chosen": -2.214144468307495, + "logits/rejected": -2.188174247741699, + "logps/chosen": -152.99171447753906, + "logps/rejected": -288.6130676269531, + "loss": 0.6191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15098419785499573, + "rewards/margins": 0.1328994780778885, + "rewards/rejected": 0.01808471791446209, + "step": 1156 + }, + { + "epoch": 0.07, + "learning_rate": 9.963553107972986e-08, + "logits/chosen": -2.19802188873291, + "logits/rejected": -2.1705429553985596, + "logps/chosen": -323.62005615234375, + "logps/rejected": -251.37020874023438, + "loss": 0.4541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.494619756937027, + "rewards/margins": 0.46185302734375, + "rewards/rejected": 0.03276672586798668, + "step": 1157 + }, + { + "epoch": 0.07, + "learning_rate": 9.963439439315397e-08, + "logits/chosen": -2.241255760192871, + "logits/rejected": -2.172213077545166, + "logps/chosen": -185.3231201171875, + "logps/rejected": -299.6951904296875, + "loss": 0.5737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.287619024515152, + "rewards/margins": 0.1277008056640625, + "rewards/rejected": 0.15991821885108948, + "step": 1158 + }, + { + "epoch": 0.07, + "learning_rate": 9.963325594332162e-08, + "logits/chosen": -2.1952145099639893, + "logits/rejected": -2.1890199184417725, + "logps/chosen": -8.32938003540039, + "logps/rejected": -225.84255981445312, + "loss": 0.6707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04677176475524902, + "rewards/margins": 0.12825265526771545, + "rewards/rejected": -0.17502442002296448, + "step": 1159 + }, + { + "epoch": 0.07, + "learning_rate": 9.963211573027321e-08, + "logits/chosen": -2.2503039836883545, + "logits/rejected": -2.229954242706299, + "logps/chosen": -18.670068740844727, + "logps/rejected": -99.9248046875, + "loss": 0.6654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05005302652716637, + "rewards/margins": 0.051430895924568176, + "rewards/rejected": -0.0013778686989098787, + "step": 1160 + }, + { + "epoch": 0.07, + "learning_rate": 9.963097375404928e-08, + "logits/chosen": -2.2119743824005127, + "logits/rejected": -2.218550205230713, + "logps/chosen": -19.280750274658203, + "logps/rejected": -140.93344116210938, + "loss": 0.651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01232070941478014, + "rewards/margins": 0.15742264688014984, + "rewards/rejected": -0.14510193467140198, + "step": 1161 + }, + { + "epoch": 0.07, + "learning_rate": 9.962983001469039e-08, + "logits/chosen": -2.277482509613037, + "logits/rejected": -2.2232589721679688, + "logps/chosen": -205.82289123535156, + "logps/rejected": -477.0029296875, + "loss": 0.5809, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.46031951904296875, + "rewards/margins": -0.04239654541015625, + "rewards/rejected": 0.502716064453125, + "step": 1162 + }, + { + "epoch": 0.07, + "learning_rate": 9.962868451223717e-08, + "logits/chosen": -2.125908136367798, + "logits/rejected": -2.100419282913208, + "logps/chosen": -18.028270721435547, + "logps/rejected": -155.5424041748047, + "loss": 0.7159, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03669166564941406, + "rewards/margins": -0.07239265739917755, + "rewards/rejected": 0.03570098802447319, + "step": 1163 + }, + { + "epoch": 0.07, + "learning_rate": 9.962753724673032e-08, + "logits/chosen": -2.109647750854492, + "logits/rejected": -2.1004083156585693, + "logps/chosen": -22.427433013916016, + "logps/rejected": -225.73159790039062, + "loss": 0.6305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.000625228916760534, + "rewards/margins": 0.26727715134620667, + "rewards/rejected": -0.2666519284248352, + "step": 1164 + }, + { + "epoch": 0.07, + "learning_rate": 9.962638821821059e-08, + "logits/chosen": -2.1773526668548584, + "logits/rejected": -2.1529958248138428, + "logps/chosen": -4.145941734313965, + "logps/rejected": -95.57698059082031, + "loss": 0.6626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01237721461802721, + "rewards/margins": 0.13964685797691345, + "rewards/rejected": -0.1520240753889084, + "step": 1165 + }, + { + "epoch": 0.07, + "learning_rate": 9.96252374267188e-08, + "logits/chosen": -2.0754454135894775, + "logits/rejected": -2.062035322189331, + "logps/chosen": -220.11148071289062, + "logps/rejected": -340.17816162109375, + "loss": 0.5857, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5191131830215454, + "rewards/margins": -0.1303863525390625, + "rewards/rejected": 0.6494995355606079, + "step": 1166 + }, + { + "epoch": 0.07, + "learning_rate": 9.962408487229583e-08, + "logits/chosen": -2.1100196838378906, + "logits/rejected": -2.095506429672241, + "logps/chosen": -0.001548588857986033, + "logps/rejected": -211.8743133544922, + "loss": 0.6677, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5497208494252845e-07, + "rewards/margins": 0.10422652959823608, + "rewards/rejected": -0.10422668606042862, + "step": 1167 + }, + { + "epoch": 0.07, + "learning_rate": 9.962293055498265e-08, + "logits/chosen": -2.2436575889587402, + "logits/rejected": -2.239798069000244, + "logps/chosen": -9.75047779083252, + "logps/rejected": -162.94839477539062, + "loss": 0.6738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024996472522616386, + "rewards/margins": 0.05331068113446236, + "rewards/rejected": -0.02831420861184597, + "step": 1168 + }, + { + "epoch": 0.07, + "learning_rate": 9.962177447482022e-08, + "logits/chosen": -2.075453519821167, + "logits/rejected": -2.070042610168457, + "logps/chosen": -20.17230796813965, + "logps/rejected": -98.09112548828125, + "loss": 0.7136, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01787586323916912, + "rewards/margins": -0.0828641876578331, + "rewards/rejected": 0.10074005275964737, + "step": 1169 + }, + { + "epoch": 0.07, + "learning_rate": 9.962061663184964e-08, + "logits/chosen": -2.019590139389038, + "logits/rejected": -2.0105032920837402, + "logps/chosen": -0.044116850942373276, + "logps/rejected": -166.86297607421875, + "loss": 0.6719, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.84660267829895e-06, + "rewards/margins": 0.0815676599740982, + "rewards/rejected": -0.08156280964612961, + "step": 1170 + }, + { + "epoch": 0.07, + "learning_rate": 9.961945702611204e-08, + "logits/chosen": -2.1257715225219727, + "logits/rejected": -2.12791109085083, + "logps/chosen": -7.001166820526123, + "logps/rejected": -106.89767456054688, + "loss": 0.6705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01248931884765625, + "rewards/margins": 0.07981567829847336, + "rewards/rejected": -0.06732635945081711, + "step": 1171 + }, + { + "epoch": 0.07, + "learning_rate": 9.96182956576486e-08, + "logits/chosen": -2.1847774982452393, + "logits/rejected": -2.1789190769195557, + "logps/chosen": -227.15982055664062, + "logps/rejected": -287.18865966796875, + "loss": 0.5886, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.40280458331108093, + "rewards/margins": -0.010095208883285522, + "rewards/rejected": 0.41289979219436646, + "step": 1172 + }, + { + "epoch": 0.07, + "learning_rate": 9.961713252650062e-08, + "logits/chosen": -2.1012911796569824, + "logits/rejected": -2.0490520000457764, + "logps/chosen": -188.10586547851562, + "logps/rejected": -377.09100341796875, + "loss": 0.6689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012223816476762295, + "rewards/margins": 0.05305633693933487, + "rewards/rejected": -0.04083251953125, + "step": 1173 + }, + { + "epoch": 0.07, + "learning_rate": 9.961596763270936e-08, + "logits/chosen": -2.045795440673828, + "logits/rejected": -2.0495808124542236, + "logps/chosen": -8.094134682323784e-05, + "logps/rejected": -82.77281951904297, + "loss": 0.6893, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.264685434738567e-07, + "rewards/margins": 0.014832532033324242, + "rewards/rejected": -0.01483230572193861, + "step": 1174 + }, + { + "epoch": 0.07, + "learning_rate": 9.961480097631625e-08, + "logits/chosen": -2.2405946254730225, + "logits/rejected": -2.1881654262542725, + "logps/chosen": -198.95790100097656, + "logps/rejected": -479.0954284667969, + "loss": 0.5699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4668777585029602, + "rewards/margins": 0.06483611464500427, + "rewards/rejected": 0.40204164385795593, + "step": 1175 + }, + { + "epoch": 0.07, + "learning_rate": 9.96136325573627e-08, + "logits/chosen": -2.1094069480895996, + "logits/rejected": -2.105804204940796, + "logps/chosen": -37.19611358642578, + "logps/rejected": -261.27264404296875, + "loss": 0.618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11383705586194992, + "rewards/margins": 0.17650794982910156, + "rewards/rejected": -0.06267090141773224, + "step": 1176 + }, + { + "epoch": 0.07, + "learning_rate": 9.961246237589025e-08, + "logits/chosen": -2.1496634483337402, + "logits/rejected": -2.1488845348358154, + "logps/chosen": -0.00010359009320382029, + "logps/rejected": -91.78962707519531, + "loss": 0.6668, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5496844696372136e-07, + "rewards/margins": 0.10835205763578415, + "rewards/rejected": -0.10835190117359161, + "step": 1177 + }, + { + "epoch": 0.07, + "learning_rate": 9.961129043194044e-08, + "logits/chosen": -1.8948266506195068, + "logits/rejected": -1.8920964002609253, + "logps/chosen": -23.18659210205078, + "logps/rejected": -167.72311401367188, + "loss": 0.6284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04485473781824112, + "rewards/margins": 0.21034088730812073, + "rewards/rejected": -0.165486142039299, + "step": 1178 + }, + { + "epoch": 0.07, + "learning_rate": 9.961011672555492e-08, + "logits/chosen": -2.0124597549438477, + "logits/rejected": -2.009634017944336, + "logps/chosen": -151.37838745117188, + "logps/rejected": -363.67730712890625, + "loss": 0.6397, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.23773041367530823, + "rewards/margins": -0.025349438190460205, + "rewards/rejected": 0.26307985186576843, + "step": 1179 + }, + { + "epoch": 0.07, + "learning_rate": 9.96089412567754e-08, + "logits/chosen": -2.079693555831909, + "logits/rejected": -2.0821807384490967, + "logps/chosen": -31.011796951293945, + "logps/rejected": -34.60919189453125, + "loss": 0.7065, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012169075198471546, + "rewards/margins": -0.05139636993408203, + "rewards/rejected": 0.03922729566693306, + "step": 1180 + }, + { + "epoch": 0.07, + "learning_rate": 9.960776402564361e-08, + "logits/chosen": -2.27131724357605, + "logits/rejected": -2.2736170291900635, + "logps/chosen": -76.6966323852539, + "logps/rejected": -129.26600646972656, + "loss": 0.6773, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.032419588416814804, + "rewards/margins": -0.037111666053533554, + "rewards/rejected": 0.06953125447034836, + "step": 1181 + }, + { + "epoch": 0.07, + "learning_rate": 9.960658503220139e-08, + "logits/chosen": -2.163567543029785, + "logits/rejected": -2.154104471206665, + "logps/chosen": -7.805720806121826, + "logps/rejected": -79.22059631347656, + "loss": 0.685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01285643596202135, + "rewards/margins": 0.01180968340486288, + "rewards/rejected": 0.0010467529064044356, + "step": 1182 + }, + { + "epoch": 0.07, + "learning_rate": 9.960540427649062e-08, + "logits/chosen": -2.1489195823669434, + "logits/rejected": -2.1286098957061768, + "logps/chosen": -135.82020568847656, + "logps/rejected": -168.4611053466797, + "loss": 0.6768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02111968956887722, + "rewards/margins": 0.02509155310690403, + "rewards/rejected": -0.003971863072365522, + "step": 1183 + }, + { + "epoch": 0.07, + "learning_rate": 9.960422175855324e-08, + "logits/chosen": -2.213456630706787, + "logits/rejected": -2.197070360183716, + "logps/chosen": -0.00013994784967508167, + "logps/rejected": -162.2425537109375, + "loss": 0.6944, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1562791542019113e-06, + "rewards/margins": -0.004967892076820135, + "rewards/rejected": 0.00496673583984375, + "step": 1184 + }, + { + "epoch": 0.07, + "learning_rate": 9.960303747843126e-08, + "logits/chosen": -2.0519585609436035, + "logits/rejected": -2.044762134552002, + "logps/chosen": -9.655851317802444e-05, + "logps/rejected": -189.20751953125, + "loss": 0.6179, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.298091754317284e-07, + "rewards/margins": 0.3275625705718994, + "rewards/rejected": -0.32756349444389343, + "step": 1185 + }, + { + "epoch": 0.07, + "learning_rate": 9.960185143616676e-08, + "logits/chosen": -2.2061188220977783, + "logits/rejected": -2.1807916164398193, + "logps/chosen": -0.0069052004255354404, + "logps/rejected": -212.67738342285156, + "loss": 0.6388, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.86061427788809e-05, + "rewards/margins": 0.23064497113227844, + "rewards/rejected": -0.230723574757576, + "step": 1186 + }, + { + "epoch": 0.07, + "learning_rate": 9.960066363180186e-08, + "logits/chosen": -1.9072078466415405, + "logits/rejected": -1.791545033454895, + "logps/chosen": -218.95791625976562, + "logps/rejected": -437.61212158203125, + "loss": 0.6086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1715545654296875, + "rewards/margins": 0.15177306532859802, + "rewards/rejected": 0.01978149451315403, + "step": 1187 + }, + { + "epoch": 0.07, + "learning_rate": 9.959947406537878e-08, + "logits/chosen": -2.1398727893829346, + "logits/rejected": -2.1164779663085938, + "logps/chosen": -38.8709602355957, + "logps/rejected": -214.5369873046875, + "loss": 0.6407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005584335420280695, + "rewards/margins": 0.22709999978542328, + "rewards/rejected": -0.23268432915210724, + "step": 1188 + }, + { + "epoch": 0.07, + "learning_rate": 9.959828273693975e-08, + "logits/chosen": -2.1577858924865723, + "logits/rejected": -2.1324520111083984, + "logps/chosen": -160.40426635742188, + "logps/rejected": -177.09075927734375, + "loss": 0.6147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14728699624538422, + "rewards/margins": 0.17252197861671448, + "rewards/rejected": -0.02523498609662056, + "step": 1189 + }, + { + "epoch": 0.07, + "learning_rate": 9.959708964652711e-08, + "logits/chosen": -2.121150493621826, + "logits/rejected": -2.0982651710510254, + "logps/chosen": -372.63232421875, + "logps/rejected": -342.95330810546875, + "loss": 0.5211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47120362520217896, + "rewards/margins": 0.19306641817092896, + "rewards/rejected": 0.27813720703125, + "step": 1190 + }, + { + "epoch": 0.07, + "learning_rate": 9.959589479418324e-08, + "logits/chosen": -2.2587854862213135, + "logits/rejected": -2.252671718597412, + "logps/chosen": -27.407163619995117, + "logps/rejected": -147.0172882080078, + "loss": 0.689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02339611016213894, + "rewards/margins": 0.02674579806625843, + "rewards/rejected": -0.05014190822839737, + "step": 1191 + }, + { + "epoch": 0.07, + "learning_rate": 9.959469817995059e-08, + "logits/chosen": -2.1839301586151123, + "logits/rejected": -2.1786859035491943, + "logps/chosen": -38.66699981689453, + "logps/rejected": -148.72543334960938, + "loss": 0.6604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08826752007007599, + "rewards/margins": 0.04602203518152237, + "rewards/rejected": 0.04224548488855362, + "step": 1192 + }, + { + "epoch": 0.07, + "learning_rate": 9.959349980387166e-08, + "logits/chosen": -2.1040542125701904, + "logits/rejected": -2.0975306034088135, + "logps/chosen": -208.41476440429688, + "logps/rejected": -381.4786682128906, + "loss": 0.6489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07185059040784836, + "rewards/margins": 0.12129822373390198, + "rewards/rejected": -0.04944763332605362, + "step": 1193 + }, + { + "epoch": 0.07, + "learning_rate": 9.959229966598903e-08, + "logits/chosen": -2.3131301403045654, + "logits/rejected": -2.306469678878784, + "logps/chosen": -0.2268541157245636, + "logps/rejected": -80.7581787109375, + "loss": 0.6697, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4746189663128462e-05, + "rewards/margins": 0.09629720449447632, + "rewards/rejected": -0.09631194919347763, + "step": 1194 + }, + { + "epoch": 0.07, + "learning_rate": 9.959109776634535e-08, + "logits/chosen": -2.2187254428863525, + "logits/rejected": -2.21954083442688, + "logps/chosen": -0.18735966086387634, + "logps/rejected": -16.270830154418945, + "loss": 0.6919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001989996526390314, + "rewards/margins": 0.023579919710755348, + "rewards/rejected": -0.025569915771484375, + "step": 1195 + }, + { + "epoch": 0.07, + "learning_rate": 9.958989410498328e-08, + "logits/chosen": -1.9565702676773071, + "logits/rejected": -1.9466060400009155, + "logps/chosen": -12.53182315826416, + "logps/rejected": -169.15354919433594, + "loss": 0.6469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0062427520751953125, + "rewards/margins": 0.1866428405046463, + "rewards/rejected": -0.180400088429451, + "step": 1196 + }, + { + "epoch": 0.07, + "learning_rate": 9.958868868194562e-08, + "logits/chosen": -2.0357625484466553, + "logits/rejected": -2.0491702556610107, + "logps/chosen": -194.2522735595703, + "logps/rejected": -264.6894226074219, + "loss": 0.5884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2963211238384247, + "rewards/margins": 0.038807690143585205, + "rewards/rejected": 0.2575134336948395, + "step": 1197 + }, + { + "epoch": 0.07, + "learning_rate": 9.958748149727515e-08, + "logits/chosen": -2.147761821746826, + "logits/rejected": -2.015331506729126, + "logps/chosen": -223.3232421875, + "logps/rejected": -408.39434814453125, + "loss": 0.5633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.325662225484848, + "rewards/margins": 0.17488707602024078, + "rewards/rejected": 0.15077514946460724, + "step": 1198 + }, + { + "epoch": 0.07, + "learning_rate": 9.95862725510148e-08, + "logits/chosen": -2.1276609897613525, + "logits/rejected": -2.1144509315490723, + "logps/chosen": -131.02804565429688, + "logps/rejected": -272.567626953125, + "loss": 0.7216, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04074859619140625, + "rewards/margins": -0.08471222221851349, + "rewards/rejected": 0.04396362230181694, + "step": 1199 + }, + { + "epoch": 0.07, + "learning_rate": 9.958506184320748e-08, + "logits/chosen": -2.093074321746826, + "logits/rejected": -2.031036615371704, + "logps/chosen": -290.98687744140625, + "logps/rejected": -378.32794189453125, + "loss": 0.4968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5651885867118835, + "rewards/margins": 0.25550535321235657, + "rewards/rejected": 0.309683233499527, + "step": 1200 + }, + { + "epoch": 0.07, + "learning_rate": 9.958384937389622e-08, + "logits/chosen": -2.1366262435913086, + "logits/rejected": -2.128542900085449, + "logps/chosen": -5.014710426330566, + "logps/rejected": -158.29989624023438, + "loss": 0.6725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01664886437356472, + "rewards/margins": 0.10230560600757599, + "rewards/rejected": -0.11895447224378586, + "step": 1201 + }, + { + "epoch": 0.07, + "learning_rate": 9.95826351431241e-08, + "logits/chosen": -2.1983397006988525, + "logits/rejected": -2.193239688873291, + "logps/chosen": -14.759329795837402, + "logps/rejected": -103.24340057373047, + "loss": 0.6963, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.223419116693549e-05, + "rewards/margins": -0.012578392401337624, + "rewards/rejected": 0.01261062640696764, + "step": 1202 + }, + { + "epoch": 0.07, + "learning_rate": 9.958141915093424e-08, + "logits/chosen": -2.1602108478546143, + "logits/rejected": -2.135730028152466, + "logps/chosen": -202.58773803710938, + "logps/rejected": -391.8163757324219, + "loss": 0.6184, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3782196044921875, + "rewards/margins": -0.08001708984375, + "rewards/rejected": 0.4582366943359375, + "step": 1203 + }, + { + "epoch": 0.07, + "learning_rate": 9.958020139736983e-08, + "logits/chosen": -2.224998950958252, + "logits/rejected": -2.22092342376709, + "logps/chosen": -110.62155151367188, + "logps/rejected": -200.49203491210938, + "loss": 0.6572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07269287109375, + "rewards/margins": 0.08365020900964737, + "rewards/rejected": -0.010957336984574795, + "step": 1204 + }, + { + "epoch": 0.07, + "learning_rate": 9.957898188247416e-08, + "logits/chosen": -2.3281402587890625, + "logits/rejected": -2.311964273452759, + "logps/chosen": -52.91101837158203, + "logps/rejected": -126.82258605957031, + "loss": 0.7174, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0073188780806958675, + "rewards/margins": -0.066228486597538, + "rewards/rejected": 0.05890960618853569, + "step": 1205 + }, + { + "epoch": 0.07, + "learning_rate": 9.957776060629052e-08, + "logits/chosen": -2.259387493133545, + "logits/rejected": -2.25180983543396, + "logps/chosen": -28.06244659423828, + "logps/rejected": -193.64137268066406, + "loss": 0.6038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0069900513626635075, + "rewards/margins": 0.37937623262405396, + "rewards/rejected": -0.3723861873149872, + "step": 1206 + }, + { + "epoch": 0.07, + "learning_rate": 9.957653756886233e-08, + "logits/chosen": -2.1876420974731445, + "logits/rejected": -2.137744188308716, + "logps/chosen": -232.1430206298828, + "logps/rejected": -428.28302001953125, + "loss": 0.565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4339462220668793, + "rewards/margins": 0.045207202434539795, + "rewards/rejected": 0.3887390196323395, + "step": 1207 + }, + { + "epoch": 0.07, + "learning_rate": 9.957531277023301e-08, + "logits/chosen": -2.1110754013061523, + "logits/rejected": -2.0162572860717773, + "logps/chosen": -391.89166259765625, + "logps/rejected": -546.1688232421875, + "loss": 0.5766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4961791932582855, + "rewards/margins": 0.03040158748626709, + "rewards/rejected": 0.46577760577201843, + "step": 1208 + }, + { + "epoch": 0.07, + "learning_rate": 9.957408621044609e-08, + "logits/chosen": -2.0730795860290527, + "logits/rejected": -2.066431999206543, + "logps/chosen": -89.27769470214844, + "logps/rejected": -201.67352294921875, + "loss": 0.6484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11722870171070099, + "rewards/margins": 0.09598541259765625, + "rewards/rejected": 0.02124328725039959, + "step": 1209 + }, + { + "epoch": 0.07, + "learning_rate": 9.957285788954512e-08, + "logits/chosen": -2.031351327896118, + "logits/rejected": -2.0324392318725586, + "logps/chosen": -317.5078125, + "logps/rejected": -380.5851135253906, + "loss": 0.5193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.549639880657196, + "rewards/margins": 0.14405211806297302, + "rewards/rejected": 0.405587762594223, + "step": 1210 + }, + { + "epoch": 0.07, + "learning_rate": 9.957162780757377e-08, + "logits/chosen": -2.3096978664398193, + "logits/rejected": -2.278034210205078, + "logps/chosen": -51.89488220214844, + "logps/rejected": -308.39630126953125, + "loss": 0.6716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011408996768295765, + "rewards/margins": 0.12298126518726349, + "rewards/rejected": -0.13439026474952698, + "step": 1211 + }, + { + "epoch": 0.07, + "learning_rate": 9.957039596457571e-08, + "logits/chosen": -2.2820746898651123, + "logits/rejected": -2.2802116870880127, + "logps/chosen": -39.62150573730469, + "logps/rejected": -165.61280822753906, + "loss": 0.6264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013023758307099342, + "rewards/margins": 0.30325737595558167, + "rewards/rejected": -0.31628113985061646, + "step": 1212 + }, + { + "epoch": 0.07, + "learning_rate": 9.95691623605947e-08, + "logits/chosen": -2.1918201446533203, + "logits/rejected": -2.1588616371154785, + "logps/chosen": -230.84251403808594, + "logps/rejected": -478.5149230957031, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5632705688476562, + "rewards/margins": 0.43578946590423584, + "rewards/rejected": 0.12748108804225922, + "step": 1213 + }, + { + "epoch": 0.07, + "learning_rate": 9.95679269956746e-08, + "logits/chosen": -2.14095401763916, + "logits/rejected": -2.115396738052368, + "logps/chosen": -31.866121292114258, + "logps/rejected": -213.2785186767578, + "loss": 0.6559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03169422224164009, + "rewards/margins": 0.18209820985794067, + "rewards/rejected": -0.21379242837429047, + "step": 1214 + }, + { + "epoch": 0.07, + "learning_rate": 9.956668986985925e-08, + "logits/chosen": -1.9594165086746216, + "logits/rejected": -1.8964160680770874, + "logps/chosen": -265.30401611328125, + "logps/rejected": -428.59735107421875, + "loss": 0.6656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.095550537109375, + "rewards/margins": 0.0147705078125, + "rewards/rejected": 0.080780029296875, + "step": 1215 + }, + { + "epoch": 0.07, + "learning_rate": 9.956545098319262e-08, + "logits/chosen": -2.0726206302642822, + "logits/rejected": -2.0187482833862305, + "logps/chosen": -273.8394775390625, + "logps/rejected": -374.437744140625, + "loss": 0.5811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37352296710014343, + "rewards/margins": 0.06389161944389343, + "rewards/rejected": 0.30963134765625, + "step": 1216 + }, + { + "epoch": 0.07, + "learning_rate": 9.956421033571871e-08, + "logits/chosen": -2.2069103717803955, + "logits/rejected": -2.187018871307373, + "logps/chosen": -25.984535217285156, + "logps/rejected": -226.63833618164062, + "loss": 0.6569, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026282502338290215, + "rewards/margins": 0.15531539916992188, + "rewards/rejected": -0.18159790337085724, + "step": 1217 + }, + { + "epoch": 0.07, + "learning_rate": 9.956296792748162e-08, + "logits/chosen": -2.1849582195281982, + "logits/rejected": -2.180248737335205, + "logps/chosen": -37.77948760986328, + "logps/rejected": -190.26229858398438, + "loss": 0.6407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022336577996611595, + "rewards/margins": 0.2702903747558594, + "rewards/rejected": -0.2926269471645355, + "step": 1218 + }, + { + "epoch": 0.07, + "learning_rate": 9.956172375852547e-08, + "logits/chosen": -1.9970972537994385, + "logits/rejected": -1.9997224807739258, + "logps/chosen": -261.4207458496094, + "logps/rejected": -354.74578857421875, + "loss": 0.625, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.18164673447608948, + "rewards/margins": -0.07828062772750854, + "rewards/rejected": 0.259927362203598, + "step": 1219 + }, + { + "epoch": 0.07, + "learning_rate": 9.956047782889445e-08, + "logits/chosen": -2.085944175720215, + "logits/rejected": -2.0588624477386475, + "logps/chosen": -254.85079956054688, + "logps/rejected": -349.2668762207031, + "loss": 0.535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46098023653030396, + "rewards/margins": 0.15346375107765198, + "rewards/rejected": 0.307516485452652, + "step": 1220 + }, + { + "epoch": 0.07, + "learning_rate": 9.955923013863285e-08, + "logits/chosen": -2.032149314880371, + "logits/rejected": -2.027029275894165, + "logps/chosen": -0.5380994081497192, + "logps/rejected": -135.65887451171875, + "loss": 0.6751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009656202979385853, + "rewards/margins": 0.08332475274801254, + "rewards/rejected": -0.09298095852136612, + "step": 1221 + }, + { + "epoch": 0.07, + "learning_rate": 9.955798068778496e-08, + "logits/chosen": -2.1085779666900635, + "logits/rejected": -2.089312791824341, + "logps/chosen": -157.12158203125, + "logps/rejected": -239.03671264648438, + "loss": 0.6032, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.324411004781723, + "rewards/margins": -0.039730846881866455, + "rewards/rejected": 0.3641418516635895, + "step": 1222 + }, + { + "epoch": 0.07, + "learning_rate": 9.955672947639518e-08, + "logits/chosen": -1.9650485515594482, + "logits/rejected": -1.9458463191986084, + "logps/chosen": -195.64801025390625, + "logps/rejected": -300.73223876953125, + "loss": 0.6649, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03865661844611168, + "rewards/margins": 0.02601013332605362, + "rewards/rejected": 0.012646484188735485, + "step": 1223 + }, + { + "epoch": 0.07, + "learning_rate": 9.955547650450798e-08, + "logits/chosen": -2.152101516723633, + "logits/rejected": -2.148885726928711, + "logps/chosen": -247.49261474609375, + "logps/rejected": -440.9991149902344, + "loss": 0.6173, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4818786680698395, + "rewards/margins": -0.17435303330421448, + "rewards/rejected": 0.656231701374054, + "step": 1224 + }, + { + "epoch": 0.07, + "learning_rate": 9.955422177216783e-08, + "logits/chosen": -2.1128368377685547, + "logits/rejected": -2.0871665477752686, + "logps/chosen": -169.25152587890625, + "logps/rejected": -380.34234619140625, + "loss": 0.6231, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.273580938577652, + "rewards/margins": -0.031225591897964478, + "rewards/rejected": 0.30480653047561646, + "step": 1225 + }, + { + "epoch": 0.07, + "learning_rate": 9.955296527941933e-08, + "logits/chosen": -2.11022686958313, + "logits/rejected": -2.0983457565307617, + "logps/chosen": -1.4781915524508804e-05, + "logps/rejected": -211.23150634765625, + "loss": 0.6612, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.384167707703e-08, + "rewards/margins": 0.12820585072040558, + "rewards/rejected": -0.12820588052272797, + "step": 1226 + }, + { + "epoch": 0.07, + "learning_rate": 9.955170702630712e-08, + "logits/chosen": -2.1414334774017334, + "logits/rejected": -2.108366012573242, + "logps/chosen": -90.97565460205078, + "logps/rejected": -291.22467041015625, + "loss": 0.6743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017475128173828125, + "rewards/margins": 0.0654808059334755, + "rewards/rejected": -0.08295593410730362, + "step": 1227 + }, + { + "epoch": 0.07, + "learning_rate": 9.955044701287589e-08, + "logits/chosen": -2.1190123558044434, + "logits/rejected": -2.0724525451660156, + "logps/chosen": -303.701416015625, + "logps/rejected": -372.4642028808594, + "loss": 0.6146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1961669921875, + "rewards/margins": 0.11224059760570526, + "rewards/rejected": 0.08392639458179474, + "step": 1228 + }, + { + "epoch": 0.07, + "learning_rate": 9.95491852391704e-08, + "logits/chosen": -2.221904754638672, + "logits/rejected": -2.2286643981933594, + "logps/chosen": -256.064697265625, + "logps/rejected": -262.5776062011719, + "loss": 0.56, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46096497774124146, + "rewards/margins": 0.09915468096733093, + "rewards/rejected": 0.3618102967739105, + "step": 1229 + }, + { + "epoch": 0.07, + "learning_rate": 9.954792170523549e-08, + "logits/chosen": -2.147153854370117, + "logits/rejected": -2.1232755184173584, + "logps/chosen": -50.12289047241211, + "logps/rejected": -293.55157470703125, + "loss": 0.5939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12813988327980042, + "rewards/margins": 0.2886531949043274, + "rewards/rejected": -0.16051331162452698, + "step": 1230 + }, + { + "epoch": 0.07, + "learning_rate": 9.954665641111604e-08, + "logits/chosen": -2.204802989959717, + "logits/rejected": -2.194855213165283, + "logps/chosen": -3.589205026626587, + "logps/rejected": -122.39932250976562, + "loss": 0.6867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014598369307350367, + "rewards/margins": 0.028621412813663483, + "rewards/rejected": -0.02876739576458931, + "step": 1231 + }, + { + "epoch": 0.07, + "learning_rate": 9.954538935685698e-08, + "logits/chosen": -2.1889779567718506, + "logits/rejected": -2.1759519577026367, + "logps/chosen": -325.4595947265625, + "logps/rejected": -487.47357177734375, + "loss": 0.6039, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.32016298174858093, + "rewards/margins": -0.008264154195785522, + "rewards/rejected": 0.32842713594436646, + "step": 1232 + }, + { + "epoch": 0.07, + "learning_rate": 9.954412054250334e-08, + "logits/chosen": -1.9226667881011963, + "logits/rejected": -1.969982624053955, + "logps/chosen": -293.58953857421875, + "logps/rejected": -406.1182861328125, + "loss": 0.5879, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4069152772426605, + "rewards/margins": -0.024359136819839478, + "rewards/rejected": 0.4312744140625, + "step": 1233 + }, + { + "epoch": 0.07, + "learning_rate": 9.95428499681002e-08, + "logits/chosen": -2.3553895950317383, + "logits/rejected": -2.3554859161376953, + "logps/chosen": -21.06290626525879, + "logps/rejected": -115.97520446777344, + "loss": 0.6716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011163902468979359, + "rewards/margins": 0.08421078324317932, + "rewards/rejected": -0.07304687798023224, + "step": 1234 + }, + { + "epoch": 0.07, + "learning_rate": 9.954157763369268e-08, + "logits/chosen": -2.0759201049804688, + "logits/rejected": -2.0536627769470215, + "logps/chosen": -229.70074462890625, + "logps/rejected": -359.8605651855469, + "loss": 0.6139, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3315109312534332, + "rewards/margins": -0.02158355712890625, + "rewards/rejected": 0.3530944883823395, + "step": 1235 + }, + { + "epoch": 0.07, + "learning_rate": 9.9540303539326e-08, + "logits/chosen": -1.9654158353805542, + "logits/rejected": -1.9749095439910889, + "logps/chosen": -179.9269561767578, + "logps/rejected": -202.02125549316406, + "loss": 0.653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08206939697265625, + "rewards/margins": 0.05949706956744194, + "rewards/rejected": 0.02257232740521431, + "step": 1236 + }, + { + "epoch": 0.07, + "learning_rate": 9.953902768504541e-08, + "logits/chosen": -2.3581595420837402, + "logits/rejected": -2.350888967514038, + "logps/chosen": -29.55323028564453, + "logps/rejected": -158.99600219726562, + "loss": 0.6445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.020846938714385033, + "rewards/margins": 0.1867038756608963, + "rewards/rejected": -0.16585694253444672, + "step": 1237 + }, + { + "epoch": 0.07, + "learning_rate": 9.953775007089622e-08, + "logits/chosen": -1.9728670120239258, + "logits/rejected": -1.970834493637085, + "logps/chosen": -64.38216400146484, + "logps/rejected": -163.29513549804688, + "loss": 0.639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08315124362707138, + "rewards/margins": 0.11948546767234802, + "rewards/rejected": -0.03633422777056694, + "step": 1238 + }, + { + "epoch": 0.07, + "learning_rate": 9.953647069692383e-08, + "logits/chosen": -2.2794439792633057, + "logits/rejected": -2.2655487060546875, + "logps/chosen": -96.82283782958984, + "logps/rejected": -316.4519958496094, + "loss": 0.6673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035204317420721054, + "rewards/margins": 0.08699875324964523, + "rewards/rejected": -0.05179443582892418, + "step": 1239 + }, + { + "epoch": 0.07, + "learning_rate": 9.95351895631737e-08, + "logits/chosen": -2.057713508605957, + "logits/rejected": -2.060100555419922, + "logps/chosen": -52.80912399291992, + "logps/rejected": -99.390869140625, + "loss": 0.6812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017212677747011185, + "rewards/margins": 0.036957550793886185, + "rewards/rejected": -0.019744873046875, + "step": 1240 + }, + { + "epoch": 0.07, + "learning_rate": 9.953390666969135e-08, + "logits/chosen": -2.042327404022217, + "logits/rejected": -2.0105783939361572, + "logps/chosen": -126.0328140258789, + "logps/rejected": -269.89593505859375, + "loss": 0.6438, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.16142578423023224, + "rewards/margins": -0.06207886338233948, + "rewards/rejected": 0.22350464761257172, + "step": 1241 + }, + { + "epoch": 0.07, + "learning_rate": 9.953262201652233e-08, + "logits/chosen": -2.046966314315796, + "logits/rejected": -2.0430386066436768, + "logps/chosen": -243.18914794921875, + "logps/rejected": -282.9249572753906, + "loss": 0.6347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16551056504249573, + "rewards/margins": 0.09090118855237961, + "rewards/rejected": 0.07460937649011612, + "step": 1242 + }, + { + "epoch": 0.07, + "learning_rate": 9.953133560371227e-08, + "logits/chosen": -2.093454360961914, + "logits/rejected": -2.090372323989868, + "logps/chosen": -15.602693557739258, + "logps/rejected": -96.19522094726562, + "loss": 0.6765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006284809205681086, + "rewards/margins": 0.0563870444893837, + "rewards/rejected": -0.05010223388671875, + "step": 1243 + }, + { + "epoch": 0.07, + "learning_rate": 9.953004743130689e-08, + "logits/chosen": -2.2681357860565186, + "logits/rejected": -2.2401678562164307, + "logps/chosen": -38.4151496887207, + "logps/rejected": -169.29855346679688, + "loss": 0.6326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028692245483398438, + "rewards/margins": 0.29131507873535156, + "rewards/rejected": -0.32000732421875, + "step": 1244 + }, + { + "epoch": 0.07, + "learning_rate": 9.952875749935196e-08, + "logits/chosen": -2.3206276893615723, + "logits/rejected": -2.321904182434082, + "logps/chosen": -122.65106201171875, + "logps/rejected": -275.8856201171875, + "loss": 0.6275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030686188489198685, + "rewards/margins": 0.23462295532226562, + "rewards/rejected": -0.20393677055835724, + "step": 1245 + }, + { + "epoch": 0.07, + "learning_rate": 9.952746580789328e-08, + "logits/chosen": -2.099468231201172, + "logits/rejected": -2.093480348587036, + "logps/chosen": -18.842653274536133, + "logps/rejected": -269.9125671386719, + "loss": 0.6842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01845874823629856, + "rewards/margins": 0.010392951779067516, + "rewards/rejected": 0.008065796457231045, + "step": 1246 + }, + { + "epoch": 0.07, + "learning_rate": 9.952617235697676e-08, + "logits/chosen": -2.166985511779785, + "logits/rejected": -2.154362916946411, + "logps/chosen": -18.35594367980957, + "logps/rejected": -208.7789306640625, + "loss": 0.7043, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.000270652788458392, + "rewards/margins": -0.031858254224061966, + "rewards/rejected": 0.03212890774011612, + "step": 1247 + }, + { + "epoch": 0.07, + "learning_rate": 9.952487714664834e-08, + "logits/chosen": -2.2001307010650635, + "logits/rejected": -2.1903574466705322, + "logps/chosen": -3.7122035026550293, + "logps/rejected": -173.24920654296875, + "loss": 0.6568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012634850107133389, + "rewards/margins": 0.15747319161891937, + "rewards/rejected": -0.17010803520679474, + "step": 1248 + }, + { + "epoch": 0.07, + "learning_rate": 9.952358017695402e-08, + "logits/chosen": -2.0888736248016357, + "logits/rejected": -2.0321216583251953, + "logps/chosen": -172.6417694091797, + "logps/rejected": -367.91644287109375, + "loss": 0.5239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5204116702079773, + "rewards/margins": 0.13884124159812927, + "rewards/rejected": 0.381570428609848, + "step": 1249 + }, + { + "epoch": 0.07, + "learning_rate": 9.95222814479399e-08, + "logits/chosen": -2.2966084480285645, + "logits/rejected": -2.3010966777801514, + "logps/chosen": -50.47123718261719, + "logps/rejected": -174.06661987304688, + "loss": 0.6674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04680938646197319, + "rewards/margins": 0.03273925557732582, + "rewards/rejected": 0.014070129953324795, + "step": 1250 + }, + { + "epoch": 0.07, + "learning_rate": 9.95209809596521e-08, + "logits/chosen": -2.2995223999023438, + "logits/rejected": -2.2970023155212402, + "logps/chosen": -44.260398864746094, + "logps/rejected": -142.72767639160156, + "loss": 0.6362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06632500141859055, + "rewards/margins": 0.14507408440113068, + "rewards/rejected": -0.07874908298254013, + "step": 1251 + }, + { + "epoch": 0.07, + "learning_rate": 9.95196787121368e-08, + "logits/chosen": -2.0908477306365967, + "logits/rejected": -2.0793704986572266, + "logps/chosen": -152.91567993164062, + "logps/rejected": -356.9934387207031, + "loss": 0.6201, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.319448858499527, + "rewards/margins": -0.07018738985061646, + "rewards/rejected": 0.38963624835014343, + "step": 1252 + }, + { + "epoch": 0.07, + "learning_rate": 9.951837470544031e-08, + "logits/chosen": -2.225572347640991, + "logits/rejected": -2.219364881515503, + "logps/chosen": -0.14105495810508728, + "logps/rejected": -95.53262329101562, + "loss": 0.6748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00015438944683410227, + "rewards/margins": 0.07451962679624557, + "rewards/rejected": -0.07436523586511612, + "step": 1253 + }, + { + "epoch": 0.07, + "learning_rate": 9.951706893960892e-08, + "logits/chosen": -2.3012585639953613, + "logits/rejected": -2.292912244796753, + "logps/chosen": -24.02448081970215, + "logps/rejected": -89.14741516113281, + "loss": 0.747, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.09039058536291122, + "rewards/margins": -0.12477321922779083, + "rewards/rejected": 0.03438263013958931, + "step": 1254 + }, + { + "epoch": 0.07, + "learning_rate": 9.951576141468902e-08, + "logits/chosen": -2.1756319999694824, + "logits/rejected": -2.154223918914795, + "logps/chosen": -14.49696159362793, + "logps/rejected": -232.5614471435547, + "loss": 0.6974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029253577813506126, + "rewards/margins": 0.049695394933223724, + "rewards/rejected": -0.078948974609375, + "step": 1255 + }, + { + "epoch": 0.07, + "learning_rate": 9.951445213072709e-08, + "logits/chosen": -2.166088342666626, + "logits/rejected": -2.1738126277923584, + "logps/chosen": -13.37224006652832, + "logps/rejected": -154.5919189453125, + "loss": 0.6782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00556526193395257, + "rewards/margins": 0.05413627624511719, + "rewards/rejected": -0.05970153957605362, + "step": 1256 + }, + { + "epoch": 0.07, + "learning_rate": 9.95131410877696e-08, + "logits/chosen": -2.057950019836426, + "logits/rejected": -2.0500710010528564, + "logps/chosen": -0.0040411315858364105, + "logps/rejected": -169.60562133789062, + "loss": 0.6178, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.525380690116435e-05, + "rewards/margins": 0.330470472574234, + "rewards/rejected": -0.3305557370185852, + "step": 1257 + }, + { + "epoch": 0.07, + "learning_rate": 9.951182828586315e-08, + "logits/chosen": -2.1780149936676025, + "logits/rejected": -2.1659350395202637, + "logps/chosen": -1.9064873456954956, + "logps/rejected": -108.42076110839844, + "loss": 0.676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010572361759841442, + "rewards/margins": 0.07911422848701477, + "rewards/rejected": -0.08968658745288849, + "step": 1258 + }, + { + "epoch": 0.07, + "learning_rate": 9.951051372505437e-08, + "logits/chosen": -2.283358573913574, + "logits/rejected": -2.270463228225708, + "logps/chosen": -45.74412536621094, + "logps/rejected": -174.63034057617188, + "loss": 0.638, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012445831671357155, + "rewards/margins": 0.20372696220874786, + "rewards/rejected": -0.19128112494945526, + "step": 1259 + }, + { + "epoch": 0.07, + "learning_rate": 9.950919740538996e-08, + "logits/chosen": -2.045581817626953, + "logits/rejected": -1.9893527030944824, + "logps/chosen": -246.78836059570312, + "logps/rejected": -556.568603515625, + "loss": 0.6049, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3838607966899872, + "rewards/margins": -0.039027392864227295, + "rewards/rejected": 0.4228881895542145, + "step": 1260 + }, + { + "epoch": 0.07, + "learning_rate": 9.950787932691669e-08, + "logits/chosen": -2.3076553344726562, + "logits/rejected": -2.302628755569458, + "logps/chosen": -9.601865768432617, + "logps/rejected": -157.2549285888672, + "loss": 0.6274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.032219886779785156, + "rewards/margins": 0.25407660007476807, + "rewards/rejected": -0.22185669839382172, + "step": 1261 + }, + { + "epoch": 0.07, + "learning_rate": 9.950655948968136e-08, + "logits/chosen": -2.2834651470184326, + "logits/rejected": -2.294781446456909, + "logps/chosen": -189.0658416748047, + "logps/rejected": -259.8621826171875, + "loss": 0.6347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13328400254249573, + "rewards/margins": 0.10508270561695099, + "rewards/rejected": 0.02820129506289959, + "step": 1262 + }, + { + "epoch": 0.07, + "learning_rate": 9.95052378937309e-08, + "logits/chosen": -2.2864184379577637, + "logits/rejected": -2.2760403156280518, + "logps/chosen": -31.81456756591797, + "logps/rejected": -83.00204467773438, + "loss": 0.7213, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.050704002380371094, + "rewards/margins": -0.04248485714197159, + "rewards/rejected": -0.008219147101044655, + "step": 1263 + }, + { + "epoch": 0.07, + "learning_rate": 9.950391453911222e-08, + "logits/chosen": -2.170578956604004, + "logits/rejected": -2.15798020362854, + "logps/chosen": -140.43043518066406, + "logps/rejected": -374.3760986328125, + "loss": 0.6463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07827911525964737, + "rewards/margins": 0.10141754150390625, + "rewards/rejected": -0.02313842810690403, + "step": 1264 + }, + { + "epoch": 0.07, + "learning_rate": 9.950258942587233e-08, + "logits/chosen": -2.145209550857544, + "logits/rejected": -2.1257412433624268, + "logps/chosen": -181.07981872558594, + "logps/rejected": -214.4299774169922, + "loss": 0.7142, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0070816040970385075, + "rewards/margins": -0.05365295708179474, + "rewards/rejected": 0.04657135158777237, + "step": 1265 + }, + { + "epoch": 0.07, + "learning_rate": 9.950126255405834e-08, + "logits/chosen": -2.0722575187683105, + "logits/rejected": -2.0721771717071533, + "logps/chosen": -23.774879455566406, + "logps/rejected": -180.23590087890625, + "loss": 0.6642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013184929266571999, + "rewards/margins": 0.07920818030834198, + "rewards/rejected": -0.06602325290441513, + "step": 1266 + }, + { + "epoch": 0.07, + "learning_rate": 9.949993392371736e-08, + "logits/chosen": -2.003591537475586, + "logits/rejected": -1.9202687740325928, + "logps/chosen": -243.67503356933594, + "logps/rejected": -369.39739990234375, + "loss": 0.6644, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.10411834716796875, + "rewards/margins": -0.022444158792495728, + "rewards/rejected": 0.12656250596046448, + "step": 1267 + }, + { + "epoch": 0.07, + "learning_rate": 9.949860353489661e-08, + "logits/chosen": -2.136141538619995, + "logits/rejected": -2.1295297145843506, + "logps/chosen": -160.42747497558594, + "logps/rejected": -303.9384460449219, + "loss": 0.5878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3604751527309418, + "rewards/margins": 0.06794583797454834, + "rewards/rejected": 0.29252931475639343, + "step": 1268 + }, + { + "epoch": 0.07, + "learning_rate": 9.949727138764332e-08, + "logits/chosen": -2.1994614601135254, + "logits/rejected": -2.170809268951416, + "logps/chosen": -48.09907531738281, + "logps/rejected": -277.6080627441406, + "loss": 0.6889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05663604661822319, + "rewards/margins": 0.07807770371437073, + "rewards/rejected": -0.13471375405788422, + "step": 1269 + }, + { + "epoch": 0.07, + "learning_rate": 9.949593748200484e-08, + "logits/chosen": -2.3996315002441406, + "logits/rejected": -2.4015939235687256, + "logps/chosen": -0.003816794604063034, + "logps/rejected": -65.67158508300781, + "loss": 0.7075, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1760508868974284e-06, + "rewards/margins": -0.05723002925515175, + "rewards/rejected": 0.057228852063417435, + "step": 1270 + }, + { + "epoch": 0.07, + "learning_rate": 9.949460181802854e-08, + "logits/chosen": -1.8901830911636353, + "logits/rejected": -1.8236963748931885, + "logps/chosen": -194.07298278808594, + "logps/rejected": -317.84246826171875, + "loss": 0.6775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05318145826458931, + "rewards/margins": 0.02508087269961834, + "rewards/rejected": 0.02810058556497097, + "step": 1271 + }, + { + "epoch": 0.07, + "learning_rate": 9.949326439576189e-08, + "logits/chosen": -2.3045074939727783, + "logits/rejected": -2.3119566440582275, + "logps/chosen": -14.417858123779297, + "logps/rejected": -140.51473999023438, + "loss": 0.6846, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08811922371387482, + "rewards/margins": -0.05224791169166565, + "rewards/rejected": 0.14036713540554047, + "step": 1272 + }, + { + "epoch": 0.07, + "learning_rate": 9.94919252152524e-08, + "logits/chosen": -2.282029628753662, + "logits/rejected": -2.2738800048828125, + "logps/chosen": -54.70420837402344, + "logps/rejected": -167.49313354492188, + "loss": 0.6393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13422012329101562, + "rewards/margins": 0.0730186402797699, + "rewards/rejected": 0.06120147928595543, + "step": 1273 + }, + { + "epoch": 0.07, + "learning_rate": 9.949058427654762e-08, + "logits/chosen": -2.1198348999023438, + "logits/rejected": -2.101660966873169, + "logps/chosen": -59.28822326660156, + "logps/rejected": -213.8439178466797, + "loss": 0.6344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019733428955078125, + "rewards/margins": 0.2500343322753906, + "rewards/rejected": -0.26976776123046875, + "step": 1274 + }, + { + "epoch": 0.07, + "learning_rate": 9.94892415796952e-08, + "logits/chosen": -2.1979730129241943, + "logits/rejected": -2.197662830352783, + "logps/chosen": -291.984375, + "logps/rejected": -445.072021484375, + "loss": 0.5173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.588488757610321, + "rewards/margins": 0.15075987577438354, + "rewards/rejected": 0.4377288818359375, + "step": 1275 + }, + { + "epoch": 0.07, + "learning_rate": 9.948789712474285e-08, + "logits/chosen": -1.7555549144744873, + "logits/rejected": -1.6747065782546997, + "logps/chosen": -286.39697265625, + "logps/rejected": -459.73590087890625, + "loss": 0.6323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11076965183019638, + "rewards/margins": 0.07753296196460724, + "rewards/rejected": 0.03323669359087944, + "step": 1276 + }, + { + "epoch": 0.07, + "learning_rate": 9.948655091173832e-08, + "logits/chosen": -2.1887624263763428, + "logits/rejected": -2.180530309677124, + "logps/chosen": -24.86859893798828, + "logps/rejected": -128.26736450195312, + "loss": 0.643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06540870666503906, + "rewards/margins": 0.1363811492919922, + "rewards/rejected": -0.07097244262695312, + "step": 1277 + }, + { + "epoch": 0.07, + "learning_rate": 9.948520294072944e-08, + "logits/chosen": -2.2574331760406494, + "logits/rejected": -2.2536447048187256, + "logps/chosen": -11.709007263183594, + "logps/rejected": -153.89927673339844, + "loss": 0.6069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0601838119328022, + "rewards/margins": 0.3120347261428833, + "rewards/rejected": -0.2518509030342102, + "step": 1278 + }, + { + "epoch": 0.07, + "learning_rate": 9.948385321176409e-08, + "logits/chosen": -2.0317022800445557, + "logits/rejected": -1.9008283615112305, + "logps/chosen": -296.99273681640625, + "logps/rejected": -727.406005859375, + "loss": 0.5721, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.669708251953125, + "rewards/margins": -0.10200196504592896, + "rewards/rejected": 0.771710216999054, + "step": 1279 + }, + { + "epoch": 0.07, + "learning_rate": 9.948250172489024e-08, + "logits/chosen": -1.984155297279358, + "logits/rejected": -1.9180432558059692, + "logps/chosen": -241.93087768554688, + "logps/rejected": -379.57818603515625, + "loss": 0.477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7528152465820312, + "rewards/margins": 0.15443265438079834, + "rewards/rejected": 0.5983825922012329, + "step": 1280 + }, + { + "epoch": 0.07, + "learning_rate": 9.948114848015586e-08, + "logits/chosen": -2.0800442695617676, + "logits/rejected": -2.0220768451690674, + "logps/chosen": -268.17791748046875, + "logps/rejected": -542.1109008789062, + "loss": 0.4944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48096010088920593, + "rewards/margins": 0.2959686517715454, + "rewards/rejected": 0.18499146401882172, + "step": 1281 + }, + { + "epoch": 0.07, + "learning_rate": 9.947979347760905e-08, + "logits/chosen": -2.115814447402954, + "logits/rejected": -2.0952682495117188, + "logps/chosen": -225.08663940429688, + "logps/rejected": -386.5174560546875, + "loss": 0.5521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.206146240234375, + "rewards/margins": 0.3949340879917145, + "rewards/rejected": -0.18878784775733948, + "step": 1282 + }, + { + "epoch": 0.07, + "learning_rate": 9.947843671729795e-08, + "logits/chosen": -2.072068929672241, + "logits/rejected": -2.0684447288513184, + "logps/chosen": -214.74072265625, + "logps/rejected": -266.71875, + "loss": 0.5553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2753051817417145, + "rewards/margins": 0.2826171815395355, + "rewards/rejected": -0.007312011905014515, + "step": 1283 + }, + { + "epoch": 0.07, + "learning_rate": 9.947707819927075e-08, + "logits/chosen": -1.9931724071502686, + "logits/rejected": -1.9971486330032349, + "logps/chosen": -6.501626491546631, + "logps/rejected": -99.20262908935547, + "loss": 0.6186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024965478107333183, + "rewards/margins": 0.2950834333896637, + "rewards/rejected": -0.27011796832084656, + "step": 1284 + }, + { + "epoch": 0.07, + "learning_rate": 9.94757179235757e-08, + "logits/chosen": -2.158872365951538, + "logits/rejected": -2.157520055770874, + "logps/chosen": -13.569326400756836, + "logps/rejected": -45.15568161010742, + "loss": 0.7016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017825603485107422, + "rewards/margins": 0.0021115299314260483, + "rewards/rejected": -0.01993713341653347, + "step": 1285 + }, + { + "epoch": 0.07, + "learning_rate": 9.947435589026117e-08, + "logits/chosen": -1.8891863822937012, + "logits/rejected": -1.8950189352035522, + "logps/chosen": -255.34393310546875, + "logps/rejected": -242.964111328125, + "loss": 0.6769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01915130577981472, + "rewards/margins": 0.07827606052160263, + "rewards/rejected": -0.0974273681640625, + "step": 1286 + }, + { + "epoch": 0.07, + "learning_rate": 9.947299209937548e-08, + "logits/chosen": -2.3203699588775635, + "logits/rejected": -2.3006389141082764, + "logps/chosen": -150.4401092529297, + "logps/rejected": -328.4520568847656, + "loss": 0.6587, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3574722409248352, + "rewards/margins": -0.19584500789642334, + "rewards/rejected": 0.5533172488212585, + "step": 1287 + }, + { + "epoch": 0.07, + "learning_rate": 9.947162655096712e-08, + "logits/chosen": -2.0722784996032715, + "logits/rejected": -2.050806999206543, + "logps/chosen": -48.99272537231445, + "logps/rejected": -186.48971557617188, + "loss": 0.7469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19132576882839203, + "rewards/margins": 0.0016368776559829712, + "rewards/rejected": -0.192962646484375, + "step": 1288 + }, + { + "epoch": 0.08, + "learning_rate": 9.94702592450846e-08, + "logits/chosen": -2.1857426166534424, + "logits/rejected": -2.178748846054077, + "logps/chosen": -2.9650611877441406, + "logps/rejected": -108.87149047851562, + "loss": 0.6831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008860540576279163, + "rewards/margins": 0.014368963427841663, + "rewards/rejected": -0.0055084228515625, + "step": 1289 + }, + { + "epoch": 0.08, + "learning_rate": 9.94688901817765e-08, + "logits/chosen": -1.9542303085327148, + "logits/rejected": -1.950802206993103, + "logps/chosen": -51.012691497802734, + "logps/rejected": -168.01626586914062, + "loss": 0.6724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00771598843857646, + "rewards/margins": 0.06273079663515091, + "rewards/rejected": -0.07044678181409836, + "step": 1290 + }, + { + "epoch": 0.08, + "learning_rate": 9.946751936109143e-08, + "logits/chosen": -2.0596766471862793, + "logits/rejected": -2.0107345581054688, + "logps/chosen": -207.90164184570312, + "logps/rejected": -299.73675537109375, + "loss": 0.5754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19898223876953125, + "rewards/margins": 0.23246613144874573, + "rewards/rejected": -0.03348388895392418, + "step": 1291 + }, + { + "epoch": 0.08, + "learning_rate": 9.94661467830781e-08, + "logits/chosen": -2.093184232711792, + "logits/rejected": -2.0593762397766113, + "logps/chosen": -175.90093994140625, + "logps/rejected": -279.45263671875, + "loss": 0.6443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06119384989142418, + "rewards/margins": 0.1686248779296875, + "rewards/rejected": -0.10743103176355362, + "step": 1292 + }, + { + "epoch": 0.08, + "learning_rate": 9.946477244778526e-08, + "logits/chosen": -1.9786317348480225, + "logits/rejected": -1.9920666217803955, + "logps/chosen": -239.92955017089844, + "logps/rejected": -260.6810302734375, + "loss": 0.6357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05218658596277237, + "rewards/margins": 0.16286468505859375, + "rewards/rejected": -0.11067809909582138, + "step": 1293 + }, + { + "epoch": 0.08, + "learning_rate": 9.946339635526177e-08, + "logits/chosen": -2.1515746116638184, + "logits/rejected": -2.164912223815918, + "logps/chosen": -322.19561767578125, + "logps/rejected": -464.94122314453125, + "loss": 0.5175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5449493527412415, + "rewards/margins": 0.20080262422561646, + "rewards/rejected": 0.344146728515625, + "step": 1294 + }, + { + "epoch": 0.08, + "learning_rate": 9.946201850555647e-08, + "logits/chosen": -2.113661766052246, + "logits/rejected": -2.111400604248047, + "logps/chosen": -0.007444032467901707, + "logps/rejected": -107.82622528076172, + "loss": 0.6797, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.502750231149548e-07, + "rewards/margins": 0.05263080447912216, + "rewards/rejected": -0.052629854530096054, + "step": 1295 + }, + { + "epoch": 0.08, + "learning_rate": 9.946063889871833e-08, + "logits/chosen": -2.066720724105835, + "logits/rejected": -2.065749168395996, + "logps/chosen": -22.46726417541504, + "logps/rejected": -126.1124267578125, + "loss": 0.6498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06268539279699326, + "rewards/margins": 0.08109207451343536, + "rewards/rejected": -0.01840667799115181, + "step": 1296 + }, + { + "epoch": 0.08, + "learning_rate": 9.945925753479636e-08, + "logits/chosen": -2.3339908123016357, + "logits/rejected": -2.312175750732422, + "logps/chosen": -0.0005766989197582006, + "logps/rejected": -153.0244598388672, + "loss": 0.6251, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.398139703378547e-06, + "rewards/margins": 0.2936673164367676, + "rewards/rejected": -0.29367372393608093, + "step": 1297 + }, + { + "epoch": 0.08, + "learning_rate": 9.945787441383963e-08, + "logits/chosen": -2.205422878265381, + "logits/rejected": -2.2093183994293213, + "logps/chosen": -222.45010375976562, + "logps/rejected": -325.2585754394531, + "loss": 0.5254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5320984125137329, + "rewards/margins": 0.13580933213233948, + "rewards/rejected": 0.39628908038139343, + "step": 1298 + }, + { + "epoch": 0.08, + "learning_rate": 9.945648953589729e-08, + "logits/chosen": -2.03120493888855, + "logits/rejected": -1.9928643703460693, + "logps/chosen": -218.64805603027344, + "logps/rejected": -436.4791259765625, + "loss": 0.6112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16916047036647797, + "rewards/margins": 0.12592926621437073, + "rewards/rejected": 0.04323120042681694, + "step": 1299 + }, + { + "epoch": 0.08, + "learning_rate": 9.945510290101851e-08, + "logits/chosen": -2.1182472705841064, + "logits/rejected": -2.0469863414764404, + "logps/chosen": -240.794677734375, + "logps/rejected": -733.5773315429688, + "loss": 0.5685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5164520144462585, + "rewards/margins": 0.04087218642234802, + "rewards/rejected": 0.4755798280239105, + "step": 1300 + }, + { + "epoch": 0.08, + "learning_rate": 9.945371450925255e-08, + "logits/chosen": -2.0136940479278564, + "logits/rejected": -2.012216806411743, + "logps/chosen": -0.00333888060413301, + "logps/rejected": -76.1067123413086, + "loss": 0.6941, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3773149223416112e-05, + "rewards/margins": 0.001641723676584661, + "rewards/rejected": -0.0016654968494549394, + "step": 1301 + }, + { + "epoch": 0.08, + "learning_rate": 9.945232436064877e-08, + "logits/chosen": -2.1865687370300293, + "logits/rejected": -2.1393723487854004, + "logps/chosen": -219.98858642578125, + "logps/rejected": -296.2387390136719, + "loss": 0.5188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44739532470703125, + "rewards/margins": 0.225016787648201, + "rewards/rejected": 0.22237853705883026, + "step": 1302 + }, + { + "epoch": 0.08, + "learning_rate": 9.945093245525653e-08, + "logits/chosen": -2.1163885593414307, + "logits/rejected": -2.0976834297180176, + "logps/chosen": -31.604177474975586, + "logps/rejected": -232.37168884277344, + "loss": 0.6361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05566997453570366, + "rewards/margins": 0.15453167259693146, + "rewards/rejected": -0.0988616943359375, + "step": 1303 + }, + { + "epoch": 0.08, + "learning_rate": 9.944953879312527e-08, + "logits/chosen": -2.1916110515594482, + "logits/rejected": -2.196586847305298, + "logps/chosen": -4.053390026092529, + "logps/rejected": -30.412193298339844, + "loss": 0.6911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004692268557846546, + "rewards/margins": 0.004441261291503906, + "rewards/rejected": -0.009133529849350452, + "step": 1304 + }, + { + "epoch": 0.08, + "learning_rate": 9.94481433743045e-08, + "logits/chosen": -2.295689821243286, + "logits/rejected": -2.2598659992218018, + "logps/chosen": -85.32882690429688, + "logps/rejected": -281.4569396972656, + "loss": 0.6169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0325775146484375, + "rewards/margins": 0.27951353788375854, + "rewards/rejected": -0.24693603813648224, + "step": 1305 + }, + { + "epoch": 0.08, + "learning_rate": 9.944674619884381e-08, + "logits/chosen": -2.3599305152893066, + "logits/rejected": -2.360044479370117, + "logps/chosen": -21.29201316833496, + "logps/rejected": -116.73511505126953, + "loss": 0.6485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031928062438964844, + "rewards/margins": 0.15142422914505005, + "rewards/rejected": -0.11949615925550461, + "step": 1306 + }, + { + "epoch": 0.08, + "learning_rate": 9.944534726679283e-08, + "logits/chosen": -2.240661859512329, + "logits/rejected": -2.234161853790283, + "logps/chosen": -2.8013953851768747e-05, + "logps/rejected": -98.55488586425781, + "loss": 0.6663, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3841858265427618e-08, + "rewards/margins": 0.10965269058942795, + "rewards/rejected": -0.10965271294116974, + "step": 1307 + }, + { + "epoch": 0.08, + "learning_rate": 9.944394657820125e-08, + "logits/chosen": -2.0837204456329346, + "logits/rejected": -2.075176239013672, + "logps/chosen": -190.90121459960938, + "logps/rejected": -346.03131103515625, + "loss": 0.6475, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.26264649629592896, + "rewards/margins": -0.06447753310203552, + "rewards/rejected": 0.3271240293979645, + "step": 1308 + }, + { + "epoch": 0.08, + "learning_rate": 9.944254413311884e-08, + "logits/chosen": -1.9841408729553223, + "logits/rejected": -1.9863860607147217, + "logps/chosen": -7.038928031921387, + "logps/rejected": -158.9808349609375, + "loss": 0.6138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03741583973169327, + "rewards/margins": 0.3933977782726288, + "rewards/rejected": -0.43081361055374146, + "step": 1309 + }, + { + "epoch": 0.08, + "learning_rate": 9.944113993159538e-08, + "logits/chosen": -2.18277645111084, + "logits/rejected": -2.176220178604126, + "logps/chosen": -43.578765869140625, + "logps/rejected": -248.53305053710938, + "loss": 0.6208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.057647705078125, + "rewards/margins": 0.21523284912109375, + "rewards/rejected": -0.15758514404296875, + "step": 1310 + }, + { + "epoch": 0.08, + "learning_rate": 9.943973397368082e-08, + "logits/chosen": -1.9745440483093262, + "logits/rejected": -1.973909854888916, + "logps/chosen": -12.87063980102539, + "logps/rejected": -87.75851440429688, + "loss": 0.653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.034304141998291016, + "rewards/margins": 0.12095499038696289, + "rewards/rejected": -0.08665084838867188, + "step": 1311 + }, + { + "epoch": 0.08, + "learning_rate": 9.943832625942505e-08, + "logits/chosen": -2.297041893005371, + "logits/rejected": -2.2737205028533936, + "logps/chosen": -86.9003677368164, + "logps/rejected": -369.4922180175781, + "loss": 0.5476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17697982490062714, + "rewards/margins": 0.4642173647880554, + "rewards/rejected": -0.2872375547885895, + "step": 1312 + }, + { + "epoch": 0.08, + "learning_rate": 9.94369167888781e-08, + "logits/chosen": -2.0165421962738037, + "logits/rejected": -1.998962640762329, + "logps/chosen": -253.3556671142578, + "logps/rejected": -581.9070434570312, + "loss": 0.4642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5942489504814148, + "rewards/margins": 0.3145614564418793, + "rewards/rejected": 0.2796874940395355, + "step": 1313 + }, + { + "epoch": 0.08, + "learning_rate": 9.943550556209008e-08, + "logits/chosen": -2.0595967769622803, + "logits/rejected": -2.0675487518310547, + "logps/chosen": -13.380297660827637, + "logps/rejected": -90.1358871459961, + "loss": 0.6793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04251556470990181, + "rewards/margins": 0.025702668353915215, + "rewards/rejected": 0.016812896355986595, + "step": 1314 + }, + { + "epoch": 0.08, + "learning_rate": 9.943409257911106e-08, + "logits/chosen": -2.0492136478424072, + "logits/rejected": -1.9959474802017212, + "logps/chosen": -304.4813232421875, + "logps/rejected": -475.0532531738281, + "loss": 0.6197, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.544537365436554, + "rewards/margins": -0.252838134765625, + "rewards/rejected": 0.797375500202179, + "step": 1315 + }, + { + "epoch": 0.08, + "learning_rate": 9.943267783999128e-08, + "logits/chosen": -2.1451923847198486, + "logits/rejected": -2.1435136795043945, + "logps/chosen": -19.170631408691406, + "logps/rejected": -96.02735900878906, + "loss": 0.7053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07763824611902237, + "rewards/margins": 0.02285919338464737, + "rewards/rejected": -0.10049743950366974, + "step": 1316 + }, + { + "epoch": 0.08, + "learning_rate": 9.943126134478094e-08, + "logits/chosen": -2.107750415802002, + "logits/rejected": -2.094609022140503, + "logps/chosen": -253.34796142578125, + "logps/rejected": -342.0937194824219, + "loss": 0.565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4815216064453125, + "rewards/margins": 0.0025238096714019775, + "rewards/rejected": 0.4789977967739105, + "step": 1317 + }, + { + "epoch": 0.08, + "learning_rate": 9.942984309353045e-08, + "logits/chosen": -2.3536150455474854, + "logits/rejected": -2.352789878845215, + "logps/chosen": -22.204191207885742, + "logps/rejected": -64.03655242919922, + "loss": 0.6326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11333771049976349, + "rewards/margins": 0.14087830483913422, + "rewards/rejected": -0.02754058875143528, + "step": 1318 + }, + { + "epoch": 0.08, + "learning_rate": 9.942842308629013e-08, + "logits/chosen": -2.1467721462249756, + "logits/rejected": -2.1339285373687744, + "logps/chosen": -182.60784912109375, + "logps/rejected": -333.47442626953125, + "loss": 0.6348, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3771469295024872, + "rewards/margins": -0.14562532305717468, + "rewards/rejected": 0.5227722525596619, + "step": 1319 + }, + { + "epoch": 0.08, + "learning_rate": 9.942700132311044e-08, + "logits/chosen": -2.247677803039551, + "logits/rejected": -2.2040538787841797, + "logps/chosen": -21.441165924072266, + "logps/rejected": -223.33877563476562, + "loss": 0.5533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039272308349609375, + "rewards/margins": 0.6190849542617798, + "rewards/rejected": -0.5798126459121704, + "step": 1320 + }, + { + "epoch": 0.08, + "learning_rate": 9.942557780404188e-08, + "logits/chosen": -2.0182342529296875, + "logits/rejected": -2.00142502784729, + "logps/chosen": -172.24139404296875, + "logps/rejected": -251.57330322265625, + "loss": 0.6182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08926086872816086, + "rewards/margins": 0.199371337890625, + "rewards/rejected": -0.11011047661304474, + "step": 1321 + }, + { + "epoch": 0.08, + "learning_rate": 9.942415252913504e-08, + "logits/chosen": -2.248272180557251, + "logits/rejected": -2.2457897663116455, + "logps/chosen": -33.120811462402344, + "logps/rejected": -167.93325805664062, + "loss": 0.6423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.041744232177734375, + "rewards/margins": 0.15110093355178833, + "rewards/rejected": -0.10935669392347336, + "step": 1322 + }, + { + "epoch": 0.08, + "learning_rate": 9.942272549844055e-08, + "logits/chosen": -2.0734636783599854, + "logits/rejected": -2.0691025257110596, + "logps/chosen": -21.59931755065918, + "logps/rejected": -91.212158203125, + "loss": 0.6665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009985161013901234, + "rewards/margins": 0.06373577564954758, + "rewards/rejected": -0.05375061184167862, + "step": 1323 + }, + { + "epoch": 0.08, + "learning_rate": 9.942129671200907e-08, + "logits/chosen": -2.024623155593872, + "logits/rejected": -2.0198848247528076, + "logps/chosen": -5.958587646484375, + "logps/rejected": -55.51581954956055, + "loss": 0.6985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02760162390768528, + "rewards/margins": 0.003812408074736595, + "rewards/rejected": -0.031414031982421875, + "step": 1324 + }, + { + "epoch": 0.08, + "learning_rate": 9.941986616989143e-08, + "logits/chosen": -2.118077278137207, + "logits/rejected": -2.1015970706939697, + "logps/chosen": -52.3299674987793, + "logps/rejected": -204.13156127929688, + "loss": 0.6552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017561722546815872, + "rewards/margins": 0.13466912508010864, + "rewards/rejected": -0.15223084390163422, + "step": 1325 + }, + { + "epoch": 0.08, + "learning_rate": 9.941843387213836e-08, + "logits/chosen": -2.2254385948181152, + "logits/rejected": -2.207176446914673, + "logps/chosen": -2.2888018065714277e-05, + "logps/rejected": -203.788818359375, + "loss": 0.57, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.15253918315284e-08, + "rewards/margins": 0.5732514262199402, + "rewards/rejected": -0.5732513666152954, + "step": 1326 + }, + { + "epoch": 0.08, + "learning_rate": 9.94169998188008e-08, + "logits/chosen": -2.2803125381469727, + "logits/rejected": -2.276865243911743, + "logps/chosen": -2.58921480178833, + "logps/rejected": -138.47784423828125, + "loss": 0.6231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010848045349121094, + "rewards/margins": 0.293835312128067, + "rewards/rejected": -0.2939437925815582, + "step": 1327 + }, + { + "epoch": 0.08, + "learning_rate": 9.94155640099297e-08, + "logits/chosen": -1.9336966276168823, + "logits/rejected": -1.931288719177246, + "logps/chosen": -57.5152587890625, + "logps/rejected": -214.53421020507812, + "loss": 0.5669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03450126573443413, + "rewards/margins": 0.5258755087852478, + "rewards/rejected": -0.4913742244243622, + "step": 1328 + }, + { + "epoch": 0.08, + "learning_rate": 9.941412644557603e-08, + "logits/chosen": -2.2891664505004883, + "logits/rejected": -2.283160924911499, + "logps/chosen": -16.480243682861328, + "logps/rejected": -102.74440002441406, + "loss": 0.7039, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02372436597943306, + "rewards/margins": -0.0330810546875, + "rewards/rejected": 0.009356689639389515, + "step": 1329 + }, + { + "epoch": 0.08, + "learning_rate": 9.941268712579089e-08, + "logits/chosen": -2.3054399490356445, + "logits/rejected": -2.299320697784424, + "logps/chosen": -33.18330383300781, + "logps/rejected": -126.47606658935547, + "loss": 0.6428, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019420623779296875, + "rewards/margins": 0.16266632080078125, + "rewards/rejected": -0.18208694458007812, + "step": 1330 + }, + { + "epoch": 0.08, + "learning_rate": 9.941124605062538e-08, + "logits/chosen": -2.10009503364563, + "logits/rejected": -2.070140838623047, + "logps/chosen": -224.36972045898438, + "logps/rejected": -398.77783203125, + "loss": 0.5932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32391053438186646, + "rewards/margins": 0.11368103325366974, + "rewards/rejected": 0.21022950112819672, + "step": 1331 + }, + { + "epoch": 0.08, + "learning_rate": 9.940980322013072e-08, + "logits/chosen": -2.0338640213012695, + "logits/rejected": -2.0173842906951904, + "logps/chosen": -35.82672882080078, + "logps/rejected": -226.61148071289062, + "loss": 0.6503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022183990105986595, + "rewards/margins": 0.15359267592430115, + "rewards/rejected": -0.13140869140625, + "step": 1332 + }, + { + "epoch": 0.08, + "learning_rate": 9.940835863435818e-08, + "logits/chosen": -2.243325710296631, + "logits/rejected": -2.2463839054107666, + "logps/chosen": -21.108551025390625, + "logps/rejected": -119.3553466796875, + "loss": 0.664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015323639381676912, + "rewards/margins": 0.12438774108886719, + "rewards/rejected": -0.1259201020002365, + "step": 1333 + }, + { + "epoch": 0.08, + "learning_rate": 9.940691229335904e-08, + "logits/chosen": -2.1780452728271484, + "logits/rejected": -2.1298460960388184, + "logps/chosen": -216.70498657226562, + "logps/rejected": -388.9949951171875, + "loss": 0.5085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6310455203056335, + "rewards/margins": 0.13864439725875854, + "rewards/rejected": 0.492401123046875, + "step": 1334 + }, + { + "epoch": 0.08, + "learning_rate": 9.940546419718472e-08, + "logits/chosen": -2.317394971847534, + "logits/rejected": -2.31801438331604, + "logps/chosen": -10.637380599975586, + "logps/rejected": -247.037353515625, + "loss": 0.6193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003074645937886089, + "rewards/margins": 0.32096022367477417, + "rewards/rejected": -0.321267694234848, + "step": 1335 + }, + { + "epoch": 0.08, + "learning_rate": 9.940401434588664e-08, + "logits/chosen": -2.1841697692871094, + "logits/rejected": -2.1380317211151123, + "logps/chosen": -205.129638671875, + "logps/rejected": -232.65634155273438, + "loss": 0.6173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14485931396484375, + "rewards/margins": 0.16767120361328125, + "rewards/rejected": -0.0228118896484375, + "step": 1336 + }, + { + "epoch": 0.08, + "learning_rate": 9.940256273951628e-08, + "logits/chosen": -2.1330924034118652, + "logits/rejected": -2.122293472290039, + "logps/chosen": -224.88340759277344, + "logps/rejected": -259.1893005371094, + "loss": 0.5533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4797500669956207, + "rewards/margins": 0.022801220417022705, + "rewards/rejected": 0.456948846578598, + "step": 1337 + }, + { + "epoch": 0.08, + "learning_rate": 9.940110937812526e-08, + "logits/chosen": -2.024876832962036, + "logits/rejected": -2.0395264625549316, + "logps/chosen": -351.07952880859375, + "logps/rejected": -447.8812255859375, + "loss": 0.5383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.664379894733429, + "rewards/margins": 0.02986752986907959, + "rewards/rejected": 0.6345123648643494, + "step": 1338 + }, + { + "epoch": 0.08, + "learning_rate": 9.939965426176519e-08, + "logits/chosen": -2.000150442123413, + "logits/rejected": -2.0090925693511963, + "logps/chosen": -293.13336181640625, + "logps/rejected": -326.4102783203125, + "loss": 0.68, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04732055589556694, + "rewards/margins": 0.046661376953125, + "rewards/rejected": 0.0006591796991415322, + "step": 1339 + }, + { + "epoch": 0.08, + "learning_rate": 9.939819739048776e-08, + "logits/chosen": -2.200171709060669, + "logits/rejected": -2.1841585636138916, + "logps/chosen": -40.016902923583984, + "logps/rejected": -184.420654296875, + "loss": 0.6831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004164505284279585, + "rewards/margins": 0.027411270886659622, + "rewards/rejected": -0.02324676513671875, + "step": 1340 + }, + { + "epoch": 0.08, + "learning_rate": 9.939673876434471e-08, + "logits/chosen": -2.1334073543548584, + "logits/rejected": -2.1354589462280273, + "logps/chosen": -9.214603778673336e-05, + "logps/rejected": -113.34416961669922, + "loss": 0.6528, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0728690824635123e-07, + "rewards/margins": 0.16853418946266174, + "rewards/rejected": -0.1685340851545334, + "step": 1341 + }, + { + "epoch": 0.08, + "learning_rate": 9.939527838338788e-08, + "logits/chosen": -2.035623550415039, + "logits/rejected": -2.025881052017212, + "logps/chosen": -13.92465591430664, + "logps/rejected": -99.9040756225586, + "loss": 0.6757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01360712107270956, + "rewards/margins": 0.07827749103307724, + "rewards/rejected": -0.09188461303710938, + "step": 1342 + }, + { + "epoch": 0.08, + "learning_rate": 9.939381624766916e-08, + "logits/chosen": -1.8988436460494995, + "logits/rejected": -1.8934266567230225, + "logps/chosen": -92.08797454833984, + "logps/rejected": -205.1725616455078, + "loss": 0.6325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12985458970069885, + "rewards/margins": 0.11925430595874786, + "rewards/rejected": 0.010600280947983265, + "step": 1343 + }, + { + "epoch": 0.08, + "learning_rate": 9.939235235724046e-08, + "logits/chosen": -1.8999179601669312, + "logits/rejected": -1.8289790153503418, + "logps/chosen": -286.671875, + "logps/rejected": -479.57293701171875, + "loss": 0.6561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00433349609375, + "rewards/margins": 0.17101135849952698, + "rewards/rejected": -0.16667786240577698, + "step": 1344 + }, + { + "epoch": 0.08, + "learning_rate": 9.93908867121538e-08, + "logits/chosen": -2.1909372806549072, + "logits/rejected": -2.170940399169922, + "logps/chosen": -0.00024805538123473525, + "logps/rejected": -239.33462524414062, + "loss": 0.5962, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5864283088594675e-06, + "rewards/margins": 0.42502182722091675, + "rewards/rejected": -0.4250244200229645, + "step": 1345 + }, + { + "epoch": 0.08, + "learning_rate": 9.938941931246126e-08, + "logits/chosen": -1.9963154792785645, + "logits/rejected": -1.9457005262374878, + "logps/chosen": -185.9613037109375, + "logps/rejected": -475.601806640625, + "loss": 0.5641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3968505859375, + "rewards/margins": 0.0988616943359375, + "rewards/rejected": 0.2979888916015625, + "step": 1346 + }, + { + "epoch": 0.08, + "learning_rate": 9.938795015821495e-08, + "logits/chosen": -1.9722024202346802, + "logits/rejected": -1.9132168292999268, + "logps/chosen": -182.42005920410156, + "logps/rejected": -377.0299072265625, + "loss": 0.6374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09901275485754013, + "rewards/margins": 0.12177276611328125, + "rewards/rejected": -0.02276000939309597, + "step": 1347 + }, + { + "epoch": 0.08, + "learning_rate": 9.938647924946706e-08, + "logits/chosen": -2.1714494228363037, + "logits/rejected": -2.1587891578674316, + "logps/chosen": -59.698978424072266, + "logps/rejected": -197.13438415527344, + "loss": 0.6992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025043487548828125, + "rewards/margins": 0.034105684608221054, + "rewards/rejected": -0.05914917215704918, + "step": 1348 + }, + { + "epoch": 0.08, + "learning_rate": 9.938500658626985e-08, + "logits/chosen": -2.1153738498687744, + "logits/rejected": -2.0831172466278076, + "logps/chosen": -144.31761169433594, + "logps/rejected": -296.0618591308594, + "loss": 0.6163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02122802846133709, + "rewards/margins": 0.35560303926467896, + "rewards/rejected": -0.3768310546875, + "step": 1349 + }, + { + "epoch": 0.08, + "learning_rate": 9.938353216867566e-08, + "logits/chosen": -2.234546184539795, + "logits/rejected": -2.2140650749206543, + "logps/chosen": -190.365234375, + "logps/rejected": -301.07354736328125, + "loss": 0.6115, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3619735836982727, + "rewards/margins": -0.07895660400390625, + "rewards/rejected": 0.44093018770217896, + "step": 1350 + }, + { + "epoch": 0.08, + "learning_rate": 9.938205599673682e-08, + "logits/chosen": -2.3321197032928467, + "logits/rejected": -2.327932596206665, + "logps/chosen": -0.013844850473105907, + "logps/rejected": -62.073829650878906, + "loss": 0.6916, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7413090467453e-05, + "rewards/margins": 0.006390351802110672, + "rewards/rejected": -0.006427764892578125, + "step": 1351 + }, + { + "epoch": 0.08, + "learning_rate": 9.938057807050582e-08, + "logits/chosen": -2.25657320022583, + "logits/rejected": -2.255286455154419, + "logps/chosen": -182.42691040039062, + "logps/rejected": -258.0939636230469, + "loss": 0.5749, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4982849061489105, + "rewards/margins": -0.016278117895126343, + "rewards/rejected": 0.5145630240440369, + "step": 1352 + }, + { + "epoch": 0.08, + "learning_rate": 9.937909839003512e-08, + "logits/chosen": -2.0845563411712646, + "logits/rejected": -2.0852630138397217, + "logps/chosen": -260.3214416503906, + "logps/rejected": -365.8252868652344, + "loss": 0.5388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49347230792045593, + "rewards/margins": 0.09074708819389343, + "rewards/rejected": 0.4027252197265625, + "step": 1353 + }, + { + "epoch": 0.08, + "learning_rate": 9.937761695537732e-08, + "logits/chosen": -2.1300554275512695, + "logits/rejected": -2.082735061645508, + "logps/chosen": -56.49367141723633, + "logps/rejected": -294.1830749511719, + "loss": 0.5722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04045143350958824, + "rewards/margins": 0.48671913146972656, + "rewards/rejected": -0.446267694234848, + "step": 1354 + }, + { + "epoch": 0.08, + "learning_rate": 9.937613376658502e-08, + "logits/chosen": -1.9919198751449585, + "logits/rejected": -2.032048463821411, + "logps/chosen": -224.77305603027344, + "logps/rejected": -302.56646728515625, + "loss": 0.6663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02203521691262722, + "rewards/margins": 0.08726349472999573, + "rewards/rejected": -0.06522827595472336, + "step": 1355 + }, + { + "epoch": 0.08, + "learning_rate": 9.937464882371094e-08, + "logits/chosen": -2.131202220916748, + "logits/rejected": -2.190444231033325, + "logps/chosen": -233.52120971679688, + "logps/rejected": -178.24008178710938, + "loss": 0.4631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6680054068565369, + "rewards/margins": 0.28110048174858093, + "rewards/rejected": 0.38690492510795593, + "step": 1356 + }, + { + "epoch": 0.08, + "learning_rate": 9.93731621268078e-08, + "logits/chosen": -2.2168545722961426, + "logits/rejected": -2.2117223739624023, + "logps/chosen": -103.12413024902344, + "logps/rejected": -254.21243286132812, + "loss": 0.6907, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0462799072265625, + "rewards/margins": -0.07742919772863388, + "rewards/rejected": 0.12370910495519638, + "step": 1357 + }, + { + "epoch": 0.08, + "learning_rate": 9.937167367592844e-08, + "logits/chosen": -2.108828544616699, + "logits/rejected": -2.1125826835632324, + "logps/chosen": -31.022523880004883, + "logps/rejected": -139.9717254638672, + "loss": 0.7391, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.09181194752454758, + "rewards/margins": -0.10521679371595383, + "rewards/rejected": 0.01340484619140625, + "step": 1358 + }, + { + "epoch": 0.08, + "learning_rate": 9.937018347112573e-08, + "logits/chosen": -2.153000831604004, + "logits/rejected": -2.1498990058898926, + "logps/chosen": -0.04845263436436653, + "logps/rejected": -69.27890014648438, + "loss": 0.6407, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.509829785907641e-05, + "rewards/margins": 0.22151759266853333, + "rewards/rejected": -0.22143249213695526, + "step": 1359 + }, + { + "epoch": 0.08, + "learning_rate": 9.93686915124526e-08, + "logits/chosen": -2.218414068222046, + "logits/rejected": -2.2151753902435303, + "logps/chosen": -78.9791259765625, + "logps/rejected": -160.1187744140625, + "loss": 0.5624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20931091904640198, + "rewards/margins": 0.2928726375102997, + "rewards/rejected": -0.08356171101331711, + "step": 1360 + }, + { + "epoch": 0.08, + "learning_rate": 9.936719779996207e-08, + "logits/chosen": -2.2981016635894775, + "logits/rejected": -2.291104555130005, + "logps/chosen": -193.8251190185547, + "logps/rejected": -355.42010498046875, + "loss": 0.5948, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4530181884765625, + "rewards/margins": -0.016308605670928955, + "rewards/rejected": 0.46932679414749146, + "step": 1361 + }, + { + "epoch": 0.08, + "learning_rate": 9.936570233370718e-08, + "logits/chosen": -2.1416409015655518, + "logits/rejected": -2.03183913230896, + "logps/chosen": -249.31756591796875, + "logps/rejected": -333.11126708984375, + "loss": 0.5721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.230845645070076, + "rewards/margins": 0.247721865773201, + "rewards/rejected": -0.016876220703125, + "step": 1362 + }, + { + "epoch": 0.08, + "learning_rate": 9.936420511374108e-08, + "logits/chosen": -2.1470468044281006, + "logits/rejected": -2.1164228916168213, + "logps/chosen": -20.114858627319336, + "logps/rejected": -191.66470336914062, + "loss": 0.6234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0044990540482103825, + "rewards/margins": 0.3002723753452301, + "rewards/rejected": -0.30477142333984375, + "step": 1363 + }, + { + "epoch": 0.08, + "learning_rate": 9.936270614011695e-08, + "logits/chosen": -2.1101512908935547, + "logits/rejected": -2.1085264682769775, + "logps/chosen": -0.5717630982398987, + "logps/rejected": -86.57382202148438, + "loss": 0.6615, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004783213138580322, + "rewards/margins": 0.10654941946268082, + "rewards/rejected": -0.1017662063241005, + "step": 1364 + }, + { + "epoch": 0.08, + "learning_rate": 9.936120541288804e-08, + "logits/chosen": -2.187126874923706, + "logits/rejected": -2.128554105758667, + "logps/chosen": -232.6761474609375, + "logps/rejected": -336.3529052734375, + "loss": 0.5065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5097808837890625, + "rewards/margins": 0.2582641541957855, + "rewards/rejected": 0.251516729593277, + "step": 1365 + }, + { + "epoch": 0.08, + "learning_rate": 9.935970293210764e-08, + "logits/chosen": -2.074474573135376, + "logits/rejected": -2.0229830741882324, + "logps/chosen": -227.98989868164062, + "logps/rejected": -434.7301330566406, + "loss": 0.6267, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4404586851596832, + "rewards/margins": -0.18883207440376282, + "rewards/rejected": 0.629290759563446, + "step": 1366 + }, + { + "epoch": 0.08, + "learning_rate": 9.935819869782918e-08, + "logits/chosen": -2.0146169662475586, + "logits/rejected": -1.9986120462417603, + "logps/chosen": -192.63583374023438, + "logps/rejected": -336.9893798828125, + "loss": 0.6163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08031006157398224, + "rewards/margins": 0.2687011957168579, + "rewards/rejected": -0.18839111924171448, + "step": 1367 + }, + { + "epoch": 0.08, + "learning_rate": 9.935669271010604e-08, + "logits/chosen": -2.1989102363586426, + "logits/rejected": -2.2014517784118652, + "logps/chosen": -41.993255615234375, + "logps/rejected": -110.03718566894531, + "loss": 0.6891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0063457489013671875, + "rewards/margins": 0.009560775943100452, + "rewards/rejected": -0.0032150268089026213, + "step": 1368 + }, + { + "epoch": 0.08, + "learning_rate": 9.935518496899175e-08, + "logits/chosen": -2.291740655899048, + "logits/rejected": -2.236729621887207, + "logps/chosen": -210.03497314453125, + "logps/rejected": -454.91778564453125, + "loss": 0.5839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3911544978618622, + "rewards/margins": 0.024809271097183228, + "rewards/rejected": 0.36634522676467896, + "step": 1369 + }, + { + "epoch": 0.08, + "learning_rate": 9.935367547453988e-08, + "logits/chosen": -2.0095040798187256, + "logits/rejected": -1.9688464403152466, + "logps/chosen": -272.4105224609375, + "logps/rejected": -350.56878662109375, + "loss": 0.6175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15372924506664276, + "rewards/margins": 0.11813963949680328, + "rewards/rejected": 0.03558960184454918, + "step": 1370 + }, + { + "epoch": 0.08, + "learning_rate": 9.935216422680403e-08, + "logits/chosen": -2.0178005695343018, + "logits/rejected": -2.0094997882843018, + "logps/chosen": -172.68528747558594, + "logps/rejected": -289.64208984375, + "loss": 0.5473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5869415402412415, + "rewards/margins": 0.03973996639251709, + "rewards/rejected": 0.5472015738487244, + "step": 1371 + }, + { + "epoch": 0.08, + "learning_rate": 9.93506512258379e-08, + "logits/chosen": -2.124148368835449, + "logits/rejected": -2.0826284885406494, + "logps/chosen": -153.35646057128906, + "logps/rejected": -311.9864196777344, + "loss": 0.6326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1680953949689865, + "rewards/margins": 0.02484893798828125, + "rewards/rejected": 0.14324645698070526, + "step": 1372 + }, + { + "epoch": 0.08, + "learning_rate": 9.934913647169523e-08, + "logits/chosen": -2.087419271469116, + "logits/rejected": -2.100865602493286, + "logps/chosen": -262.3250427246094, + "logps/rejected": -267.97918701171875, + "loss": 0.6916, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0190887451171875, + "rewards/margins": -0.03891906887292862, + "rewards/rejected": 0.05800781399011612, + "step": 1373 + }, + { + "epoch": 0.08, + "learning_rate": 9.934761996442987e-08, + "logits/chosen": -1.9986003637313843, + "logits/rejected": -1.9971543550491333, + "logps/chosen": -44.266902923583984, + "logps/rejected": -92.14395141601562, + "loss": 0.7091, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.039881136268377304, + "rewards/margins": -0.023735811933875084, + "rewards/rejected": -0.01614532433450222, + "step": 1374 + }, + { + "epoch": 0.08, + "learning_rate": 9.934610170409563e-08, + "logits/chosen": -2.0104212760925293, + "logits/rejected": -2.0060503482818604, + "logps/chosen": -72.9247817993164, + "logps/rejected": -234.68304443359375, + "loss": 0.6591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037911225110292435, + "rewards/margins": 0.2292778044939041, + "rewards/rejected": -0.26718902587890625, + "step": 1375 + }, + { + "epoch": 0.08, + "learning_rate": 9.934458169074648e-08, + "logits/chosen": -2.280184507369995, + "logits/rejected": -2.281303644180298, + "logps/chosen": -16.436782836914062, + "logps/rejected": -100.03858947753906, + "loss": 0.6632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04382724687457085, + "rewards/margins": 0.07021351158618927, + "rewards/rejected": -0.026386260986328125, + "step": 1376 + }, + { + "epoch": 0.08, + "learning_rate": 9.934305992443644e-08, + "logits/chosen": -2.0434117317199707, + "logits/rejected": -2.035874605178833, + "logps/chosen": -357.7965087890625, + "logps/rejected": -414.6343688964844, + "loss": 0.5443, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.552960216999054, + "rewards/margins": -0.00641477108001709, + "rewards/rejected": 0.559374988079071, + "step": 1377 + }, + { + "epoch": 0.08, + "learning_rate": 9.934153640521954e-08, + "logits/chosen": -2.112661600112915, + "logits/rejected": -2.100825548171997, + "logps/chosen": -47.01097106933594, + "logps/rejected": -143.94207763671875, + "loss": 0.5992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09487076103687286, + "rewards/margins": 0.31636887788772583, + "rewards/rejected": -0.22149811685085297, + "step": 1378 + }, + { + "epoch": 0.08, + "learning_rate": 9.934001113314991e-08, + "logits/chosen": -1.933445692062378, + "logits/rejected": -1.9167829751968384, + "logps/chosen": -298.04376220703125, + "logps/rejected": -327.02264404296875, + "loss": 0.5653, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.473776251077652, + "rewards/margins": -0.019406139850616455, + "rewards/rejected": 0.49318239092826843, + "step": 1379 + }, + { + "epoch": 0.08, + "learning_rate": 9.933848410828174e-08, + "logits/chosen": -2.1445508003234863, + "logits/rejected": -2.1360526084899902, + "logps/chosen": -141.09127807617188, + "logps/rejected": -229.27188110351562, + "loss": 0.5786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3715362548828125, + "rewards/margins": 0.049983203411102295, + "rewards/rejected": 0.3215530514717102, + "step": 1380 + }, + { + "epoch": 0.08, + "learning_rate": 9.933695533066928e-08, + "logits/chosen": -2.071794033050537, + "logits/rejected": -2.0670907497406006, + "logps/chosen": -45.557247161865234, + "logps/rejected": -195.48341369628906, + "loss": 0.6647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037136077880859375, + "rewards/margins": 0.0816551223397255, + "rewards/rejected": -0.04451904445886612, + "step": 1381 + }, + { + "epoch": 0.08, + "learning_rate": 9.933542480036683e-08, + "logits/chosen": -2.2103679180145264, + "logits/rejected": -2.18819260597229, + "logps/chosen": -250.59927368164062, + "logps/rejected": -371.22479248046875, + "loss": 0.5796, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4147842526435852, + "rewards/margins": 0.031230181455612183, + "rewards/rejected": 0.383554071187973, + "step": 1382 + }, + { + "epoch": 0.08, + "learning_rate": 9.933389251742875e-08, + "logits/chosen": -2.119278907775879, + "logits/rejected": -2.123044967651367, + "logps/chosen": -217.7408447265625, + "logps/rejected": -335.8403625488281, + "loss": 0.5632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22788695991039276, + "rewards/margins": 0.32845765352249146, + "rewards/rejected": -0.1005706787109375, + "step": 1383 + }, + { + "epoch": 0.08, + "learning_rate": 9.93323584819095e-08, + "logits/chosen": -1.9669045209884644, + "logits/rejected": -1.940020203590393, + "logps/chosen": -239.4931640625, + "logps/rejected": -369.73876953125, + "loss": 0.5636, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5596084594726562, + "rewards/margins": -0.031199634075164795, + "rewards/rejected": 0.590808093547821, + "step": 1384 + }, + { + "epoch": 0.08, + "learning_rate": 9.933082269386356e-08, + "logits/chosen": -2.1751790046691895, + "logits/rejected": -2.125330686569214, + "logps/chosen": -112.03270721435547, + "logps/rejected": -321.3358154296875, + "loss": 0.5662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.240997314453125, + "rewards/margins": 0.2888641357421875, + "rewards/rejected": -0.0478668212890625, + "step": 1385 + }, + { + "epoch": 0.08, + "learning_rate": 9.932928515334551e-08, + "logits/chosen": -2.177863597869873, + "logits/rejected": -2.172450065612793, + "logps/chosen": -59.47837448120117, + "logps/rejected": -153.97860717773438, + "loss": 0.6638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020835876930505037, + "rewards/margins": 0.15430985391139984, + "rewards/rejected": -0.15639343857765198, + "step": 1386 + }, + { + "epoch": 0.08, + "learning_rate": 9.932774586040996e-08, + "logits/chosen": -2.1642940044403076, + "logits/rejected": -2.1575069427490234, + "logps/chosen": -217.8717498779297, + "logps/rejected": -236.04953002929688, + "loss": 0.5525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4064651429653168, + "rewards/margins": 0.17943724989891052, + "rewards/rejected": 0.22702789306640625, + "step": 1387 + }, + { + "epoch": 0.08, + "learning_rate": 9.932620481511156e-08, + "logits/chosen": -2.1648690700531006, + "logits/rejected": -2.085575819015503, + "logps/chosen": -221.4337158203125, + "logps/rejected": -341.6693420410156, + "loss": 0.3962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6994568109512329, + "rewards/margins": 0.6408295035362244, + "rewards/rejected": 0.05862731859087944, + "step": 1388 + }, + { + "epoch": 0.08, + "learning_rate": 9.93246620175051e-08, + "logits/chosen": -2.003018856048584, + "logits/rejected": -1.9532803297042847, + "logps/chosen": -258.7853698730469, + "logps/rejected": -351.08074951171875, + "loss": 0.6204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24457703530788422, + "rewards/margins": 0.02139587700366974, + "rewards/rejected": 0.22318115830421448, + "step": 1389 + }, + { + "epoch": 0.08, + "learning_rate": 9.932311746764536e-08, + "logits/chosen": -2.268951654434204, + "logits/rejected": -2.257842540740967, + "logps/chosen": -0.00015449027705471963, + "logps/rejected": -108.86186981201172, + "loss": 0.6492, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.582093755649112e-07, + "rewards/margins": 0.18438100814819336, + "rewards/rejected": -0.1843818724155426, + "step": 1390 + }, + { + "epoch": 0.08, + "learning_rate": 9.932157116558725e-08, + "logits/chosen": -2.1727652549743652, + "logits/rejected": -2.1687068939208984, + "logps/chosen": -22.314056396484375, + "logps/rejected": -144.65321350097656, + "loss": 0.6497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032419588416814804, + "rewards/margins": 0.22087937593460083, + "rewards/rejected": -0.25329896807670593, + "step": 1391 + }, + { + "epoch": 0.08, + "learning_rate": 9.932002311138565e-08, + "logits/chosen": -2.18241548538208, + "logits/rejected": -2.075044631958008, + "logps/chosen": -235.13690185546875, + "logps/rejected": -321.244873046875, + "loss": 0.5936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14070892333984375, + "rewards/margins": 0.27162933349609375, + "rewards/rejected": -0.13092041015625, + "step": 1392 + }, + { + "epoch": 0.08, + "learning_rate": 9.93184733050956e-08, + "logits/chosen": -1.945085048675537, + "logits/rejected": -1.927536129951477, + "logps/chosen": -195.9066925048828, + "logps/rejected": -337.17230224609375, + "loss": 0.6364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10041504353284836, + "rewards/margins": 0.12607727944850922, + "rewards/rejected": -0.02566223219037056, + "step": 1393 + }, + { + "epoch": 0.08, + "learning_rate": 9.931692174677213e-08, + "logits/chosen": -2.167012929916382, + "logits/rejected": -2.166853427886963, + "logps/chosen": -175.71441650390625, + "logps/rejected": -220.13046264648438, + "loss": 0.575, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4360183775424957, + "rewards/margins": -0.07231292128562927, + "rewards/rejected": 0.508331298828125, + "step": 1394 + }, + { + "epoch": 0.08, + "learning_rate": 9.931536843647034e-08, + "logits/chosen": -2.2671966552734375, + "logits/rejected": -2.256908893585205, + "logps/chosen": -13.927774429321289, + "logps/rejected": -212.89437866210938, + "loss": 0.6747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0019312858348712325, + "rewards/margins": 0.062005139887332916, + "rewards/rejected": -0.0600738525390625, + "step": 1395 + }, + { + "epoch": 0.08, + "learning_rate": 9.931381337424546e-08, + "logits/chosen": -1.9895005226135254, + "logits/rejected": -1.965667963027954, + "logps/chosen": -292.21453857421875, + "logps/rejected": -402.803466796875, + "loss": 0.4562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5859436392784119, + "rewards/margins": 0.4245605766773224, + "rewards/rejected": 0.16138306260108948, + "step": 1396 + }, + { + "epoch": 0.08, + "learning_rate": 9.93122565601527e-08, + "logits/chosen": -2.081500291824341, + "logits/rejected": -2.08385968208313, + "logps/chosen": -3.3653507232666016, + "logps/rejected": -164.22540283203125, + "loss": 0.6516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006215048022568226, + "rewards/margins": 0.16699081659317017, + "rewards/rejected": -0.16077576577663422, + "step": 1397 + }, + { + "epoch": 0.08, + "learning_rate": 9.931069799424739e-08, + "logits/chosen": -2.0329291820526123, + "logits/rejected": -2.0283753871917725, + "logps/chosen": -0.0001450727431802079, + "logps/rejected": -139.57806396484375, + "loss": 0.6495, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.860804784177162e-07, + "rewards/margins": 0.18351107835769653, + "rewards/rejected": -0.18351136147975922, + "step": 1398 + }, + { + "epoch": 0.08, + "learning_rate": 9.930913767658486e-08, + "logits/chosen": -1.9258925914764404, + "logits/rejected": -1.948001503944397, + "logps/chosen": -264.5869445800781, + "logps/rejected": -314.43402099609375, + "loss": 0.6204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12665100395679474, + "rewards/margins": 0.08481750637292862, + "rewards/rejected": 0.04183349758386612, + "step": 1399 + }, + { + "epoch": 0.08, + "learning_rate": 9.930757560722057e-08, + "logits/chosen": -2.056319236755371, + "logits/rejected": -2.0583126544952393, + "logps/chosen": -0.03495368734002113, + "logps/rejected": -194.05767822265625, + "loss": 0.6889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001646235614316538, + "rewards/margins": 0.0285890381783247, + "rewards/rejected": -0.02875366248190403, + "step": 1400 + }, + { + "epoch": 0.08, + "learning_rate": 9.930601178621e-08, + "logits/chosen": -2.2508699893951416, + "logits/rejected": -2.239475727081299, + "logps/chosen": -51.76594924926758, + "logps/rejected": -250.20401000976562, + "loss": 0.6022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08209724724292755, + "rewards/margins": 0.31004828214645386, + "rewards/rejected": -0.2279510498046875, + "step": 1401 + }, + { + "epoch": 0.08, + "learning_rate": 9.930444621360872e-08, + "logits/chosen": -2.1256089210510254, + "logits/rejected": -2.096177339553833, + "logps/chosen": -34.044395446777344, + "logps/rejected": -174.31484985351562, + "loss": 0.6136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009090423583984375, + "rewards/margins": 0.34827956557273865, + "rewards/rejected": -0.357369989156723, + "step": 1402 + }, + { + "epoch": 0.08, + "learning_rate": 9.930287888947234e-08, + "logits/chosen": -2.268324136734009, + "logits/rejected": -2.25119948387146, + "logps/chosen": -3.0831520557403564, + "logps/rejected": -167.9164276123047, + "loss": 0.6129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017309188842773438, + "rewards/margins": 0.3201623857021332, + "rewards/rejected": -0.3218933045864105, + "step": 1403 + }, + { + "epoch": 0.08, + "learning_rate": 9.930130981385651e-08, + "logits/chosen": -2.250636100769043, + "logits/rejected": -2.258138656616211, + "logps/chosen": -47.73619079589844, + "logps/rejected": -114.7903823852539, + "loss": 0.6538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03205261379480362, + "rewards/margins": 0.11956329643726349, + "rewards/rejected": -0.08751068264245987, + "step": 1404 + }, + { + "epoch": 0.08, + "learning_rate": 9.929973898681703e-08, + "logits/chosen": -2.0809714794158936, + "logits/rejected": -2.0878617763519287, + "logps/chosen": -229.52024841308594, + "logps/rejected": -320.9654541015625, + "loss": 0.531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5366775393486023, + "rewards/margins": 0.10894620418548584, + "rewards/rejected": 0.42773133516311646, + "step": 1405 + }, + { + "epoch": 0.08, + "learning_rate": 9.929816640840964e-08, + "logits/chosen": -2.20424222946167, + "logits/rejected": -2.1889595985412598, + "logps/chosen": -14.742182731628418, + "logps/rejected": -193.7241973876953, + "loss": 0.7293, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04632902145385742, + "rewards/margins": -0.08842039108276367, + "rewards/rejected": 0.04209136962890625, + "step": 1406 + }, + { + "epoch": 0.08, + "learning_rate": 9.929659207869026e-08, + "logits/chosen": -2.1075472831726074, + "logits/rejected": -2.139158248901367, + "logps/chosen": -186.13763427734375, + "logps/rejected": -189.56503295898438, + "loss": 0.5529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5027191042900085, + "rewards/margins": 0.09868162870407104, + "rewards/rejected": 0.4040374755859375, + "step": 1407 + }, + { + "epoch": 0.08, + "learning_rate": 9.929501599771479e-08, + "logits/chosen": -2.2823233604431152, + "logits/rejected": -2.251966714859009, + "logps/chosen": -12.817548751831055, + "logps/rejected": -142.25631713867188, + "loss": 0.6448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005312538240104914, + "rewards/margins": 0.2018091231584549, + "rewards/rejected": -0.19649659097194672, + "step": 1408 + }, + { + "epoch": 0.08, + "learning_rate": 9.929343816553921e-08, + "logits/chosen": -1.9971421957015991, + "logits/rejected": -1.995261788368225, + "logps/chosen": -261.76849365234375, + "logps/rejected": -450.3717956542969, + "loss": 0.5322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6600708365440369, + "rewards/margins": 0.008450329303741455, + "rewards/rejected": 0.6516205072402954, + "step": 1409 + }, + { + "epoch": 0.08, + "learning_rate": 9.92918585822196e-08, + "logits/chosen": -2.084101915359497, + "logits/rejected": -2.041229248046875, + "logps/chosen": -166.3473663330078, + "logps/rejected": -239.8544921875, + "loss": 0.6378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11906738579273224, + "rewards/margins": 0.08926544338464737, + "rewards/rejected": 0.02980194054543972, + "step": 1410 + }, + { + "epoch": 0.08, + "learning_rate": 9.929027724781205e-08, + "logits/chosen": -2.1470930576324463, + "logits/rejected": -2.137692451477051, + "logps/chosen": -186.59339904785156, + "logps/rejected": -209.90036010742188, + "loss": 0.6497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09773864597082138, + "rewards/margins": 0.10480956733226776, + "rewards/rejected": -0.0070709227584302425, + "step": 1411 + }, + { + "epoch": 0.08, + "learning_rate": 9.928869416237276e-08, + "logits/chosen": -1.9087002277374268, + "logits/rejected": -1.9171329736709595, + "logps/chosen": -279.77105712890625, + "logps/rejected": -288.28778076171875, + "loss": 0.6502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03154296800494194, + "rewards/margins": 0.06147460639476776, + "rewards/rejected": -0.02993164025247097, + "step": 1412 + }, + { + "epoch": 0.08, + "learning_rate": 9.928710932595794e-08, + "logits/chosen": -2.0699920654296875, + "logits/rejected": -2.0461483001708984, + "logps/chosen": -72.87358093261719, + "logps/rejected": -282.4990234375, + "loss": 0.562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030492400750517845, + "rewards/margins": 0.5907158255577087, + "rewards/rejected": -0.560223400592804, + "step": 1413 + }, + { + "epoch": 0.08, + "learning_rate": 9.928552273862392e-08, + "logits/chosen": -2.1080687046051025, + "logits/rejected": -2.0384156703948975, + "logps/chosen": -253.41490173339844, + "logps/rejected": -429.59979248046875, + "loss": 0.4797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6841689944267273, + "rewards/margins": 0.20443877577781677, + "rewards/rejected": 0.4797302186489105, + "step": 1414 + }, + { + "epoch": 0.08, + "learning_rate": 9.928393440042705e-08, + "logits/chosen": -2.1632323265075684, + "logits/rejected": -2.1747193336486816, + "logps/chosen": -1.1889498233795166, + "logps/rejected": -96.19596099853516, + "loss": 0.6813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013280630111694336, + "rewards/margins": 0.032972097396850586, + "rewards/rejected": -0.01969146728515625, + "step": 1415 + }, + { + "epoch": 0.08, + "learning_rate": 9.928234431142375e-08, + "logits/chosen": -1.9790431261062622, + "logits/rejected": -1.9587163925170898, + "logps/chosen": -45.8534049987793, + "logps/rejected": -287.6217346191406, + "loss": 0.6609, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05951576307415962, + "rewards/margins": 0.18334007263183594, + "rewards/rejected": -0.24285583198070526, + "step": 1416 + }, + { + "epoch": 0.08, + "learning_rate": 9.928075247167051e-08, + "logits/chosen": -2.1756043434143066, + "logits/rejected": -2.145176410675049, + "logps/chosen": -242.85195922851562, + "logps/rejected": -396.4208984375, + "loss": 0.5353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6512802243232727, + "rewards/margins": 0.02834320068359375, + "rewards/rejected": 0.622937023639679, + "step": 1417 + }, + { + "epoch": 0.08, + "learning_rate": 9.92791588812239e-08, + "logits/chosen": -2.0536205768585205, + "logits/rejected": -2.0506129264831543, + "logps/chosen": -7.009328692220151e-05, + "logps/rejected": -175.31674194335938, + "loss": 0.619, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.840876724505506e-07, + "rewards/margins": 0.3141290545463562, + "rewards/rejected": -0.31412965059280396, + "step": 1418 + }, + { + "epoch": 0.08, + "learning_rate": 9.927756354014051e-08, + "logits/chosen": -2.0764055252075195, + "logits/rejected": -2.1145896911621094, + "logps/chosen": -263.86407470703125, + "logps/rejected": -375.9198913574219, + "loss": 0.61, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14790649712085724, + "rewards/margins": 0.12987975776195526, + "rewards/rejected": 0.01802673377096653, + "step": 1419 + }, + { + "epoch": 0.08, + "learning_rate": 9.927596644847702e-08, + "logits/chosen": -2.162545919418335, + "logits/rejected": -2.125250816345215, + "logps/chosen": -336.4817810058594, + "logps/rejected": -626.2180786132812, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7643219232559204, + "rewards/margins": 0.5898224115371704, + "rewards/rejected": 0.17449951171875, + "step": 1420 + }, + { + "epoch": 0.08, + "learning_rate": 9.927436760629016e-08, + "logits/chosen": -2.2032647132873535, + "logits/rejected": -2.2033114433288574, + "logps/chosen": -28.75920295715332, + "logps/rejected": -131.86488342285156, + "loss": 0.6823, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05039405822753906, + "rewards/margins": -0.0063304901123046875, + "rewards/rejected": 0.05672454833984375, + "step": 1421 + }, + { + "epoch": 0.08, + "learning_rate": 9.927276701363673e-08, + "logits/chosen": -2.174638509750366, + "logits/rejected": -2.169386863708496, + "logps/chosen": -0.4023882746696472, + "logps/rejected": -160.1632843017578, + "loss": 0.5629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003853571368381381, + "rewards/margins": 0.6188668012619019, + "rewards/rejected": -0.6227203607559204, + "step": 1422 + }, + { + "epoch": 0.08, + "learning_rate": 9.927116467057362e-08, + "logits/chosen": -2.1566948890686035, + "logits/rejected": -2.1115000247955322, + "logps/chosen": -156.45614624023438, + "logps/rejected": -249.43185424804688, + "loss": 0.6117, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.346395879983902, + "rewards/margins": -0.060089111328125, + "rewards/rejected": 0.406484991312027, + "step": 1423 + }, + { + "epoch": 0.08, + "learning_rate": 9.926956057715772e-08, + "logits/chosen": -2.1698925495147705, + "logits/rejected": -2.1621334552764893, + "logps/chosen": -0.18353979289531708, + "logps/rejected": -210.02659606933594, + "loss": 0.5855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003788089845329523, + "rewards/margins": 0.4945548176765442, + "rewards/rejected": -0.498342901468277, + "step": 1424 + }, + { + "epoch": 0.08, + "learning_rate": 9.926795473344602e-08, + "logits/chosen": -2.184706926345825, + "logits/rejected": -2.1634602546691895, + "logps/chosen": -19.01616668701172, + "logps/rejected": -190.9750518798828, + "loss": 0.6808, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0364343635737896, + "rewards/margins": -0.006926536560058594, + "rewards/rejected": 0.04336090013384819, + "step": 1425 + }, + { + "epoch": 0.08, + "learning_rate": 9.926634713949556e-08, + "logits/chosen": -2.274564504623413, + "logits/rejected": -2.2625958919525146, + "logps/chosen": -30.666500091552734, + "logps/rejected": -189.03289794921875, + "loss": 0.6491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009048843756318092, + "rewards/margins": 0.14409981667995453, + "rewards/rejected": -0.135050967335701, + "step": 1426 + }, + { + "epoch": 0.08, + "learning_rate": 9.926473779536348e-08, + "logits/chosen": -2.2922518253326416, + "logits/rejected": -2.2717783451080322, + "logps/chosen": -29.426477432250977, + "logps/rejected": -207.50901794433594, + "loss": 0.6531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026894569396972656, + "rewards/margins": 0.2026662826538086, + "rewards/rejected": -0.22956085205078125, + "step": 1427 + }, + { + "epoch": 0.08, + "learning_rate": 9.926312670110692e-08, + "logits/chosen": -2.158477306365967, + "logits/rejected": -2.148343563079834, + "logps/chosen": -2.132598876953125, + "logps/rejected": -184.19161987304688, + "loss": 0.6193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04503524303436279, + "rewards/margins": 0.2624211311340332, + "rewards/rejected": -0.21738587319850922, + "step": 1428 + }, + { + "epoch": 0.08, + "learning_rate": 9.926151385678314e-08, + "logits/chosen": -2.174013137817383, + "logits/rejected": -2.167456865310669, + "logps/chosen": -30.82765769958496, + "logps/rejected": -88.90530395507812, + "loss": 0.6382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03016338311135769, + "rewards/margins": 0.17898082733154297, + "rewards/rejected": -0.14881744980812073, + "step": 1429 + }, + { + "epoch": 0.08, + "learning_rate": 9.925989926244942e-08, + "logits/chosen": -2.2220571041107178, + "logits/rejected": -2.2071409225463867, + "logps/chosen": -8.192412376403809, + "logps/rejected": -139.8656005859375, + "loss": 0.688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012927151285111904, + "rewards/margins": 0.0628998726606369, + "rewards/rejected": -0.07582702487707138, + "step": 1430 + }, + { + "epoch": 0.08, + "learning_rate": 9.925828291816313e-08, + "logits/chosen": -2.3299875259399414, + "logits/rejected": -2.330146551132202, + "logps/chosen": -30.2485408782959, + "logps/rejected": -205.10354614257812, + "loss": 0.627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055457498878240585, + "rewards/margins": 0.23063603043556213, + "rewards/rejected": -0.17517852783203125, + "step": 1431 + }, + { + "epoch": 0.08, + "learning_rate": 9.925666482398167e-08, + "logits/chosen": -2.3031647205352783, + "logits/rejected": -2.2919788360595703, + "logps/chosen": -65.5008316040039, + "logps/rejected": -235.521240234375, + "loss": 0.6443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.034830477088689804, + "rewards/margins": 0.12249527871608734, + "rewards/rejected": -0.08766479790210724, + "step": 1432 + }, + { + "epoch": 0.08, + "learning_rate": 9.925504497996254e-08, + "logits/chosen": -2.120354413986206, + "logits/rejected": -1.911826252937317, + "logps/chosen": -203.25808715820312, + "logps/rejected": -525.603759765625, + "loss": 0.5667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16991424560546875, + "rewards/margins": 0.3132614195346832, + "rewards/rejected": -0.14334717392921448, + "step": 1433 + }, + { + "epoch": 0.08, + "learning_rate": 9.925342338616328e-08, + "logits/chosen": -1.9762907028198242, + "logits/rejected": -1.9403102397918701, + "logps/chosen": -135.75086975097656, + "logps/rejected": -340.4902038574219, + "loss": 0.5621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3769119381904602, + "rewards/margins": 0.036061108112335205, + "rewards/rejected": 0.340850830078125, + "step": 1434 + }, + { + "epoch": 0.08, + "learning_rate": 9.92518000426415e-08, + "logits/chosen": -2.0837013721466064, + "logits/rejected": -2.07016921043396, + "logps/chosen": -0.04965616762638092, + "logps/rejected": -267.632080078125, + "loss": 0.598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010843559866771102, + "rewards/margins": 0.40894371271133423, + "rewards/rejected": -0.4100280702114105, + "step": 1435 + }, + { + "epoch": 0.08, + "learning_rate": 9.925017494945486e-08, + "logits/chosen": -2.01425838470459, + "logits/rejected": -1.9825407266616821, + "logps/chosen": -123.3753433227539, + "logps/rejected": -242.36032104492188, + "loss": 0.609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11040039360523224, + "rewards/margins": 0.22939452528953552, + "rewards/rejected": -0.11899413913488388, + "step": 1436 + }, + { + "epoch": 0.08, + "learning_rate": 9.92485481066611e-08, + "logits/chosen": -2.1783881187438965, + "logits/rejected": -2.177319288253784, + "logps/chosen": -0.0009649332496337593, + "logps/rejected": -143.35372924804688, + "loss": 0.6392, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.179544703220017e-05, + "rewards/margins": 0.22857573628425598, + "rewards/rejected": -0.22853393852710724, + "step": 1437 + }, + { + "epoch": 0.08, + "learning_rate": 9.924691951431801e-08, + "logits/chosen": -2.169109582901001, + "logits/rejected": -2.1576948165893555, + "logps/chosen": -260.0761413574219, + "logps/rejected": -293.29144287109375, + "loss": 0.5076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5951294302940369, + "rewards/margins": 0.13682863116264343, + "rewards/rejected": 0.45830079913139343, + "step": 1438 + }, + { + "epoch": 0.08, + "learning_rate": 9.924528917248345e-08, + "logits/chosen": -2.2151238918304443, + "logits/rejected": -2.1641554832458496, + "logps/chosen": -179.05612182617188, + "logps/rejected": -400.7691345214844, + "loss": 0.5748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4231201112270355, + "rewards/margins": 0.033572375774383545, + "rewards/rejected": 0.389547735452652, + "step": 1439 + }, + { + "epoch": 0.08, + "learning_rate": 9.924365708121533e-08, + "logits/chosen": -2.05008602142334, + "logits/rejected": -2.010707378387451, + "logps/chosen": -340.65533447265625, + "logps/rejected": -568.0343627929688, + "loss": 0.5862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05842895433306694, + "rewards/margins": 0.3752807676792145, + "rewards/rejected": -0.31685182452201843, + "step": 1440 + }, + { + "epoch": 0.08, + "learning_rate": 9.924202324057162e-08, + "logits/chosen": -2.0476438999176025, + "logits/rejected": -2.0430116653442383, + "logps/chosen": -21.139324188232422, + "logps/rejected": -231.34414672851562, + "loss": 0.6687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.081517793238163, + "rewards/margins": 0.018129728734493256, + "rewards/rejected": 0.06338806450366974, + "step": 1441 + }, + { + "epoch": 0.08, + "learning_rate": 9.92403876506104e-08, + "logits/chosen": -2.2294788360595703, + "logits/rejected": -2.2321367263793945, + "logps/chosen": -29.790943145751953, + "logps/rejected": -87.79740142822266, + "loss": 0.7173, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08153744041919708, + "rewards/margins": -0.006171993911266327, + "rewards/rejected": -0.07536544650793076, + "step": 1442 + }, + { + "epoch": 0.08, + "learning_rate": 9.923875031138973e-08, + "logits/chosen": -2.1235129833221436, + "logits/rejected": -2.10214900970459, + "logps/chosen": -247.89865112304688, + "logps/rejected": -331.0387268066406, + "loss": 0.5237, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7463104128837585, + "rewards/margins": -0.02183842658996582, + "rewards/rejected": 0.7681488394737244, + "step": 1443 + }, + { + "epoch": 0.08, + "learning_rate": 9.92371112229678e-08, + "logits/chosen": -2.127902030944824, + "logits/rejected": -2.086472511291504, + "logps/chosen": -207.10067749023438, + "logps/rejected": -372.46441650390625, + "loss": 0.6421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07704468071460724, + "rewards/margins": 0.29391783475875854, + "rewards/rejected": -0.370962530374527, + "step": 1444 + }, + { + "epoch": 0.08, + "learning_rate": 9.923547038540285e-08, + "logits/chosen": -2.1643123626708984, + "logits/rejected": -2.127732515335083, + "logps/chosen": -219.13253784179688, + "logps/rejected": -499.3170166015625, + "loss": 0.4956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4856308102607727, + "rewards/margins": 0.21068879961967468, + "rewards/rejected": 0.274942010641098, + "step": 1445 + }, + { + "epoch": 0.08, + "learning_rate": 9.923382779875315e-08, + "logits/chosen": -2.1679046154022217, + "logits/rejected": -2.1658120155334473, + "logps/chosen": -51.555816650390625, + "logps/rejected": -145.49375915527344, + "loss": 0.6558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00507278461009264, + "rewards/margins": 0.1375877410173416, + "rewards/rejected": -0.13251495361328125, + "step": 1446 + }, + { + "epoch": 0.08, + "learning_rate": 9.923218346307704e-08, + "logits/chosen": -2.135582685470581, + "logits/rejected": -2.109915256500244, + "logps/chosen": -249.30340576171875, + "logps/rejected": -488.406494140625, + "loss": 0.3945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8366028070449829, + "rewards/margins": 0.5302520990371704, + "rewards/rejected": 0.3063507080078125, + "step": 1447 + }, + { + "epoch": 0.08, + "learning_rate": 9.923053737843297e-08, + "logits/chosen": -2.303018569946289, + "logits/rejected": -2.3001868724823, + "logps/chosen": -0.0002805910480674356, + "logps/rejected": -190.18577575683594, + "loss": 0.6213, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1920928955078125e-07, + "rewards/margins": 0.3080476224422455, + "rewards/rejected": -0.30804750323295593, + "step": 1448 + }, + { + "epoch": 0.08, + "learning_rate": 9.92288895448794e-08, + "logits/chosen": -2.114572525024414, + "logits/rejected": -2.103283643722534, + "logps/chosen": -32.622859954833984, + "logps/rejected": -194.60812377929688, + "loss": 0.6622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0052032470703125, + "rewards/margins": 0.09622650593519211, + "rewards/rejected": -0.09102325886487961, + "step": 1449 + }, + { + "epoch": 0.08, + "learning_rate": 9.922723996247487e-08, + "logits/chosen": -2.3023881912231445, + "logits/rejected": -2.302168130874634, + "logps/chosen": -8.93468952178955, + "logps/rejected": -94.27183532714844, + "loss": 0.6147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013149452395737171, + "rewards/margins": 0.2490621656179428, + "rewards/rejected": -0.2622116208076477, + "step": 1450 + }, + { + "epoch": 0.08, + "learning_rate": 9.922558863127796e-08, + "logits/chosen": -2.058234691619873, + "logits/rejected": -2.0600335597991943, + "logps/chosen": -0.8406435251235962, + "logps/rejected": -47.540496826171875, + "loss": 0.6366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006804668810218573, + "rewards/margins": 0.24736288189888, + "rewards/rejected": -0.2541675567626953, + "step": 1451 + }, + { + "epoch": 0.08, + "learning_rate": 9.922393555134738e-08, + "logits/chosen": -2.1321659088134766, + "logits/rejected": -2.0521624088287354, + "logps/chosen": -158.07464599609375, + "logps/rejected": -373.5738830566406, + "loss": 0.575, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3603515625, + "rewards/margins": -0.0208587646484375, + "rewards/rejected": 0.3812103271484375, + "step": 1452 + }, + { + "epoch": 0.08, + "learning_rate": 9.922228072274182e-08, + "logits/chosen": -2.1596922874450684, + "logits/rejected": -2.1612296104431152, + "logps/chosen": -0.0873323529958725, + "logps/rejected": -71.90460205078125, + "loss": 0.6864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018565893406048417, + "rewards/margins": 0.027345681563019753, + "rewards/rejected": -0.02920227125287056, + "step": 1453 + }, + { + "epoch": 0.08, + "learning_rate": 9.922062414552007e-08, + "logits/chosen": -2.2722296714782715, + "logits/rejected": -2.2577762603759766, + "logps/chosen": -58.60193634033203, + "logps/rejected": -252.27037048339844, + "loss": 0.5787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12137603759765625, + "rewards/margins": 0.31378328800201416, + "rewards/rejected": -0.19240723550319672, + "step": 1454 + }, + { + "epoch": 0.08, + "learning_rate": 9.921896581974098e-08, + "logits/chosen": -1.970741629600525, + "logits/rejected": -1.8790249824523926, + "logps/chosen": -228.78994750976562, + "logps/rejected": -375.95306396484375, + "loss": 0.5596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.374594122171402, + "rewards/margins": 0.15437011420726776, + "rewards/rejected": 0.22022400796413422, + "step": 1455 + }, + { + "epoch": 0.08, + "learning_rate": 9.921730574546348e-08, + "logits/chosen": -2.1528303623199463, + "logits/rejected": -2.1455187797546387, + "logps/chosen": -139.28692626953125, + "logps/rejected": -232.32235717773438, + "loss": 0.595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28049468994140625, + "rewards/margins": 0.12841033935546875, + "rewards/rejected": 0.1520843505859375, + "step": 1456 + }, + { + "epoch": 0.08, + "learning_rate": 9.921564392274653e-08, + "logits/chosen": -2.114199638366699, + "logits/rejected": -2.116349458694458, + "logps/chosen": -42.568660736083984, + "logps/rejected": -181.1446533203125, + "loss": 0.645, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.020849227905273438, + "rewards/margins": 0.1821269989013672, + "rewards/rejected": -0.16127777099609375, + "step": 1457 + }, + { + "epoch": 0.08, + "learning_rate": 9.921398035164918e-08, + "logits/chosen": -2.1225476264953613, + "logits/rejected": -2.1191670894622803, + "logps/chosen": -290.60650634765625, + "logps/rejected": -286.9884033203125, + "loss": 0.5643, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6942352652549744, + "rewards/margins": -0.12627863883972168, + "rewards/rejected": 0.820513904094696, + "step": 1458 + }, + { + "epoch": 0.08, + "learning_rate": 9.92123150322305e-08, + "logits/chosen": -2.0709197521209717, + "logits/rejected": -2.0455009937286377, + "logps/chosen": -176.10604858398438, + "logps/rejected": -291.30340576171875, + "loss": 0.5912, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.413909912109375, + "rewards/margins": -0.06054076552391052, + "rewards/rejected": 0.4744506776332855, + "step": 1459 + }, + { + "epoch": 0.08, + "learning_rate": 9.921064796454968e-08, + "logits/chosen": -1.98763108253479, + "logits/rejected": -1.9870988130569458, + "logps/chosen": -37.95140838623047, + "logps/rejected": -197.79550170898438, + "loss": 0.6786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017298126593232155, + "rewards/margins": 0.042285919189453125, + "rewards/rejected": -0.02498779259622097, + "step": 1460 + }, + { + "epoch": 0.09, + "learning_rate": 9.92089791486659e-08, + "logits/chosen": -2.0535426139831543, + "logits/rejected": -1.994773268699646, + "logps/chosen": -265.60479736328125, + "logps/rejected": -537.6215209960938, + "loss": 0.5451, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7265869379043579, + "rewards/margins": -0.07285767793655396, + "rewards/rejected": 0.7994446158409119, + "step": 1461 + }, + { + "epoch": 0.09, + "learning_rate": 9.92073085846385e-08, + "logits/chosen": -2.1205334663391113, + "logits/rejected": -2.118610143661499, + "logps/chosen": -0.00727113476023078, + "logps/rejected": -149.9384002685547, + "loss": 0.6696, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.86843738751486e-05, + "rewards/margins": 0.09576939791440964, + "rewards/rejected": -0.09584808349609375, + "step": 1462 + }, + { + "epoch": 0.09, + "learning_rate": 9.920563627252679e-08, + "logits/chosen": -2.180143117904663, + "logits/rejected": -1.9424480199813843, + "logps/chosen": -378.866943359375, + "logps/rejected": -773.507568359375, + "loss": 0.471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39176636934280396, + "rewards/margins": 0.541912853717804, + "rewards/rejected": -0.150146484375, + "step": 1463 + }, + { + "epoch": 0.09, + "learning_rate": 9.92039622123902e-08, + "logits/chosen": -2.2363457679748535, + "logits/rejected": -2.2390835285186768, + "logps/chosen": -37.34797286987305, + "logps/rejected": -94.96221923828125, + "loss": 0.7183, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.12551270425319672, + "rewards/margins": -0.003921516239643097, + "rewards/rejected": -0.12159118801355362, + "step": 1464 + }, + { + "epoch": 0.09, + "learning_rate": 9.920228640428819e-08, + "logits/chosen": -2.1422624588012695, + "logits/rejected": -2.1282968521118164, + "logps/chosen": -265.9990234375, + "logps/rejected": -408.9992980957031, + "loss": 0.5, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5228424072265625, + "rewards/margins": 0.201812744140625, + "rewards/rejected": 0.3210296630859375, + "step": 1465 + }, + { + "epoch": 0.09, + "learning_rate": 9.920060884828028e-08, + "logits/chosen": -2.0805959701538086, + "logits/rejected": -2.069227695465088, + "logps/chosen": -79.2852783203125, + "logps/rejected": -209.30075073242188, + "loss": 0.6245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.096435546875, + "rewards/margins": 0.2041977047920227, + "rewards/rejected": -0.10776215046644211, + "step": 1466 + }, + { + "epoch": 0.09, + "learning_rate": 9.919892954442608e-08, + "logits/chosen": -2.1365323066711426, + "logits/rejected": -2.1209864616394043, + "logps/chosen": -166.62069702148438, + "logps/rejected": -204.82705688476562, + "loss": 0.6755, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.056671142578125, + "rewards/margins": -0.0390472412109375, + "rewards/rejected": 0.0957183837890625, + "step": 1467 + }, + { + "epoch": 0.09, + "learning_rate": 9.919724849278524e-08, + "logits/chosen": -2.065737247467041, + "logits/rejected": -2.048102378845215, + "logps/chosen": -236.61412048339844, + "logps/rejected": -310.62127685546875, + "loss": 0.5733, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6642501950263977, + "rewards/margins": -0.1178848147392273, + "rewards/rejected": 0.782135009765625, + "step": 1468 + }, + { + "epoch": 0.09, + "learning_rate": 9.91955656934175e-08, + "logits/chosen": -2.3177225589752197, + "logits/rejected": -2.308004856109619, + "logps/chosen": -122.07967376708984, + "logps/rejected": -217.41439819335938, + "loss": 0.6891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015911102294921875, + "rewards/margins": 0.027077484875917435, + "rewards/rejected": -0.011166381649672985, + "step": 1469 + }, + { + "epoch": 0.09, + "learning_rate": 9.91938811463826e-08, + "logits/chosen": -2.2396247386932373, + "logits/rejected": -2.241265296936035, + "logps/chosen": -5.947433948516846, + "logps/rejected": -102.96363830566406, + "loss": 0.6904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032778263557702303, + "rewards/margins": 0.025723790749907494, + "rewards/rejected": -0.029001617804169655, + "step": 1470 + }, + { + "epoch": 0.09, + "learning_rate": 9.919219485174043e-08, + "logits/chosen": -2.247074604034424, + "logits/rejected": -2.2188680171966553, + "logps/chosen": -99.42234802246094, + "logps/rejected": -286.8542175292969, + "loss": 0.6325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12386017292737961, + "rewards/margins": 0.20800018310546875, + "rewards/rejected": -0.08414001762866974, + "step": 1471 + }, + { + "epoch": 0.09, + "learning_rate": 9.919050680955087e-08, + "logits/chosen": -2.0212039947509766, + "logits/rejected": -2.0261340141296387, + "logps/chosen": -24.26480484008789, + "logps/rejected": -172.42047119140625, + "loss": 0.6823, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04008808359503746, + "rewards/margins": -0.023621942847967148, + "rewards/rejected": 0.06371002644300461, + "step": 1472 + }, + { + "epoch": 0.09, + "learning_rate": 9.91888170198739e-08, + "logits/chosen": -2.2287604808807373, + "logits/rejected": -2.199455499649048, + "logps/chosen": -236.97450256347656, + "logps/rejected": -355.64691162109375, + "loss": 0.4515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7435318231582642, + "rewards/margins": 0.31023409962654114, + "rewards/rejected": 0.433297723531723, + "step": 1473 + }, + { + "epoch": 0.09, + "learning_rate": 9.918712548276952e-08, + "logits/chosen": -2.3000853061676025, + "logits/rejected": -2.2821226119995117, + "logps/chosen": -54.39433288574219, + "logps/rejected": -148.4169158935547, + "loss": 0.7089, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.041481781750917435, + "rewards/margins": -0.0006568916141986847, + "rewards/rejected": -0.04082489013671875, + "step": 1474 + }, + { + "epoch": 0.09, + "learning_rate": 9.918543219829784e-08, + "logits/chosen": -2.212947130203247, + "logits/rejected": -2.188164234161377, + "logps/chosen": -131.85560607910156, + "logps/rejected": -213.79661560058594, + "loss": 0.6238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06249694898724556, + "rewards/margins": 0.1526229828596115, + "rewards/rejected": -0.09012603759765625, + "step": 1475 + }, + { + "epoch": 0.09, + "learning_rate": 9.918373716651905e-08, + "logits/chosen": -2.242525815963745, + "logits/rejected": -2.193732500076294, + "logps/chosen": -112.56288146972656, + "logps/rejected": -222.8038787841797, + "loss": 0.6864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025817870628088713, + "rewards/margins": 0.0284347552806139, + "rewards/rejected": -0.03101654164493084, + "step": 1476 + }, + { + "epoch": 0.09, + "learning_rate": 9.91820403874933e-08, + "logits/chosen": -2.2483272552490234, + "logits/rejected": -2.2315032482147217, + "logps/chosen": -2.5984530448913574, + "logps/rejected": -226.3890838623047, + "loss": 0.5283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002972602960653603, + "rewards/margins": 0.8040591478347778, + "rewards/rejected": -0.8043563961982727, + "step": 1477 + }, + { + "epoch": 0.09, + "learning_rate": 9.91803418612809e-08, + "logits/chosen": -2.276047706604004, + "logits/rejected": -2.257023572921753, + "logps/chosen": -183.26571655273438, + "logps/rejected": -332.67327880859375, + "loss": 0.5137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6816849112510681, + "rewards/margins": 0.10644686222076416, + "rewards/rejected": 0.575238049030304, + "step": 1478 + }, + { + "epoch": 0.09, + "learning_rate": 9.917864158794222e-08, + "logits/chosen": -2.102189779281616, + "logits/rejected": -2.1405653953552246, + "logps/chosen": -203.48110961914062, + "logps/rejected": -240.3383331298828, + "loss": 0.6004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29782867431640625, + "rewards/margins": 0.07080993056297302, + "rewards/rejected": 0.22701874375343323, + "step": 1479 + }, + { + "epoch": 0.09, + "learning_rate": 9.917693956753761e-08, + "logits/chosen": -2.085498809814453, + "logits/rejected": -2.0850467681884766, + "logps/chosen": -286.9912109375, + "logps/rejected": -341.92376708984375, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.814666748046875, + "rewards/margins": 0.38261106610298157, + "rewards/rejected": 0.43205568194389343, + "step": 1480 + }, + { + "epoch": 0.09, + "learning_rate": 9.917523580012755e-08, + "logits/chosen": -2.1830005645751953, + "logits/rejected": -2.1890549659729004, + "logps/chosen": -157.29693603515625, + "logps/rejected": -267.5555114746094, + "loss": 0.592, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4521026611328125, + "rewards/margins": -0.1141510009765625, + "rewards/rejected": 0.566253662109375, + "step": 1481 + }, + { + "epoch": 0.09, + "learning_rate": 9.917353028577257e-08, + "logits/chosen": -2.284623146057129, + "logits/rejected": -2.252750873565674, + "logps/chosen": -213.22622680664062, + "logps/rejected": -286.4601745605469, + "loss": 0.6108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14110107719898224, + "rewards/margins": 0.14467163383960724, + "rewards/rejected": -0.003570556640625, + "step": 1482 + }, + { + "epoch": 0.09, + "learning_rate": 9.917182302453328e-08, + "logits/chosen": -2.3024845123291016, + "logits/rejected": -2.295109987258911, + "logps/chosen": -59.39759826660156, + "logps/rejected": -170.85653686523438, + "loss": 0.5636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17846451699733734, + "rewards/margins": 0.38332444429397583, + "rewards/rejected": -0.2048599272966385, + "step": 1483 + }, + { + "epoch": 0.09, + "learning_rate": 9.91701140164703e-08, + "logits/chosen": -2.1190145015716553, + "logits/rejected": -2.131141424179077, + "logps/chosen": -118.52761840820312, + "logps/rejected": -228.41757202148438, + "loss": 0.6719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08043060451745987, + "rewards/margins": 0.01903533935546875, + "rewards/rejected": 0.06139526516199112, + "step": 1484 + }, + { + "epoch": 0.09, + "learning_rate": 9.916840326164437e-08, + "logits/chosen": -2.166043519973755, + "logits/rejected": -2.1622138023376465, + "logps/chosen": -0.08869726210832596, + "logps/rejected": -119.15524291992188, + "loss": 0.6715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013881579507142305, + "rewards/margins": 0.08669320493936539, + "rewards/rejected": -0.08808135986328125, + "step": 1485 + }, + { + "epoch": 0.09, + "learning_rate": 9.916669076011623e-08, + "logits/chosen": -2.192064046859741, + "logits/rejected": -2.1648290157318115, + "logps/chosen": -164.5810546875, + "logps/rejected": -227.19569396972656, + "loss": 0.6169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15156708657741547, + "rewards/margins": 0.21422424912452698, + "rewards/rejected": -0.06265716999769211, + "step": 1486 + }, + { + "epoch": 0.09, + "learning_rate": 9.916497651194673e-08, + "logits/chosen": -2.1495420932769775, + "logits/rejected": -2.1508877277374268, + "logps/chosen": -56.03919219970703, + "logps/rejected": -140.06723022460938, + "loss": 0.6677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05098572000861168, + "rewards/margins": 0.07148437947034836, + "rewards/rejected": -0.02049865759909153, + "step": 1487 + }, + { + "epoch": 0.09, + "learning_rate": 9.916326051719679e-08, + "logits/chosen": -2.326582431793213, + "logits/rejected": -2.3224685192108154, + "logps/chosen": -37.1910514831543, + "logps/rejected": -209.23727416992188, + "loss": 0.6042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03294410929083824, + "rewards/margins": 0.406350314617157, + "rewards/rejected": -0.4392944276332855, + "step": 1488 + }, + { + "epoch": 0.09, + "learning_rate": 9.916154277592734e-08, + "logits/chosen": -2.0699284076690674, + "logits/rejected": -2.060600757598877, + "logps/chosen": -69.2894287109375, + "logps/rejected": -213.57525634765625, + "loss": 0.637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02242889441549778, + "rewards/margins": 0.2714248597621918, + "rewards/rejected": -0.293853759765625, + "step": 1489 + }, + { + "epoch": 0.09, + "learning_rate": 9.915982328819942e-08, + "logits/chosen": -2.3603992462158203, + "logits/rejected": -2.3725974559783936, + "logps/chosen": -135.25613403320312, + "logps/rejected": -446.6890563964844, + "loss": 0.5424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29882508516311646, + "rewards/margins": 0.24408569931983948, + "rewards/rejected": 0.05473938211798668, + "step": 1490 + }, + { + "epoch": 0.09, + "learning_rate": 9.915810205407411e-08, + "logits/chosen": -2.0334360599517822, + "logits/rejected": -2.074420690536499, + "logps/chosen": -154.72991943359375, + "logps/rejected": -218.40478515625, + "loss": 0.5711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37385255098342896, + "rewards/margins": 0.0175933837890625, + "rewards/rejected": 0.35625916719436646, + "step": 1491 + }, + { + "epoch": 0.09, + "learning_rate": 9.915637907361255e-08, + "logits/chosen": -2.122605085372925, + "logits/rejected": -2.107327938079834, + "logps/chosen": -6.461036537075415e-05, + "logps/rejected": -189.40435791015625, + "loss": 0.5541, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8145373082443257e-07, + "rewards/margins": 0.6617527008056641, + "rewards/rejected": -0.6617523431777954, + "step": 1492 + }, + { + "epoch": 0.09, + "learning_rate": 9.915465434687596e-08, + "logits/chosen": -2.0754871368408203, + "logits/rejected": -2.012289524078369, + "logps/chosen": -212.98289489746094, + "logps/rejected": -384.68560791015625, + "loss": 0.4843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7462829947471619, + "rewards/margins": 0.17750859260559082, + "rewards/rejected": 0.568774402141571, + "step": 1493 + }, + { + "epoch": 0.09, + "learning_rate": 9.915292787392561e-08, + "logits/chosen": -1.9322916269302368, + "logits/rejected": -1.9286859035491943, + "logps/chosen": -198.51255798339844, + "logps/rejected": -283.9610595703125, + "loss": 0.5405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44474029541015625, + "rewards/margins": 0.13702544569969177, + "rewards/rejected": 0.3077148497104645, + "step": 1494 + }, + { + "epoch": 0.09, + "learning_rate": 9.915119965482282e-08, + "logits/chosen": -2.039715051651001, + "logits/rejected": -1.8933701515197754, + "logps/chosen": -188.9820556640625, + "logps/rejected": -447.9283752441406, + "loss": 0.5771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18035888671875, + "rewards/margins": 0.28069764375686646, + "rewards/rejected": -0.10033874958753586, + "step": 1495 + }, + { + "epoch": 0.09, + "learning_rate": 9.9149469689629e-08, + "logits/chosen": -2.14750075340271, + "logits/rejected": -2.1254467964172363, + "logps/chosen": -2.874335765838623, + "logps/rejected": -185.14443969726562, + "loss": 0.6895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00010702609870349988, + "rewards/margins": 0.006735444068908691, + "rewards/rejected": -0.0066284178756177425, + "step": 1496 + }, + { + "epoch": 0.09, + "learning_rate": 9.914773797840561e-08, + "logits/chosen": -2.1475491523742676, + "logits/rejected": -2.0849785804748535, + "logps/chosen": -199.42930603027344, + "logps/rejected": -395.15185546875, + "loss": 0.4019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6301132440567017, + "rewards/margins": 0.6966415643692017, + "rewards/rejected": -0.0665283203125, + "step": 1497 + }, + { + "epoch": 0.09, + "learning_rate": 9.914600452121415e-08, + "logits/chosen": -2.0825283527374268, + "logits/rejected": -2.0816760063171387, + "logps/chosen": -16.220203399658203, + "logps/rejected": -32.695213317871094, + "loss": 0.6704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03386097028851509, + "rewards/margins": 0.05380573123693466, + "rewards/rejected": -0.01994476281106472, + "step": 1498 + }, + { + "epoch": 0.09, + "learning_rate": 9.914426931811621e-08, + "logits/chosen": -2.315528631210327, + "logits/rejected": -2.3171000480651855, + "logps/chosen": -2.7347753047943115, + "logps/rejected": -185.6190948486328, + "loss": 0.579, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012030362850055099, + "rewards/margins": 0.5240456461906433, + "rewards/rejected": -0.5252487063407898, + "step": 1499 + }, + { + "epoch": 0.09, + "learning_rate": 9.914253236917342e-08, + "logits/chosen": -2.266162872314453, + "logits/rejected": -2.246595859527588, + "logps/chosen": -237.9843292236328, + "logps/rejected": -396.0100402832031, + "loss": 0.4903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6092926263809204, + "rewards/margins": 0.18259888887405396, + "rewards/rejected": 0.42669373750686646, + "step": 1500 + }, + { + "epoch": 0.09, + "learning_rate": 9.914079367444753e-08, + "logits/chosen": -2.1985459327697754, + "logits/rejected": -2.2047195434570312, + "logps/chosen": -95.27941131591797, + "logps/rejected": -192.21803283691406, + "loss": 0.6175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13657303154468536, + "rewards/margins": 0.13792800903320312, + "rewards/rejected": -0.0013549805153161287, + "step": 1501 + }, + { + "epoch": 0.09, + "learning_rate": 9.913905323400024e-08, + "logits/chosen": -2.170544147491455, + "logits/rejected": -2.1596198081970215, + "logps/chosen": -230.67601013183594, + "logps/rejected": -418.4290466308594, + "loss": 0.5409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4398910701274872, + "rewards/margins": 0.15805208683013916, + "rewards/rejected": 0.281838983297348, + "step": 1502 + }, + { + "epoch": 0.09, + "learning_rate": 9.913731104789344e-08, + "logits/chosen": -2.129746437072754, + "logits/rejected": -2.1224541664123535, + "logps/chosen": -0.00010561545786913484, + "logps/rejected": -160.35986328125, + "loss": 0.6755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1918745990158186e-08, + "rewards/margins": 0.0723724216222763, + "rewards/rejected": -0.0723724365234375, + "step": 1503 + }, + { + "epoch": 0.09, + "learning_rate": 9.913556711618898e-08, + "logits/chosen": -2.1091320514678955, + "logits/rejected": -2.093367576599121, + "logps/chosen": -297.89306640625, + "logps/rejected": -389.8099365234375, + "loss": 0.4832, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6659790277481079, + "rewards/margins": 0.16272586584091187, + "rewards/rejected": 0.503253161907196, + "step": 1504 + }, + { + "epoch": 0.09, + "learning_rate": 9.913382143894884e-08, + "logits/chosen": -2.114271402359009, + "logits/rejected": -2.0962166786193848, + "logps/chosen": -101.73297119140625, + "logps/rejected": -193.3394775390625, + "loss": 0.6199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03305969387292862, + "rewards/margins": 0.41118013858795166, + "rewards/rejected": -0.4442398250102997, + "step": 1505 + }, + { + "epoch": 0.09, + "learning_rate": 9.9132074016235e-08, + "logits/chosen": -2.2648861408233643, + "logits/rejected": -2.2597639560699463, + "logps/chosen": -12.197848320007324, + "logps/rejected": -246.60348510742188, + "loss": 0.5679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03151969984173775, + "rewards/margins": 0.5182140469551086, + "rewards/rejected": -0.4866943359375, + "step": 1506 + }, + { + "epoch": 0.09, + "learning_rate": 9.913032484810959e-08, + "logits/chosen": -1.9690117835998535, + "logits/rejected": -1.9580981731414795, + "logps/chosen": -25.88313865661621, + "logps/rejected": -234.39610290527344, + "loss": 0.6535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028081512078642845, + "rewards/margins": 0.15138626098632812, + "rewards/rejected": -0.12330474704504013, + "step": 1507 + }, + { + "epoch": 0.09, + "learning_rate": 9.91285739346347e-08, + "logits/chosen": -2.2853686809539795, + "logits/rejected": -2.28755259513855, + "logps/chosen": -13.754483222961426, + "logps/rejected": -157.5960693359375, + "loss": 0.6521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03738689422607422, + "rewards/margins": 0.12798747420310974, + "rewards/rejected": -0.09060058742761612, + "step": 1508 + }, + { + "epoch": 0.09, + "learning_rate": 9.912682127587255e-08, + "logits/chosen": -2.227008819580078, + "logits/rejected": -2.229034662246704, + "logps/chosen": -115.55626678466797, + "logps/rejected": -240.41986083984375, + "loss": 0.6212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055162813514471054, + "rewards/margins": 0.21707230806350708, + "rewards/rejected": -0.16190949082374573, + "step": 1509 + }, + { + "epoch": 0.09, + "learning_rate": 9.912506687188542e-08, + "logits/chosen": -1.9875495433807373, + "logits/rejected": -1.9377869367599487, + "logps/chosen": -256.6007080078125, + "logps/rejected": -482.6127624511719, + "loss": 0.4048, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.740863025188446, + "rewards/margins": 0.5473175048828125, + "rewards/rejected": 0.19354553520679474, + "step": 1510 + }, + { + "epoch": 0.09, + "learning_rate": 9.91233107227356e-08, + "logits/chosen": -2.3547918796539307, + "logits/rejected": -2.3488101959228516, + "logps/chosen": -3.597775459289551, + "logps/rejected": -129.47515869140625, + "loss": 0.6283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03138725832104683, + "rewards/margins": 0.2479827105998993, + "rewards/rejected": -0.21659545600414276, + "step": 1511 + }, + { + "epoch": 0.09, + "learning_rate": 9.91215528284855e-08, + "logits/chosen": -1.9934872388839722, + "logits/rejected": -1.9717137813568115, + "logps/chosen": -192.6834716796875, + "logps/rejected": -285.4946594238281, + "loss": 0.6287, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2953018248081207, + "rewards/margins": -0.03509369492530823, + "rewards/rejected": 0.33039551973342896, + "step": 1512 + }, + { + "epoch": 0.09, + "learning_rate": 9.911979318919756e-08, + "logits/chosen": -1.987196922302246, + "logits/rejected": -1.9770010709762573, + "logps/chosen": -236.900146484375, + "logps/rejected": -313.2168884277344, + "loss": 0.625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13788604736328125, + "rewards/margins": 0.11076202243566513, + "rewards/rejected": 0.02712402306497097, + "step": 1513 + }, + { + "epoch": 0.09, + "learning_rate": 9.91180318049343e-08, + "logits/chosen": -2.1196208000183105, + "logits/rejected": -2.1151514053344727, + "logps/chosen": -89.29304504394531, + "logps/rejected": -245.25765991210938, + "loss": 0.6594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035858154296875, + "rewards/margins": 0.22136077284812927, + "rewards/rejected": -0.2572189271450043, + "step": 1514 + }, + { + "epoch": 0.09, + "learning_rate": 9.91162686757583e-08, + "logits/chosen": -2.0188636779785156, + "logits/rejected": -2.005537509918213, + "logps/chosen": -0.036531947553157806, + "logps/rejected": -216.2064971923828, + "loss": 0.5939, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000807357020676136, + "rewards/margins": 0.44714340567588806, + "rewards/rejected": -0.4479507505893707, + "step": 1515 + }, + { + "epoch": 0.09, + "learning_rate": 9.911450380173216e-08, + "logits/chosen": -2.3612284660339355, + "logits/rejected": -2.354332685470581, + "logps/chosen": -0.07575605809688568, + "logps/rejected": -147.3600616455078, + "loss": 0.6577, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003659919020719826, + "rewards/margins": 0.14755119383335114, + "rewards/rejected": -0.14791718125343323, + "step": 1516 + }, + { + "epoch": 0.09, + "learning_rate": 9.911273718291861e-08, + "logits/chosen": -2.2663400173187256, + "logits/rejected": -2.252309560775757, + "logps/chosen": -59.474098205566406, + "logps/rejected": -197.89834594726562, + "loss": 0.6332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050323486328125, + "rewards/margins": 0.19786988198757172, + "rewards/rejected": -0.14754639565944672, + "step": 1517 + }, + { + "epoch": 0.09, + "learning_rate": 9.911096881938041e-08, + "logits/chosen": -2.2939188480377197, + "logits/rejected": -2.3175950050354004, + "logps/chosen": -289.5107116699219, + "logps/rejected": -393.2245178222656, + "loss": 0.5146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7070465087890625, + "rewards/margins": 0.06805723905563354, + "rewards/rejected": 0.638989269733429, + "step": 1518 + }, + { + "epoch": 0.09, + "learning_rate": 9.910919871118036e-08, + "logits/chosen": -1.8944010734558105, + "logits/rejected": -1.799641728401184, + "logps/chosen": -192.22537231445312, + "logps/rejected": -318.3851013183594, + "loss": 0.5621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19305114448070526, + "rewards/margins": 0.3436035215854645, + "rewards/rejected": -0.15055237710475922, + "step": 1519 + }, + { + "epoch": 0.09, + "learning_rate": 9.910742685838136e-08, + "logits/chosen": -2.146165370941162, + "logits/rejected": -2.085683822631836, + "logps/chosen": -279.2488098144531, + "logps/rejected": -390.7994079589844, + "loss": 0.4804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.396340936422348, + "rewards/margins": 0.47999876737594604, + "rewards/rejected": -0.08365783840417862, + "step": 1520 + }, + { + "epoch": 0.09, + "learning_rate": 9.910565326104634e-08, + "logits/chosen": -2.1134932041168213, + "logits/rejected": -2.1177866458892822, + "logps/chosen": -223.86837768554688, + "logps/rejected": -388.66217041015625, + "loss": 0.512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5474166870117188, + "rewards/margins": 0.09941253066062927, + "rewards/rejected": 0.4480041563510895, + "step": 1521 + }, + { + "epoch": 0.09, + "learning_rate": 9.910387791923831e-08, + "logits/chosen": -2.2140443325042725, + "logits/rejected": -2.175994396209717, + "logps/chosen": -16.936506271362305, + "logps/rejected": -204.5853271484375, + "loss": 0.6158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04578571394085884, + "rewards/margins": 0.24953632056713104, + "rewards/rejected": -0.2037506103515625, + "step": 1522 + }, + { + "epoch": 0.09, + "learning_rate": 9.910210083302035e-08, + "logits/chosen": -2.2763607501983643, + "logits/rejected": -2.315279483795166, + "logps/chosen": -219.102783203125, + "logps/rejected": -331.041015625, + "loss": 0.4332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5678375363349915, + "rewards/margins": 0.45962831377983093, + "rewards/rejected": 0.10820923000574112, + "step": 1523 + }, + { + "epoch": 0.09, + "learning_rate": 9.910032200245557e-08, + "logits/chosen": -2.0926504135131836, + "logits/rejected": -2.047420024871826, + "logps/chosen": -276.1971740722656, + "logps/rejected": -473.52288818359375, + "loss": 0.4894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6042999625205994, + "rewards/margins": 0.19305726885795593, + "rewards/rejected": 0.41124269366264343, + "step": 1524 + }, + { + "epoch": 0.09, + "learning_rate": 9.90985414276072e-08, + "logits/chosen": -2.2993106842041016, + "logits/rejected": -2.3019673824310303, + "logps/chosen": -7.095255374908447, + "logps/rejected": -119.64905548095703, + "loss": 0.6408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010738849639892578, + "rewards/margins": 0.1953076422214508, + "rewards/rejected": -0.18456879258155823, + "step": 1525 + }, + { + "epoch": 0.09, + "learning_rate": 9.909675910853845e-08, + "logits/chosen": -2.1099472045898438, + "logits/rejected": -2.0769100189208984, + "logps/chosen": -260.67584228515625, + "logps/rejected": -370.2943420410156, + "loss": 0.4642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6439880728721619, + "rewards/margins": 0.26137393712997437, + "rewards/rejected": 0.3826141357421875, + "step": 1526 + }, + { + "epoch": 0.09, + "learning_rate": 9.909497504531268e-08, + "logits/chosen": -2.2484776973724365, + "logits/rejected": -2.2478299140930176, + "logps/chosen": -60.56999969482422, + "logps/rejected": -114.83954620361328, + "loss": 0.6363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13784293830394745, + "rewards/margins": 0.10552787780761719, + "rewards/rejected": 0.03231506422162056, + "step": 1527 + }, + { + "epoch": 0.09, + "learning_rate": 9.909318923799324e-08, + "logits/chosen": -2.0295889377593994, + "logits/rejected": -2.00166392326355, + "logps/chosen": -240.5760498046875, + "logps/rejected": -344.59136962890625, + "loss": 0.5916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20564423501491547, + "rewards/margins": 0.18904267251491547, + "rewards/rejected": 0.0166015625, + "step": 1528 + }, + { + "epoch": 0.09, + "learning_rate": 9.909140168664356e-08, + "logits/chosen": -1.9908862113952637, + "logits/rejected": -1.9405518770217896, + "logps/chosen": -252.1392364501953, + "logps/rejected": -410.1578674316406, + "loss": 0.5011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4113571345806122, + "rewards/margins": 0.4417007565498352, + "rewards/rejected": -0.03034362755715847, + "step": 1529 + }, + { + "epoch": 0.09, + "learning_rate": 9.908961239132719e-08, + "logits/chosen": -2.3013017177581787, + "logits/rejected": -2.287649154663086, + "logps/chosen": -11.961394309997559, + "logps/rejected": -137.92794799804688, + "loss": 0.6557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018764400854706764, + "rewards/margins": 0.13889379799365997, + "rewards/rejected": -0.12012939900159836, + "step": 1530 + }, + { + "epoch": 0.09, + "learning_rate": 9.908782135210763e-08, + "logits/chosen": -2.202589511871338, + "logits/rejected": -2.1735682487487793, + "logps/chosen": -174.79226684570312, + "logps/rejected": -242.84947204589844, + "loss": 0.5481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4346984922885895, + "rewards/margins": 0.10814666748046875, + "rewards/rejected": 0.3265518248081207, + "step": 1531 + }, + { + "epoch": 0.09, + "learning_rate": 9.908602856904858e-08, + "logits/chosen": -2.180187463760376, + "logits/rejected": -2.1829967498779297, + "logps/chosen": -75.31986999511719, + "logps/rejected": -219.92813110351562, + "loss": 0.6001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08524399250745773, + "rewards/margins": 0.3004600703716278, + "rewards/rejected": -0.21521607041358948, + "step": 1532 + }, + { + "epoch": 0.09, + "learning_rate": 9.908423404221365e-08, + "logits/chosen": -2.1386570930480957, + "logits/rejected": -2.1105451583862305, + "logps/chosen": -277.6499938964844, + "logps/rejected": -411.8894348144531, + "loss": 0.527, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6418396234512329, + "rewards/margins": -0.00256955623626709, + "rewards/rejected": 0.6444091796875, + "step": 1533 + }, + { + "epoch": 0.09, + "learning_rate": 9.908243777166665e-08, + "logits/chosen": -2.038905382156372, + "logits/rejected": -1.843180537223816, + "logps/chosen": -231.42051696777344, + "logps/rejected": -616.214111328125, + "loss": 0.5115, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22830353677272797, + "rewards/margins": 0.6186966300010681, + "rewards/rejected": -0.39039307832717896, + "step": 1534 + }, + { + "epoch": 0.09, + "learning_rate": 9.908063975747138e-08, + "logits/chosen": -2.162825584411621, + "logits/rejected": -2.1616461277008057, + "logps/chosen": -1.0548149347305298, + "logps/rejected": -150.4432373046875, + "loss": 0.6334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01335611380636692, + "rewards/margins": 0.24989023804664612, + "rewards/rejected": -0.23653411865234375, + "step": 1535 + }, + { + "epoch": 0.09, + "learning_rate": 9.907883999969169e-08, + "logits/chosen": -2.015894889831543, + "logits/rejected": -1.980987310409546, + "logps/chosen": -271.4244079589844, + "logps/rejected": -380.22454833984375, + "loss": 0.5325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30937501788139343, + "rewards/margins": 0.38830873370170593, + "rewards/rejected": -0.0789337158203125, + "step": 1536 + }, + { + "epoch": 0.09, + "learning_rate": 9.907703849839154e-08, + "logits/chosen": -2.1244025230407715, + "logits/rejected": -2.1129343509674072, + "logps/chosen": -16.496488571166992, + "logps/rejected": -172.80441284179688, + "loss": 0.6094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07785721123218536, + "rewards/margins": 0.23758621513843536, + "rewards/rejected": -0.15972900390625, + "step": 1537 + }, + { + "epoch": 0.09, + "learning_rate": 9.907523525363492e-08, + "logits/chosen": -1.9496512413024902, + "logits/rejected": -1.9510889053344727, + "logps/chosen": -9.610803604125977, + "logps/rejected": -117.68313598632812, + "loss": 0.6438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03093709982931614, + "rewards/margins": 0.13971243798732758, + "rewards/rejected": -0.10877533257007599, + "step": 1538 + }, + { + "epoch": 0.09, + "learning_rate": 9.90734302654859e-08, + "logits/chosen": -2.0675644874572754, + "logits/rejected": -2.0377089977264404, + "logps/chosen": -194.2565460205078, + "logps/rejected": -401.01922607421875, + "loss": 0.553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3639968931674957, + "rewards/margins": 0.162638857960701, + "rewards/rejected": 0.20135803520679474, + "step": 1539 + }, + { + "epoch": 0.09, + "learning_rate": 9.907162353400857e-08, + "logits/chosen": -2.1539077758789062, + "logits/rejected": -2.149902820587158, + "logps/chosen": -24.356901168823242, + "logps/rejected": -184.186279296875, + "loss": 0.5886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0001317977876169607, + "rewards/margins": 0.4134908616542816, + "rewards/rejected": -0.4133590757846832, + "step": 1540 + }, + { + "epoch": 0.09, + "learning_rate": 9.906981505926716e-08, + "logits/chosen": -2.000596761703491, + "logits/rejected": -1.9542378187179565, + "logps/chosen": -46.867401123046875, + "logps/rejected": -363.6170654296875, + "loss": 0.6444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0019069671398028731, + "rewards/margins": 0.21056480705738068, + "rewards/rejected": -0.20865784585475922, + "step": 1541 + }, + { + "epoch": 0.09, + "learning_rate": 9.906800484132588e-08, + "logits/chosen": -2.1608903408050537, + "logits/rejected": -2.1615121364593506, + "logps/chosen": -11.840371131896973, + "logps/rejected": -46.84916687011719, + "loss": 0.6801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007069587591104209, + "rewards/margins": 0.05242757871747017, + "rewards/rejected": -0.05313453823328018, + "step": 1542 + }, + { + "epoch": 0.09, + "learning_rate": 9.906619288024906e-08, + "logits/chosen": -2.108240842819214, + "logits/rejected": -2.104419708251953, + "logps/chosen": -12.097443580627441, + "logps/rejected": -82.64981079101562, + "loss": 0.6724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026244640350341797, + "rewards/margins": 0.09065122902393341, + "rewards/rejected": -0.06440658867359161, + "step": 1543 + }, + { + "epoch": 0.09, + "learning_rate": 9.906437917610105e-08, + "logits/chosen": -2.172896385192871, + "logits/rejected": -2.1592960357666016, + "logps/chosen": -45.18367004394531, + "logps/rejected": -163.52908325195312, + "loss": 0.6103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09670715779066086, + "rewards/margins": 0.2403564453125, + "rewards/rejected": -0.14364929497241974, + "step": 1544 + }, + { + "epoch": 0.09, + "learning_rate": 9.90625637289463e-08, + "logits/chosen": -2.080052137374878, + "logits/rejected": -2.076773166656494, + "logps/chosen": -0.37236467003822327, + "logps/rejected": -60.22458267211914, + "loss": 0.6294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003732288023456931, + "rewards/margins": 0.2778942286968231, + "rewards/rejected": -0.28162652254104614, + "step": 1545 + }, + { + "epoch": 0.09, + "learning_rate": 9.906074653884928e-08, + "logits/chosen": -2.0390846729278564, + "logits/rejected": -2.0303735733032227, + "logps/chosen": -118.50833129882812, + "logps/rejected": -297.4930725097656, + "loss": 0.638, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029055023565888405, + "rewards/margins": 0.1873619109392166, + "rewards/rejected": -0.15830688178539276, + "step": 1546 + }, + { + "epoch": 0.09, + "learning_rate": 9.905892760587458e-08, + "logits/chosen": -2.238210678100586, + "logits/rejected": -2.2198848724365234, + "logps/chosen": -87.7953109741211, + "logps/rejected": -193.08889770507812, + "loss": 0.5971, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14225082099437714, + "rewards/margins": 0.24569624662399292, + "rewards/rejected": -0.10344543308019638, + "step": 1547 + }, + { + "epoch": 0.09, + "learning_rate": 9.90571069300868e-08, + "logits/chosen": -2.173264265060425, + "logits/rejected": -2.1406948566436768, + "logps/chosen": -156.25692749023438, + "logps/rejected": -268.74774169921875, + "loss": 0.5771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05940094217658043, + "rewards/margins": 0.41954195499420166, + "rewards/rejected": -0.36014100909233093, + "step": 1548 + }, + { + "epoch": 0.09, + "learning_rate": 9.905528451155061e-08, + "logits/chosen": -2.1257941722869873, + "logits/rejected": -2.121511697769165, + "logps/chosen": -153.4712371826172, + "logps/rejected": -315.443115234375, + "loss": 0.5271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27627870440483093, + "rewards/margins": 0.41242069005966187, + "rewards/rejected": -0.13614197075366974, + "step": 1549 + }, + { + "epoch": 0.09, + "learning_rate": 9.905346035033076e-08, + "logits/chosen": -2.1445820331573486, + "logits/rejected": -2.1278648376464844, + "logps/chosen": -152.87020874023438, + "logps/rejected": -257.3262939453125, + "loss": 0.6184, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3927505612373352, + "rewards/margins": -0.13516998291015625, + "rewards/rejected": 0.5279205441474915, + "step": 1550 + }, + { + "epoch": 0.09, + "learning_rate": 9.905163444649203e-08, + "logits/chosen": -2.1849043369293213, + "logits/rejected": -2.163726806640625, + "logps/chosen": -279.35601806640625, + "logps/rejected": -431.80706787109375, + "loss": 0.4334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.74151611328125, + "rewards/margins": 0.3841491639614105, + "rewards/rejected": 0.3573669493198395, + "step": 1551 + }, + { + "epoch": 0.09, + "learning_rate": 9.904980680009933e-08, + "logits/chosen": -2.1406195163726807, + "logits/rejected": -2.1405327320098877, + "logps/chosen": -205.55499267578125, + "logps/rejected": -297.327880859375, + "loss": 0.625, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5103347897529602, + "rewards/margins": -0.20640718936920166, + "rewards/rejected": 0.7167419791221619, + "step": 1552 + }, + { + "epoch": 0.09, + "learning_rate": 9.904797741121757e-08, + "logits/chosen": -2.1310126781463623, + "logits/rejected": -2.113680124282837, + "logps/chosen": -209.51361083984375, + "logps/rejected": -357.3629150390625, + "loss": 0.5973, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5450103878974915, + "rewards/margins": -0.1613556146621704, + "rewards/rejected": 0.7063660025596619, + "step": 1553 + }, + { + "epoch": 0.09, + "learning_rate": 9.904614627991172e-08, + "logits/chosen": -2.0959365367889404, + "logits/rejected": -2.057872772216797, + "logps/chosen": -149.33102416992188, + "logps/rejected": -271.45068359375, + "loss": 0.5348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.455087274312973, + "rewards/margins": 0.15436705946922302, + "rewards/rejected": 0.30072021484375, + "step": 1554 + }, + { + "epoch": 0.09, + "learning_rate": 9.904431340624684e-08, + "logits/chosen": -1.948965072631836, + "logits/rejected": -1.975054144859314, + "logps/chosen": -429.8159484863281, + "logps/rejected": -523.497314453125, + "loss": 0.3662, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0824432373046875, + "rewards/margins": 0.4466766119003296, + "rewards/rejected": 0.6357666254043579, + "step": 1555 + }, + { + "epoch": 0.09, + "learning_rate": 9.904247879028806e-08, + "logits/chosen": -2.239442825317383, + "logits/rejected": -2.2240006923675537, + "logps/chosen": -3.1574859619140625, + "logps/rejected": -147.94625854492188, + "loss": 0.586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00371723179705441, + "rewards/margins": 0.46248453855514526, + "rewards/rejected": -0.4662017822265625, + "step": 1556 + }, + { + "epoch": 0.09, + "learning_rate": 9.904064243210054e-08, + "logits/chosen": -2.108903408050537, + "logits/rejected": -2.0910520553588867, + "logps/chosen": -170.22579956054688, + "logps/rejected": -371.79632568359375, + "loss": 0.5741, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.46702882647514343, + "rewards/margins": -0.04861447215080261, + "rewards/rejected": 0.515643298625946, + "step": 1557 + }, + { + "epoch": 0.09, + "learning_rate": 9.90388043317495e-08, + "logits/chosen": -2.010437250137329, + "logits/rejected": -1.9855061769485474, + "logps/chosen": -183.87193298339844, + "logps/rejected": -417.67425537109375, + "loss": 0.6071, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.35822296142578125, + "rewards/margins": -0.12779083847999573, + "rewards/rejected": 0.486013799905777, + "step": 1558 + }, + { + "epoch": 0.09, + "learning_rate": 9.903696448930027e-08, + "logits/chosen": -2.072937488555908, + "logits/rejected": -1.927290678024292, + "logps/chosen": -315.29058837890625, + "logps/rejected": -501.59124755859375, + "loss": 0.5762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18534241616725922, + "rewards/margins": 0.20363464951515198, + "rewards/rejected": -0.01829223707318306, + "step": 1559 + }, + { + "epoch": 0.09, + "learning_rate": 9.903512290481819e-08, + "logits/chosen": -2.2263362407684326, + "logits/rejected": -2.1983065605163574, + "logps/chosen": -0.13003651797771454, + "logps/rejected": -246.37774658203125, + "loss": 0.5921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000900310289580375, + "rewards/margins": 0.4463607668876648, + "rewards/rejected": -0.4472610652446747, + "step": 1560 + }, + { + "epoch": 0.09, + "learning_rate": 9.903327957836868e-08, + "logits/chosen": -2.231961488723755, + "logits/rejected": -2.1919186115264893, + "logps/chosen": -251.49203491210938, + "logps/rejected": -418.17919921875, + "loss": 0.4403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6256378293037415, + "rewards/margins": 0.4271637201309204, + "rewards/rejected": 0.19847412407398224, + "step": 1561 + }, + { + "epoch": 0.09, + "learning_rate": 9.903143451001726e-08, + "logits/chosen": -2.091195821762085, + "logits/rejected": -2.092740058898926, + "logps/chosen": -0.5939332842826843, + "logps/rejected": -68.91995239257812, + "loss": 0.6925, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.003693276783451438, + "rewards/margins": -0.013448446989059448, + "rewards/rejected": 0.01714172400534153, + "step": 1562 + }, + { + "epoch": 0.09, + "learning_rate": 9.902958769982943e-08, + "logits/chosen": -1.9255263805389404, + "logits/rejected": -1.9206048250198364, + "logps/chosen": -129.9193572998047, + "logps/rejected": -279.20806884765625, + "loss": 0.5785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2529251277446747, + "rewards/margins": 0.13412630558013916, + "rewards/rejected": 0.11879882961511612, + "step": 1563 + }, + { + "epoch": 0.09, + "learning_rate": 9.90277391478708e-08, + "logits/chosen": -2.0158979892730713, + "logits/rejected": -2.0485410690307617, + "logps/chosen": -314.1103515625, + "logps/rejected": -307.0650634765625, + "loss": 0.4578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7038604617118835, + "rewards/margins": 0.3053741157054901, + "rewards/rejected": 0.39848634600639343, + "step": 1564 + }, + { + "epoch": 0.09, + "learning_rate": 9.902588885420706e-08, + "logits/chosen": -2.1696219444274902, + "logits/rejected": -2.164966583251953, + "logps/chosen": -0.014768826775252819, + "logps/rejected": -258.9293518066406, + "loss": 0.6299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00018489612557459623, + "rewards/margins": 0.2630602717399597, + "rewards/rejected": -0.26287537813186646, + "step": 1565 + }, + { + "epoch": 0.09, + "learning_rate": 9.902403681890396e-08, + "logits/chosen": -2.2859339714050293, + "logits/rejected": -2.2847182750701904, + "logps/chosen": -0.0006919459556229413, + "logps/rejected": -102.86721801757812, + "loss": 0.6897, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0372740436869208e-06, + "rewards/margins": 0.013832343742251396, + "rewards/rejected": -0.013834381476044655, + "step": 1566 + }, + { + "epoch": 0.09, + "learning_rate": 9.902218304202725e-08, + "logits/chosen": -2.078266143798828, + "logits/rejected": -2.0677950382232666, + "logps/chosen": -181.35336303710938, + "logps/rejected": -264.9619445800781, + "loss": 0.5239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5185180902481079, + "rewards/margins": 0.1390533745288849, + "rewards/rejected": 0.379464715719223, + "step": 1567 + }, + { + "epoch": 0.09, + "learning_rate": 9.902032752364281e-08, + "logits/chosen": -2.2394044399261475, + "logits/rejected": -2.2390217781066895, + "logps/chosen": -0.00031514489091932774, + "logps/rejected": -47.79743576049805, + "loss": 0.6322, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.005436833016574e-06, + "rewards/margins": 0.26016995310783386, + "rewards/rejected": -0.260174959897995, + "step": 1568 + }, + { + "epoch": 0.09, + "learning_rate": 9.901847026381656e-08, + "logits/chosen": -2.042938709259033, + "logits/rejected": -1.9275575876235962, + "logps/chosen": -244.7642059326172, + "logps/rejected": -335.35174560546875, + "loss": 0.5195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38481447100639343, + "rewards/margins": 0.38079530000686646, + "rewards/rejected": 0.0040191649459302425, + "step": 1569 + }, + { + "epoch": 0.09, + "learning_rate": 9.901661126261445e-08, + "logits/chosen": -2.057603597640991, + "logits/rejected": -2.039248466491699, + "logps/chosen": -83.24199676513672, + "logps/rejected": -252.7203369140625, + "loss": 0.62, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.042052458971738815, + "rewards/margins": 0.387546569108963, + "rewards/rejected": -0.42959901690483093, + "step": 1570 + }, + { + "epoch": 0.09, + "learning_rate": 9.901475052010255e-08, + "logits/chosen": -2.1570351123809814, + "logits/rejected": -2.1621625423431396, + "logps/chosen": -8.809453720459715e-05, + "logps/rejected": -161.0736083984375, + "loss": 0.6585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8119084188583656e-06, + "rewards/margins": 0.1437670737504959, + "rewards/rejected": -0.14376525580883026, + "step": 1571 + }, + { + "epoch": 0.09, + "learning_rate": 9.901288803634697e-08, + "logits/chosen": -2.1299564838409424, + "logits/rejected": -2.1398627758026123, + "logps/chosen": -7.024867534637451, + "logps/rejected": -155.0001220703125, + "loss": 0.6614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01791844330728054, + "rewards/margins": 0.15988002717494965, + "rewards/rejected": -0.17779846489429474, + "step": 1572 + }, + { + "epoch": 0.09, + "learning_rate": 9.901102381141384e-08, + "logits/chosen": -2.214966058731079, + "logits/rejected": -2.122659206390381, + "logps/chosen": -226.07656860351562, + "logps/rejected": -412.4556579589844, + "loss": 0.4524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3262008726596832, + "rewards/margins": 0.7060623168945312, + "rewards/rejected": -0.379861444234848, + "step": 1573 + }, + { + "epoch": 0.09, + "learning_rate": 9.900915784536941e-08, + "logits/chosen": -2.1524295806884766, + "logits/rejected": -2.149000644683838, + "logps/chosen": -5.853101902175695e-05, + "logps/rejected": -235.61920166015625, + "loss": 0.5627, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3377872910023143e-07, + "rewards/margins": 0.6238598227500916, + "rewards/rejected": -0.6238601803779602, + "step": 1574 + }, + { + "epoch": 0.09, + "learning_rate": 9.900729013827997e-08, + "logits/chosen": -1.8770326375961304, + "logits/rejected": -1.8796929121017456, + "logps/chosen": -301.3018798828125, + "logps/rejected": -338.83074951171875, + "loss": 0.5018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6366333365440369, + "rewards/margins": 0.08651429414749146, + "rewards/rejected": 0.5501190423965454, + "step": 1575 + }, + { + "epoch": 0.09, + "learning_rate": 9.900542069021187e-08, + "logits/chosen": -2.201253652572632, + "logits/rejected": -2.2056517601013184, + "logps/chosen": -0.04237491264939308, + "logps/rejected": -215.97811889648438, + "loss": 0.598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003472302050795406, + "rewards/margins": 0.41646337509155273, + "rewards/rejected": -0.4168106019496918, + "step": 1576 + }, + { + "epoch": 0.09, + "learning_rate": 9.90035495012315e-08, + "logits/chosen": -2.2244837284088135, + "logits/rejected": -2.2228808403015137, + "logps/chosen": -45.508304595947266, + "logps/rejected": -249.77609252929688, + "loss": 0.6642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04863853380084038, + "rewards/margins": 0.06090354919433594, + "rewards/rejected": -0.012265014462172985, + "step": 1577 + }, + { + "epoch": 0.09, + "learning_rate": 9.900167657140536e-08, + "logits/chosen": -2.2411768436431885, + "logits/rejected": -2.214045286178589, + "logps/chosen": -14.540826797485352, + "logps/rejected": -237.92074584960938, + "loss": 0.5148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.038756560534238815, + "rewards/margins": 0.855791449546814, + "rewards/rejected": -0.817034900188446, + "step": 1578 + }, + { + "epoch": 0.09, + "learning_rate": 9.899980190079997e-08, + "logits/chosen": -2.1267056465148926, + "logits/rejected": -2.114560604095459, + "logps/chosen": -72.77131652832031, + "logps/rejected": -244.63209533691406, + "loss": 0.5542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.053656768053770065, + "rewards/margins": 0.8127983212471008, + "rewards/rejected": -0.866455078125, + "step": 1579 + }, + { + "epoch": 0.09, + "learning_rate": 9.899792548948193e-08, + "logits/chosen": -2.1197474002838135, + "logits/rejected": -2.0952513217926025, + "logps/chosen": -229.8765869140625, + "logps/rejected": -418.53839111328125, + "loss": 0.5662, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6143768429756165, + "rewards/margins": -0.09350889921188354, + "rewards/rejected": 0.7078857421875, + "step": 1580 + }, + { + "epoch": 0.09, + "learning_rate": 9.899604733751791e-08, + "logits/chosen": -2.062986135482788, + "logits/rejected": -2.010037899017334, + "logps/chosen": -306.5240478515625, + "logps/rejected": -464.49847412109375, + "loss": 0.4918, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8571869134902954, + "rewards/margins": -0.028607189655303955, + "rewards/rejected": 0.8857941031455994, + "step": 1581 + }, + { + "epoch": 0.09, + "learning_rate": 9.899416744497461e-08, + "logits/chosen": -2.0996785163879395, + "logits/rejected": -2.091402053833008, + "logps/chosen": -236.5646514892578, + "logps/rejected": -282.03668212890625, + "loss": 0.6346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16905365884304047, + "rewards/margins": 0.05761566758155823, + "rewards/rejected": 0.11143799126148224, + "step": 1582 + }, + { + "epoch": 0.09, + "learning_rate": 9.899228581191883e-08, + "logits/chosen": -2.0934534072875977, + "logits/rejected": -2.0914227962493896, + "logps/chosen": -2.342743396759033, + "logps/rejected": -129.25054931640625, + "loss": 0.6521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02184760570526123, + "rewards/margins": 0.18884041905403137, + "rewards/rejected": -0.2106880247592926, + "step": 1583 + }, + { + "epoch": 0.09, + "learning_rate": 9.899040243841742e-08, + "logits/chosen": -2.1306824684143066, + "logits/rejected": -2.1053457260131836, + "logps/chosen": -193.3266143798828, + "logps/rejected": -409.67626953125, + "loss": 0.5594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13370513916015625, + "rewards/margins": 0.4961532652378082, + "rewards/rejected": -0.362448126077652, + "step": 1584 + }, + { + "epoch": 0.09, + "learning_rate": 9.898851732453728e-08, + "logits/chosen": -2.2004222869873047, + "logits/rejected": -2.205782651901245, + "logps/chosen": -3.921934330719523e-05, + "logps/rejected": -19.77519416809082, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.867612907830335e-07, + "rewards/margins": -0.010852456092834473, + "rewards/rejected": 0.010851669125258923, + "step": 1585 + }, + { + "epoch": 0.09, + "learning_rate": 9.898663047034537e-08, + "logits/chosen": -2.342262029647827, + "logits/rejected": -2.332489252090454, + "logps/chosen": -220.87680053710938, + "logps/rejected": -327.48089599609375, + "loss": 0.6371, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5015091300010681, + "rewards/margins": -0.2927718758583069, + "rewards/rejected": 0.794281005859375, + "step": 1586 + }, + { + "epoch": 0.09, + "learning_rate": 9.898474187590872e-08, + "logits/chosen": -2.249239683151245, + "logits/rejected": -2.2529258728027344, + "logps/chosen": -21.83833885192871, + "logps/rejected": -62.27472686767578, + "loss": 0.7218, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0669044479727745, + "rewards/margins": -0.049897000193595886, + "rewards/rejected": -0.01700744591653347, + "step": 1587 + }, + { + "epoch": 0.09, + "learning_rate": 9.898285154129443e-08, + "logits/chosen": -2.024477005004883, + "logits/rejected": -1.9792901277542114, + "logps/chosen": -182.04649353027344, + "logps/rejected": -265.05267333984375, + "loss": 0.6074, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15748444199562073, + "rewards/margins": 0.18512116372585297, + "rewards/rejected": -0.02763671986758709, + "step": 1588 + }, + { + "epoch": 0.09, + "learning_rate": 9.898095946656966e-08, + "logits/chosen": -2.1967434883117676, + "logits/rejected": -2.194236993789673, + "logps/chosen": -19.592893600463867, + "logps/rejected": -129.51304626464844, + "loss": 0.7046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12889881432056427, + "rewards/margins": 0.06428661942481995, + "rewards/rejected": -0.19318543374538422, + "step": 1589 + }, + { + "epoch": 0.09, + "learning_rate": 9.897906565180161e-08, + "logits/chosen": -2.0630898475646973, + "logits/rejected": -2.0185909271240234, + "logps/chosen": -195.1591796875, + "logps/rejected": -255.24526977539062, + "loss": 0.6649, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12780456244945526, + "rewards/margins": -0.034829720854759216, + "rewards/rejected": 0.16263428330421448, + "step": 1590 + }, + { + "epoch": 0.09, + "learning_rate": 9.897717009705757e-08, + "logits/chosen": -2.3253326416015625, + "logits/rejected": -2.3210294246673584, + "logps/chosen": -3.2171545028686523, + "logps/rejected": -75.6500473022461, + "loss": 0.6969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024175453931093216, + "rewards/margins": 0.02877635881304741, + "rewards/rejected": -0.052951812744140625, + "step": 1591 + }, + { + "epoch": 0.09, + "learning_rate": 9.897527280240488e-08, + "logits/chosen": -2.287689447402954, + "logits/rejected": -2.2750067710876465, + "logps/chosen": -1.0541859865188599, + "logps/rejected": -99.51840209960938, + "loss": 0.6723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.000610053539276123, + "rewards/margins": 0.06829651445150375, + "rewards/rejected": -0.06768646091222763, + "step": 1592 + }, + { + "epoch": 0.09, + "learning_rate": 9.897337376791092e-08, + "logits/chosen": -2.2700817584991455, + "logits/rejected": -2.2773234844207764, + "logps/chosen": -7.1316680908203125, + "logps/rejected": -70.76285552978516, + "loss": 0.6736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012216567993164062, + "rewards/margins": 0.08113536983728409, + "rewards/rejected": -0.0823570266366005, + "step": 1593 + }, + { + "epoch": 0.09, + "learning_rate": 9.897147299364318e-08, + "logits/chosen": -2.222811698913574, + "logits/rejected": -2.208974838256836, + "logps/chosen": -5.138031482696533, + "logps/rejected": -208.7291717529297, + "loss": 0.5752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0373656265437603, + "rewards/margins": 0.595016598701477, + "rewards/rejected": -0.6323822140693665, + "step": 1594 + }, + { + "epoch": 0.09, + "learning_rate": 9.896957047966917e-08, + "logits/chosen": -1.987684965133667, + "logits/rejected": -1.9827274084091187, + "logps/chosen": -287.93475341796875, + "logps/rejected": -407.88848876953125, + "loss": 0.537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4908508360385895, + "rewards/margins": 0.11766663193702698, + "rewards/rejected": 0.3731842041015625, + "step": 1595 + }, + { + "epoch": 0.09, + "learning_rate": 9.896766622605648e-08, + "logits/chosen": -2.1428346633911133, + "logits/rejected": -2.123814105987549, + "logps/chosen": -254.1787109375, + "logps/rejected": -390.751953125, + "loss": 0.3781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9382110834121704, + "rewards/margins": 0.5693114995956421, + "rewards/rejected": 0.36889955401420593, + "step": 1596 + }, + { + "epoch": 0.09, + "learning_rate": 9.896576023287276e-08, + "logits/chosen": -2.091787576675415, + "logits/rejected": -2.1098618507385254, + "logps/chosen": -275.932373046875, + "logps/rejected": -308.8154296875, + "loss": 0.3711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9531616568565369, + "rewards/margins": 0.5057312250137329, + "rewards/rejected": 0.44743043184280396, + "step": 1597 + }, + { + "epoch": 0.09, + "learning_rate": 9.896385250018573e-08, + "logits/chosen": -2.144458055496216, + "logits/rejected": -2.130751609802246, + "logps/chosen": -38.537479400634766, + "logps/rejected": -189.94955444335938, + "loss": 0.5679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09236755222082138, + "rewards/margins": 0.4392242431640625, + "rewards/rejected": -0.3468566834926605, + "step": 1598 + }, + { + "epoch": 0.09, + "learning_rate": 9.896194302806313e-08, + "logits/chosen": -2.1398112773895264, + "logits/rejected": -2.1303813457489014, + "logps/chosen": -1.3742786645889282, + "logps/rejected": -104.22947692871094, + "loss": 0.7025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010358822531998158, + "rewards/margins": 0.012681948952376842, + "rewards/rejected": -0.023040771484375, + "step": 1599 + }, + { + "epoch": 0.09, + "learning_rate": 9.896003181657284e-08, + "logits/chosen": -2.132572650909424, + "logits/rejected": -2.055431365966797, + "logps/chosen": -219.9115753173828, + "logps/rejected": -433.95391845703125, + "loss": 0.5693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.144154354929924, + "rewards/margins": 0.33391571044921875, + "rewards/rejected": -0.18976135551929474, + "step": 1600 + }, + { + "epoch": 0.09, + "learning_rate": 9.895811886578272e-08, + "logits/chosen": -2.1053709983825684, + "logits/rejected": -2.0824759006500244, + "logps/chosen": -220.1091766357422, + "logps/rejected": -347.83660888671875, + "loss": 0.5282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3744400143623352, + "rewards/margins": 0.3251327574253082, + "rewards/rejected": 0.04930725321173668, + "step": 1601 + }, + { + "epoch": 0.09, + "learning_rate": 9.895620417576073e-08, + "logits/chosen": -2.0869226455688477, + "logits/rejected": -2.096087694168091, + "logps/chosen": -17.073949813842773, + "logps/rejected": -232.0670166015625, + "loss": 0.5835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023220444098114967, + "rewards/margins": 0.4569515287876129, + "rewards/rejected": -0.4337310791015625, + "step": 1602 + }, + { + "epoch": 0.09, + "learning_rate": 9.89542877465749e-08, + "logits/chosen": -2.0878169536590576, + "logits/rejected": -2.089623212814331, + "logps/chosen": -11.492847442626953, + "logps/rejected": -148.44354248046875, + "loss": 0.6843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005089473910629749, + "rewards/margins": 0.018119145184755325, + "rewards/rejected": -0.0232086181640625, + "step": 1603 + }, + { + "epoch": 0.09, + "learning_rate": 9.895236957829332e-08, + "logits/chosen": -1.9790995121002197, + "logits/rejected": -1.970094084739685, + "logps/chosen": -323.38250732421875, + "logps/rejected": -491.92523193359375, + "loss": 0.439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.588909924030304, + "rewards/margins": 0.514300525188446, + "rewards/rejected": 0.07460937649011612, + "step": 1604 + }, + { + "epoch": 0.09, + "learning_rate": 9.89504496709841e-08, + "logits/chosen": -2.116826057434082, + "logits/rejected": -2.0581552982330322, + "logps/chosen": -171.5723876953125, + "logps/rejected": -636.0881958007812, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3517166078090668, + "rewards/margins": 1.540950059890747, + "rewards/rejected": -1.189233422279358, + "step": 1605 + }, + { + "epoch": 0.09, + "learning_rate": 9.894852802471547e-08, + "logits/chosen": -2.046389102935791, + "logits/rejected": -2.066789388656616, + "logps/chosen": -154.28085327148438, + "logps/rejected": -212.65484619140625, + "loss": 0.5753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18000030517578125, + "rewards/margins": 0.32200777530670166, + "rewards/rejected": -0.14200745522975922, + "step": 1606 + }, + { + "epoch": 0.09, + "learning_rate": 9.89466046395557e-08, + "logits/chosen": -2.3019566535949707, + "logits/rejected": -2.2724218368530273, + "logps/chosen": -162.82745361328125, + "logps/rejected": -341.0810241699219, + "loss": 0.5966, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5120514035224915, + "rewards/margins": -0.13848876953125, + "rewards/rejected": 0.6505401730537415, + "step": 1607 + }, + { + "epoch": 0.09, + "learning_rate": 9.89446795155731e-08, + "logits/chosen": -2.0890941619873047, + "logits/rejected": -2.0795280933380127, + "logps/chosen": -18.37706756591797, + "logps/rejected": -223.997802734375, + "loss": 0.5679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021274948492646217, + "rewards/margins": 0.5518520474433899, + "rewards/rejected": -0.5305771231651306, + "step": 1608 + }, + { + "epoch": 0.09, + "learning_rate": 9.894275265283609e-08, + "logits/chosen": -2.285921335220337, + "logits/rejected": -2.2804815769195557, + "logps/chosen": -0.8291102051734924, + "logps/rejected": -58.003517150878906, + "loss": 0.6948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01438035350292921, + "rewards/margins": 0.004363543353974819, + "rewards/rejected": -0.01874389685690403, + "step": 1609 + }, + { + "epoch": 0.09, + "learning_rate": 9.894082405141308e-08, + "logits/chosen": -2.102940320968628, + "logits/rejected": -2.086841106414795, + "logps/chosen": -238.46804809570312, + "logps/rejected": -348.62347412109375, + "loss": 0.5218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6568542718887329, + "rewards/margins": 0.0326995849609375, + "rewards/rejected": 0.6241546869277954, + "step": 1610 + }, + { + "epoch": 0.09, + "learning_rate": 9.893889371137262e-08, + "logits/chosen": -1.9708709716796875, + "logits/rejected": -1.9710413217544556, + "logps/chosen": -226.04025268554688, + "logps/rejected": -366.54290771484375, + "loss": 0.523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36500856280326843, + "rewards/margins": 0.42886966466903687, + "rewards/rejected": -0.06386108696460724, + "step": 1611 + }, + { + "epoch": 0.09, + "learning_rate": 9.893696163278325e-08, + "logits/chosen": -2.2117695808410645, + "logits/rejected": -2.209362030029297, + "logps/chosen": -15.882533073425293, + "logps/rejected": -60.31694412231445, + "loss": 0.6871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.049764540046453476, + "rewards/margins": 0.06146517023444176, + "rewards/rejected": -0.11122971028089523, + "step": 1612 + }, + { + "epoch": 0.09, + "learning_rate": 9.893502781571364e-08, + "logits/chosen": -2.1703221797943115, + "logits/rejected": -2.1691155433654785, + "logps/chosen": -222.7252197265625, + "logps/rejected": -266.7315979003906, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7041412591934204, + "rewards/margins": 0.32447510957717896, + "rewards/rejected": 0.37966614961624146, + "step": 1613 + }, + { + "epoch": 0.09, + "learning_rate": 9.893309226023247e-08, + "logits/chosen": -2.1468875408172607, + "logits/rejected": -2.115981101989746, + "logps/chosen": -196.91317749023438, + "logps/rejected": -272.75299072265625, + "loss": 0.5132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6735290884971619, + "rewards/margins": 0.12230837345123291, + "rewards/rejected": 0.551220715045929, + "step": 1614 + }, + { + "epoch": 0.09, + "learning_rate": 9.893115496640852e-08, + "logits/chosen": -2.156647205352783, + "logits/rejected": -2.1637401580810547, + "logps/chosen": -4.7599263191223145, + "logps/rejected": -154.9557342529297, + "loss": 0.6332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01238784845918417, + "rewards/margins": 0.24183882772922516, + "rewards/rejected": -0.229450985789299, + "step": 1615 + }, + { + "epoch": 0.09, + "learning_rate": 9.892921593431058e-08, + "logits/chosen": -2.1790082454681396, + "logits/rejected": -2.1777262687683105, + "logps/chosen": -7.049933433532715, + "logps/rejected": -167.44252014160156, + "loss": 0.6214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04643554612994194, + "rewards/margins": 0.26718902587890625, + "rewards/rejected": -0.220753476023674, + "step": 1616 + }, + { + "epoch": 0.09, + "learning_rate": 9.892727516400755e-08, + "logits/chosen": -2.1688711643218994, + "logits/rejected": -2.163085699081421, + "logps/chosen": -17.47882080078125, + "logps/rejected": -168.00601196289062, + "loss": 0.7286, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08320999145507812, + "rewards/margins": -0.08873061835765839, + "rewards/rejected": 0.005520630162209272, + "step": 1617 + }, + { + "epoch": 0.09, + "learning_rate": 9.892533265556838e-08, + "logits/chosen": -1.9120337963104248, + "logits/rejected": -1.807682991027832, + "logps/chosen": -282.20562744140625, + "logps/rejected": -533.1504516601562, + "loss": 0.5265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18040771782398224, + "rewards/margins": 0.60552978515625, + "rewards/rejected": -0.42512208223342896, + "step": 1618 + }, + { + "epoch": 0.09, + "learning_rate": 9.892338840906208e-08, + "logits/chosen": -2.1511099338531494, + "logits/rejected": -2.109104633331299, + "logps/chosen": -290.54364013671875, + "logps/rejected": -347.0637512207031, + "loss": 0.5382, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7946228384971619, + "rewards/margins": -0.09357601404190063, + "rewards/rejected": 0.8881988525390625, + "step": 1619 + }, + { + "epoch": 0.09, + "learning_rate": 9.892144242455771e-08, + "logits/chosen": -2.208930492401123, + "logits/rejected": -2.165877103805542, + "logps/chosen": -194.7503662109375, + "logps/rejected": -441.3500061035156, + "loss": 0.5656, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6942047476768494, + "rewards/margins": -0.14324951171875, + "rewards/rejected": 0.8374542593955994, + "step": 1620 + }, + { + "epoch": 0.09, + "learning_rate": 9.891949470212441e-08, + "logits/chosen": -2.094109058380127, + "logits/rejected": -2.0520620346069336, + "logps/chosen": -220.75970458984375, + "logps/rejected": -453.1935119628906, + "loss": 0.5274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6698212027549744, + "rewards/margins": 0.03625798225402832, + "rewards/rejected": 0.633563220500946, + "step": 1621 + }, + { + "epoch": 0.09, + "learning_rate": 9.891754524183135e-08, + "logits/chosen": -2.2020699977874756, + "logits/rejected": -2.143972396850586, + "logps/chosen": -188.84120178222656, + "logps/rejected": -439.24212646484375, + "loss": 0.4969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6213638186454773, + "rewards/margins": 0.19837185740470886, + "rewards/rejected": 0.42299196124076843, + "step": 1622 + }, + { + "epoch": 0.09, + "learning_rate": 9.891559404374783e-08, + "logits/chosen": -1.9807099103927612, + "logits/rejected": -1.9763619899749756, + "logps/chosen": -14.477320671081543, + "logps/rejected": -62.93973922729492, + "loss": 0.6866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008715152740478516, + "rewards/margins": 0.0035521509125828743, + "rewards/rejected": -0.01226730365306139, + "step": 1623 + }, + { + "epoch": 0.09, + "learning_rate": 9.891364110794312e-08, + "logits/chosen": -2.3409481048583984, + "logits/rejected": -2.3298301696777344, + "logps/chosen": -53.60544204711914, + "logps/rejected": -203.23599243164062, + "loss": 0.611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06925811618566513, + "rewards/margins": 0.2576705813407898, + "rewards/rejected": -0.18841247260570526, + "step": 1624 + }, + { + "epoch": 0.09, + "learning_rate": 9.891168643448661e-08, + "logits/chosen": -2.222609758377075, + "logits/rejected": -2.212423086166382, + "logps/chosen": -0.0017301248153671622, + "logps/rejected": -142.45883178710938, + "loss": 0.6632, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.247318050445756e-05, + "rewards/margins": 0.12336704879999161, + "rewards/rejected": -0.12337952107191086, + "step": 1625 + }, + { + "epoch": 0.09, + "learning_rate": 9.890973002344775e-08, + "logits/chosen": -2.1489930152893066, + "logits/rejected": -2.0704774856567383, + "logps/chosen": -245.4435577392578, + "logps/rejected": -421.47564697265625, + "loss": 0.5401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3966568112373352, + "rewards/margins": 0.23908844590187073, + "rewards/rejected": 0.15756836533546448, + "step": 1626 + }, + { + "epoch": 0.09, + "learning_rate": 9.890777187489603e-08, + "logits/chosen": -2.05867600440979, + "logits/rejected": -2.0407466888427734, + "logps/chosen": -222.7783660888672, + "logps/rejected": -246.96340942382812, + "loss": 0.5379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6462570428848267, + "rewards/margins": 0.046751439571380615, + "rewards/rejected": 0.599505603313446, + "step": 1627 + }, + { + "epoch": 0.09, + "learning_rate": 9.890581198890104e-08, + "logits/chosen": -2.1863696575164795, + "logits/rejected": -2.139439344406128, + "logps/chosen": -162.5279998779297, + "logps/rejected": -428.8101806640625, + "loss": 0.5238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5866379141807556, + "rewards/margins": 0.17768403887748718, + "rewards/rejected": 0.40895387530326843, + "step": 1628 + }, + { + "epoch": 0.09, + "learning_rate": 9.890385036553237e-08, + "logits/chosen": -2.1616358757019043, + "logits/rejected": -2.165137767791748, + "logps/chosen": -0.06481539458036423, + "logps/rejected": -46.82947540283203, + "loss": 0.6803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007150575402192771, + "rewards/margins": 0.052862364798784256, + "rewards/rejected": -0.053577423095703125, + "step": 1629 + }, + { + "epoch": 0.09, + "learning_rate": 9.890188700485973e-08, + "logits/chosen": -2.1610190868377686, + "logits/rejected": -2.1579668521881104, + "logps/chosen": -30.28073501586914, + "logps/rejected": -52.39888000488281, + "loss": 0.6816, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08295708149671555, + "rewards/margins": -0.026198193430900574, + "rewards/rejected": 0.10915527492761612, + "step": 1630 + }, + { + "epoch": 0.09, + "learning_rate": 9.889992190695285e-08, + "logits/chosen": -2.268202543258667, + "logits/rejected": -2.2341110706329346, + "logps/chosen": -100.27662658691406, + "logps/rejected": -206.43496704101562, + "loss": 0.5891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.053769685328006744, + "rewards/margins": 0.3428550660610199, + "rewards/rejected": -0.28908538818359375, + "step": 1631 + }, + { + "epoch": 0.09, + "learning_rate": 9.889795507188154e-08, + "logits/chosen": -2.202650785446167, + "logits/rejected": -2.200413942337036, + "logps/chosen": -26.95840835571289, + "logps/rejected": -180.21937561035156, + "loss": 0.6376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037183381617069244, + "rewards/margins": 0.12831956148147583, + "rewards/rejected": -0.09113617241382599, + "step": 1632 + }, + { + "epoch": 0.1, + "learning_rate": 9.889598649971569e-08, + "logits/chosen": -1.9995015859603882, + "logits/rejected": -2.001262903213501, + "logps/chosen": -0.17199182510375977, + "logps/rejected": -47.56972122192383, + "loss": 0.6821, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031269758474081755, + "rewards/margins": 0.04279854893684387, + "rewards/rejected": -0.04592552408576012, + "step": 1633 + }, + { + "epoch": 0.1, + "learning_rate": 9.889401619052522e-08, + "logits/chosen": -2.1700973510742188, + "logits/rejected": -2.1907222270965576, + "logps/chosen": -304.23760986328125, + "logps/rejected": -282.3768310546875, + "loss": 0.4253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8279663324356079, + "rewards/margins": 0.41923218965530396, + "rewards/rejected": 0.40873414278030396, + "step": 1634 + }, + { + "epoch": 0.1, + "learning_rate": 9.889204414438012e-08, + "logits/chosen": -2.1146912574768066, + "logits/rejected": -2.128993511199951, + "logps/chosen": -148.8400115966797, + "logps/rejected": -298.0125427246094, + "loss": 0.6341, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3299759030342102, + "rewards/margins": -0.1986190676689148, + "rewards/rejected": 0.528594970703125, + "step": 1635 + }, + { + "epoch": 0.1, + "learning_rate": 9.889007036135047e-08, + "logits/chosen": -1.9766039848327637, + "logits/rejected": -1.968820333480835, + "logps/chosen": -14.073670387268066, + "logps/rejected": -76.04275512695312, + "loss": 0.6885, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03696126863360405, + "rewards/margins": -0.03825235739350319, + "rewards/rejected": 0.07521362602710724, + "step": 1636 + }, + { + "epoch": 0.1, + "learning_rate": 9.888809484150638e-08, + "logits/chosen": -2.197988271713257, + "logits/rejected": -2.1950743198394775, + "logps/chosen": -1.8070544004440308, + "logps/rejected": -163.66783142089844, + "loss": 0.6779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04917565733194351, + "rewards/margins": 0.11100959032773972, + "rewards/rejected": -0.16018524765968323, + "step": 1637 + }, + { + "epoch": 0.1, + "learning_rate": 9.888611758491801e-08, + "logits/chosen": -2.0347559452056885, + "logits/rejected": -2.0424373149871826, + "logps/chosen": -0.20201672613620758, + "logps/rejected": -110.03208923339844, + "loss": 0.6577, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0265579792067e-07, + "rewards/margins": 0.1471414715051651, + "rewards/rejected": -0.1471412628889084, + "step": 1638 + }, + { + "epoch": 0.1, + "learning_rate": 9.888413859165562e-08, + "logits/chosen": -2.235527992248535, + "logits/rejected": -2.268674612045288, + "logps/chosen": -258.39202880859375, + "logps/rejected": -349.73504638671875, + "loss": 0.5309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6283844113349915, + "rewards/margins": 0.00997316837310791, + "rewards/rejected": 0.6184112429618835, + "step": 1639 + }, + { + "epoch": 0.1, + "learning_rate": 9.88821578617895e-08, + "logits/chosen": -2.267019033432007, + "logits/rejected": -2.2678017616271973, + "logps/chosen": -0.001057082787156105, + "logps/rejected": -103.97700500488281, + "loss": 0.6826, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4138993214583024e-05, + "rewards/margins": 0.035585593432188034, + "rewards/rejected": -0.03555145487189293, + "step": 1640 + }, + { + "epoch": 0.1, + "learning_rate": 9.888017539539004e-08, + "logits/chosen": -2.0460317134857178, + "logits/rejected": -2.020826816558838, + "logps/chosen": -248.4324493408203, + "logps/rejected": -353.8171691894531, + "loss": 0.6162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2228958159685135, + "rewards/margins": 0.08835296332836151, + "rewards/rejected": 0.13454285264015198, + "step": 1641 + }, + { + "epoch": 0.1, + "learning_rate": 9.887819119252764e-08, + "logits/chosen": -2.4210987091064453, + "logits/rejected": -2.386396646499634, + "logps/chosen": -0.00038666007458232343, + "logps/rejected": -207.63229370117188, + "loss": 0.5656, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.760575267137028e-06, + "rewards/margins": 0.5906854271888733, + "rewards/rejected": -0.5906952023506165, + "step": 1642 + }, + { + "epoch": 0.1, + "learning_rate": 9.88762052532728e-08, + "logits/chosen": -2.090043067932129, + "logits/rejected": -2.075599431991577, + "logps/chosen": -176.5633544921875, + "logps/rejected": -353.98883056640625, + "loss": 0.5211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49959108233451843, + "rewards/margins": 0.13243409991264343, + "rewards/rejected": 0.367156982421875, + "step": 1643 + }, + { + "epoch": 0.1, + "learning_rate": 9.887421757769607e-08, + "logits/chosen": -2.2206616401672363, + "logits/rejected": -2.217250347137451, + "logps/chosen": -17.574451446533203, + "logps/rejected": -194.6432647705078, + "loss": 0.5109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024708176031708717, + "rewards/margins": 0.8695796728134155, + "rewards/rejected": -0.8448715209960938, + "step": 1644 + }, + { + "epoch": 0.1, + "learning_rate": 9.887222816586806e-08, + "logits/chosen": -2.184623956680298, + "logits/rejected": -2.186917304992676, + "logps/chosen": -5.6504199164919555e-05, + "logps/rejected": -133.9423370361328, + "loss": 0.646, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.430504283916889e-07, + "rewards/margins": 0.19743362069129944, + "rewards/rejected": -0.1974334716796875, + "step": 1645 + }, + { + "epoch": 0.1, + "learning_rate": 9.887023701785946e-08, + "logits/chosen": -2.195085048675537, + "logits/rejected": -2.2040750980377197, + "logps/chosen": -257.2721252441406, + "logps/rejected": -406.5067443847656, + "loss": 0.4764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6000640988349915, + "rewards/margins": 0.31531068682670593, + "rewards/rejected": 0.2847534120082855, + "step": 1646 + }, + { + "epoch": 0.1, + "learning_rate": 9.886824413374098e-08, + "logits/chosen": -2.2964882850646973, + "logits/rejected": -2.3039650917053223, + "logps/chosen": -40.849098205566406, + "logps/rejected": -78.38731384277344, + "loss": 0.6712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.052616119384765625, + "rewards/margins": 0.09826965630054474, + "rewards/rejected": -0.15088577568531036, + "step": 1647 + }, + { + "epoch": 0.1, + "learning_rate": 9.886624951358343e-08, + "logits/chosen": -2.1822402477264404, + "logits/rejected": -2.1804733276367188, + "logps/chosen": -216.990966796875, + "logps/rejected": -299.9859924316406, + "loss": 0.4996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5648300051689148, + "rewards/margins": 0.19370266795158386, + "rewards/rejected": 0.37112733721733093, + "step": 1648 + }, + { + "epoch": 0.1, + "learning_rate": 9.886425315745766e-08, + "logits/chosen": -2.172434091567993, + "logits/rejected": -2.1697020530700684, + "logps/chosen": -3.2384896278381348, + "logps/rejected": -91.43580627441406, + "loss": 0.6808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005374789237976074, + "rewards/margins": 0.055931735783815384, + "rewards/rejected": -0.05055694654583931, + "step": 1649 + }, + { + "epoch": 0.1, + "learning_rate": 9.88622550654346e-08, + "logits/chosen": -2.2275173664093018, + "logits/rejected": -2.2270376682281494, + "logps/chosen": -144.00994873046875, + "logps/rejected": -200.16168212890625, + "loss": 0.6033, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.37548828125, + "rewards/margins": -0.060925304889678955, + "rewards/rejected": 0.43641358613967896, + "step": 1650 + }, + { + "epoch": 0.1, + "learning_rate": 9.886025523758525e-08, + "logits/chosen": -2.1635348796844482, + "logits/rejected": -2.1660876274108887, + "logps/chosen": -12.059266090393066, + "logps/rejected": -52.169063568115234, + "loss": 0.7008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06383724510669708, + "rewards/margins": 0.03352908790111542, + "rewards/rejected": -0.0973663330078125, + "step": 1651 + }, + { + "epoch": 0.1, + "learning_rate": 9.885825367398061e-08, + "logits/chosen": -2.0404000282287598, + "logits/rejected": -2.035578966140747, + "logps/chosen": -21.031354904174805, + "logps/rejected": -97.69352722167969, + "loss": 0.5866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003731346223503351, + "rewards/margins": 0.4313657879829407, + "rewards/rejected": -0.42763444781303406, + "step": 1652 + }, + { + "epoch": 0.1, + "learning_rate": 9.88562503746918e-08, + "logits/chosen": -2.1665592193603516, + "logits/rejected": -2.101121187210083, + "logps/chosen": -373.5834655761719, + "logps/rejected": -478.5835876464844, + "loss": 0.3592, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0287903547286987, + "rewards/margins": 0.5682617425918579, + "rewards/rejected": 0.46052858233451843, + "step": 1653 + }, + { + "epoch": 0.1, + "learning_rate": 9.885424533979001e-08, + "logits/chosen": -2.0405168533325195, + "logits/rejected": -2.017235040664673, + "logps/chosen": -272.31109619140625, + "logps/rejected": -415.93572998046875, + "loss": 0.5539, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6605896353721619, + "rewards/margins": -0.08638304471969604, + "rewards/rejected": 0.7469726800918579, + "step": 1654 + }, + { + "epoch": 0.1, + "learning_rate": 9.885223856934646e-08, + "logits/chosen": -2.0972654819488525, + "logits/rejected": -2.0362815856933594, + "logps/chosen": -233.2012481689453, + "logps/rejected": -315.4992980957031, + "loss": 0.5578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3169265687465668, + "rewards/margins": 0.23548735678195953, + "rewards/rejected": 0.08143921196460724, + "step": 1655 + }, + { + "epoch": 0.1, + "learning_rate": 9.885023006343243e-08, + "logits/chosen": -2.130516767501831, + "logits/rejected": -2.1260857582092285, + "logps/chosen": -0.4749862551689148, + "logps/rejected": -147.27224731445312, + "loss": 0.6922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016106490045785904, + "rewards/margins": 0.022852249443531036, + "rewards/rejected": -0.03895873948931694, + "step": 1656 + }, + { + "epoch": 0.1, + "learning_rate": 9.884821982211926e-08, + "logits/chosen": -2.0799899101257324, + "logits/rejected": -2.088897705078125, + "logps/chosen": -207.70021057128906, + "logps/rejected": -273.1600646972656, + "loss": 0.5709, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5315994620323181, + "rewards/margins": -0.04160308837890625, + "rewards/rejected": 0.5732025504112244, + "step": 1657 + }, + { + "epoch": 0.1, + "learning_rate": 9.88462078454784e-08, + "logits/chosen": -2.2180051803588867, + "logits/rejected": -2.200045108795166, + "logps/chosen": -51.57860565185547, + "logps/rejected": -251.03228759765625, + "loss": 0.56, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050843048840761185, + "rewards/margins": 0.5600380301475525, + "rewards/rejected": -0.5091949701309204, + "step": 1658 + }, + { + "epoch": 0.1, + "learning_rate": 9.884419413358129e-08, + "logits/chosen": -1.983872652053833, + "logits/rejected": -1.9812581539154053, + "logps/chosen": -0.0018354958156123757, + "logps/rejected": -28.788557052612305, + "loss": 0.6895, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6042922399938107e-05, + "rewards/margins": 0.012703030370175838, + "rewards/rejected": -0.012729072943329811, + "step": 1659 + }, + { + "epoch": 0.1, + "learning_rate": 9.88421786864995e-08, + "logits/chosen": -2.0212414264678955, + "logits/rejected": -1.9244569540023804, + "logps/chosen": -183.4560546875, + "logps/rejected": -418.2779846191406, + "loss": 0.6917, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07302703708410263, + "rewards/margins": -0.06827545911073685, + "rewards/rejected": 0.14130249619483948, + "step": 1660 + }, + { + "epoch": 0.1, + "learning_rate": 9.884016150430459e-08, + "logits/chosen": -2.2793188095092773, + "logits/rejected": -2.273061513900757, + "logps/chosen": -193.85400390625, + "logps/rejected": -378.5992431640625, + "loss": 0.4532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5301971435546875, + "rewards/margins": 0.48394471406936646, + "rewards/rejected": 0.04625244066119194, + "step": 1661 + }, + { + "epoch": 0.1, + "learning_rate": 9.883814258706824e-08, + "logits/chosen": -2.1506052017211914, + "logits/rejected": -2.1534218788146973, + "logps/chosen": -1.5965913534164429, + "logps/rejected": -69.68772888183594, + "loss": 0.6859, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05647050216794014, + "rewards/margins": -0.02602003887295723, + "rewards/rejected": 0.08249054104089737, + "step": 1662 + }, + { + "epoch": 0.1, + "learning_rate": 9.883612193486217e-08, + "logits/chosen": -2.1058261394500732, + "logits/rejected": -2.1077053546905518, + "logps/chosen": -31.34552764892578, + "logps/rejected": -125.35377502441406, + "loss": 0.653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06818580627441406, + "rewards/margins": 0.06823845207691193, + "rewards/rejected": -5.264282299322076e-05, + "step": 1663 + }, + { + "epoch": 0.1, + "learning_rate": 9.883409954775819e-08, + "logits/chosen": -2.2016336917877197, + "logits/rejected": -2.1914680004119873, + "logps/chosen": -0.2301734834909439, + "logps/rejected": -161.22254943847656, + "loss": 0.6208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0026533768977969885, + "rewards/margins": 0.2998274564743042, + "rewards/rejected": -0.2971740663051605, + "step": 1664 + }, + { + "epoch": 0.1, + "learning_rate": 9.88320754258281e-08, + "logits/chosen": -2.0095338821411133, + "logits/rejected": -1.955196499824524, + "logps/chosen": -195.2829132080078, + "logps/rejected": -359.4385681152344, + "loss": 0.5225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23977355659008026, + "rewards/margins": 0.4968475103378296, + "rewards/rejected": -0.2570739686489105, + "step": 1665 + }, + { + "epoch": 0.1, + "learning_rate": 9.883004956914383e-08, + "logits/chosen": -2.1033785343170166, + "logits/rejected": -2.1248199939727783, + "logps/chosen": -262.78253173828125, + "logps/rejected": -329.515625, + "loss": 0.4886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.69989013671875, + "rewards/margins": 0.15089112520217896, + "rewards/rejected": 0.548999011516571, + "step": 1666 + }, + { + "epoch": 0.1, + "learning_rate": 9.882802197777735e-08, + "logits/chosen": -2.0442376136779785, + "logits/rejected": -2.03706955909729, + "logps/chosen": -85.00725555419922, + "logps/rejected": -212.8280029296875, + "loss": 0.5479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10545425862073898, + "rewards/margins": 0.5089057683944702, + "rewards/rejected": -0.403451532125473, + "step": 1667 + }, + { + "epoch": 0.1, + "learning_rate": 9.882599265180068e-08, + "logits/chosen": -2.159808397293091, + "logits/rejected": -2.147526264190674, + "logps/chosen": -32.21452331542969, + "logps/rejected": -153.10438537597656, + "loss": 0.6098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1375095397233963, + "rewards/margins": 0.23715706169605255, + "rewards/rejected": -0.09964752197265625, + "step": 1668 + }, + { + "epoch": 0.1, + "learning_rate": 9.882396159128592e-08, + "logits/chosen": -2.228459596633911, + "logits/rejected": -2.228220224380493, + "logps/chosen": -0.01716206595301628, + "logps/rejected": -223.93386840820312, + "loss": 0.653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019861385226249695, + "rewards/margins": 0.16414465010166168, + "rewards/rejected": -0.16434326767921448, + "step": 1669 + }, + { + "epoch": 0.1, + "learning_rate": 9.882192879630522e-08, + "logits/chosen": -2.1929779052734375, + "logits/rejected": -2.193999767303467, + "logps/chosen": -7.009283065795898, + "logps/rejected": -211.12185668945312, + "loss": 0.5613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037316180765628815, + "rewards/margins": 0.6198052167892456, + "rewards/rejected": -0.582489013671875, + "step": 1670 + }, + { + "epoch": 0.1, + "learning_rate": 9.88198942669308e-08, + "logits/chosen": -2.033684730529785, + "logits/rejected": -2.019047498703003, + "logps/chosen": -316.49835205078125, + "logps/rejected": -400.6744384765625, + "loss": 0.5485, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6027923822402954, + "rewards/margins": -0.053277552127838135, + "rewards/rejected": 0.6560699343681335, + "step": 1671 + }, + { + "epoch": 0.1, + "learning_rate": 9.881785800323493e-08, + "logits/chosen": -2.1585776805877686, + "logits/rejected": -2.136047601699829, + "logps/chosen": -235.37310791015625, + "logps/rejected": -453.2972717285156, + "loss": 0.4975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7235031127929688, + "rewards/margins": 0.11240994930267334, + "rewards/rejected": 0.6110931634902954, + "step": 1672 + }, + { + "epoch": 0.1, + "learning_rate": 9.881582000528994e-08, + "logits/chosen": -1.9982949495315552, + "logits/rejected": -2.002100944519043, + "logps/chosen": -32.89273452758789, + "logps/rejected": -146.26734924316406, + "loss": 0.6695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008295821957290173, + "rewards/margins": 0.14551430940628052, + "rewards/rejected": -0.15381012856960297, + "step": 1673 + }, + { + "epoch": 0.1, + "learning_rate": 9.881378027316825e-08, + "logits/chosen": -2.177717447280884, + "logits/rejected": -2.154323101043701, + "logps/chosen": -255.63406372070312, + "logps/rejected": -318.36004638671875, + "loss": 0.5135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.625866711139679, + "rewards/margins": 0.06528931856155396, + "rewards/rejected": 0.560577392578125, + "step": 1674 + }, + { + "epoch": 0.1, + "learning_rate": 9.881173880694231e-08, + "logits/chosen": -2.170842170715332, + "logits/rejected": -2.176506519317627, + "logps/chosen": -49.75054168701172, + "logps/rejected": -173.17623901367188, + "loss": 0.5656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024999236688017845, + "rewards/margins": 0.5631889700889587, + "rewards/rejected": -0.538189709186554, + "step": 1675 + }, + { + "epoch": 0.1, + "learning_rate": 9.880969560668464e-08, + "logits/chosen": -2.0389394760131836, + "logits/rejected": -2.0632317066192627, + "logps/chosen": -296.8769226074219, + "logps/rejected": -380.4984130859375, + "loss": 0.3681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7043426632881165, + "rewards/margins": 0.7558013796806335, + "rewards/rejected": -0.05145874246954918, + "step": 1676 + }, + { + "epoch": 0.1, + "learning_rate": 9.880765067246783e-08, + "logits/chosen": -2.0645241737365723, + "logits/rejected": -2.047785758972168, + "logps/chosen": -78.95256805419922, + "logps/rejected": -194.9278106689453, + "loss": 0.6481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001044464181177318, + "rewards/margins": 0.1817070096731186, + "rewards/rejected": -0.18066254258155823, + "step": 1677 + }, + { + "epoch": 0.1, + "learning_rate": 9.880560400436453e-08, + "logits/chosen": -2.035964250564575, + "logits/rejected": -1.991142988204956, + "logps/chosen": -279.5910339355469, + "logps/rejected": -479.18121337890625, + "loss": 0.5556, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7651702761650085, + "rewards/margins": -0.181488037109375, + "rewards/rejected": 0.9466583132743835, + "step": 1678 + }, + { + "epoch": 0.1, + "learning_rate": 9.880355560244744e-08, + "logits/chosen": -2.192678451538086, + "logits/rejected": -2.182486057281494, + "logps/chosen": -111.84239959716797, + "logps/rejected": -340.796875, + "loss": 0.5717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21478271484375, + "rewards/margins": 0.30059510469436646, + "rewards/rejected": -0.08581238240003586, + "step": 1679 + }, + { + "epoch": 0.1, + "learning_rate": 9.880150546678933e-08, + "logits/chosen": -2.0430853366851807, + "logits/rejected": -2.034691572189331, + "logps/chosen": -76.68620300292969, + "logps/rejected": -180.92672729492188, + "loss": 0.6807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07475509494543076, + "rewards/margins": 0.11011124402284622, + "rewards/rejected": -0.18486633896827698, + "step": 1680 + }, + { + "epoch": 0.1, + "learning_rate": 9.879945359746303e-08, + "logits/chosen": -2.214778423309326, + "logits/rejected": -2.2015538215637207, + "logps/chosen": -15.875421524047852, + "logps/rejected": -209.78793334960938, + "loss": 0.5563, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0054324017546605e-06, + "rewards/margins": 0.6510946750640869, + "rewards/rejected": -0.6510986685752869, + "step": 1681 + }, + { + "epoch": 0.1, + "learning_rate": 9.879739999454142e-08, + "logits/chosen": -2.187305450439453, + "logits/rejected": -2.1733434200286865, + "logps/chosen": -215.5996856689453, + "logps/rejected": -376.08349609375, + "loss": 0.4924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6795318722724915, + "rewards/margins": 0.11707460880279541, + "rewards/rejected": 0.562457263469696, + "step": 1682 + }, + { + "epoch": 0.1, + "learning_rate": 9.879534465809749e-08, + "logits/chosen": -2.1304314136505127, + "logits/rejected": -2.1613001823425293, + "logps/chosen": -142.40081787109375, + "logps/rejected": -354.9796142578125, + "loss": 0.5184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4393295347690582, + "rewards/margins": 0.20369720458984375, + "rewards/rejected": 0.23563233017921448, + "step": 1683 + }, + { + "epoch": 0.1, + "learning_rate": 9.879328758820423e-08, + "logits/chosen": -2.2522401809692383, + "logits/rejected": -2.2427895069122314, + "logps/chosen": -12.825222969055176, + "logps/rejected": -182.12147521972656, + "loss": 0.6381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01819915883243084, + "rewards/margins": 0.2345932126045227, + "rewards/rejected": -0.21639405190944672, + "step": 1684 + }, + { + "epoch": 0.1, + "learning_rate": 9.87912287849347e-08, + "logits/chosen": -1.9344295263290405, + "logits/rejected": -1.9335284233093262, + "logps/chosen": -250.4500732421875, + "logps/rejected": -454.3707275390625, + "loss": 0.6445, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6172119379043579, + "rewards/margins": -0.3904540538787842, + "rewards/rejected": 1.007665991783142, + "step": 1685 + }, + { + "epoch": 0.1, + "learning_rate": 9.878916824836209e-08, + "logits/chosen": -2.27284574508667, + "logits/rejected": -2.254457473754883, + "logps/chosen": -95.78667449951172, + "logps/rejected": -313.3369445800781, + "loss": 0.6574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14881591498851776, + "rewards/margins": 0.03861694037914276, + "rewards/rejected": 0.110198974609375, + "step": 1686 + }, + { + "epoch": 0.1, + "learning_rate": 9.878710597855954e-08, + "logits/chosen": -2.155956268310547, + "logits/rejected": -2.064526081085205, + "logps/chosen": -267.0471496582031, + "logps/rejected": -395.25299072265625, + "loss": 0.647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22123108804225922, + "rewards/margins": 0.016641244292259216, + "rewards/rejected": 0.20458984375, + "step": 1687 + }, + { + "epoch": 0.1, + "learning_rate": 9.878504197560037e-08, + "logits/chosen": -2.091411590576172, + "logits/rejected": -2.0811939239501953, + "logps/chosen": -18.763427734375, + "logps/rejected": -170.60565185546875, + "loss": 0.5853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022446060553193092, + "rewards/margins": 0.44880715012550354, + "rewards/rejected": -0.426361083984375, + "step": 1688 + }, + { + "epoch": 0.1, + "learning_rate": 9.878297623955787e-08, + "logits/chosen": -2.183915853500366, + "logits/rejected": -2.1810572147369385, + "logps/chosen": -229.12216186523438, + "logps/rejected": -307.6157531738281, + "loss": 0.4167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7278305292129517, + "rewards/margins": 0.4345504939556122, + "rewards/rejected": 0.2932800352573395, + "step": 1689 + }, + { + "epoch": 0.1, + "learning_rate": 9.878090877050543e-08, + "logits/chosen": -2.0899767875671387, + "logits/rejected": -2.0754647254943848, + "logps/chosen": -136.30917358398438, + "logps/rejected": -304.1742248535156, + "loss": 0.6369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02269439771771431, + "rewards/margins": 0.13620758056640625, + "rewards/rejected": -0.11351318657398224, + "step": 1690 + }, + { + "epoch": 0.1, + "learning_rate": 9.877883956851648e-08, + "logits/chosen": -2.06365704536438, + "logits/rejected": -2.0636119842529297, + "logps/chosen": -58.57050704956055, + "logps/rejected": -150.9473876953125, + "loss": 0.6072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10643196105957031, + "rewards/margins": 0.2562626004219055, + "rewards/rejected": -0.149830624461174, + "step": 1691 + }, + { + "epoch": 0.1, + "learning_rate": 9.877676863366456e-08, + "logits/chosen": -1.9534460306167603, + "logits/rejected": -1.9197875261306763, + "logps/chosen": -215.71507263183594, + "logps/rejected": -339.9449768066406, + "loss": 0.5402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24956512451171875, + "rewards/margins": 0.3333938717842102, + "rewards/rejected": -0.08382873982191086, + "step": 1692 + }, + { + "epoch": 0.1, + "learning_rate": 9.877469596602322e-08, + "logits/chosen": -2.1303024291992188, + "logits/rejected": -2.105581760406494, + "logps/chosen": -10.156018257141113, + "logps/rejected": -175.03680419921875, + "loss": 0.6438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005055904388427734, + "rewards/margins": 0.192674919962883, + "rewards/rejected": -0.18761901557445526, + "step": 1693 + }, + { + "epoch": 0.1, + "learning_rate": 9.877262156566611e-08, + "logits/chosen": -2.092164993286133, + "logits/rejected": -2.085268259048462, + "logps/chosen": -3.2447381019592285, + "logps/rejected": -72.02674865722656, + "loss": 0.6593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011004090309143066, + "rewards/margins": 0.11373617500066757, + "rewards/rejected": -0.1027320846915245, + "step": 1694 + }, + { + "epoch": 0.1, + "learning_rate": 9.87705454326669e-08, + "logits/chosen": -2.0534305572509766, + "logits/rejected": -2.0751326084136963, + "logps/chosen": -260.9158935546875, + "logps/rejected": -447.67645263671875, + "loss": 0.4436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8625427484512329, + "rewards/margins": 0.2475219964981079, + "rewards/rejected": 0.615020751953125, + "step": 1695 + }, + { + "epoch": 0.1, + "learning_rate": 9.876846756709935e-08, + "logits/chosen": -2.2175824642181396, + "logits/rejected": -2.2046568393707275, + "logps/chosen": -25.938426971435547, + "logps/rejected": -188.8253631591797, + "loss": 0.5906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012510490603744984, + "rewards/margins": 0.4470197558403015, + "rewards/rejected": -0.43450927734375, + "step": 1696 + }, + { + "epoch": 0.1, + "learning_rate": 9.876638796903727e-08, + "logits/chosen": -2.2193617820739746, + "logits/rejected": -2.214796304702759, + "logps/chosen": -5.172842502593994, + "logps/rejected": -57.719261169433594, + "loss": 0.6819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010695457604015246, + "rewards/margins": 0.01143708173185587, + "rewards/rejected": -0.01154403667896986, + "step": 1697 + }, + { + "epoch": 0.1, + "learning_rate": 9.876430663855456e-08, + "logits/chosen": -2.255481004714966, + "logits/rejected": -2.236147403717041, + "logps/chosen": -172.67478942871094, + "logps/rejected": -279.7394714355469, + "loss": 0.6396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2109527587890625, + "rewards/margins": 0.02579040825366974, + "rewards/rejected": 0.18516235053539276, + "step": 1698 + }, + { + "epoch": 0.1, + "learning_rate": 9.876222357572514e-08, + "logits/chosen": -2.1300530433654785, + "logits/rejected": -2.1270389556884766, + "logps/chosen": -0.07206539809703827, + "logps/rejected": -118.05497741699219, + "loss": 0.6894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008035294595174491, + "rewards/margins": 0.014085234142839909, + "rewards/rejected": -0.014888763427734375, + "step": 1699 + }, + { + "epoch": 0.1, + "learning_rate": 9.876013878062304e-08, + "logits/chosen": -2.0165700912475586, + "logits/rejected": -2.051574945449829, + "logps/chosen": -241.19248962402344, + "logps/rejected": -370.9674987792969, + "loss": 0.4909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6603226065635681, + "rewards/margins": 0.17762911319732666, + "rewards/rejected": 0.48269349336624146, + "step": 1700 + }, + { + "epoch": 0.1, + "learning_rate": 9.875805225332228e-08, + "logits/chosen": -2.139216423034668, + "logits/rejected": -2.1287567615509033, + "logps/chosen": -28.83072280883789, + "logps/rejected": -144.677978515625, + "loss": 0.6507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0012218476040288806, + "rewards/margins": 0.11937981098890305, + "rewards/rejected": -0.11815796047449112, + "step": 1701 + }, + { + "epoch": 0.1, + "learning_rate": 9.8755963993897e-08, + "logits/chosen": -2.1468420028686523, + "logits/rejected": -2.145587682723999, + "logps/chosen": -78.06985473632812, + "logps/rejected": -141.9705352783203, + "loss": 0.7101, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.12382125854492188, + "rewards/margins": -0.035396575927734375, + "rewards/rejected": -0.0884246826171875, + "step": 1702 + }, + { + "epoch": 0.1, + "learning_rate": 9.87538740024214e-08, + "logits/chosen": -1.9781932830810547, + "logits/rejected": -1.9741566181182861, + "logps/chosen": -301.32684326171875, + "logps/rejected": -356.2266845703125, + "loss": 0.5573, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4614929258823395, + "rewards/margins": -0.013400256633758545, + "rewards/rejected": 0.474893182516098, + "step": 1703 + }, + { + "epoch": 0.1, + "learning_rate": 9.875178227896969e-08, + "logits/chosen": -2.2234413623809814, + "logits/rejected": -2.215501070022583, + "logps/chosen": -27.02788543701172, + "logps/rejected": -235.06582641601562, + "loss": 0.6903, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05661430582404137, + "rewards/margins": 0.08756598830223083, + "rewards/rejected": -0.1441802978515625, + "step": 1704 + }, + { + "epoch": 0.1, + "learning_rate": 9.874968882361623e-08, + "logits/chosen": -2.1049530506134033, + "logits/rejected": -2.0945117473602295, + "logps/chosen": -49.11569595336914, + "logps/rejected": -258.80462646484375, + "loss": 0.5891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12607231736183167, + "rewards/margins": 0.39084893465042114, + "rewards/rejected": -0.2647766172885895, + "step": 1705 + }, + { + "epoch": 0.1, + "learning_rate": 9.874759363643534e-08, + "logits/chosen": -2.2251105308532715, + "logits/rejected": -2.1919140815734863, + "logps/chosen": -269.67938232421875, + "logps/rejected": -356.7605285644531, + "loss": 0.4911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.842987060546875, + "rewards/margins": 0.06365352869033813, + "rewards/rejected": 0.7793335318565369, + "step": 1706 + }, + { + "epoch": 0.1, + "learning_rate": 9.874549671750148e-08, + "logits/chosen": -1.9500572681427002, + "logits/rejected": -1.9146331548690796, + "logps/chosen": -330.33697509765625, + "logps/rejected": -493.307861328125, + "loss": 0.4744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35976868867874146, + "rewards/margins": 0.5344604849815369, + "rewards/rejected": -0.17469178140163422, + "step": 1707 + }, + { + "epoch": 0.1, + "learning_rate": 9.874339806688915e-08, + "logits/chosen": -2.1716814041137695, + "logits/rejected": -2.1373541355133057, + "logps/chosen": -218.52493286132812, + "logps/rejected": -340.97076416015625, + "loss": 0.5122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5541015863418579, + "rewards/margins": 0.17583009600639343, + "rewards/rejected": 0.3782714903354645, + "step": 1708 + }, + { + "epoch": 0.1, + "learning_rate": 9.874129768467288e-08, + "logits/chosen": -2.3024022579193115, + "logits/rejected": -2.303224802017212, + "logps/chosen": -0.005288400687277317, + "logps/rejected": -143.57752990722656, + "loss": 0.5822, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.940648305928335e-05, + "rewards/margins": 0.5057050585746765, + "rewards/rejected": -0.5058044791221619, + "step": 1709 + }, + { + "epoch": 0.1, + "learning_rate": 9.873919557092729e-08, + "logits/chosen": -2.074932336807251, + "logits/rejected": -2.0367190837860107, + "logps/chosen": -220.52796936035156, + "logps/rejected": -489.66473388671875, + "loss": 0.5483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2699173092842102, + "rewards/margins": 0.2866348326206207, + "rewards/rejected": -0.01671752892434597, + "step": 1710 + }, + { + "epoch": 0.1, + "learning_rate": 9.873709172572709e-08, + "logits/chosen": -2.3007826805114746, + "logits/rejected": -2.2925894260406494, + "logps/chosen": -41.57087707519531, + "logps/rejected": -213.97763061523438, + "loss": 0.6407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03728294372558594, + "rewards/margins": 0.26322975754737854, + "rewards/rejected": -0.3005127012729645, + "step": 1711 + }, + { + "epoch": 0.1, + "learning_rate": 9.873498614914697e-08, + "logits/chosen": -2.249678611755371, + "logits/rejected": -2.2310259342193604, + "logps/chosen": -15.183341979980469, + "logps/rejected": -279.1602478027344, + "loss": 0.5841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007267952314577997, + "rewards/margins": 0.4912022650241852, + "rewards/rejected": -0.49047547578811646, + "step": 1712 + }, + { + "epoch": 0.1, + "learning_rate": 9.873287884126176e-08, + "logits/chosen": -2.1244101524353027, + "logits/rejected": -2.0991451740264893, + "logps/chosen": -227.40667724609375, + "logps/rejected": -311.9728698730469, + "loss": 0.4843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3400817811489105, + "rewards/margins": 0.47641295194625854, + "rewards/rejected": -0.13633118569850922, + "step": 1713 + }, + { + "epoch": 0.1, + "learning_rate": 9.873076980214632e-08, + "logits/chosen": -2.1538760662078857, + "logits/rejected": -2.1458420753479004, + "logps/chosen": -0.0025215651839971542, + "logps/rejected": -103.47773742675781, + "loss": 0.6709, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.665083881467581e-05, + "rewards/margins": 0.07970713824033737, + "rewards/rejected": -0.07976379245519638, + "step": 1714 + }, + { + "epoch": 0.1, + "learning_rate": 9.872865903187555e-08, + "logits/chosen": -1.9366437196731567, + "logits/rejected": -1.9454925060272217, + "logps/chosen": -185.5262908935547, + "logps/rejected": -280.19976806640625, + "loss": 0.5683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.271463006734848, + "rewards/margins": 0.20633849501609802, + "rewards/rejected": 0.06512451171875, + "step": 1715 + }, + { + "epoch": 0.1, + "learning_rate": 9.872654653052448e-08, + "logits/chosen": -2.1881632804870605, + "logits/rejected": -2.1877527236938477, + "logps/chosen": -0.22565238177776337, + "logps/rejected": -71.14009094238281, + "loss": 0.6578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0026477889623492956, + "rewards/margins": 0.11609993875026703, + "rewards/rejected": -0.11345215141773224, + "step": 1716 + }, + { + "epoch": 0.1, + "learning_rate": 9.872443229816811e-08, + "logits/chosen": -2.1808719635009766, + "logits/rejected": -2.141761541366577, + "logps/chosen": -204.07058715820312, + "logps/rejected": -358.642333984375, + "loss": 0.5202, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6556243896484375, + "rewards/margins": 0.09449154138565063, + "rewards/rejected": 0.5611328482627869, + "step": 1717 + }, + { + "epoch": 0.1, + "learning_rate": 9.872231633488157e-08, + "logits/chosen": -2.065239906311035, + "logits/rejected": -2.068556070327759, + "logps/chosen": -32.72564697265625, + "logps/rejected": -206.7661590576172, + "loss": 0.6245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05314788967370987, + "rewards/margins": 0.2576156556606293, + "rewards/rejected": -0.2044677734375, + "step": 1718 + }, + { + "epoch": 0.1, + "learning_rate": 9.872019864074004e-08, + "logits/chosen": -2.161208152770996, + "logits/rejected": -2.1614909172058105, + "logps/chosen": -0.00016605394193902612, + "logps/rejected": -148.94439697265625, + "loss": 0.6899, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.549630610497843e-06, + "rewards/margins": 0.013247656635940075, + "rewards/rejected": -0.013249206356704235, + "step": 1719 + }, + { + "epoch": 0.1, + "learning_rate": 9.871807921581873e-08, + "logits/chosen": -2.1428749561309814, + "logits/rejected": -2.141059160232544, + "logps/chosen": -52.459312438964844, + "logps/rejected": -113.62528991699219, + "loss": 0.6988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05956001207232475, + "rewards/margins": 0.003721240907907486, + "rewards/rejected": -0.06328125298023224, + "step": 1720 + }, + { + "epoch": 0.1, + "learning_rate": 9.871595806019294e-08, + "logits/chosen": -2.2495110034942627, + "logits/rejected": -2.257453441619873, + "logps/chosen": -117.95671844482422, + "logps/rejected": -226.192138671875, + "loss": 0.6489, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.038379669189453125, + "rewards/margins": 0.2844352722167969, + "rewards/rejected": -0.32281494140625, + "step": 1721 + }, + { + "epoch": 0.1, + "learning_rate": 9.871383517393802e-08, + "logits/chosen": -2.2066104412078857, + "logits/rejected": -2.099245071411133, + "logps/chosen": -220.357666015625, + "logps/rejected": -639.668701171875, + "loss": 0.4309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7464889883995056, + "rewards/margins": 0.4383957087993622, + "rewards/rejected": 0.30809327960014343, + "step": 1722 + }, + { + "epoch": 0.1, + "learning_rate": 9.87117105571294e-08, + "logits/chosen": -2.092522621154785, + "logits/rejected": -2.077383041381836, + "logps/chosen": -50.58378601074219, + "logps/rejected": -212.77532958984375, + "loss": 0.6271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10202331840991974, + "rewards/margins": 0.13313141465187073, + "rewards/rejected": -0.03110809437930584, + "step": 1723 + }, + { + "epoch": 0.1, + "learning_rate": 9.870958420984253e-08, + "logits/chosen": -2.1033241748809814, + "logits/rejected": -2.0801584720611572, + "logps/chosen": -152.74696350097656, + "logps/rejected": -240.08485412597656, + "loss": 0.5194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3344711363315582, + "rewards/margins": 0.269400030374527, + "rewards/rejected": 0.06507110595703125, + "step": 1724 + }, + { + "epoch": 0.1, + "learning_rate": 9.870745613215299e-08, + "logits/chosen": -2.269718885421753, + "logits/rejected": -2.262350082397461, + "logps/chosen": -17.207653045654297, + "logps/rejected": -110.68081665039062, + "loss": 0.6433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004287719784770161, + "rewards/margins": 0.2086074948310852, + "rewards/rejected": -0.20903626084327698, + "step": 1725 + }, + { + "epoch": 0.1, + "learning_rate": 9.870532632413632e-08, + "logits/chosen": -2.1125807762145996, + "logits/rejected": -2.1150872707366943, + "logps/chosen": -7.961741924285889, + "logps/rejected": -117.23284912109375, + "loss": 0.6608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05140218883752823, + "rewards/margins": 0.18856897950172424, + "rewards/rejected": -0.23997116088867188, + "step": 1726 + }, + { + "epoch": 0.1, + "learning_rate": 9.870319478586824e-08, + "logits/chosen": -2.0709729194641113, + "logits/rejected": -2.0631837844848633, + "logps/chosen": -3.686936616897583, + "logps/rejected": -175.01739501953125, + "loss": 0.6832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04482767730951309, + "rewards/margins": 0.09007527679204941, + "rewards/rejected": -0.1349029541015625, + "step": 1727 + }, + { + "epoch": 0.1, + "learning_rate": 9.870106151742445e-08, + "logits/chosen": -2.067556858062744, + "logits/rejected": -2.186919689178467, + "logps/chosen": -286.738525390625, + "logps/rejected": -316.5452880859375, + "loss": 0.5648, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.555889904499054, + "rewards/margins": -0.1143798828125, + "rewards/rejected": 0.670269787311554, + "step": 1728 + }, + { + "epoch": 0.1, + "learning_rate": 9.869892651888072e-08, + "logits/chosen": -2.002917766571045, + "logits/rejected": -1.9927151203155518, + "logps/chosen": -37.14567947387695, + "logps/rejected": -124.81594848632812, + "loss": 0.6695, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08373375236988068, + "rewards/margins": -0.021084971725940704, + "rewards/rejected": 0.10481872409582138, + "step": 1729 + }, + { + "epoch": 0.1, + "learning_rate": 9.869678979031291e-08, + "logits/chosen": -2.0082643032073975, + "logits/rejected": -1.9931819438934326, + "logps/chosen": -75.21381378173828, + "logps/rejected": -233.42860412597656, + "loss": 0.6505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03295593336224556, + "rewards/margins": 0.143107607960701, + "rewards/rejected": -0.11015167087316513, + "step": 1730 + }, + { + "epoch": 0.1, + "learning_rate": 9.869465133179693e-08, + "logits/chosen": -2.208322048187256, + "logits/rejected": -2.1962852478027344, + "logps/chosen": -60.648773193359375, + "logps/rejected": -224.10133361816406, + "loss": 0.5813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01367874164134264, + "rewards/margins": 0.5034645199775696, + "rewards/rejected": -0.5171432495117188, + "step": 1731 + }, + { + "epoch": 0.1, + "learning_rate": 9.869251114340873e-08, + "logits/chosen": -1.9598556756973267, + "logits/rejected": -1.9544612169265747, + "logps/chosen": -196.52552795410156, + "logps/rejected": -273.67779541015625, + "loss": 0.406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8042449951171875, + "rewards/margins": 0.4219116270542145, + "rewards/rejected": 0.382333368062973, + "step": 1732 + }, + { + "epoch": 0.1, + "learning_rate": 9.869036922522437e-08, + "logits/chosen": -2.046569585800171, + "logits/rejected": -2.0524866580963135, + "logps/chosen": -191.78155517578125, + "logps/rejected": -326.5077819824219, + "loss": 0.635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13081054389476776, + "rewards/margins": 0.08502501994371414, + "rewards/rejected": 0.04578552395105362, + "step": 1733 + }, + { + "epoch": 0.1, + "learning_rate": 9.868822557731993e-08, + "logits/chosen": -2.1093196868896484, + "logits/rejected": -2.072425603866577, + "logps/chosen": -226.4033203125, + "logps/rejected": -273.2245178222656, + "loss": 0.514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3742019832134247, + "rewards/margins": 0.28696441650390625, + "rewards/rejected": 0.08723755180835724, + "step": 1734 + }, + { + "epoch": 0.1, + "learning_rate": 9.868608019977154e-08, + "logits/chosen": -2.24186110496521, + "logits/rejected": -2.2481071949005127, + "logps/chosen": -32.18073272705078, + "logps/rejected": -190.80880737304688, + "loss": 0.62, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01428985595703125, + "rewards/margins": 0.2877853512763977, + "rewards/rejected": -0.27349549531936646, + "step": 1735 + }, + { + "epoch": 0.1, + "learning_rate": 9.868393309265543e-08, + "logits/chosen": -2.025520086288452, + "logits/rejected": -2.015679359436035, + "logps/chosen": -161.40220642089844, + "logps/rejected": -308.0974426269531, + "loss": 0.5292, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7861679196357727, + "rewards/margins": -0.03694915771484375, + "rewards/rejected": 0.8231170773506165, + "step": 1736 + }, + { + "epoch": 0.1, + "learning_rate": 9.868178425604789e-08, + "logits/chosen": -2.0516507625579834, + "logits/rejected": -1.9477014541625977, + "logps/chosen": -365.2603454589844, + "logps/rejected": -751.535400390625, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7724029421806335, + "rewards/margins": 1.1418243646621704, + "rewards/rejected": -0.3694213926792145, + "step": 1737 + }, + { + "epoch": 0.1, + "learning_rate": 9.867963369002525e-08, + "logits/chosen": -2.1198954582214355, + "logits/rejected": -2.115596294403076, + "logps/chosen": -0.3663156032562256, + "logps/rejected": -89.34466552734375, + "loss": 0.6627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028382360469549894, + "rewards/margins": 0.13636159896850586, + "rewards/rejected": -0.13919983804225922, + "step": 1738 + }, + { + "epoch": 0.1, + "learning_rate": 9.86774813946639e-08, + "logits/chosen": -2.0045034885406494, + "logits/rejected": -1.9764578342437744, + "logps/chosen": -224.14901733398438, + "logps/rejected": -289.63128662109375, + "loss": 0.5456, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6862503290176392, + "rewards/margins": -0.07720792293548584, + "rewards/rejected": 0.763458251953125, + "step": 1739 + }, + { + "epoch": 0.1, + "learning_rate": 9.867532737004029e-08, + "logits/chosen": -2.1864426136016846, + "logits/rejected": -2.1775553226470947, + "logps/chosen": -95.39627075195312, + "logps/rejected": -200.21243286132812, + "loss": 0.958, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5211570858955383, + "rewards/margins": -0.42027512192726135, + "rewards/rejected": -0.10088195651769638, + "step": 1740 + }, + { + "epoch": 0.1, + "learning_rate": 9.867317161623096e-08, + "logits/chosen": -1.935263752937317, + "logits/rejected": -1.9312496185302734, + "logps/chosen": -297.4481201171875, + "logps/rejected": -351.2957763671875, + "loss": 0.5544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11118774861097336, + "rewards/margins": 0.45787355303764343, + "rewards/rejected": -0.3466857969760895, + "step": 1741 + }, + { + "epoch": 0.1, + "learning_rate": 9.867101413331249e-08, + "logits/chosen": -2.0077526569366455, + "logits/rejected": -2.021766424179077, + "logps/chosen": -249.73243713378906, + "logps/rejected": -300.9961242675781, + "loss": 0.5192, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6286666989326477, + "rewards/margins": -0.002430737018585205, + "rewards/rejected": 0.6310974359512329, + "step": 1742 + }, + { + "epoch": 0.1, + "learning_rate": 9.866885492136154e-08, + "logits/chosen": -2.1625566482543945, + "logits/rejected": -2.1602485179901123, + "logps/chosen": -126.15240478515625, + "logps/rejected": -294.89349365234375, + "loss": 0.6119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2511795163154602, + "rewards/margins": 0.028355419635772705, + "rewards/rejected": 0.2228240966796875, + "step": 1743 + }, + { + "epoch": 0.1, + "learning_rate": 9.866669398045478e-08, + "logits/chosen": -2.054384469985962, + "logits/rejected": -2.0536766052246094, + "logps/chosen": -25.60009002685547, + "logps/rejected": -87.78463745117188, + "loss": 0.6828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01928863488137722, + "rewards/margins": 0.039107512682676315, + "rewards/rejected": -0.019818877801299095, + "step": 1744 + }, + { + "epoch": 0.1, + "learning_rate": 9.866453131066898e-08, + "logits/chosen": -2.1333649158477783, + "logits/rejected": -2.153831958770752, + "logps/chosen": -217.1703643798828, + "logps/rejected": -280.52978515625, + "loss": 0.3524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7281265258789062, + "rewards/margins": 0.8116836547851562, + "rewards/rejected": -0.08355712890625, + "step": 1745 + }, + { + "epoch": 0.1, + "learning_rate": 9.8662366912081e-08, + "logits/chosen": -2.107515335083008, + "logits/rejected": -2.100776195526123, + "logps/chosen": -40.51976013183594, + "logps/rejected": -186.1960906982422, + "loss": 0.6748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02640075795352459, + "rewards/margins": 0.00934753566980362, + "rewards/rejected": 0.01705322228372097, + "step": 1746 + }, + { + "epoch": 0.1, + "learning_rate": 9.866020078476774e-08, + "logits/chosen": -1.959017276763916, + "logits/rejected": -1.95979642868042, + "logps/chosen": -55.89524841308594, + "logps/rejected": -203.82394409179688, + "loss": 0.6621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00702323904260993, + "rewards/margins": 0.17519646883010864, + "rewards/rejected": -0.16817322373390198, + "step": 1747 + }, + { + "epoch": 0.1, + "learning_rate": 9.86580329288061e-08, + "logits/chosen": -2.1131057739257812, + "logits/rejected": -2.1129839420318604, + "logps/chosen": -10.195178985595703, + "logps/rejected": -21.237262725830078, + "loss": 0.6734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016743851825594902, + "rewards/margins": 0.08798827975988388, + "rewards/rejected": -0.10473213344812393, + "step": 1748 + }, + { + "epoch": 0.1, + "learning_rate": 9.86558633442731e-08, + "logits/chosen": -2.1395938396453857, + "logits/rejected": -2.1418395042419434, + "logps/chosen": -64.88555145263672, + "logps/rejected": -75.61601257324219, + "loss": 0.784, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2761497497558594, + "rewards/margins": -0.09806366264820099, + "rewards/rejected": -0.1780860871076584, + "step": 1749 + }, + { + "epoch": 0.1, + "learning_rate": 9.865369203124585e-08, + "logits/chosen": -2.1966400146484375, + "logits/rejected": -2.177058458328247, + "logps/chosen": -227.0587921142578, + "logps/rejected": -407.63580322265625, + "loss": 0.5804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21595001220703125, + "rewards/margins": 0.19739532470703125, + "rewards/rejected": 0.0185546875, + "step": 1750 + }, + { + "epoch": 0.1, + "learning_rate": 9.865151898980148e-08, + "logits/chosen": -2.283895969390869, + "logits/rejected": -2.277984142303467, + "logps/chosen": -16.623294830322266, + "logps/rejected": -144.28546142578125, + "loss": 0.5925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0020593644585460424, + "rewards/margins": 0.4477701187133789, + "rewards/rejected": -0.4457107484340668, + "step": 1751 + }, + { + "epoch": 0.1, + "learning_rate": 9.864934422001715e-08, + "logits/chosen": -1.941006064414978, + "logits/rejected": -1.9230927228927612, + "logps/chosen": -79.41753387451172, + "logps/rejected": -152.70773315429688, + "loss": 0.5873, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11132126301527023, + "rewards/margins": 0.32436448335647583, + "rewards/rejected": -0.213043212890625, + "step": 1752 + }, + { + "epoch": 0.1, + "learning_rate": 9.864716772197017e-08, + "logits/chosen": -1.816964864730835, + "logits/rejected": -1.826894760131836, + "logps/chosen": -22.25373077392578, + "logps/rejected": -153.84222412109375, + "loss": 0.6519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03928127512335777, + "rewards/margins": 0.10797786712646484, + "rewards/rejected": -0.06869659572839737, + "step": 1753 + }, + { + "epoch": 0.1, + "learning_rate": 9.864498949573782e-08, + "logits/chosen": -1.9786343574523926, + "logits/rejected": -1.9595798254013062, + "logps/chosen": -243.53172302246094, + "logps/rejected": -452.75872802734375, + "loss": 0.3868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5586776733398438, + "rewards/margins": 0.6867965459823608, + "rewards/rejected": -0.12811890244483948, + "step": 1754 + }, + { + "epoch": 0.1, + "learning_rate": 9.86428095413975e-08, + "logits/chosen": -2.131291389465332, + "logits/rejected": -2.125422716140747, + "logps/chosen": -107.61521911621094, + "logps/rejected": -211.87713623046875, + "loss": 0.6049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19271545112133026, + "rewards/margins": 0.11120299994945526, + "rewards/rejected": 0.081512451171875, + "step": 1755 + }, + { + "epoch": 0.1, + "learning_rate": 9.864062785902664e-08, + "logits/chosen": -2.0370330810546875, + "logits/rejected": -2.0194571018218994, + "logps/chosen": -218.7263641357422, + "logps/rejected": -305.1832275390625, + "loss": 0.5702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26178285479545593, + "rewards/margins": 0.17819520831108093, + "rewards/rejected": 0.083587646484375, + "step": 1756 + }, + { + "epoch": 0.1, + "learning_rate": 9.863844444870276e-08, + "logits/chosen": -2.177579402923584, + "logits/rejected": -2.173882007598877, + "logps/chosen": -89.0626449584961, + "logps/rejected": -287.86187744140625, + "loss": 0.6165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07871093600988388, + "rewards/margins": 0.21052247285842896, + "rewards/rejected": -0.13181152939796448, + "step": 1757 + }, + { + "epoch": 0.1, + "learning_rate": 9.863625931050342e-08, + "logits/chosen": -2.018186330795288, + "logits/rejected": -1.9991048574447632, + "logps/chosen": -23.46489715576172, + "logps/rejected": -123.7620849609375, + "loss": 0.66, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09530601650476456, + "rewards/margins": 0.19883614778518677, + "rewards/rejected": -0.2941421568393707, + "step": 1758 + }, + { + "epoch": 0.1, + "learning_rate": 9.863407244450622e-08, + "logits/chosen": -1.9999490976333618, + "logits/rejected": -1.9913426637649536, + "logps/chosen": -238.77914428710938, + "logps/rejected": -320.41375732421875, + "loss": 0.4759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8226303458213806, + "rewards/margins": 0.21671295166015625, + "rewards/rejected": 0.6059173941612244, + "step": 1759 + }, + { + "epoch": 0.1, + "learning_rate": 9.86318838507889e-08, + "logits/chosen": -2.135326385498047, + "logits/rejected": -2.1287293434143066, + "logps/chosen": -6.876168251037598, + "logps/rejected": -75.23163604736328, + "loss": 0.6771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024976778775453568, + "rewards/margins": 0.0398067981004715, + "rewards/rejected": -0.01483001746237278, + "step": 1760 + }, + { + "epoch": 0.1, + "learning_rate": 9.862969352942916e-08, + "logits/chosen": -2.1712591648101807, + "logits/rejected": -2.176724672317505, + "logps/chosen": -61.701805114746094, + "logps/rejected": -232.07183837890625, + "loss": 0.6305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0809326171875, + "rewards/margins": 0.10706482082605362, + "rewards/rejected": -0.02613220177590847, + "step": 1761 + }, + { + "epoch": 0.1, + "learning_rate": 9.862750148050484e-08, + "logits/chosen": -2.2563321590423584, + "logits/rejected": -2.2589473724365234, + "logps/chosen": -57.611629486083984, + "logps/rejected": -195.156982421875, + "loss": 0.5376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06959877163171768, + "rewards/margins": 0.5788730978965759, + "rewards/rejected": -0.5092743039131165, + "step": 1762 + }, + { + "epoch": 0.1, + "learning_rate": 9.862530770409381e-08, + "logits/chosen": -1.9625810384750366, + "logits/rejected": -1.9587831497192383, + "logps/chosen": -26.20934295654297, + "logps/rejected": -96.31590270996094, + "loss": 0.6981, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.029014205560088158, + "rewards/margins": -0.04552192986011505, + "rewards/rejected": 0.07453613728284836, + "step": 1763 + }, + { + "epoch": 0.1, + "learning_rate": 9.8623112200274e-08, + "logits/chosen": -1.9907817840576172, + "logits/rejected": -2.0009236335754395, + "logps/chosen": -27.644634246826172, + "logps/rejected": -209.88882446289062, + "loss": 0.6828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003909111022949219, + "rewards/margins": 0.04140911251306534, + "rewards/rejected": -0.03750000149011612, + "step": 1764 + }, + { + "epoch": 0.1, + "learning_rate": 9.862091496912339e-08, + "logits/chosen": -2.1394598484039307, + "logits/rejected": -2.1474180221557617, + "logps/chosen": -141.79397583007812, + "logps/rejected": -216.57521057128906, + "loss": 0.5295, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.70037841796875, + "rewards/margins": -0.03643953800201416, + "rewards/rejected": 0.7368179559707642, + "step": 1765 + }, + { + "epoch": 0.1, + "learning_rate": 9.861871601072004e-08, + "logits/chosen": -2.019789457321167, + "logits/rejected": -2.0230813026428223, + "logps/chosen": -14.838152885437012, + "logps/rejected": -98.54835510253906, + "loss": 0.6967, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.014459229074418545, + "rewards/margins": -0.02462158352136612, + "rewards/rejected": 0.010162353515625, + "step": 1766 + }, + { + "epoch": 0.1, + "learning_rate": 9.86165153251421e-08, + "logits/chosen": -1.9931367635726929, + "logits/rejected": -1.9122203588485718, + "logps/chosen": -176.584716796875, + "logps/rejected": -403.09228515625, + "loss": 0.5717, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.45735475420951843, + "rewards/margins": -0.08211973309516907, + "rewards/rejected": 0.5394744873046875, + "step": 1767 + }, + { + "epoch": 0.1, + "learning_rate": 9.861431291246773e-08, + "logits/chosen": -2.041954755783081, + "logits/rejected": -2.027339220046997, + "logps/chosen": -31.754310607910156, + "logps/rejected": -200.9687957763672, + "loss": 0.6987, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07303352653980255, + "rewards/margins": -0.0657070130109787, + "rewards/rejected": 0.13874053955078125, + "step": 1768 + }, + { + "epoch": 0.1, + "learning_rate": 9.861210877277515e-08, + "logits/chosen": -2.1056318283081055, + "logits/rejected": -2.095015525817871, + "logps/chosen": -242.05670166015625, + "logps/rejected": -333.47357177734375, + "loss": 0.4692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8469452261924744, + "rewards/margins": 0.1708770990371704, + "rewards/rejected": 0.676068127155304, + "step": 1769 + }, + { + "epoch": 0.1, + "learning_rate": 9.860990290614269e-08, + "logits/chosen": -1.9575884342193604, + "logits/rejected": -1.9499971866607666, + "logps/chosen": -230.62789916992188, + "logps/rejected": -307.5919189453125, + "loss": 0.5232, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7535476684570312, + "rewards/margins": -0.00981903076171875, + "rewards/rejected": 0.76336669921875, + "step": 1770 + }, + { + "epoch": 0.1, + "learning_rate": 9.86076953126487e-08, + "logits/chosen": -2.1741976737976074, + "logits/rejected": -2.1740541458129883, + "logps/chosen": -21.960988998413086, + "logps/rejected": -54.487693786621094, + "loss": 0.6976, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02441253699362278, + "rewards/margins": -0.03388214111328125, + "rewards/rejected": 0.05829467996954918, + "step": 1771 + }, + { + "epoch": 0.1, + "learning_rate": 9.860548599237161e-08, + "logits/chosen": -1.9811193943023682, + "logits/rejected": -1.923215627670288, + "logps/chosen": -329.79400634765625, + "logps/rejected": -542.6094360351562, + "loss": 0.4545, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.070367455482483, + "rewards/margins": 0.1141357421875, + "rewards/rejected": 0.9562317132949829, + "step": 1772 + }, + { + "epoch": 0.1, + "learning_rate": 9.86032749453899e-08, + "logits/chosen": -2.0730361938476562, + "logits/rejected": -2.063305377960205, + "logps/chosen": -213.86270141601562, + "logps/rejected": -367.6566162109375, + "loss": 0.6049, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6376587152481079, + "rewards/margins": -0.2409210205078125, + "rewards/rejected": 0.8785797357559204, + "step": 1773 + }, + { + "epoch": 0.1, + "learning_rate": 9.860106217178212e-08, + "logits/chosen": -2.1207430362701416, + "logits/rejected": -2.111043930053711, + "logps/chosen": -173.3484344482422, + "logps/rejected": -247.28402709960938, + "loss": 0.5817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19402313232421875, + "rewards/margins": 0.23905333876609802, + "rewards/rejected": -0.04503021389245987, + "step": 1774 + }, + { + "epoch": 0.1, + "learning_rate": 9.859884767162687e-08, + "logits/chosen": -2.257478713989258, + "logits/rejected": -2.2613959312438965, + "logps/chosen": -21.32624053955078, + "logps/rejected": -149.38287353515625, + "loss": 0.6667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04706115648150444, + "rewards/margins": 0.11784820258617401, + "rewards/rejected": -0.16490936279296875, + "step": 1775 + }, + { + "epoch": 0.1, + "learning_rate": 9.859663144500283e-08, + "logits/chosen": -2.075852632522583, + "logits/rejected": -2.074606418609619, + "logps/chosen": -32.450469970703125, + "logps/rejected": -66.00724029541016, + "loss": 0.6954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036583710461854935, + "rewards/margins": 0.02592620626091957, + "rewards/rejected": -0.0625099167227745, + "step": 1776 + }, + { + "epoch": 0.1, + "learning_rate": 9.859441349198874e-08, + "logits/chosen": -2.244568347930908, + "logits/rejected": -2.243441343307495, + "logps/chosen": -0.00011145677126478404, + "logps/rejected": -78.75588989257812, + "loss": 0.6688, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0728108890134536e-07, + "rewards/margins": 0.09906300902366638, + "rewards/rejected": -0.09906311333179474, + "step": 1777 + }, + { + "epoch": 0.1, + "learning_rate": 9.859219381266337e-08, + "logits/chosen": -2.014220952987671, + "logits/rejected": -2.008603811264038, + "logps/chosen": -0.018886826932430267, + "logps/rejected": -98.54631042480469, + "loss": 0.6864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00018723196990322322, + "rewards/margins": 0.02886536344885826, + "rewards/rejected": -0.028678132221102715, + "step": 1778 + }, + { + "epoch": 0.1, + "learning_rate": 9.858997240710558e-08, + "logits/chosen": -2.373769998550415, + "logits/rejected": -2.375542640686035, + "logps/chosen": -70.24226379394531, + "logps/rejected": -237.2095489501953, + "loss": 0.4951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22211609780788422, + "rewards/margins": 0.6633270382881165, + "rewards/rejected": -0.44121095538139343, + "step": 1779 + }, + { + "epoch": 0.1, + "learning_rate": 9.85877492753943e-08, + "logits/chosen": -2.0806291103363037, + "logits/rejected": -1.9905339479446411, + "logps/chosen": -193.44558715820312, + "logps/rejected": -462.8778076171875, + "loss": 0.3942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5891128778457642, + "rewards/margins": 0.6843460202217102, + "rewards/rejected": -0.09523315727710724, + "step": 1780 + }, + { + "epoch": 0.1, + "learning_rate": 9.85855244176085e-08, + "logits/chosen": -2.203223705291748, + "logits/rejected": -2.1947078704833984, + "logps/chosen": -40.75498580932617, + "logps/rejected": -174.85252380371094, + "loss": 0.5473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18357506394386292, + "rewards/margins": 0.35281944274902344, + "rewards/rejected": -0.16924439370632172, + "step": 1781 + }, + { + "epoch": 0.1, + "learning_rate": 9.858329783382722e-08, + "logits/chosen": -2.1237475872039795, + "logits/rejected": -2.1205084323883057, + "logps/chosen": -20.441431045532227, + "logps/rejected": -128.1186065673828, + "loss": 0.6731, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013209342956542969, + "rewards/margins": 0.07654666900634766, + "rewards/rejected": -0.08975601196289062, + "step": 1782 + }, + { + "epoch": 0.1, + "learning_rate": 9.858106952412955e-08, + "logits/chosen": -2.130948781967163, + "logits/rejected": -2.131563425064087, + "logps/chosen": -54.45125961303711, + "logps/rejected": -139.44578552246094, + "loss": 0.665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031955719459801912, + "rewards/margins": 0.16302147507667542, + "rewards/rejected": -0.16621704399585724, + "step": 1783 + }, + { + "epoch": 0.1, + "learning_rate": 9.857883948859465e-08, + "logits/chosen": -2.0451314449310303, + "logits/rejected": -2.0455434322357178, + "logps/chosen": -22.343971252441406, + "logps/rejected": -87.12324523925781, + "loss": 0.6648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.042684175074100494, + "rewards/margins": 0.1726531982421875, + "rewards/rejected": -0.2153373807668686, + "step": 1784 + }, + { + "epoch": 0.1, + "learning_rate": 9.857660772730176e-08, + "logits/chosen": -2.0606191158294678, + "logits/rejected": -2.0333807468414307, + "logps/chosen": -329.5361022949219, + "logps/rejected": -509.2061462402344, + "loss": 0.4853, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0926300287246704, + "rewards/margins": -0.07404172420501709, + "rewards/rejected": 1.1666717529296875, + "step": 1785 + }, + { + "epoch": 0.1, + "learning_rate": 9.857437424033013e-08, + "logits/chosen": -2.001209259033203, + "logits/rejected": -1.9671775102615356, + "logps/chosen": -249.69837951660156, + "logps/rejected": -351.590087890625, + "loss": 0.506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7053634524345398, + "rewards/margins": 0.03426361083984375, + "rewards/rejected": 0.671099841594696, + "step": 1786 + }, + { + "epoch": 0.1, + "learning_rate": 9.857213902775914e-08, + "logits/chosen": -2.2508885860443115, + "logits/rejected": -2.231872081756592, + "logps/chosen": -30.105819702148438, + "logps/rejected": -215.96514892578125, + "loss": 0.5553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11485081166028976, + "rewards/margins": 0.46447089314460754, + "rewards/rejected": -0.3496200740337372, + "step": 1787 + }, + { + "epoch": 0.1, + "learning_rate": 9.856990208966817e-08, + "logits/chosen": -2.1923201084136963, + "logits/rejected": -2.1859028339385986, + "logps/chosen": -0.9897783398628235, + "logps/rejected": -189.1728973388672, + "loss": 0.6149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021530389494728297, + "rewards/margins": 0.3553694188594818, + "rewards/rejected": -0.3555847108364105, + "step": 1788 + }, + { + "epoch": 0.1, + "learning_rate": 9.85676634261367e-08, + "logits/chosen": -2.1583034992218018, + "logits/rejected": -2.092332124710083, + "logps/chosen": -365.150390625, + "logps/rejected": -611.81396484375, + "loss": 0.4769, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2666138410568237, + "rewards/margins": -0.07844233512878418, + "rewards/rejected": 1.345056176185608, + "step": 1789 + }, + { + "epoch": 0.1, + "learning_rate": 9.856542303724425e-08, + "logits/chosen": -2.050388813018799, + "logits/rejected": -2.0431630611419678, + "logps/chosen": -42.80609130859375, + "logps/rejected": -120.85664367675781, + "loss": 0.6647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04526214674115181, + "rewards/margins": 0.08263702690601349, + "rewards/rejected": -0.127899169921875, + "step": 1790 + }, + { + "epoch": 0.1, + "learning_rate": 9.856318092307042e-08, + "logits/chosen": -2.1255857944488525, + "logits/rejected": -2.081146717071533, + "logps/chosen": -237.25637817382812, + "logps/rejected": -565.5111694335938, + "loss": 0.4617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6133056879043579, + "rewards/margins": 0.374786376953125, + "rewards/rejected": 0.23851929605007172, + "step": 1791 + }, + { + "epoch": 0.1, + "learning_rate": 9.856093708369486e-08, + "logits/chosen": -2.163566827774048, + "logits/rejected": -2.182335138320923, + "logps/chosen": -173.5377197265625, + "logps/rejected": -332.8367614746094, + "loss": 0.5014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35996705293655396, + "rewards/margins": 0.4400726556777954, + "rewards/rejected": -0.08010559529066086, + "step": 1792 + }, + { + "epoch": 0.1, + "learning_rate": 9.855869151919727e-08, + "logits/chosen": -2.215135335922241, + "logits/rejected": -2.2187438011169434, + "logps/chosen": -33.00028991699219, + "logps/rejected": -187.9149627685547, + "loss": 0.6057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.065948486328125, + "rewards/margins": 0.3261581361293793, + "rewards/rejected": -0.2602096498012543, + "step": 1793 + }, + { + "epoch": 0.1, + "learning_rate": 9.855644422965743e-08, + "logits/chosen": -2.1703929901123047, + "logits/rejected": -2.1757919788360596, + "logps/chosen": -184.40060424804688, + "logps/rejected": -172.2841796875, + "loss": 0.5695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1459396332502365, + "rewards/margins": 0.2581726014614105, + "rewards/rejected": -0.11223297566175461, + "step": 1794 + }, + { + "epoch": 0.1, + "learning_rate": 9.855419521515517e-08, + "logits/chosen": -2.1169323921203613, + "logits/rejected": -2.0709733963012695, + "logps/chosen": -224.40626525878906, + "logps/rejected": -368.70416259765625, + "loss": 0.4398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7213851809501648, + "rewards/margins": 0.29408109188079834, + "rewards/rejected": 0.42730408906936646, + "step": 1795 + }, + { + "epoch": 0.1, + "learning_rate": 9.855194447577042e-08, + "logits/chosen": -2.3154003620147705, + "logits/rejected": -2.2959070205688477, + "logps/chosen": -180.3695526123047, + "logps/rejected": -300.6953125, + "loss": 0.7156, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2863143980503082, + "rewards/margins": -0.4387619197368622, + "rewards/rejected": 0.7250763177871704, + "step": 1796 + }, + { + "epoch": 0.1, + "learning_rate": 9.854969201158307e-08, + "logits/chosen": -2.296921491622925, + "logits/rejected": -2.2971115112304688, + "logps/chosen": -26.92763328552246, + "logps/rejected": -139.26315307617188, + "loss": 0.6301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12054824829101562, + "rewards/margins": 0.09811325371265411, + "rewards/rejected": 0.02243499830365181, + "step": 1797 + }, + { + "epoch": 0.1, + "learning_rate": 9.85474378226732e-08, + "logits/chosen": -2.129330635070801, + "logits/rejected": -2.1250152587890625, + "logps/chosen": -51.833717346191406, + "logps/rejected": -299.42584228515625, + "loss": 0.5792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06904220581054688, + "rewards/margins": 0.4436393678188324, + "rewards/rejected": -0.3745971620082855, + "step": 1798 + }, + { + "epoch": 0.1, + "learning_rate": 9.854518190912087e-08, + "logits/chosen": -2.0911388397216797, + "logits/rejected": -2.0282375812530518, + "logps/chosen": -172.0991973876953, + "logps/rejected": -343.12445068359375, + "loss": 0.6279, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2616867125034332, + "rewards/margins": -0.04001006484031677, + "rewards/rejected": 0.30169677734375, + "step": 1799 + }, + { + "epoch": 0.1, + "learning_rate": 9.85429242710062e-08, + "logits/chosen": -1.957519769668579, + "logits/rejected": -1.8978976011276245, + "logps/chosen": -239.67633056640625, + "logps/rejected": -498.5669250488281, + "loss": 0.3775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7073883414268494, + "rewards/margins": 0.7112793326377869, + "rewards/rejected": -0.0038909912109375, + "step": 1800 + }, + { + "epoch": 0.1, + "learning_rate": 9.854066490840944e-08, + "logits/chosen": -2.1119863986968994, + "logits/rejected": -2.1013336181640625, + "logps/chosen": -38.652503967285156, + "logps/rejected": -163.81199645996094, + "loss": 0.6573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019989013671875, + "rewards/margins": 0.08860321342945099, + "rewards/rejected": -0.09060211479663849, + "step": 1801 + }, + { + "epoch": 0.1, + "learning_rate": 9.853840382141081e-08, + "logits/chosen": -2.072343587875366, + "logits/rejected": -2.0434603691101074, + "logps/chosen": -256.90740966796875, + "logps/rejected": -346.41009521484375, + "loss": 0.5546, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.668365478515625, + "rewards/margins": -0.09221190214157104, + "rewards/rejected": 0.760577380657196, + "step": 1802 + }, + { + "epoch": 0.1, + "learning_rate": 9.853614101009065e-08, + "logits/chosen": -2.067595958709717, + "logits/rejected": -2.0783421993255615, + "logps/chosen": -223.99591064453125, + "logps/rejected": -271.018798828125, + "loss": 0.5243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6471222043037415, + "rewards/margins": 0.011227428913116455, + "rewards/rejected": 0.635894775390625, + "step": 1803 + }, + { + "epoch": 0.1, + "learning_rate": 9.853387647452934e-08, + "logits/chosen": -2.067171812057495, + "logits/rejected": -2.0633928775787354, + "logps/chosen": -246.07244873046875, + "logps/rejected": -332.7003173828125, + "loss": 0.5821, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20790405571460724, + "rewards/margins": 0.19177857041358948, + "rewards/rejected": 0.01612548902630806, + "step": 1804 + }, + { + "epoch": 0.11, + "learning_rate": 9.853161021480733e-08, + "logits/chosen": -1.955950379371643, + "logits/rejected": -1.9512131214141846, + "logps/chosen": -42.781951904296875, + "logps/rejected": -86.40010070800781, + "loss": 0.651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07220077514648438, + "rewards/margins": 0.19097900390625, + "rewards/rejected": -0.2631797790527344, + "step": 1805 + }, + { + "epoch": 0.11, + "learning_rate": 9.852934223100513e-08, + "logits/chosen": -2.1315600872039795, + "logits/rejected": -2.1245086193084717, + "logps/chosen": -222.6830596923828, + "logps/rejected": -305.74444580078125, + "loss": 0.6339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21549224853515625, + "rewards/margins": 0.034925833344459534, + "rewards/rejected": 0.18056641519069672, + "step": 1806 + }, + { + "epoch": 0.11, + "learning_rate": 9.852707252320332e-08, + "logits/chosen": -1.9554908275604248, + "logits/rejected": -1.9840199947357178, + "logps/chosen": -361.54095458984375, + "logps/rejected": -403.6378173828125, + "loss": 0.6076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26424866914749146, + "rewards/margins": 0.09010925889015198, + "rewards/rejected": 0.17413941025733948, + "step": 1807 + }, + { + "epoch": 0.11, + "learning_rate": 9.852480109148252e-08, + "logits/chosen": -2.0457026958465576, + "logits/rejected": -2.000229597091675, + "logps/chosen": -309.9869384765625, + "logps/rejected": -423.9073181152344, + "loss": 0.6266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1116790771484375, + "rewards/margins": 0.08424682915210724, + "rewards/rejected": 0.02743225172162056, + "step": 1808 + }, + { + "epoch": 0.11, + "learning_rate": 9.852252793592342e-08, + "logits/chosen": -2.118380069732666, + "logits/rejected": -2.1030335426330566, + "logps/chosen": -82.96397399902344, + "logps/rejected": -214.0634307861328, + "loss": 0.6735, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1285247802734375, + "rewards/margins": -0.04571075737476349, + "rewards/rejected": 0.174235537648201, + "step": 1809 + }, + { + "epoch": 0.11, + "learning_rate": 9.852025305660678e-08, + "logits/chosen": -2.086158514022827, + "logits/rejected": -2.077327013015747, + "logps/chosen": -29.867002487182617, + "logps/rejected": -251.33641052246094, + "loss": 0.6199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035888671875, + "rewards/margins": 0.299765020608902, + "rewards/rejected": -0.263876348733902, + "step": 1810 + }, + { + "epoch": 0.11, + "learning_rate": 9.851797645361342e-08, + "logits/chosen": -2.184727191925049, + "logits/rejected": -2.1808536052703857, + "logps/chosen": -36.91178894042969, + "logps/rejected": -160.39576721191406, + "loss": 0.7349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15069428086280823, + "rewards/margins": 0.006811514496803284, + "rewards/rejected": -0.1575057953596115, + "step": 1811 + }, + { + "epoch": 0.11, + "learning_rate": 9.851569812702422e-08, + "logits/chosen": -2.097790002822876, + "logits/rejected": -2.034999370574951, + "logps/chosen": -254.1571807861328, + "logps/rejected": -412.7576904296875, + "loss": 0.3868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8328628540039062, + "rewards/margins": 0.4630996584892273, + "rewards/rejected": 0.36976319551467896, + "step": 1812 + }, + { + "epoch": 0.11, + "learning_rate": 9.851341807692008e-08, + "logits/chosen": -2.0117695331573486, + "logits/rejected": -1.9772570133209229, + "logps/chosen": -172.6371612548828, + "logps/rejected": -298.6866760253906, + "loss": 0.5727, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5374771356582642, + "rewards/margins": -0.11390531063079834, + "rewards/rejected": 0.6513824462890625, + "step": 1813 + }, + { + "epoch": 0.11, + "learning_rate": 9.851113630338205e-08, + "logits/chosen": -2.046025037765503, + "logits/rejected": -1.988344430923462, + "logps/chosen": -245.13217163085938, + "logps/rejected": -426.695556640625, + "loss": 0.3609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8245788812637329, + "rewards/margins": 0.6378173828125, + "rewards/rejected": 0.18676148355007172, + "step": 1814 + }, + { + "epoch": 0.11, + "learning_rate": 9.850885280649115e-08, + "logits/chosen": -2.2307381629943848, + "logits/rejected": -2.2324166297912598, + "logps/chosen": -14.09427547454834, + "logps/rejected": -190.99118041992188, + "loss": 0.6129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05682821199297905, + "rewards/margins": 0.26396626234054565, + "rewards/rejected": -0.2071380615234375, + "step": 1815 + }, + { + "epoch": 0.11, + "learning_rate": 9.850656758632852e-08, + "logits/chosen": -2.268745183944702, + "logits/rejected": -2.240922212600708, + "logps/chosen": -46.09962463378906, + "logps/rejected": -365.8843994140625, + "loss": 0.5565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05623321607708931, + "rewards/margins": 0.5557724237442017, + "rewards/rejected": -0.49953919649124146, + "step": 1816 + }, + { + "epoch": 0.11, + "learning_rate": 9.850428064297533e-08, + "logits/chosen": -2.0630905628204346, + "logits/rejected": -2.078040599822998, + "logps/chosen": -259.24261474609375, + "logps/rejected": -306.324951171875, + "loss": 0.4203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7767090201377869, + "rewards/margins": 0.35621342062950134, + "rewards/rejected": 0.4204955995082855, + "step": 1817 + }, + { + "epoch": 0.11, + "learning_rate": 9.850199197651285e-08, + "logits/chosen": -2.1991970539093018, + "logits/rejected": -2.152045249938965, + "logps/chosen": -209.29281616210938, + "logps/rejected": -377.885498046875, + "loss": 0.5335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5194366574287415, + "rewards/margins": 0.10027465224266052, + "rewards/rejected": 0.41916200518608093, + "step": 1818 + }, + { + "epoch": 0.11, + "learning_rate": 9.849970158702234e-08, + "logits/chosen": -2.2284576892852783, + "logits/rejected": -2.2368507385253906, + "logps/chosen": -132.3824920654297, + "logps/rejected": -271.0688171386719, + "loss": 0.6712, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12676087021827698, + "rewards/margins": -0.03998717665672302, + "rewards/rejected": 0.166748046875, + "step": 1819 + }, + { + "epoch": 0.11, + "learning_rate": 9.849740947458521e-08, + "logits/chosen": -2.1960251331329346, + "logits/rejected": -2.202357769012451, + "logps/chosen": -9.284908294677734, + "logps/rejected": -99.31649780273438, + "loss": 0.6728, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013118267059326172, + "rewards/margins": 0.0974888801574707, + "rewards/rejected": -0.11060714721679688, + "step": 1820 + }, + { + "epoch": 0.11, + "learning_rate": 9.849511563928286e-08, + "logits/chosen": -2.0742878913879395, + "logits/rejected": -2.0686349868774414, + "logps/chosen": -20.24775505065918, + "logps/rejected": -118.81853485107422, + "loss": 0.6073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07013358920812607, + "rewards/margins": 0.23351630568504333, + "rewards/rejected": -0.16338272392749786, + "step": 1821 + }, + { + "epoch": 0.11, + "learning_rate": 9.849282008119679e-08, + "logits/chosen": -2.176934242248535, + "logits/rejected": -2.1771318912506104, + "logps/chosen": -0.3252103626728058, + "logps/rejected": -183.26376342773438, + "loss": 0.5617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006185659673064947, + "rewards/margins": 0.6277468800544739, + "rewards/rejected": -0.6339325308799744, + "step": 1822 + }, + { + "epoch": 0.11, + "learning_rate": 9.849052280040857e-08, + "logits/chosen": -1.9598941802978516, + "logits/rejected": -1.9090059995651245, + "logps/chosen": -219.3662872314453, + "logps/rejected": -360.7811279296875, + "loss": 0.5966, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14484405517578125, + "rewards/margins": 0.1725814789533615, + "rewards/rejected": -0.02773742750287056, + "step": 1823 + }, + { + "epoch": 0.11, + "learning_rate": 9.848822379699975e-08, + "logits/chosen": -2.123319149017334, + "logits/rejected": -2.123605728149414, + "logps/chosen": -31.37078857421875, + "logps/rejected": -224.10557556152344, + "loss": 0.4994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12835197150707245, + "rewards/margins": 0.8016628623008728, + "rewards/rejected": -0.6733108758926392, + "step": 1824 + }, + { + "epoch": 0.11, + "learning_rate": 9.848592307105206e-08, + "logits/chosen": -2.0657742023468018, + "logits/rejected": -2.057983636856079, + "logps/chosen": -47.93931579589844, + "logps/rejected": -155.53713989257812, + "loss": 0.622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01750030554831028, + "rewards/margins": 0.19130095839500427, + "rewards/rejected": -0.20880126953125, + "step": 1825 + }, + { + "epoch": 0.11, + "learning_rate": 9.848362062264722e-08, + "logits/chosen": -1.9907444715499878, + "logits/rejected": -1.946032166481018, + "logps/chosen": -213.92318725585938, + "logps/rejected": -288.1441650390625, + "loss": 0.4447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8575378656387329, + "rewards/margins": 0.1951690912246704, + "rewards/rejected": 0.6623687744140625, + "step": 1826 + }, + { + "epoch": 0.11, + "learning_rate": 9.848131645186699e-08, + "logits/chosen": -1.904072880744934, + "logits/rejected": -1.9097474813461304, + "logps/chosen": -0.0005623718607239425, + "logps/rejected": -111.51632690429688, + "loss": 0.6783, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8053664108447265e-06, + "rewards/margins": 0.015357269905507565, + "rewards/rejected": -0.01536407507956028, + "step": 1827 + }, + { + "epoch": 0.11, + "learning_rate": 9.847901055879326e-08, + "logits/chosen": -2.158872365951538, + "logits/rejected": -2.152637481689453, + "logps/chosen": -30.41823959350586, + "logps/rejected": -151.19898986816406, + "loss": 0.5613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06518860161304474, + "rewards/margins": 0.5329391360282898, + "rewards/rejected": -0.46775054931640625, + "step": 1828 + }, + { + "epoch": 0.11, + "learning_rate": 9.847670294350796e-08, + "logits/chosen": -2.2798218727111816, + "logits/rejected": -2.275743007659912, + "logps/chosen": -0.003265473758801818, + "logps/rejected": -99.41104125976562, + "loss": 0.6759, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.1056122427107766e-05, + "rewards/margins": 0.07032684981822968, + "rewards/rejected": -0.07028579711914062, + "step": 1829 + }, + { + "epoch": 0.11, + "learning_rate": 9.847439360609302e-08, + "logits/chosen": -2.133169174194336, + "logits/rejected": -2.1267971992492676, + "logps/chosen": -68.16622924804688, + "logps/rejected": -195.9503173828125, + "loss": 0.6107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.141368106007576, + "rewards/margins": 0.21641846001148224, + "rewards/rejected": -0.07505035400390625, + "step": 1830 + }, + { + "epoch": 0.11, + "learning_rate": 9.847208254663052e-08, + "logits/chosen": -2.3244059085845947, + "logits/rejected": -2.326545476913452, + "logps/chosen": -0.003164896974340081, + "logps/rejected": -139.44247436523438, + "loss": 0.6421, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.800096601480618e-05, + "rewards/margins": 0.21357741951942444, + "rewards/rejected": -0.21361541748046875, + "step": 1831 + }, + { + "epoch": 0.11, + "learning_rate": 9.846976976520254e-08, + "logits/chosen": -2.245515823364258, + "logits/rejected": -2.236604928970337, + "logps/chosen": -0.0006840219721198082, + "logps/rejected": -80.45426940917969, + "loss": 0.6475, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1427403851248528e-07, + "rewards/margins": 0.19170935451984406, + "rewards/rejected": -0.19170914590358734, + "step": 1832 + }, + { + "epoch": 0.11, + "learning_rate": 9.846745526189126e-08, + "logits/chosen": -2.118790864944458, + "logits/rejected": -2.150689125061035, + "logps/chosen": -222.64108276367188, + "logps/rejected": -227.35910034179688, + "loss": 0.6014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17316284775733948, + "rewards/margins": 0.21856842935085297, + "rewards/rejected": -0.04540557786822319, + "step": 1833 + }, + { + "epoch": 0.11, + "learning_rate": 9.846513903677886e-08, + "logits/chosen": -2.0043118000030518, + "logits/rejected": -2.023073434829712, + "logps/chosen": -325.62554931640625, + "logps/rejected": -537.9315795898438, + "loss": 0.4585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46244508028030396, + "rewards/margins": 0.57171630859375, + "rewards/rejected": -0.10927124321460724, + "step": 1834 + }, + { + "epoch": 0.11, + "learning_rate": 9.846282108994768e-08, + "logits/chosen": -2.256190061569214, + "logits/rejected": -2.229687213897705, + "logps/chosen": -209.63482666015625, + "logps/rejected": -343.7170104980469, + "loss": 0.5628, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6701812744140625, + "rewards/margins": -0.1021575927734375, + "rewards/rejected": 0.7723388671875, + "step": 1835 + }, + { + "epoch": 0.11, + "learning_rate": 9.846050142148002e-08, + "logits/chosen": -1.9441910982131958, + "logits/rejected": -1.943332314491272, + "logps/chosen": -6.556649208068848, + "logps/rejected": -28.221450805664062, + "loss": 0.6745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030803490430116653, + "rewards/margins": 0.028275681659579277, + "rewards/rejected": 0.0025278092361986637, + "step": 1836 + }, + { + "epoch": 0.11, + "learning_rate": 9.845818003145831e-08, + "logits/chosen": -2.2344810962677, + "logits/rejected": -2.2127275466918945, + "logps/chosen": -34.512123107910156, + "logps/rejected": -255.86387634277344, + "loss": 0.523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04267120361328125, + "rewards/margins": 0.9258026480674744, + "rewards/rejected": -0.9684738516807556, + "step": 1837 + }, + { + "epoch": 0.11, + "learning_rate": 9.845585691996502e-08, + "logits/chosen": -2.1883132457733154, + "logits/rejected": -2.1768076419830322, + "logps/chosen": -60.883399963378906, + "logps/rejected": -131.37611389160156, + "loss": 0.6858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015534973703324795, + "rewards/margins": 0.04642333835363388, + "rewards/rejected": -0.06195831298828125, + "step": 1838 + }, + { + "epoch": 0.11, + "learning_rate": 9.845353208708268e-08, + "logits/chosen": -2.073258876800537, + "logits/rejected": -2.0738582611083984, + "logps/chosen": -255.47178649902344, + "logps/rejected": -293.0946044921875, + "loss": 0.572, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4471176266670227, + "rewards/margins": -0.02011871337890625, + "rewards/rejected": 0.46723634004592896, + "step": 1839 + }, + { + "epoch": 0.11, + "learning_rate": 9.845120553289383e-08, + "logits/chosen": -2.1998095512390137, + "logits/rejected": -2.189826488494873, + "logps/chosen": -52.489967346191406, + "logps/rejected": -155.27865600585938, + "loss": 0.6014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09407539665699005, + "rewards/margins": 0.24157296121120453, + "rewards/rejected": -0.14749756455421448, + "step": 1840 + }, + { + "epoch": 0.11, + "learning_rate": 9.844887725748118e-08, + "logits/chosen": -2.4094035625457764, + "logits/rejected": -2.4066171646118164, + "logps/chosen": -0.008601521141827106, + "logps/rejected": -239.24037170410156, + "loss": 0.5489, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.321323886979371e-05, + "rewards/margins": 0.6952391266822815, + "rewards/rejected": -0.6953323483467102, + "step": 1841 + }, + { + "epoch": 0.11, + "learning_rate": 9.844654726092742e-08, + "logits/chosen": -2.0917205810546875, + "logits/rejected": -2.0481855869293213, + "logps/chosen": -238.90655517578125, + "logps/rejected": -371.37078857421875, + "loss": 0.5817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08845062553882599, + "rewards/margins": 0.3421921133995056, + "rewards/rejected": -0.25374147295951843, + "step": 1842 + }, + { + "epoch": 0.11, + "learning_rate": 9.844421554331532e-08, + "logits/chosen": -2.1619253158569336, + "logits/rejected": -2.1851234436035156, + "logps/chosen": -132.36793518066406, + "logps/rejected": -284.53607177734375, + "loss": 0.5706, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10060882568359375, + "rewards/margins": 0.4177688658237457, + "rewards/rejected": -0.317160040140152, + "step": 1843 + }, + { + "epoch": 0.11, + "learning_rate": 9.844188210472772e-08, + "logits/chosen": -2.112417459487915, + "logits/rejected": -2.1081857681274414, + "logps/chosen": -34.81557083129883, + "logps/rejected": -176.49717712402344, + "loss": 0.6726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0063499449752271175, + "rewards/margins": 0.058002471923828125, + "rewards/rejected": -0.05165252834558487, + "step": 1844 + }, + { + "epoch": 0.11, + "learning_rate": 9.843954694524752e-08, + "logits/chosen": -2.2233076095581055, + "logits/rejected": -2.2155404090881348, + "logps/chosen": -8.809813499450684, + "logps/rejected": -225.3572540283203, + "loss": 0.4945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06288766860961914, + "rewards/margins": 0.9335846900939941, + "rewards/rejected": -0.870697021484375, + "step": 1845 + }, + { + "epoch": 0.11, + "learning_rate": 9.843721006495766e-08, + "logits/chosen": -2.116626262664795, + "logits/rejected": -2.1164565086364746, + "logps/chosen": -54.95465850830078, + "logps/rejected": -137.58908081054688, + "loss": 0.5732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16593895852565765, + "rewards/margins": 0.33608973026275635, + "rewards/rejected": -0.1701507568359375, + "step": 1846 + }, + { + "epoch": 0.11, + "learning_rate": 9.843487146394116e-08, + "logits/chosen": -2.102713108062744, + "logits/rejected": -2.1044301986694336, + "logps/chosen": -24.061601638793945, + "logps/rejected": -109.72261047363281, + "loss": 0.6882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03210468217730522, + "rewards/margins": 0.04581127688288689, + "rewards/rejected": -0.07791595906019211, + "step": 1847 + }, + { + "epoch": 0.11, + "learning_rate": 9.843253114228111e-08, + "logits/chosen": -2.083296537399292, + "logits/rejected": -2.087648868560791, + "logps/chosen": -32.177310943603516, + "logps/rejected": -219.9412841796875, + "loss": 0.5724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10649146884679794, + "rewards/margins": 0.36703377962112427, + "rewards/rejected": -0.2605423033237457, + "step": 1848 + }, + { + "epoch": 0.11, + "learning_rate": 9.843018910006066e-08, + "logits/chosen": -2.010941743850708, + "logits/rejected": -2.007052421569824, + "logps/chosen": -42.95244598388672, + "logps/rejected": -190.90234375, + "loss": 0.674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024675751104950905, + "rewards/margins": 0.025473786517977715, + "rewards/rejected": -0.0007980346563272178, + "step": 1849 + }, + { + "epoch": 0.11, + "learning_rate": 9.842784533736299e-08, + "logits/chosen": -1.9586737155914307, + "logits/rejected": -1.9675301313400269, + "logps/chosen": -138.69456481933594, + "logps/rejected": -224.53955078125, + "loss": 0.7362, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.14058838784694672, + "rewards/margins": -0.030941776931285858, + "rewards/rejected": -0.10964661091566086, + "step": 1850 + }, + { + "epoch": 0.11, + "learning_rate": 9.842549985427135e-08, + "logits/chosen": -2.208338737487793, + "logits/rejected": -2.2065086364746094, + "logps/chosen": -44.95831298828125, + "logps/rejected": -109.737548828125, + "loss": 0.6552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.053578950464725494, + "rewards/margins": 0.0910286009311676, + "rewards/rejected": -0.03744964674115181, + "step": 1851 + }, + { + "epoch": 0.11, + "learning_rate": 9.842315265086909e-08, + "logits/chosen": -2.2074146270751953, + "logits/rejected": -2.120471239089966, + "logps/chosen": -205.24920654296875, + "logps/rejected": -373.20404052734375, + "loss": 0.6482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14897461235523224, + "rewards/margins": 0.04471130669116974, + "rewards/rejected": 0.1042633056640625, + "step": 1852 + }, + { + "epoch": 0.11, + "learning_rate": 9.84208037272396e-08, + "logits/chosen": -2.0129687786102295, + "logits/rejected": -2.017587900161743, + "logps/chosen": -32.31787872314453, + "logps/rejected": -144.94256591796875, + "loss": 0.629, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05025367811322212, + "rewards/margins": 0.21086005866527557, + "rewards/rejected": -0.16060638427734375, + "step": 1853 + }, + { + "epoch": 0.11, + "learning_rate": 9.841845308346629e-08, + "logits/chosen": -2.2796289920806885, + "logits/rejected": -2.260831594467163, + "logps/chosen": -10.421210289001465, + "logps/rejected": -280.05401611328125, + "loss": 0.5078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004861068911850452, + "rewards/margins": 1.0107699632644653, + "rewards/rejected": -1.015631079673767, + "step": 1854 + }, + { + "epoch": 0.11, + "learning_rate": 9.841610071963269e-08, + "logits/chosen": -2.01646089553833, + "logits/rejected": -2.010751247406006, + "logps/chosen": -327.0754699707031, + "logps/rejected": -342.9495544433594, + "loss": 0.4778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.960528552532196, + "rewards/margins": 0.002062976360321045, + "rewards/rejected": 0.958465576171875, + "step": 1855 + }, + { + "epoch": 0.11, + "learning_rate": 9.841374663582237e-08, + "logits/chosen": -2.197953224182129, + "logits/rejected": -2.1828932762145996, + "logps/chosen": -7.42662523407489e-05, + "logps/rejected": -162.1580810546875, + "loss": 0.5995, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.79464562836074e-07, + "rewards/margins": 0.4067208170890808, + "rewards/rejected": -0.4067215025424957, + "step": 1856 + }, + { + "epoch": 0.11, + "learning_rate": 9.841139083211894e-08, + "logits/chosen": -2.167288303375244, + "logits/rejected": -2.1036033630371094, + "logps/chosen": -204.4041290283203, + "logps/rejected": -503.6902770996094, + "loss": 0.3578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9970413446426392, + "rewards/margins": 0.5609359741210938, + "rewards/rejected": 0.436105340719223, + "step": 1857 + }, + { + "epoch": 0.11, + "learning_rate": 9.840903330860611e-08, + "logits/chosen": -1.9924657344818115, + "logits/rejected": -1.968820571899414, + "logps/chosen": -86.68002319335938, + "logps/rejected": -202.12704467773438, + "loss": 0.5537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20041199028491974, + "rewards/margins": 0.4605697989463806, + "rewards/rejected": -0.2601577937602997, + "step": 1858 + }, + { + "epoch": 0.11, + "learning_rate": 9.840667406536762e-08, + "logits/chosen": -2.269103765487671, + "logits/rejected": -2.2702102661132812, + "logps/chosen": -39.60801696777344, + "logps/rejected": -196.5592041015625, + "loss": 0.6453, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06282692402601242, + "rewards/margins": 0.15271645784378052, + "rewards/rejected": -0.0898895263671875, + "step": 1859 + }, + { + "epoch": 0.11, + "learning_rate": 9.840431310248729e-08, + "logits/chosen": -2.2247660160064697, + "logits/rejected": -2.207961082458496, + "logps/chosen": -2.4977192878723145, + "logps/rejected": -70.25555419921875, + "loss": 0.6694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019801020622253418, + "rewards/margins": 0.08952529728412628, + "rewards/rejected": -0.06972427666187286, + "step": 1860 + }, + { + "epoch": 0.11, + "learning_rate": 9.8401950420049e-08, + "logits/chosen": -2.126878261566162, + "logits/rejected": -1.9788435697555542, + "logps/chosen": -205.74118041992188, + "logps/rejected": -489.3896484375, + "loss": 0.5712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1407623291015625, + "rewards/margins": 0.343170166015625, + "rewards/rejected": -0.2024078369140625, + "step": 1861 + }, + { + "epoch": 0.11, + "learning_rate": 9.839958601813664e-08, + "logits/chosen": -2.157135009765625, + "logits/rejected": -2.144014835357666, + "logps/chosen": -238.14866638183594, + "logps/rejected": -395.1307373046875, + "loss": 0.6117, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.39554595947265625, + "rewards/margins": -0.08810272812843323, + "rewards/rejected": 0.4836486876010895, + "step": 1862 + }, + { + "epoch": 0.11, + "learning_rate": 9.839721989683427e-08, + "logits/chosen": -2.1341357231140137, + "logits/rejected": -2.1167330741882324, + "logps/chosen": -217.0616455078125, + "logps/rejected": -400.1790466308594, + "loss": 0.5618, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.48487550020217896, + "rewards/margins": -0.07827150821685791, + "rewards/rejected": 0.5631470084190369, + "step": 1863 + }, + { + "epoch": 0.11, + "learning_rate": 9.839485205622588e-08, + "logits/chosen": -2.1346426010131836, + "logits/rejected": -2.123713731765747, + "logps/chosen": -0.0014724229695275426, + "logps/rejected": -204.54798889160156, + "loss": 0.5905, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4185206964612007e-06, + "rewards/margins": 0.4662659764289856, + "rewards/rejected": -0.4662674069404602, + "step": 1864 + }, + { + "epoch": 0.11, + "learning_rate": 9.839248249639565e-08, + "logits/chosen": -2.2595226764678955, + "logits/rejected": -2.2564868927001953, + "logps/chosen": -168.0581512451172, + "logps/rejected": -297.2675476074219, + "loss": 0.4911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5381485223770142, + "rewards/margins": 0.18448030948638916, + "rewards/rejected": 0.353668212890625, + "step": 1865 + }, + { + "epoch": 0.11, + "learning_rate": 9.839011121742771e-08, + "logits/chosen": -2.2179551124572754, + "logits/rejected": -2.213702917098999, + "logps/chosen": -56.75483322143555, + "logps/rejected": -143.49069213867188, + "loss": 0.5945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1077808365225792, + "rewards/margins": 0.41380348801612854, + "rewards/rejected": -0.30602264404296875, + "step": 1866 + }, + { + "epoch": 0.11, + "learning_rate": 9.838773821940632e-08, + "logits/chosen": -2.2164785861968994, + "logits/rejected": -2.199770927429199, + "logps/chosen": -15.676332473754883, + "logps/rejected": -202.76947021484375, + "loss": 0.6445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0023539543617516756, + "rewards/margins": 0.21942701935768127, + "rewards/rejected": -0.21707306802272797, + "step": 1867 + }, + { + "epoch": 0.11, + "learning_rate": 9.838536350241579e-08, + "logits/chosen": -1.9464725255966187, + "logits/rejected": -1.9404116868972778, + "logps/chosen": -267.46533203125, + "logps/rejected": -395.086669921875, + "loss": 0.4009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.813244640827179, + "rewards/margins": 0.4734649658203125, + "rewards/rejected": 0.33977967500686646, + "step": 1868 + }, + { + "epoch": 0.11, + "learning_rate": 9.838298706654045e-08, + "logits/chosen": -2.1749520301818848, + "logits/rejected": -2.163365602493286, + "logps/chosen": -0.08867602050304413, + "logps/rejected": -131.99139404296875, + "loss": 0.6407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021585523791145533, + "rewards/margins": 0.1985970139503479, + "rewards/rejected": -0.19881287217140198, + "step": 1869 + }, + { + "epoch": 0.11, + "learning_rate": 9.838060891186476e-08, + "logits/chosen": -2.1150271892547607, + "logits/rejected": -2.1030077934265137, + "logps/chosen": -167.09530639648438, + "logps/rejected": -384.0501708984375, + "loss": 0.7289, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3730407655239105, + "rewards/margins": -0.49435123801231384, + "rewards/rejected": 0.8673920035362244, + "step": 1870 + }, + { + "epoch": 0.11, + "learning_rate": 9.837822903847318e-08, + "logits/chosen": -2.18196439743042, + "logits/rejected": -2.183166980743408, + "logps/chosen": -0.0032312015537172556, + "logps/rejected": -236.28140258789062, + "loss": 0.5147, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.07061481080018e-05, + "rewards/margins": 0.9166651368141174, + "rewards/rejected": -0.916735827922821, + "step": 1871 + }, + { + "epoch": 0.11, + "learning_rate": 9.837584744645027e-08, + "logits/chosen": -2.0308573246002197, + "logits/rejected": -2.0303843021392822, + "logps/chosen": -20.54157257080078, + "logps/rejected": -118.60255432128906, + "loss": 0.6004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0313020721077919, + "rewards/margins": 0.4348905682563782, + "rewards/rejected": -0.4661926329135895, + "step": 1872 + }, + { + "epoch": 0.11, + "learning_rate": 9.837346413588061e-08, + "logits/chosen": -2.17348051071167, + "logits/rejected": -2.1362428665161133, + "logps/chosen": -233.49131774902344, + "logps/rejected": -319.69866943359375, + "loss": 0.4057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5880874991416931, + "rewards/margins": 0.822059690952301, + "rewards/rejected": -0.23397217690944672, + "step": 1873 + }, + { + "epoch": 0.11, + "learning_rate": 9.83710791068489e-08, + "logits/chosen": -2.15087890625, + "logits/rejected": -2.150592088699341, + "logps/chosen": -6.003548622131348, + "logps/rejected": -166.99472045898438, + "loss": 0.5837, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.659393274167087e-05, + "rewards/margins": 0.4948045611381531, + "rewards/rejected": -0.494821161031723, + "step": 1874 + }, + { + "epoch": 0.11, + "learning_rate": 9.836869235943983e-08, + "logits/chosen": -2.1055121421813965, + "logits/rejected": -2.104125738143921, + "logps/chosen": -10.56773853302002, + "logps/rejected": -156.64077758789062, + "loss": 0.6666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03388586267828941, + "rewards/margins": 0.06099005043506622, + "rewards/rejected": -0.02710418775677681, + "step": 1875 + }, + { + "epoch": 0.11, + "learning_rate": 9.836630389373823e-08, + "logits/chosen": -2.043290853500366, + "logits/rejected": -2.0030879974365234, + "logps/chosen": -201.00845336914062, + "logps/rejected": -493.7919616699219, + "loss": 0.4864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6847625970840454, + "rewards/margins": 0.1417328119277954, + "rewards/rejected": 0.54302978515625, + "step": 1876 + }, + { + "epoch": 0.11, + "learning_rate": 9.836391370982893e-08, + "logits/chosen": -2.1527557373046875, + "logits/rejected": -2.1434733867645264, + "logps/chosen": -39.32090759277344, + "logps/rejected": -188.59031677246094, + "loss": 0.5437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2617015838623047, + "rewards/margins": 0.38934746384620667, + "rewards/rejected": -0.12764587998390198, + "step": 1877 + }, + { + "epoch": 0.11, + "learning_rate": 9.836152180779683e-08, + "logits/chosen": -2.25590181350708, + "logits/rejected": -2.2428183555603027, + "logps/chosen": -13.597670555114746, + "logps/rejected": -237.87945556640625, + "loss": 0.54, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018049145117402077, + "rewards/margins": 0.7220362424850464, + "rewards/rejected": -0.7039871215820312, + "step": 1878 + }, + { + "epoch": 0.11, + "learning_rate": 9.835912818772691e-08, + "logits/chosen": -1.9569294452667236, + "logits/rejected": -1.9104037284851074, + "logps/chosen": -201.07118225097656, + "logps/rejected": -539.2378540039062, + "loss": 0.4404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6893936395645142, + "rewards/margins": 0.3963577449321747, + "rewards/rejected": 0.2930358946323395, + "step": 1879 + }, + { + "epoch": 0.11, + "learning_rate": 9.835673284970423e-08, + "logits/chosen": -1.9705636501312256, + "logits/rejected": -1.9700300693511963, + "logps/chosen": -23.322509765625, + "logps/rejected": -134.779052734375, + "loss": 0.6711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.042397499084472656, + "rewards/margins": 0.16215725243091583, + "rewards/rejected": -0.2045547515153885, + "step": 1880 + }, + { + "epoch": 0.11, + "learning_rate": 9.835433579381385e-08, + "logits/chosen": -2.2207391262054443, + "logits/rejected": -2.193145990371704, + "logps/chosen": -226.08474731445312, + "logps/rejected": -352.42669677734375, + "loss": 0.4272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8634597659111023, + "rewards/margins": 0.26047515869140625, + "rewards/rejected": 0.602984607219696, + "step": 1881 + }, + { + "epoch": 0.11, + "learning_rate": 9.835193702014092e-08, + "logits/chosen": -2.0025734901428223, + "logits/rejected": -2.0707826614379883, + "logps/chosen": -242.69554138183594, + "logps/rejected": -287.0820617675781, + "loss": 0.4709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5803207755088806, + "rewards/margins": 0.2554825246334076, + "rewards/rejected": 0.324838250875473, + "step": 1882 + }, + { + "epoch": 0.11, + "learning_rate": 9.83495365287707e-08, + "logits/chosen": -2.210142135620117, + "logits/rejected": -2.1739370822906494, + "logps/chosen": -287.2879638671875, + "logps/rejected": -353.6888427734375, + "loss": 0.47, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7151764035224915, + "rewards/margins": 0.1783905029296875, + "rewards/rejected": 0.536785900592804, + "step": 1883 + }, + { + "epoch": 0.11, + "learning_rate": 9.834713431978842e-08, + "logits/chosen": -1.9835234880447388, + "logits/rejected": -1.9546661376953125, + "logps/chosen": -303.90936279296875, + "logps/rejected": -399.69140625, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.083581566810608, + "rewards/margins": 0.6268036365509033, + "rewards/rejected": 0.456777960062027, + "step": 1884 + }, + { + "epoch": 0.11, + "learning_rate": 9.834473039327943e-08, + "logits/chosen": -2.2179577350616455, + "logits/rejected": -2.2175142765045166, + "logps/chosen": -5.504420757293701, + "logps/rejected": -179.84561157226562, + "loss": 0.5446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024901723489165306, + "rewards/margins": 0.6840509176254272, + "rewards/rejected": -0.659149169921875, + "step": 1885 + }, + { + "epoch": 0.11, + "learning_rate": 9.834232474932916e-08, + "logits/chosen": -2.268094301223755, + "logits/rejected": -2.221356153488159, + "logps/chosen": -178.79214477539062, + "logps/rejected": -370.26971435546875, + "loss": 0.5302, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7589569091796875, + "rewards/margins": -0.034304797649383545, + "rewards/rejected": 0.793261706829071, + "step": 1886 + }, + { + "epoch": 0.11, + "learning_rate": 9.833991738802304e-08, + "logits/chosen": -2.220994472503662, + "logits/rejected": -2.2006843090057373, + "logps/chosen": -177.76263427734375, + "logps/rejected": -341.2847900390625, + "loss": 0.4951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.557965099811554, + "rewards/margins": 0.20668336749076843, + "rewards/rejected": 0.3512817323207855, + "step": 1887 + }, + { + "epoch": 0.11, + "learning_rate": 9.833750830944659e-08, + "logits/chosen": -2.141756772994995, + "logits/rejected": -2.097097635269165, + "logps/chosen": -187.31137084960938, + "logps/rejected": -422.66241455078125, + "loss": 0.4641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7486343383789062, + "rewards/margins": 0.24717557430267334, + "rewards/rejected": 0.5014587640762329, + "step": 1888 + }, + { + "epoch": 0.11, + "learning_rate": 9.83350975136854e-08, + "logits/chosen": -2.1995677947998047, + "logits/rejected": -2.186659574508667, + "logps/chosen": -163.28565979003906, + "logps/rejected": -217.139892578125, + "loss": 0.5711, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5567657351493835, + "rewards/margins": -0.0767822265625, + "rewards/rejected": 0.6335479617118835, + "step": 1889 + }, + { + "epoch": 0.11, + "learning_rate": 9.833268500082512e-08, + "logits/chosen": -2.165492296218872, + "logits/rejected": -2.162048816680908, + "logps/chosen": -48.24079513549805, + "logps/rejected": -121.98497009277344, + "loss": 0.6265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0698162093758583, + "rewards/margins": 0.16788597404956818, + "rewards/rejected": -0.09806976467370987, + "step": 1890 + }, + { + "epoch": 0.11, + "learning_rate": 9.833027077095145e-08, + "logits/chosen": -2.0604825019836426, + "logits/rejected": -2.045799732208252, + "logps/chosen": -1.4708698987960815, + "logps/rejected": -168.23712158203125, + "loss": 0.5937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007633090135641396, + "rewards/margins": 0.4483654201030731, + "rewards/rejected": -0.4491287171840668, + "step": 1891 + }, + { + "epoch": 0.11, + "learning_rate": 9.832785482415016e-08, + "logits/chosen": -1.9779136180877686, + "logits/rejected": -1.979459285736084, + "logps/chosen": -194.0645751953125, + "logps/rejected": -289.10601806640625, + "loss": 0.5542, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.51324462890625, + "rewards/margins": -0.056640625, + "rewards/rejected": 0.56988525390625, + "step": 1892 + }, + { + "epoch": 0.11, + "learning_rate": 9.832543716050705e-08, + "logits/chosen": -2.225146770477295, + "logits/rejected": -2.188551425933838, + "logps/chosen": -61.54095458984375, + "logps/rejected": -239.31829833984375, + "loss": 0.563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05395698547363281, + "rewards/margins": 0.5200992822647095, + "rewards/rejected": -0.4661422669887543, + "step": 1893 + }, + { + "epoch": 0.11, + "learning_rate": 9.832301778010804e-08, + "logits/chosen": -2.157393217086792, + "logits/rejected": -2.158482074737549, + "logps/chosen": -25.673538208007812, + "logps/rejected": -53.99533462524414, + "loss": 0.6444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13632355630397797, + "rewards/margins": 0.04839363694190979, + "rewards/rejected": 0.08792991936206818, + "step": 1894 + }, + { + "epoch": 0.11, + "learning_rate": 9.832059668303906e-08, + "logits/chosen": -2.2271580696105957, + "logits/rejected": -2.221390962600708, + "logps/chosen": -0.0004874715523328632, + "logps/rejected": -115.04193115234375, + "loss": 0.666, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2405019333527889e-05, + "rewards/margins": 0.11258982867002487, + "rewards/rejected": -0.11260223388671875, + "step": 1895 + }, + { + "epoch": 0.11, + "learning_rate": 9.831817386938612e-08, + "logits/chosen": -2.264577627182007, + "logits/rejected": -2.264923095703125, + "logps/chosen": -33.69742202758789, + "logps/rejected": -178.56874084472656, + "loss": 0.5711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07858047634363174, + "rewards/margins": 0.48973312973976135, + "rewards/rejected": -0.4111526608467102, + "step": 1896 + }, + { + "epoch": 0.11, + "learning_rate": 9.83157493392353e-08, + "logits/chosen": -2.1516377925872803, + "logits/rejected": -2.1415114402770996, + "logps/chosen": -222.73117065429688, + "logps/rejected": -303.92193603515625, + "loss": 0.4624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8033370971679688, + "rewards/margins": 0.15246731042861938, + "rewards/rejected": 0.6508697867393494, + "step": 1897 + }, + { + "epoch": 0.11, + "learning_rate": 9.831332309267273e-08, + "logits/chosen": -2.2730331420898438, + "logits/rejected": -2.2610576152801514, + "logps/chosen": -5.0718841552734375, + "logps/rejected": -146.762939453125, + "loss": 0.64, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.038113784044981, + "rewards/margins": 0.18121834099292755, + "rewards/rejected": -0.14310455322265625, + "step": 1898 + }, + { + "epoch": 0.11, + "learning_rate": 9.831089512978459e-08, + "logits/chosen": -2.043665647506714, + "logits/rejected": -2.0210297107696533, + "logps/chosen": -179.2559814453125, + "logps/rejected": -290.00457763671875, + "loss": 0.4982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6549011468887329, + "rewards/margins": 0.16630861163139343, + "rewards/rejected": 0.4885925352573395, + "step": 1899 + }, + { + "epoch": 0.11, + "learning_rate": 9.830846545065714e-08, + "logits/chosen": -2.1640212535858154, + "logits/rejected": -2.16481614112854, + "logps/chosen": -113.60050964355469, + "logps/rejected": -127.59474182128906, + "loss": 0.7174, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.011322021484375, + "rewards/margins": -0.041741181164979935, + "rewards/rejected": 0.030419159680604935, + "step": 1900 + }, + { + "epoch": 0.11, + "learning_rate": 9.830603405537671e-08, + "logits/chosen": -2.075395107269287, + "logits/rejected": -2.0709869861602783, + "logps/chosen": -0.020628558471798897, + "logps/rejected": -136.14175415039062, + "loss": 0.536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0005755357560701668, + "rewards/margins": 0.7568484544754028, + "rewards/rejected": -0.7562729120254517, + "step": 1901 + }, + { + "epoch": 0.11, + "learning_rate": 9.830360094402965e-08, + "logits/chosen": -2.124307632446289, + "logits/rejected": -2.116915225982666, + "logps/chosen": -20.53544807434082, + "logps/rejected": -113.9892578125, + "loss": 0.696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05812683328986168, + "rewards/margins": 0.04912262037396431, + "rewards/rejected": -0.10724945366382599, + "step": 1902 + }, + { + "epoch": 0.11, + "learning_rate": 9.830116611670241e-08, + "logits/chosen": -2.067674160003662, + "logits/rejected": -2.0497679710388184, + "logps/chosen": -41.47068786621094, + "logps/rejected": -204.21763610839844, + "loss": 0.5627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1006397232413292, + "rewards/margins": 0.49637338519096375, + "rewards/rejected": -0.39573365449905396, + "step": 1903 + }, + { + "epoch": 0.11, + "learning_rate": 9.829872957348147e-08, + "logits/chosen": -2.025548219680786, + "logits/rejected": -1.9476101398468018, + "logps/chosen": -172.17623901367188, + "logps/rejected": -361.27557373046875, + "loss": 0.4571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7160888910293579, + "rewards/margins": 0.3203491270542145, + "rewards/rejected": 0.39573976397514343, + "step": 1904 + }, + { + "epoch": 0.11, + "learning_rate": 9.82962913144534e-08, + "logits/chosen": -2.110179901123047, + "logits/rejected": -2.0984907150268555, + "logps/chosen": -25.8609619140625, + "logps/rejected": -110.18830871582031, + "loss": 0.6338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14426708221435547, + "rewards/margins": 0.08608607947826385, + "rewards/rejected": 0.058180999010801315, + "step": 1905 + }, + { + "epoch": 0.11, + "learning_rate": 9.829385133970484e-08, + "logits/chosen": -2.237151622772217, + "logits/rejected": -2.22634220123291, + "logps/chosen": -24.55573844909668, + "logps/rejected": -103.9046401977539, + "loss": 0.6669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017525101080536842, + "rewards/margins": 0.16080132126808167, + "rewards/rejected": -0.14327621459960938, + "step": 1906 + }, + { + "epoch": 0.11, + "learning_rate": 9.829140964932243e-08, + "logits/chosen": -2.1780223846435547, + "logits/rejected": -2.2070860862731934, + "logps/chosen": -158.11273193359375, + "logps/rejected": -190.63766479492188, + "loss": 0.5771, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3875274658203125, + "rewards/margins": -0.07953187823295593, + "rewards/rejected": 0.46705934405326843, + "step": 1907 + }, + { + "epoch": 0.11, + "learning_rate": 9.828896624339294e-08, + "logits/chosen": -2.08343768119812, + "logits/rejected": -2.032949924468994, + "logps/chosen": -268.83331298828125, + "logps/rejected": -669.677490234375, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5192718505859375, + "rewards/margins": 0.6996185183525085, + "rewards/rejected": -0.18034668266773224, + "step": 1908 + }, + { + "epoch": 0.11, + "learning_rate": 9.828652112200318e-08, + "logits/chosen": -2.036447525024414, + "logits/rejected": -2.016761541366577, + "logps/chosen": -70.03716278076172, + "logps/rejected": -268.618896484375, + "loss": 0.5234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12061386555433273, + "rewards/margins": 0.6618614196777344, + "rewards/rejected": -0.541247546672821, + "step": 1909 + }, + { + "epoch": 0.11, + "learning_rate": 9.828407428523997e-08, + "logits/chosen": -2.0540497303009033, + "logits/rejected": -2.0322587490081787, + "logps/chosen": -176.9465789794922, + "logps/rejected": -207.5920867919922, + "loss": 0.5966, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1920822113752365, + "rewards/margins": 0.1782272309064865, + "rewards/rejected": 0.01385498046875, + "step": 1910 + }, + { + "epoch": 0.11, + "learning_rate": 9.828162573319026e-08, + "logits/chosen": -2.0902976989746094, + "logits/rejected": -2.0189688205718994, + "logps/chosen": -242.98760986328125, + "logps/rejected": -345.9964599609375, + "loss": 0.5532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18081359565258026, + "rewards/margins": 0.427032470703125, + "rewards/rejected": -0.24621887505054474, + "step": 1911 + }, + { + "epoch": 0.11, + "learning_rate": 9.827917546594105e-08, + "logits/chosen": -2.0557608604431152, + "logits/rejected": -2.0540401935577393, + "logps/chosen": -41.97508239746094, + "logps/rejected": -90.5237045288086, + "loss": 0.658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016664505004882812, + "rewards/margins": 0.10989570617675781, + "rewards/rejected": -0.12656021118164062, + "step": 1912 + }, + { + "epoch": 0.11, + "learning_rate": 9.827672348357934e-08, + "logits/chosen": -1.9224389791488647, + "logits/rejected": -1.9156184196472168, + "logps/chosen": -64.59428405761719, + "logps/rejected": -193.48617553710938, + "loss": 0.6264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03659362718462944, + "rewards/margins": 0.18174439668655396, + "rewards/rejected": -0.14515076577663422, + "step": 1913 + }, + { + "epoch": 0.11, + "learning_rate": 9.827426978619228e-08, + "logits/chosen": -2.1383004188537598, + "logits/rejected": -2.1424617767333984, + "logps/chosen": -40.084922790527344, + "logps/rejected": -202.71803283691406, + "loss": 0.6646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07046394795179367, + "rewards/margins": 0.18821066617965698, + "rewards/rejected": -0.25867462158203125, + "step": 1914 + }, + { + "epoch": 0.11, + "learning_rate": 9.827181437386702e-08, + "logits/chosen": -2.163382053375244, + "logits/rejected": -2.1575980186462402, + "logps/chosen": -4.220972061157227, + "logps/rejected": -113.31285095214844, + "loss": 0.668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012301779352128506, + "rewards/margins": 0.07728943973779678, + "rewards/rejected": -0.08959122002124786, + "step": 1915 + }, + { + "epoch": 0.11, + "learning_rate": 9.82693572466908e-08, + "logits/chosen": -2.1932027339935303, + "logits/rejected": -2.1601626873016357, + "logps/chosen": -297.58099365234375, + "logps/rejected": -449.6003723144531, + "loss": 0.4397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.991406261920929, + "rewards/margins": 0.1790984869003296, + "rewards/rejected": 0.8123077750205994, + "step": 1916 + }, + { + "epoch": 0.11, + "learning_rate": 9.826689840475089e-08, + "logits/chosen": -2.052792549133301, + "logits/rejected": -2.041276693344116, + "logps/chosen": -285.44329833984375, + "logps/rejected": -540.7166748046875, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9263977408409119, + "rewards/margins": 0.696075439453125, + "rewards/rejected": 0.23032227158546448, + "step": 1917 + }, + { + "epoch": 0.11, + "learning_rate": 9.826443784813466e-08, + "logits/chosen": -2.1800057888031006, + "logits/rejected": -2.100315570831299, + "logps/chosen": -131.1634521484375, + "logps/rejected": -360.0722961425781, + "loss": 0.612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08894958347082138, + "rewards/margins": 0.21624755859375, + "rewards/rejected": -0.12729798257350922, + "step": 1918 + }, + { + "epoch": 0.11, + "learning_rate": 9.82619755769295e-08, + "logits/chosen": -2.1696813106536865, + "logits/rejected": -2.1574060916900635, + "logps/chosen": -49.78791427612305, + "logps/rejected": -227.10333251953125, + "loss": 0.5454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08728867024183273, + "rewards/margins": 0.6592888236045837, + "rewards/rejected": -0.5720001459121704, + "step": 1919 + }, + { + "epoch": 0.11, + "learning_rate": 9.82595115912229e-08, + "logits/chosen": -2.179183006286621, + "logits/rejected": -2.1416473388671875, + "logps/chosen": -11.210066795349121, + "logps/rejected": -271.0221862792969, + "loss": 0.5359, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03421440348029137, + "rewards/margins": 0.8319630026817322, + "rewards/rejected": -0.8661773800849915, + "step": 1920 + }, + { + "epoch": 0.11, + "learning_rate": 9.825704589110237e-08, + "logits/chosen": -2.052788019180298, + "logits/rejected": -2.0539815425872803, + "logps/chosen": -0.04469885304570198, + "logps/rejected": -188.089111328125, + "loss": 0.5792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013714785454794765, + "rewards/margins": 0.5120043754577637, + "rewards/rejected": -0.5133758783340454, + "step": 1921 + }, + { + "epoch": 0.11, + "learning_rate": 9.825457847665554e-08, + "logits/chosen": -2.216245174407959, + "logits/rejected": -2.2092807292938232, + "logps/chosen": -0.05710793286561966, + "logps/rejected": -186.69158935546875, + "loss": 0.621, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9653310207322647e-07, + "rewards/margins": 0.3157406747341156, + "rewards/rejected": -0.3157409727573395, + "step": 1922 + }, + { + "epoch": 0.11, + "learning_rate": 9.825210934797004e-08, + "logits/chosen": -2.2296125888824463, + "logits/rejected": -2.2257423400878906, + "logps/chosen": -1.720537781715393, + "logps/rejected": -112.7789535522461, + "loss": 0.669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10055731981992722, + "rewards/margins": 0.21073874831199646, + "rewards/rejected": -0.3112960755825043, + "step": 1923 + }, + { + "epoch": 0.11, + "learning_rate": 9.824963850513358e-08, + "logits/chosen": -2.080709218978882, + "logits/rejected": -2.0903189182281494, + "logps/chosen": -16.78982162475586, + "logps/rejected": -78.66725158691406, + "loss": 0.6818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03541164472699165, + "rewards/margins": 0.0892190933227539, + "rewards/rejected": -0.12463074177503586, + "step": 1924 + }, + { + "epoch": 0.11, + "learning_rate": 9.824716594823395e-08, + "logits/chosen": -1.9030791521072388, + "logits/rejected": -1.9057496786117554, + "logps/chosen": -301.96453857421875, + "logps/rejected": -426.7705383300781, + "loss": 0.501, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7608307003974915, + "rewards/margins": -0.005017101764678955, + "rewards/rejected": 0.7658478021621704, + "step": 1925 + }, + { + "epoch": 0.11, + "learning_rate": 9.824469167735898e-08, + "logits/chosen": -2.227638006210327, + "logits/rejected": -2.226564407348633, + "logps/chosen": -19.50104331970215, + "logps/rejected": -45.14573669433594, + "loss": 0.7274, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03631114959716797, + "rewards/margins": -0.12346325069665909, + "rewards/rejected": 0.08715210109949112, + "step": 1926 + }, + { + "epoch": 0.11, + "learning_rate": 9.824221569259659e-08, + "logits/chosen": -2.227458953857422, + "logits/rejected": -2.1966543197631836, + "logps/chosen": -146.29205322265625, + "logps/rejected": -338.80322265625, + "loss": 0.5983, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4192825257778168, + "rewards/margins": -0.11706694960594177, + "rewards/rejected": 0.5363494753837585, + "step": 1927 + }, + { + "epoch": 0.11, + "learning_rate": 9.823973799403469e-08, + "logits/chosen": -2.2902467250823975, + "logits/rejected": -2.2824573516845703, + "logps/chosen": -28.6923770904541, + "logps/rejected": -117.68630981445312, + "loss": 0.6554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03029308281838894, + "rewards/margins": 0.16106052696704865, + "rewards/rejected": -0.19135360419750214, + "step": 1928 + }, + { + "epoch": 0.11, + "learning_rate": 9.823725858176136e-08, + "logits/chosen": -2.1729648113250732, + "logits/rejected": -2.1759486198425293, + "logps/chosen": -50.96340560913086, + "logps/rejected": -224.40945434570312, + "loss": 0.6508, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04900245741009712, + "rewards/margins": 0.29850271344184875, + "rewards/rejected": -0.3475051820278168, + "step": 1929 + }, + { + "epoch": 0.11, + "learning_rate": 9.823477745586463e-08, + "logits/chosen": -1.958603024482727, + "logits/rejected": -1.9257878065109253, + "logps/chosen": -330.5366516113281, + "logps/rejected": -468.4842529296875, + "loss": 0.4696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9232727289199829, + "rewards/margins": 0.08723145723342896, + "rewards/rejected": 0.836041271686554, + "step": 1930 + }, + { + "epoch": 0.11, + "learning_rate": 9.823229461643267e-08, + "logits/chosen": -2.0306906700134277, + "logits/rejected": -2.00186824798584, + "logps/chosen": -135.76185607910156, + "logps/rejected": -185.40711975097656, + "loss": 0.6427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15495911240577698, + "rewards/margins": 0.04401245713233948, + "rewards/rejected": 0.1109466552734375, + "step": 1931 + }, + { + "epoch": 0.11, + "learning_rate": 9.822981006355366e-08, + "logits/chosen": -2.1364548206329346, + "logits/rejected": -2.1074442863464355, + "logps/chosen": -287.8447265625, + "logps/rejected": -395.76812744140625, + "loss": 0.4102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.797045886516571, + "rewards/margins": 0.44612425565719604, + "rewards/rejected": 0.350921630859375, + "step": 1932 + }, + { + "epoch": 0.11, + "learning_rate": 9.82273237973159e-08, + "logits/chosen": -2.008213520050049, + "logits/rejected": -2.0217390060424805, + "logps/chosen": -174.8038330078125, + "logps/rejected": -160.8114471435547, + "loss": 0.5092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5576385855674744, + "rewards/margins": 0.1607910394668579, + "rewards/rejected": 0.39684754610061646, + "step": 1933 + }, + { + "epoch": 0.11, + "learning_rate": 9.82248358178077e-08, + "logits/chosen": -2.276975154876709, + "logits/rejected": -2.230323076248169, + "logps/chosen": -148.9346466064453, + "logps/rejected": -326.44873046875, + "loss": 0.501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5446578860282898, + "rewards/margins": 0.09859463572502136, + "rewards/rejected": 0.44606325030326843, + "step": 1934 + }, + { + "epoch": 0.11, + "learning_rate": 9.822234612511742e-08, + "logits/chosen": -2.1307783126831055, + "logits/rejected": -2.1341397762298584, + "logps/chosen": -46.536415100097656, + "logps/rejected": -156.29002380371094, + "loss": 0.639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039423372596502304, + "rewards/margins": 0.19894029200077057, + "rewards/rejected": -0.15951691567897797, + "step": 1935 + }, + { + "epoch": 0.11, + "learning_rate": 9.821985471933353e-08, + "logits/chosen": -2.021848678588867, + "logits/rejected": -2.0137722492218018, + "logps/chosen": -0.00012910005170851946, + "logps/rejected": -109.39064025878906, + "loss": 0.6231, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7403465335519286e-06, + "rewards/margins": 0.30301645398139954, + "rewards/rejected": -0.303018182516098, + "step": 1936 + }, + { + "epoch": 0.11, + "learning_rate": 9.821736160054452e-08, + "logits/chosen": -2.098966121673584, + "logits/rejected": -2.08552622795105, + "logps/chosen": -279.80316162109375, + "logps/rejected": -399.38543701171875, + "loss": 0.2723, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0005066394805908, + "rewards/margins": 1.1187317371368408, + "rewards/rejected": -0.11822509765625, + "step": 1937 + }, + { + "epoch": 0.11, + "learning_rate": 9.8214866768839e-08, + "logits/chosen": -2.188187837600708, + "logits/rejected": -2.1883254051208496, + "logps/chosen": -8.388007164001465, + "logps/rejected": -101.87944030761719, + "loss": 0.6579, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012514877133071423, + "rewards/margins": 0.1602863371372223, + "rewards/rejected": -0.172801211476326, + "step": 1938 + }, + { + "epoch": 0.11, + "learning_rate": 9.821237022430555e-08, + "logits/chosen": -2.003774642944336, + "logits/rejected": -2.02113938331604, + "logps/chosen": -189.48001098632812, + "logps/rejected": -288.0155029296875, + "loss": 0.5339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48825380206108093, + "rewards/margins": 0.09429016709327698, + "rewards/rejected": 0.39396363496780396, + "step": 1939 + }, + { + "epoch": 0.11, + "learning_rate": 9.820987196703287e-08, + "logits/chosen": -2.10453724861145, + "logits/rejected": -2.100141763687134, + "logps/chosen": -1.9907904061255977e-05, + "logps/rejected": -165.23805236816406, + "loss": 0.5958, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.218634390123043e-07, + "rewards/margins": 0.4475109577178955, + "rewards/rejected": -0.4475112855434418, + "step": 1940 + }, + { + "epoch": 0.11, + "learning_rate": 9.820737199710974e-08, + "logits/chosen": -2.067018985748291, + "logits/rejected": -2.0255932807922363, + "logps/chosen": -203.3121795654297, + "logps/rejected": -332.6728515625, + "loss": 0.581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10544891655445099, + "rewards/margins": 0.3396407961845398, + "rewards/rejected": -0.23419189453125, + "step": 1941 + }, + { + "epoch": 0.11, + "learning_rate": 9.820487031462493e-08, + "logits/chosen": -2.0532479286193848, + "logits/rejected": -2.0439765453338623, + "logps/chosen": -348.5098876953125, + "logps/rejected": -392.89501953125, + "loss": 0.5105, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1125916242599487, + "rewards/margins": -0.21727287769317627, + "rewards/rejected": 1.329864501953125, + "step": 1942 + }, + { + "epoch": 0.11, + "learning_rate": 9.820236691966732e-08, + "logits/chosen": -2.169626474380493, + "logits/rejected": -2.1792635917663574, + "logps/chosen": -34.44849395751953, + "logps/rejected": -178.5330352783203, + "loss": 0.5441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0945255309343338, + "rewards/margins": 0.5678058862686157, + "rewards/rejected": -0.4732803404331207, + "step": 1943 + }, + { + "epoch": 0.11, + "learning_rate": 9.819986181232587e-08, + "logits/chosen": -2.0204293727874756, + "logits/rejected": -1.8826647996902466, + "logps/chosen": -225.25341796875, + "logps/rejected": -688.987548828125, + "loss": 0.508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7200683951377869, + "rewards/margins": 0.05419313907623291, + "rewards/rejected": 0.665875256061554, + "step": 1944 + }, + { + "epoch": 0.11, + "learning_rate": 9.819735499268956e-08, + "logits/chosen": -2.2757387161254883, + "logits/rejected": -2.287263870239258, + "logps/chosen": -14.726764678955078, + "logps/rejected": -299.9801025390625, + "loss": 0.5489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03472938761115074, + "rewards/margins": 0.6767551302909851, + "rewards/rejected": -0.6420257687568665, + "step": 1945 + }, + { + "epoch": 0.11, + "learning_rate": 9.819484646084744e-08, + "logits/chosen": -2.204540729522705, + "logits/rejected": -2.192401885986328, + "logps/chosen": -0.0012313994811847806, + "logps/rejected": -196.50643920898438, + "loss": 0.5236, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4554761694162153e-05, + "rewards/margins": 0.8491103053092957, + "rewards/rejected": -0.8491348624229431, + "step": 1946 + }, + { + "epoch": 0.11, + "learning_rate": 9.819233621688862e-08, + "logits/chosen": -2.1057333946228027, + "logits/rejected": -2.041268825531006, + "logps/chosen": -232.11520385742188, + "logps/rejected": -506.6239013671875, + "loss": 0.4515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.875030517578125, + "rewards/margins": 0.13274532556533813, + "rewards/rejected": 0.7422851920127869, + "step": 1947 + }, + { + "epoch": 0.11, + "learning_rate": 9.81898242609023e-08, + "logits/chosen": -2.2405519485473633, + "logits/rejected": -2.194301128387451, + "logps/chosen": -151.56878662109375, + "logps/rejected": -407.7980041503906, + "loss": 0.5789, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5406631827354431, + "rewards/margins": -0.14540553092956543, + "rewards/rejected": 0.6860687136650085, + "step": 1948 + }, + { + "epoch": 0.11, + "learning_rate": 9.818731059297767e-08, + "logits/chosen": -2.2236554622650146, + "logits/rejected": -2.207026720046997, + "logps/chosen": -10.267657279968262, + "logps/rejected": -163.25650024414062, + "loss": 0.5384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009385394863784313, + "rewards/margins": 0.7555468678474426, + "rewards/rejected": -0.7649322748184204, + "step": 1949 + }, + { + "epoch": 0.11, + "learning_rate": 9.818479521320408e-08, + "logits/chosen": -2.181295871734619, + "logits/rejected": -2.14890718460083, + "logps/chosen": -240.41970825195312, + "logps/rejected": -330.766845703125, + "loss": 0.412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9062469601631165, + "rewards/margins": 0.3290008306503296, + "rewards/rejected": 0.5772461295127869, + "step": 1950 + }, + { + "epoch": 0.11, + "learning_rate": 9.818227812167087e-08, + "logits/chosen": -2.1702277660369873, + "logits/rejected": -2.166452646255493, + "logps/chosen": -20.856019973754883, + "logps/rejected": -174.07997131347656, + "loss": 0.6466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1035928726196289, + "rewards/margins": 0.3289159834384918, + "rewards/rejected": -0.4325088560581207, + "step": 1951 + }, + { + "epoch": 0.11, + "learning_rate": 9.817975931846744e-08, + "logits/chosen": -2.197118043899536, + "logits/rejected": -2.200432538986206, + "logps/chosen": -38.08660888671875, + "logps/rejected": -103.71880340576172, + "loss": 0.5768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20228691399097443, + "rewards/margins": 0.311178982257843, + "rewards/rejected": -0.108892060816288, + "step": 1952 + }, + { + "epoch": 0.11, + "learning_rate": 9.81772388036833e-08, + "logits/chosen": -2.099416494369507, + "logits/rejected": -2.079936981201172, + "logps/chosen": -175.26559448242188, + "logps/rejected": -326.093994140625, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.594647228717804, + "rewards/margins": 0.693988025188446, + "rewards/rejected": -0.09934081882238388, + "step": 1953 + }, + { + "epoch": 0.11, + "learning_rate": 9.817471657740798e-08, + "logits/chosen": -2.0348072052001953, + "logits/rejected": -2.0201146602630615, + "logps/chosen": -52.27684020996094, + "logps/rejected": -286.1573486328125, + "loss": 0.5648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28682443499565125, + "rewards/margins": 0.25686532258987427, + "rewards/rejected": 0.02995910681784153, + "step": 1954 + }, + { + "epoch": 0.11, + "learning_rate": 9.817219263973107e-08, + "logits/chosen": -2.1472809314727783, + "logits/rejected": -2.2052159309387207, + "logps/chosen": -129.98391723632812, + "logps/rejected": -192.97027587890625, + "loss": 0.6006, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.33094483613967896, + "rewards/margins": -0.011593610048294067, + "rewards/rejected": 0.342538446187973, + "step": 1955 + }, + { + "epoch": 0.11, + "learning_rate": 9.816966699074226e-08, + "logits/chosen": -2.164719820022583, + "logits/rejected": -2.1650097370147705, + "logps/chosen": -1.8298546075820923, + "logps/rejected": -32.566097259521484, + "loss": 0.7002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03500726446509361, + "rewards/margins": 0.005407165735960007, + "rewards/rejected": -0.04041443020105362, + "step": 1956 + }, + { + "epoch": 0.11, + "learning_rate": 9.816713963053123e-08, + "logits/chosen": -2.1347978115081787, + "logits/rejected": -2.0954625606536865, + "logps/chosen": -135.55624389648438, + "logps/rejected": -300.6548767089844, + "loss": 0.5638, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15477906167507172, + "rewards/margins": 0.4611603021621704, + "rewards/rejected": -0.3063812255859375, + "step": 1957 + }, + { + "epoch": 0.11, + "learning_rate": 9.816461055918779e-08, + "logits/chosen": -2.324049472808838, + "logits/rejected": -2.314842939376831, + "logps/chosen": -13.62354850769043, + "logps/rejected": -102.52236938476562, + "loss": 0.5778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0730811133980751, + "rewards/margins": 0.4369162619113922, + "rewards/rejected": -0.3638351559638977, + "step": 1958 + }, + { + "epoch": 0.11, + "learning_rate": 9.81620797768018e-08, + "logits/chosen": -2.1734511852264404, + "logits/rejected": -2.156301259994507, + "logps/chosen": -163.93814086914062, + "logps/rejected": -313.6593933105469, + "loss": 0.5654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.259765625, + "rewards/margins": 0.286917120218277, + "rewards/rejected": -0.02715148963034153, + "step": 1959 + }, + { + "epoch": 0.11, + "learning_rate": 9.815954728346313e-08, + "logits/chosen": -2.0153167247772217, + "logits/rejected": -1.9984807968139648, + "logps/chosen": -212.06109619140625, + "logps/rejected": -212.63677978515625, + "loss": 0.5819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22229157388210297, + "rewards/margins": 0.20418396592140198, + "rewards/rejected": 0.01810760609805584, + "step": 1960 + }, + { + "epoch": 0.11, + "learning_rate": 9.815701307926179e-08, + "logits/chosen": -2.297593832015991, + "logits/rejected": -2.2949025630950928, + "logps/chosen": -12.444865226745605, + "logps/rejected": -189.42892456054688, + "loss": 0.67, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0096282958984375, + "rewards/margins": 0.07589111477136612, + "rewards/rejected": -0.06626281887292862, + "step": 1961 + }, + { + "epoch": 0.11, + "learning_rate": 9.815447716428777e-08, + "logits/chosen": -2.193030595779419, + "logits/rejected": -2.1903746128082275, + "logps/chosen": -4.17488956451416, + "logps/rejected": -103.73419189453125, + "loss": 0.6312, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.084442065912299e-05, + "rewards/margins": 0.2628541886806488, + "rewards/rejected": -0.2629150450229645, + "step": 1962 + }, + { + "epoch": 0.11, + "learning_rate": 9.815193953863118e-08, + "logits/chosen": -2.1307663917541504, + "logits/rejected": -2.1266744136810303, + "logps/chosen": -23.401573181152344, + "logps/rejected": -120.46405029296875, + "loss": 0.6948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04153022915124893, + "rewards/margins": 0.0005222335457801819, + "rewards/rejected": 0.04100799560546875, + "step": 1963 + }, + { + "epoch": 0.11, + "learning_rate": 9.814940020238216e-08, + "logits/chosen": -2.2116446495056152, + "logits/rejected": -2.2076425552368164, + "logps/chosen": -12.6251802444458, + "logps/rejected": -209.14749145507812, + "loss": 0.4835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04999885708093643, + "rewards/margins": 1.055840015411377, + "rewards/rejected": -1.0058411359786987, + "step": 1964 + }, + { + "epoch": 0.11, + "learning_rate": 9.814685915563093e-08, + "logits/chosen": -2.155961751937866, + "logits/rejected": -2.157630443572998, + "logps/chosen": -29.13968849182129, + "logps/rejected": -144.285400390625, + "loss": 0.5738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04356556013226509, + "rewards/margins": 0.49363425374031067, + "rewards/rejected": -0.4500686824321747, + "step": 1965 + }, + { + "epoch": 0.11, + "learning_rate": 9.814431639846771e-08, + "logits/chosen": -2.0831823348999023, + "logits/rejected": -2.0882046222686768, + "logps/chosen": -32.05919647216797, + "logps/rejected": -179.32122802734375, + "loss": 0.6245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18467120826244354, + "rewards/margins": 0.6622953414916992, + "rewards/rejected": -0.846966564655304, + "step": 1966 + }, + { + "epoch": 0.11, + "learning_rate": 9.814177193098292e-08, + "logits/chosen": -2.106841802597046, + "logits/rejected": -2.1203396320343018, + "logps/chosen": -80.35684204101562, + "logps/rejected": -189.01199340820312, + "loss": 0.6248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028585052117705345, + "rewards/margins": 0.29711687564849854, + "rewards/rejected": -0.32570192217826843, + "step": 1967 + }, + { + "epoch": 0.11, + "learning_rate": 9.813922575326689e-08, + "logits/chosen": -2.04986572265625, + "logits/rejected": -2.0463335514068604, + "logps/chosen": -191.33993530273438, + "logps/rejected": -254.16053771972656, + "loss": 0.5396, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7837738394737244, + "rewards/margins": -0.12511134147644043, + "rewards/rejected": 0.9088851809501648, + "step": 1968 + }, + { + "epoch": 0.11, + "learning_rate": 9.813667786541006e-08, + "logits/chosen": -2.1444454193115234, + "logits/rejected": -2.133364200592041, + "logps/chosen": -0.004823677707463503, + "logps/rejected": -357.24835205078125, + "loss": 0.5365, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.101696660858579e-05, + "rewards/margins": 0.7608498930931091, + "rewards/rejected": -0.7609009146690369, + "step": 1969 + }, + { + "epoch": 0.11, + "learning_rate": 9.813412826750301e-08, + "logits/chosen": -2.0389628410339355, + "logits/rejected": -1.950748324394226, + "logps/chosen": -154.89007568359375, + "logps/rejected": -339.01812744140625, + "loss": 0.6475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.162486270070076, + "rewards/margins": 0.0016281157732009888, + "rewards/rejected": 0.160858154296875, + "step": 1970 + }, + { + "epoch": 0.11, + "learning_rate": 9.813157695963626e-08, + "logits/chosen": -2.191659927368164, + "logits/rejected": -2.1954641342163086, + "logps/chosen": -0.1620798110961914, + "logps/rejected": -64.15152740478516, + "loss": 0.6792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00648388871923089, + "rewards/margins": 0.05001263692975044, + "rewards/rejected": -0.04352874681353569, + "step": 1971 + }, + { + "epoch": 0.11, + "learning_rate": 9.812902394190046e-08, + "logits/chosen": -2.000349521636963, + "logits/rejected": -1.990246057510376, + "logps/chosen": -329.85076904296875, + "logps/rejected": -371.27398681640625, + "loss": 0.5118, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8470398187637329, + "rewards/margins": -0.05507504940032959, + "rewards/rejected": 0.9021148681640625, + "step": 1972 + }, + { + "epoch": 0.11, + "learning_rate": 9.81264692143863e-08, + "logits/chosen": -2.238614797592163, + "logits/rejected": -2.2212727069854736, + "logps/chosen": -8.516915321350098, + "logps/rejected": -100.07064056396484, + "loss": 0.6249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011905098333954811, + "rewards/margins": 0.27463170886039734, + "rewards/rejected": -0.2627266049385071, + "step": 1973 + }, + { + "epoch": 0.11, + "learning_rate": 9.812391277718453e-08, + "logits/chosen": -1.9820975065231323, + "logits/rejected": -1.9746204614639282, + "logps/chosen": -24.630168914794922, + "logps/rejected": -135.07069396972656, + "loss": 0.6659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031239701434969902, + "rewards/margins": 0.05162086710333824, + "rewards/rejected": -0.02038116566836834, + "step": 1974 + }, + { + "epoch": 0.11, + "learning_rate": 9.8121354630386e-08, + "logits/chosen": -2.0282511711120605, + "logits/rejected": -2.0307540893554688, + "logps/chosen": -42.73822021484375, + "logps/rejected": -209.32525634765625, + "loss": 0.6569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08080215752124786, + "rewards/margins": 0.08442459255456924, + "rewards/rejected": -0.003622436663135886, + "step": 1975 + }, + { + "epoch": 0.11, + "learning_rate": 9.811879477408156e-08, + "logits/chosen": -2.2220215797424316, + "logits/rejected": -2.1953442096710205, + "logps/chosen": -8.126106262207031, + "logps/rejected": -264.70599365234375, + "loss": 0.5247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0726439505815506, + "rewards/margins": 0.7511076927185059, + "rewards/rejected": -0.6784637570381165, + "step": 1976 + }, + { + "epoch": 0.12, + "learning_rate": 9.811623320836215e-08, + "logits/chosen": -2.1130411624908447, + "logits/rejected": -2.0477802753448486, + "logps/chosen": -299.4449462890625, + "logps/rejected": -480.3886413574219, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.940600574016571, + "rewards/margins": 0.9978362917900085, + "rewards/rejected": -0.0572357177734375, + "step": 1977 + }, + { + "epoch": 0.12, + "learning_rate": 9.811366993331878e-08, + "logits/chosen": -2.1231369972229004, + "logits/rejected": -2.083940029144287, + "logps/chosen": -171.90872192382812, + "logps/rejected": -311.96002197265625, + "loss": 0.4481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7648559808731079, + "rewards/margins": 0.21382445096969604, + "rewards/rejected": 0.5510315299034119, + "step": 1978 + }, + { + "epoch": 0.12, + "learning_rate": 9.81111049490425e-08, + "logits/chosen": -2.211941957473755, + "logits/rejected": -2.2037365436553955, + "logps/chosen": -71.38934326171875, + "logps/rejected": -214.31301879882812, + "loss": 0.541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0707855224609375, + "rewards/margins": 0.6622344851493835, + "rewards/rejected": -0.591448962688446, + "step": 1979 + }, + { + "epoch": 0.12, + "learning_rate": 9.810853825562445e-08, + "logits/chosen": -2.2241885662078857, + "logits/rejected": -2.198535442352295, + "logps/chosen": -15.000103950500488, + "logps/rejected": -156.89920043945312, + "loss": 0.4902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01249914150685072, + "rewards/margins": 1.0752921104431152, + "rewards/rejected": -1.0627930164337158, + "step": 1980 + }, + { + "epoch": 0.12, + "learning_rate": 9.810596985315576e-08, + "logits/chosen": -2.119999885559082, + "logits/rejected": -2.085975408554077, + "logps/chosen": -5.054286956787109, + "logps/rejected": -296.77752685546875, + "loss": 0.4553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0074821473099291325, + "rewards/margins": 1.3972053527832031, + "rewards/rejected": -1.404687523841858, + "step": 1981 + }, + { + "epoch": 0.12, + "learning_rate": 9.810339974172774e-08, + "logits/chosen": -2.191702127456665, + "logits/rejected": -2.191406488418579, + "logps/chosen": -0.004617456812411547, + "logps/rejected": -23.7554874420166, + "loss": 0.6892, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.31859399820678e-05, + "rewards/margins": 0.012524548918008804, + "rewards/rejected": -0.012451362796127796, + "step": 1982 + }, + { + "epoch": 0.12, + "learning_rate": 9.810082792143166e-08, + "logits/chosen": -2.104675769805908, + "logits/rejected": -2.05287766456604, + "logps/chosen": -283.8869323730469, + "logps/rejected": -549.9722290039062, + "loss": 0.3915, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1023895740509033, + "rewards/margins": 0.2835724353790283, + "rewards/rejected": 0.818817138671875, + "step": 1983 + }, + { + "epoch": 0.12, + "learning_rate": 9.809825439235889e-08, + "logits/chosen": -2.125380277633667, + "logits/rejected": -2.1177175045013428, + "logps/chosen": -31.77694320678711, + "logps/rejected": -241.93373107910156, + "loss": 0.5896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06683845818042755, + "rewards/margins": 0.3260349631309509, + "rewards/rejected": -0.2591964900493622, + "step": 1984 + }, + { + "epoch": 0.12, + "learning_rate": 9.809567915460083e-08, + "logits/chosen": -2.32542085647583, + "logits/rejected": -2.3235981464385986, + "logps/chosen": -13.959676742553711, + "logps/rejected": -72.20317077636719, + "loss": 0.6503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09304237365722656, + "rewards/margins": 0.04048652574419975, + "rewards/rejected": 0.05255584791302681, + "step": 1985 + }, + { + "epoch": 0.12, + "learning_rate": 9.8093102208249e-08, + "logits/chosen": -2.2021446228027344, + "logits/rejected": -2.1954615116119385, + "logps/chosen": -11.142730712890625, + "logps/rejected": -231.135009765625, + "loss": 0.5842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02274322509765625, + "rewards/margins": 0.4717758297920227, + "rewards/rejected": -0.44903260469436646, + "step": 1986 + }, + { + "epoch": 0.12, + "learning_rate": 9.809052355339494e-08, + "logits/chosen": -1.969260573387146, + "logits/rejected": -1.9721596240997314, + "logps/chosen": -31.33348274230957, + "logps/rejected": -130.93756103515625, + "loss": 0.7212, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.061248015612363815, + "rewards/margins": -0.15084151923656464, + "rewards/rejected": 0.21208953857421875, + "step": 1987 + }, + { + "epoch": 0.12, + "learning_rate": 9.808794319013022e-08, + "logits/chosen": -2.049908399581909, + "logits/rejected": -2.045287609100342, + "logps/chosen": -2.658435106277466, + "logps/rejected": -142.5759735107422, + "loss": 0.6827, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0047720433212816715, + "rewards/margins": 0.028227854520082474, + "rewards/rejected": -0.02345581166446209, + "step": 1988 + }, + { + "epoch": 0.12, + "learning_rate": 9.808536111854656e-08, + "logits/chosen": -2.096223831176758, + "logits/rejected": -2.0973703861236572, + "logps/chosen": -9.511625289916992, + "logps/rejected": -203.98138427734375, + "loss": 0.6178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007007789798080921, + "rewards/margins": 0.2681146562099457, + "rewards/rejected": -0.2611068785190582, + "step": 1989 + }, + { + "epoch": 0.12, + "learning_rate": 9.808277733873564e-08, + "logits/chosen": -2.029231548309326, + "logits/rejected": -2.057297468185425, + "logps/chosen": -200.0992431640625, + "logps/rejected": -404.2942199707031, + "loss": 0.3374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.771228015422821, + "rewards/margins": 0.8618804812431335, + "rewards/rejected": -0.0906524658203125, + "step": 1990 + }, + { + "epoch": 0.12, + "learning_rate": 9.808019185078928e-08, + "logits/chosen": -2.1994686126708984, + "logits/rejected": -2.170969247817993, + "logps/chosen": -10.868066787719727, + "logps/rejected": -185.73175048828125, + "loss": 0.5513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001288127969019115, + "rewards/margins": 0.6894879341125488, + "rewards/rejected": -0.6907760500907898, + "step": 1991 + }, + { + "epoch": 0.12, + "learning_rate": 9.807760465479933e-08, + "logits/chosen": -2.2190613746643066, + "logits/rejected": -2.2121126651763916, + "logps/chosen": -45.54833221435547, + "logps/rejected": -226.44940185546875, + "loss": 0.6233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012308502569794655, + "rewards/margins": 0.1699317991733551, + "rewards/rejected": -0.157623291015625, + "step": 1992 + }, + { + "epoch": 0.12, + "learning_rate": 9.807501575085767e-08, + "logits/chosen": -1.9890884160995483, + "logits/rejected": -1.9847164154052734, + "logps/chosen": -5.53879451751709, + "logps/rejected": -212.89173889160156, + "loss": 0.5456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012241936288774014, + "rewards/margins": 0.7247205972671509, + "rewards/rejected": -0.7124786376953125, + "step": 1993 + }, + { + "epoch": 0.12, + "learning_rate": 9.80724251390563e-08, + "logits/chosen": -2.045433759689331, + "logits/rejected": -2.046961784362793, + "logps/chosen": -127.09730529785156, + "logps/rejected": -226.77838134765625, + "loss": 0.6427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04681854322552681, + "rewards/margins": 0.28810882568359375, + "rewards/rejected": -0.33492738008499146, + "step": 1994 + }, + { + "epoch": 0.12, + "learning_rate": 9.806983281948724e-08, + "logits/chosen": -2.277824878692627, + "logits/rejected": -2.2713100910186768, + "logps/chosen": -20.829015731811523, + "logps/rejected": -216.14559936523438, + "loss": 0.5244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03567314147949219, + "rewards/margins": 0.8131901025772095, + "rewards/rejected": -0.8488632440567017, + "step": 1995 + }, + { + "epoch": 0.12, + "learning_rate": 9.806723879224259e-08, + "logits/chosen": -2.1186695098876953, + "logits/rejected": -2.110577344894409, + "logps/chosen": -19.84121322631836, + "logps/rejected": -137.15216064453125, + "loss": 0.5779, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050361063331365585, + "rewards/margins": 0.4545297622680664, + "rewards/rejected": -0.4041686952114105, + "step": 1996 + }, + { + "epoch": 0.12, + "learning_rate": 9.806464305741449e-08, + "logits/chosen": -2.1529362201690674, + "logits/rejected": -2.1372904777526855, + "logps/chosen": -33.35831069946289, + "logps/rejected": -358.8145751953125, + "loss": 0.4918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17319945991039276, + "rewards/margins": 0.769561767578125, + "rewards/rejected": -0.596362292766571, + "step": 1997 + }, + { + "epoch": 0.12, + "learning_rate": 9.806204561509517e-08, + "logits/chosen": -2.1342320442199707, + "logits/rejected": -2.1051418781280518, + "logps/chosen": -217.71487426757812, + "logps/rejected": -313.86651611328125, + "loss": 0.4936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9056793451309204, + "rewards/margins": 0.029803454875946045, + "rewards/rejected": 0.8758758902549744, + "step": 1998 + }, + { + "epoch": 0.12, + "learning_rate": 9.805944646537688e-08, + "logits/chosen": -2.120920419692993, + "logits/rejected": -2.1095974445343018, + "logps/chosen": -180.59945678710938, + "logps/rejected": -301.30438232421875, + "loss": 0.5952, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7812027335166931, + "rewards/margins": -0.3629196286201477, + "rewards/rejected": 1.1441223621368408, + "step": 1999 + }, + { + "epoch": 0.12, + "learning_rate": 9.805684560835196e-08, + "logits/chosen": -2.1998696327209473, + "logits/rejected": -2.1290907859802246, + "logps/chosen": -178.44761657714844, + "logps/rejected": -322.0555419921875, + "loss": 0.5343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2758956849575043, + "rewards/margins": 0.4651290774345398, + "rewards/rejected": -0.18923340737819672, + "step": 2000 + }, + { + "epoch": 0.12, + "learning_rate": 9.805424304411282e-08, + "logits/chosen": -2.1178765296936035, + "logits/rejected": -2.1086180210113525, + "logps/chosen": -190.25665283203125, + "logps/rejected": -335.28204345703125, + "loss": 0.4665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6659942865371704, + "rewards/margins": 0.2804870903491974, + "rewards/rejected": 0.385507196187973, + "step": 2001 + }, + { + "epoch": 0.12, + "learning_rate": 9.805163877275192e-08, + "logits/chosen": -2.2099456787109375, + "logits/rejected": -2.166126012802124, + "logps/chosen": -201.4737548828125, + "logps/rejected": -318.5533447265625, + "loss": 0.4135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6617141962051392, + "rewards/margins": 0.5015366077423096, + "rewards/rejected": 0.16017761826515198, + "step": 2002 + }, + { + "epoch": 0.12, + "learning_rate": 9.804903279436177e-08, + "logits/chosen": -2.258310079574585, + "logits/rejected": -2.2264747619628906, + "logps/chosen": -44.90190124511719, + "logps/rejected": -192.80267333984375, + "loss": 0.5835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030809402465820312, + "rewards/margins": 0.5249156951904297, + "rewards/rejected": -0.55572509765625, + "step": 2003 + }, + { + "epoch": 0.12, + "learning_rate": 9.804642510903491e-08, + "logits/chosen": -2.2635996341705322, + "logits/rejected": -2.2618794441223145, + "logps/chosen": -0.030355991795659065, + "logps/rejected": -173.2464599609375, + "loss": 0.6127, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.930421710014343e-06, + "rewards/margins": 0.3403637409210205, + "rewards/rejected": -0.3403686583042145, + "step": 2004 + }, + { + "epoch": 0.12, + "learning_rate": 9.804381571686404e-08, + "logits/chosen": -2.1914095878601074, + "logits/rejected": -2.1776561737060547, + "logps/chosen": -24.4577693939209, + "logps/rejected": -219.96902465820312, + "loss": 0.5257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.047788240015506744, + "rewards/margins": 0.9229728579521179, + "rewards/rejected": -0.9707611203193665, + "step": 2005 + }, + { + "epoch": 0.12, + "learning_rate": 9.80412046179418e-08, + "logits/chosen": -2.2206859588623047, + "logits/rejected": -2.198235273361206, + "logps/chosen": -205.3797607421875, + "logps/rejected": -403.1770324707031, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7826172113418579, + "rewards/margins": 0.8293304443359375, + "rewards/rejected": -0.04671325907111168, + "step": 2006 + }, + { + "epoch": 0.12, + "learning_rate": 9.803859181236101e-08, + "logits/chosen": -2.1799373626708984, + "logits/rejected": -2.176996946334839, + "logps/chosen": -196.012451171875, + "logps/rejected": -242.73532104492188, + "loss": 0.5721, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6430389285087585, + "rewards/margins": -0.2016083002090454, + "rewards/rejected": 0.844647228717804, + "step": 2007 + }, + { + "epoch": 0.12, + "learning_rate": 9.803597730021443e-08, + "logits/chosen": -2.035256862640381, + "logits/rejected": -2.0189566612243652, + "logps/chosen": -26.397720336914062, + "logps/rejected": -200.31748962402344, + "loss": 0.667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028442000970244408, + "rewards/margins": 0.09193229675292969, + "rewards/rejected": -0.06349029392004013, + "step": 2008 + }, + { + "epoch": 0.12, + "learning_rate": 9.803336108159499e-08, + "logits/chosen": -2.1033575534820557, + "logits/rejected": -2.0862224102020264, + "logps/chosen": -51.91827392578125, + "logps/rejected": -182.1893768310547, + "loss": 0.5947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05063018947839737, + "rewards/margins": 0.49316105246543884, + "rewards/rejected": -0.5437912344932556, + "step": 2009 + }, + { + "epoch": 0.12, + "learning_rate": 9.803074315659558e-08, + "logits/chosen": -2.152250051498413, + "logits/rejected": -2.132455348968506, + "logps/chosen": -297.38665771484375, + "logps/rejected": -479.67266845703125, + "loss": 0.4772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.702410876750946, + "rewards/margins": 0.1849273443222046, + "rewards/rejected": 0.5174835324287415, + "step": 2010 + }, + { + "epoch": 0.12, + "learning_rate": 9.802812352530925e-08, + "logits/chosen": -1.9923969507217407, + "logits/rejected": -1.9861347675323486, + "logps/chosen": -24.889156341552734, + "logps/rejected": -137.53536987304688, + "loss": 0.7038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0803913101553917, + "rewards/margins": 0.04931756108999252, + "rewards/rejected": -0.12970887124538422, + "step": 2011 + }, + { + "epoch": 0.12, + "learning_rate": 9.802550218782904e-08, + "logits/chosen": -2.0645010471343994, + "logits/rejected": -2.1249303817749023, + "logps/chosen": -341.23681640625, + "logps/rejected": -591.897216796875, + "loss": 0.3655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3604492247104645, + "rewards/margins": 0.9743286371231079, + "rewards/rejected": -0.613879382610321, + "step": 2012 + }, + { + "epoch": 0.12, + "learning_rate": 9.802287914424807e-08, + "logits/chosen": -2.1017630100250244, + "logits/rejected": -2.0967965126037598, + "logps/chosen": -84.93505096435547, + "logps/rejected": -406.49053955078125, + "loss": 0.4618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05576324462890625, + "rewards/margins": 1.1719528436660767, + "rewards/rejected": -1.1161895990371704, + "step": 2013 + }, + { + "epoch": 0.12, + "learning_rate": 9.802025439465953e-08, + "logits/chosen": -1.9223942756652832, + "logits/rejected": -1.9270734786987305, + "logps/chosen": -228.5166015625, + "logps/rejected": -324.9542236328125, + "loss": 0.6004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3107238709926605, + "rewards/margins": 0.10511168837547302, + "rewards/rejected": 0.2056121826171875, + "step": 2014 + }, + { + "epoch": 0.12, + "learning_rate": 9.801762793915664e-08, + "logits/chosen": -2.2992076873779297, + "logits/rejected": -2.3014423847198486, + "logps/chosen": -0.00011563080624910071, + "logps/rejected": -119.38330841064453, + "loss": 0.6289, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3838220286620526e-08, + "rewards/margins": 0.2759803533554077, + "rewards/rejected": -0.2759803831577301, + "step": 2015 + }, + { + "epoch": 0.12, + "learning_rate": 9.801499977783277e-08, + "logits/chosen": -2.044149160385132, + "logits/rejected": -2.0399909019470215, + "logps/chosen": -183.6174774169922, + "logps/rejected": -269.020751953125, + "loss": 0.4904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6386123895645142, + "rewards/margins": 0.1643112301826477, + "rewards/rejected": 0.47430115938186646, + "step": 2016 + }, + { + "epoch": 0.12, + "learning_rate": 9.80123699107812e-08, + "logits/chosen": -2.1977779865264893, + "logits/rejected": -2.1748902797698975, + "logps/chosen": -50.1643180847168, + "logps/rejected": -164.02194213867188, + "loss": 0.6412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18668250739574432, + "rewards/margins": 0.015837475657463074, + "rewards/rejected": 0.17084503173828125, + "step": 2017 + }, + { + "epoch": 0.12, + "learning_rate": 9.800973833809542e-08, + "logits/chosen": -2.202406406402588, + "logits/rejected": -2.199234962463379, + "logps/chosen": -0.0001329114311374724, + "logps/rejected": -83.71075439453125, + "loss": 0.6147, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.15197529643774e-07, + "rewards/margins": 0.3427574634552002, + "rewards/rejected": -0.3427581787109375, + "step": 2018 + }, + { + "epoch": 0.12, + "learning_rate": 9.800710505986888e-08, + "logits/chosen": -1.9125945568084717, + "logits/rejected": -1.8326239585876465, + "logps/chosen": -178.48846435546875, + "logps/rejected": -345.62274169921875, + "loss": 0.5351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1405181884765625, + "rewards/margins": 0.420928955078125, + "rewards/rejected": -0.2804107666015625, + "step": 2019 + }, + { + "epoch": 0.12, + "learning_rate": 9.800447007619515e-08, + "logits/chosen": -1.97763991355896, + "logits/rejected": -1.9828574657440186, + "logps/chosen": -239.05349731445312, + "logps/rejected": -332.4259033203125, + "loss": 0.4778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8574112057685852, + "rewards/margins": 0.0930984616279602, + "rewards/rejected": 0.764312744140625, + "step": 2020 + }, + { + "epoch": 0.12, + "learning_rate": 9.800183338716782e-08, + "logits/chosen": -1.9800009727478027, + "logits/rejected": -2.019127130508423, + "logps/chosen": -202.06430053710938, + "logps/rejected": -389.71002197265625, + "loss": 0.5123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.56732177734375, + "rewards/margins": 0.16236570477485657, + "rewards/rejected": 0.40495607256889343, + "step": 2021 + }, + { + "epoch": 0.12, + "learning_rate": 9.799919499288056e-08, + "logits/chosen": -2.2739031314849854, + "logits/rejected": -2.260702610015869, + "logps/chosen": -85.93933868408203, + "logps/rejected": -309.6423034667969, + "loss": 0.506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20305557548999786, + "rewards/margins": 0.6803291440010071, + "rewards/rejected": -0.477273553609848, + "step": 2022 + }, + { + "epoch": 0.12, + "learning_rate": 9.799655489342714e-08, + "logits/chosen": -2.146611452102661, + "logits/rejected": -2.122415542602539, + "logps/chosen": -338.7117919921875, + "logps/rejected": -438.8226318359375, + "loss": 0.2806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9704346060752869, + "rewards/margins": 1.1837372779846191, + "rewards/rejected": -0.2133026123046875, + "step": 2023 + }, + { + "epoch": 0.12, + "learning_rate": 9.799391308890127e-08, + "logits/chosen": -2.2516045570373535, + "logits/rejected": -2.251195192337036, + "logps/chosen": -9.183357238769531, + "logps/rejected": -86.28215789794922, + "loss": 0.6327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05085888132452965, + "rewards/margins": 0.16868801414966583, + "rewards/rejected": -0.11782913655042648, + "step": 2024 + }, + { + "epoch": 0.12, + "learning_rate": 9.799126957939686e-08, + "logits/chosen": -2.089618444442749, + "logits/rejected": -2.0040080547332764, + "logps/chosen": -250.39425659179688, + "logps/rejected": -392.1312255859375, + "loss": 0.5547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17051391303539276, + "rewards/margins": 0.4111267030239105, + "rewards/rejected": -0.24061278998851776, + "step": 2025 + }, + { + "epoch": 0.12, + "learning_rate": 9.798862436500781e-08, + "logits/chosen": -1.8623034954071045, + "logits/rejected": -1.8586595058441162, + "logps/chosen": -52.09937286376953, + "logps/rejected": -201.06039428710938, + "loss": 0.6534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021217346657067537, + "rewards/margins": 0.17538681626319885, + "rewards/rejected": -0.17750854790210724, + "step": 2026 + }, + { + "epoch": 0.12, + "learning_rate": 9.798597744582809e-08, + "logits/chosen": -2.187229633331299, + "logits/rejected": -2.1824281215667725, + "logps/chosen": -17.608545303344727, + "logps/rejected": -159.6573028564453, + "loss": 0.6228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014064789284020662, + "rewards/margins": 0.26630058884620667, + "rewards/rejected": -0.2648940980434418, + "step": 2027 + }, + { + "epoch": 0.12, + "learning_rate": 9.79833288219517e-08, + "logits/chosen": -2.1958606243133545, + "logits/rejected": -2.183570146560669, + "logps/chosen": -24.361665725708008, + "logps/rejected": -259.6455078125, + "loss": 0.5902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06001262739300728, + "rewards/margins": 0.3826139569282532, + "rewards/rejected": -0.322601318359375, + "step": 2028 + }, + { + "epoch": 0.12, + "learning_rate": 9.798067849347278e-08, + "logits/chosen": -2.0560717582702637, + "logits/rejected": -2.0314741134643555, + "logps/chosen": -278.2265319824219, + "logps/rejected": -425.8825378417969, + "loss": 0.4337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6805633902549744, + "rewards/margins": 0.4297180473804474, + "rewards/rejected": 0.250845342874527, + "step": 2029 + }, + { + "epoch": 0.12, + "learning_rate": 9.797802646048546e-08, + "logits/chosen": -2.085493803024292, + "logits/rejected": -2.067279815673828, + "logps/chosen": -234.28170776367188, + "logps/rejected": -374.4241943359375, + "loss": 0.4438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.75048828125, + "rewards/margins": 0.20565181970596313, + "rewards/rejected": 0.5448364615440369, + "step": 2030 + }, + { + "epoch": 0.12, + "learning_rate": 9.797537272308394e-08, + "logits/chosen": -2.0158166885375977, + "logits/rejected": -2.029426097869873, + "logps/chosen": -174.35585021972656, + "logps/rejected": -181.73348999023438, + "loss": 0.6636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09713287651538849, + "rewards/margins": 0.03050994873046875, + "rewards/rejected": 0.06662292778491974, + "step": 2031 + }, + { + "epoch": 0.12, + "learning_rate": 9.797271728136251e-08, + "logits/chosen": -1.9834471940994263, + "logits/rejected": -1.9655448198318481, + "logps/chosen": -194.29449462890625, + "logps/rejected": -385.3536071777344, + "loss": 0.5726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.291299432516098, + "rewards/margins": 0.04047238826751709, + "rewards/rejected": 0.25082704424858093, + "step": 2032 + }, + { + "epoch": 0.12, + "learning_rate": 9.797006013541552e-08, + "logits/chosen": -2.0985729694366455, + "logits/rejected": -2.080975294113159, + "logps/chosen": -66.604736328125, + "logps/rejected": -255.68203735351562, + "loss": 0.5685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07955856621265411, + "rewards/margins": 0.5211891531944275, + "rewards/rejected": -0.4416305720806122, + "step": 2033 + }, + { + "epoch": 0.12, + "learning_rate": 9.796740128533733e-08, + "logits/chosen": -2.330057382583618, + "logits/rejected": -2.319896697998047, + "logps/chosen": -27.31109046936035, + "logps/rejected": -218.90420532226562, + "loss": 0.5239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0569646842777729, + "rewards/margins": 0.7846319675445557, + "rewards/rejected": -0.7276672720909119, + "step": 2034 + }, + { + "epoch": 0.12, + "learning_rate": 9.796474073122243e-08, + "logits/chosen": -2.2849574089050293, + "logits/rejected": -2.288999557495117, + "logps/chosen": -5.978400230407715, + "logps/rejected": -114.63451385498047, + "loss": 0.5784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024891043081879616, + "rewards/margins": 0.4949761927127838, + "rewards/rejected": -0.47008514404296875, + "step": 2035 + }, + { + "epoch": 0.12, + "learning_rate": 9.79620784731653e-08, + "logits/chosen": -2.2177913188934326, + "logits/rejected": -2.1860077381134033, + "logps/chosen": -173.82662963867188, + "logps/rejected": -464.6094055175781, + "loss": 0.3563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39651185274124146, + "rewards/margins": 1.119317650794983, + "rewards/rejected": -0.7228057980537415, + "step": 2036 + }, + { + "epoch": 0.12, + "learning_rate": 9.795941451126055e-08, + "logits/chosen": -2.0372235774993896, + "logits/rejected": -2.035982370376587, + "logps/chosen": -50.88326644897461, + "logps/rejected": -339.571044921875, + "loss": 0.4842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035616684705019, + "rewards/margins": 1.2590245008468628, + "rewards/rejected": -1.294641137123108, + "step": 2037 + }, + { + "epoch": 0.12, + "learning_rate": 9.795674884560279e-08, + "logits/chosen": -2.0995216369628906, + "logits/rejected": -2.0911779403686523, + "logps/chosen": -13.773385047912598, + "logps/rejected": -292.3702392578125, + "loss": 0.482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0755794569849968, + "rewards/margins": 1.0303035974502563, + "rewards/rejected": -0.954724133014679, + "step": 2038 + }, + { + "epoch": 0.12, + "learning_rate": 9.795408147628673e-08, + "logits/chosen": -1.912436842918396, + "logits/rejected": -1.9158927202224731, + "logps/chosen": -21.45140838623047, + "logps/rejected": -228.06192016601562, + "loss": 0.57, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05189171060919762, + "rewards/margins": 0.6239383816719055, + "rewards/rejected": -0.675830066204071, + "step": 2039 + }, + { + "epoch": 0.12, + "learning_rate": 9.795141240340713e-08, + "logits/chosen": -1.9725784063339233, + "logits/rejected": -1.9770148992538452, + "logps/chosen": -90.7525405883789, + "logps/rejected": -176.12242126464844, + "loss": 0.6376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.048088837414979935, + "rewards/margins": 0.27341535687446594, + "rewards/rejected": -0.3215042054653168, + "step": 2040 + }, + { + "epoch": 0.12, + "learning_rate": 9.794874162705881e-08, + "logits/chosen": -2.1752195358276367, + "logits/rejected": -2.1648154258728027, + "logps/chosen": -0.21207541227340698, + "logps/rejected": -285.0360107421875, + "loss": 0.5167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0052729113958776, + "rewards/margins": 0.8870946168899536, + "rewards/rejected": -0.8923675417900085, + "step": 2041 + }, + { + "epoch": 0.12, + "learning_rate": 9.794606914733664e-08, + "logits/chosen": -1.992544174194336, + "logits/rejected": -1.9728789329528809, + "logps/chosen": -76.12894439697266, + "logps/rejected": -362.5643615722656, + "loss": 0.439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23930588364601135, + "rewards/margins": 1.0171409845352173, + "rewards/rejected": -0.7778350710868835, + "step": 2042 + }, + { + "epoch": 0.12, + "learning_rate": 9.794339496433557e-08, + "logits/chosen": -1.946118712425232, + "logits/rejected": -1.9321149587631226, + "logps/chosen": -36.12513732910156, + "logps/rejected": -201.53050231933594, + "loss": 0.5503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14322014153003693, + "rewards/margins": 0.42987173795700073, + "rewards/rejected": -0.286651611328125, + "step": 2043 + }, + { + "epoch": 0.12, + "learning_rate": 9.794071907815058e-08, + "logits/chosen": -2.2597224712371826, + "logits/rejected": -2.2571589946746826, + "logps/chosen": -35.968509674072266, + "logps/rejected": -33.77312469482422, + "loss": 0.6907, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.028481675311923027, + "rewards/margins": -0.016833877190947533, + "rewards/rejected": 0.04531555250287056, + "step": 2044 + }, + { + "epoch": 0.12, + "learning_rate": 9.793804148887677e-08, + "logits/chosen": -1.9942666292190552, + "logits/rejected": -1.988229751586914, + "logps/chosen": -91.873291015625, + "logps/rejected": -228.20681762695312, + "loss": 0.632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16472168266773224, + "rewards/margins": 0.4648788571357727, + "rewards/rejected": -0.6296005249023438, + "step": 2045 + }, + { + "epoch": 0.12, + "learning_rate": 9.793536219660921e-08, + "logits/chosen": -2.1000678539276123, + "logits/rejected": -2.0815083980560303, + "logps/chosen": -117.42532348632812, + "logps/rejected": -218.13882446289062, + "loss": 0.6346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0430450439453125, + "rewards/margins": 0.16240844130516052, + "rewards/rejected": -0.11936340481042862, + "step": 2046 + }, + { + "epoch": 0.12, + "learning_rate": 9.793268120144311e-08, + "logits/chosen": -2.3574862480163574, + "logits/rejected": -2.332637071609497, + "logps/chosen": -25.734004974365234, + "logps/rejected": -349.5234069824219, + "loss": 0.5139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03704414516687393, + "rewards/margins": 0.9832988977432251, + "rewards/rejected": -1.0203430652618408, + "step": 2047 + }, + { + "epoch": 0.12, + "learning_rate": 9.792999850347372e-08, + "logits/chosen": -2.146261692047119, + "logits/rejected": -2.1477651596069336, + "logps/chosen": -0.03081178106367588, + "logps/rejected": -58.20142364501953, + "loss": 0.7064, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.11434268951416e-06, + "rewards/margins": -0.05697294697165489, + "rewards/rejected": 0.05696983262896538, + "step": 2048 + }, + { + "epoch": 0.12, + "learning_rate": 9.792731410279633e-08, + "logits/chosen": -2.0796332359313965, + "logits/rejected": -2.084193468093872, + "logps/chosen": -20.38649559020996, + "logps/rejected": -104.7964859008789, + "loss": 0.6426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07496757805347443, + "rewards/margins": 0.1436901092529297, + "rewards/rejected": -0.06872253865003586, + "step": 2049 + }, + { + "epoch": 0.12, + "learning_rate": 9.792462799950631e-08, + "logits/chosen": -2.1193950176239014, + "logits/rejected": -2.105299711227417, + "logps/chosen": -86.21781921386719, + "logps/rejected": -271.40521240234375, + "loss": 0.6193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030405426397919655, + "rewards/margins": 0.24589310586452484, + "rewards/rejected": -0.21548767387866974, + "step": 2050 + }, + { + "epoch": 0.12, + "learning_rate": 9.792194019369907e-08, + "logits/chosen": -2.1630098819732666, + "logits/rejected": -2.134012222290039, + "logps/chosen": -63.978153228759766, + "logps/rejected": -233.42974853515625, + "loss": 0.5947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04489097744226456, + "rewards/margins": 0.33712729811668396, + "rewards/rejected": -0.292236328125, + "step": 2051 + }, + { + "epoch": 0.12, + "learning_rate": 9.79192506854701e-08, + "logits/chosen": -2.171520233154297, + "logits/rejected": -2.1533353328704834, + "logps/chosen": -85.96800231933594, + "logps/rejected": -162.88818359375, + "loss": 0.6796, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08978652954101562, + "rewards/margins": 0.08339309692382812, + "rewards/rejected": 0.0063934326171875, + "step": 2052 + }, + { + "epoch": 0.12, + "learning_rate": 9.791655947491497e-08, + "logits/chosen": -2.1457269191741943, + "logits/rejected": -2.151758909225464, + "logps/chosen": -265.3661804199219, + "logps/rejected": -375.2228088378906, + "loss": 0.4383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7309234738349915, + "rewards/margins": 0.32283326983451843, + "rewards/rejected": 0.408090204000473, + "step": 2053 + }, + { + "epoch": 0.12, + "learning_rate": 9.791386656212924e-08, + "logits/chosen": -2.267991542816162, + "logits/rejected": -2.2607104778289795, + "logps/chosen": -18.888322830200195, + "logps/rejected": -131.5111541748047, + "loss": 0.5965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035394858568906784, + "rewards/margins": 0.38322964310646057, + "rewards/rejected": -0.3478347957134247, + "step": 2054 + }, + { + "epoch": 0.12, + "learning_rate": 9.79111719472086e-08, + "logits/chosen": -2.333812713623047, + "logits/rejected": -2.3216419219970703, + "logps/chosen": -6.403527736663818, + "logps/rejected": -125.90794372558594, + "loss": 0.6073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04649315029382706, + "rewards/margins": 0.4362201690673828, + "rewards/rejected": -0.4827133119106293, + "step": 2055 + }, + { + "epoch": 0.12, + "learning_rate": 9.790847563024878e-08, + "logits/chosen": -2.152963638305664, + "logits/rejected": -2.15313982963562, + "logps/chosen": -0.43331587314605713, + "logps/rejected": -39.568153381347656, + "loss": 0.6841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011209514923393726, + "rewards/margins": 0.010415622033178806, + "rewards/rejected": -0.021625136956572533, + "step": 2056 + }, + { + "epoch": 0.12, + "learning_rate": 9.790577761134555e-08, + "logits/chosen": -2.2840988636016846, + "logits/rejected": -2.2800960540771484, + "logps/chosen": -43.509056091308594, + "logps/rejected": -120.72138977050781, + "loss": 0.748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31037941575050354, + "rewards/margins": 0.11272543668746948, + "rewards/rejected": -0.423104852437973, + "step": 2057 + }, + { + "epoch": 0.12, + "learning_rate": 9.790307789059478e-08, + "logits/chosen": -2.2934718132019043, + "logits/rejected": -2.2608354091644287, + "logps/chosen": -167.7454376220703, + "logps/rejected": -334.767578125, + "loss": 0.5374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16208648681640625, + "rewards/margins": 0.4832199215888977, + "rewards/rejected": -0.32113343477249146, + "step": 2058 + }, + { + "epoch": 0.12, + "learning_rate": 9.790037646809236e-08, + "logits/chosen": -2.06644344329834, + "logits/rejected": -2.0597305297851562, + "logps/chosen": -58.29124450683594, + "logps/rejected": -121.04712677001953, + "loss": 0.7581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17819367349147797, + "rewards/margins": 0.02083510160446167, + "rewards/rejected": -0.19902877509593964, + "step": 2059 + }, + { + "epoch": 0.12, + "learning_rate": 9.789767334393426e-08, + "logits/chosen": -2.100592851638794, + "logits/rejected": -2.092928171157837, + "logps/chosen": -15.151692390441895, + "logps/rejected": -122.12413787841797, + "loss": 0.5956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007280349964275956, + "rewards/margins": 0.436631977558136, + "rewards/rejected": -0.435903936624527, + "step": 2060 + }, + { + "epoch": 0.12, + "learning_rate": 9.78949685182165e-08, + "logits/chosen": -2.128270149230957, + "logits/rejected": -2.1193747520446777, + "logps/chosen": -273.0997314453125, + "logps/rejected": -335.62237548828125, + "loss": 0.5388, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.860089123249054, + "rewards/margins": -0.14279180765151978, + "rewards/rejected": 1.0028809309005737, + "step": 2061 + }, + { + "epoch": 0.12, + "learning_rate": 9.78922619910352e-08, + "logits/chosen": -1.7700228691101074, + "logits/rejected": -1.7901668548583984, + "logps/chosen": -231.78274536132812, + "logps/rejected": -254.2784881591797, + "loss": 0.6104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22124634683132172, + "rewards/margins": 0.06964416801929474, + "rewards/rejected": 0.15160217881202698, + "step": 2062 + }, + { + "epoch": 0.12, + "learning_rate": 9.788955376248648e-08, + "logits/chosen": -2.0428857803344727, + "logits/rejected": -1.9844183921813965, + "logps/chosen": -148.41607666015625, + "logps/rejected": -361.79364013671875, + "loss": 0.5026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6345108151435852, + "rewards/margins": 0.0813857913017273, + "rewards/rejected": 0.5531250238418579, + "step": 2063 + }, + { + "epoch": 0.12, + "learning_rate": 9.788684383266655e-08, + "logits/chosen": -2.2666714191436768, + "logits/rejected": -2.2532382011413574, + "logps/chosen": -4.516138076782227, + "logps/rejected": -93.6089096069336, + "loss": 0.709, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.5572051274357364e-05, + "rewards/margins": -0.03179826959967613, + "rewards/rejected": 0.03176269680261612, + "step": 2064 + }, + { + "epoch": 0.12, + "learning_rate": 9.788413220167168e-08, + "logits/chosen": -2.146080732345581, + "logits/rejected": -2.141995668411255, + "logps/chosen": -7.112820148468018, + "logps/rejected": -137.27120971679688, + "loss": 0.5563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12436041980981827, + "rewards/margins": 0.4889569878578186, + "rewards/rejected": -0.36459657549858093, + "step": 2065 + }, + { + "epoch": 0.12, + "learning_rate": 9.788141886959822e-08, + "logits/chosen": -2.0440146923065186, + "logits/rejected": -2.028721570968628, + "logps/chosen": -230.28054809570312, + "logps/rejected": -368.5123291015625, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0694961547851562, + "rewards/margins": 0.6914504766464233, + "rewards/rejected": 0.3780456483364105, + "step": 2066 + }, + { + "epoch": 0.12, + "learning_rate": 9.787870383654256e-08, + "logits/chosen": -2.114259958267212, + "logits/rejected": -2.128838539123535, + "logps/chosen": -238.098876953125, + "logps/rejected": -311.4184265136719, + "loss": 0.504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6276092529296875, + "rewards/margins": 0.116973876953125, + "rewards/rejected": 0.5106353759765625, + "step": 2067 + }, + { + "epoch": 0.12, + "learning_rate": 9.787598710260112e-08, + "logits/chosen": -2.0559051036834717, + "logits/rejected": -2.049929618835449, + "logps/chosen": -10.879302024841309, + "logps/rejected": -118.70314025878906, + "loss": 0.6331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02824096754193306, + "rewards/margins": 0.23799896240234375, + "rewards/rejected": -0.209757998585701, + "step": 2068 + }, + { + "epoch": 0.12, + "learning_rate": 9.787326866787044e-08, + "logits/chosen": -2.2588343620300293, + "logits/rejected": -2.2492053508758545, + "logps/chosen": -160.4955596923828, + "logps/rejected": -273.79522705078125, + "loss": 0.4984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6940506100654602, + "rewards/margins": 0.16093289852142334, + "rewards/rejected": 0.5331177115440369, + "step": 2069 + }, + { + "epoch": 0.12, + "learning_rate": 9.787054853244707e-08, + "logits/chosen": -2.073867082595825, + "logits/rejected": -2.054832696914673, + "logps/chosen": -27.35675048828125, + "logps/rejected": -89.65127563476562, + "loss": 0.6607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05624428018927574, + "rewards/margins": 0.20459556579589844, + "rewards/rejected": -0.2608398497104645, + "step": 2070 + }, + { + "epoch": 0.12, + "learning_rate": 9.786782669642767e-08, + "logits/chosen": -2.1999752521514893, + "logits/rejected": -2.193471670150757, + "logps/chosen": -39.55792999267578, + "logps/rejected": -139.89892578125, + "loss": 0.7097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11096420139074326, + "rewards/margins": 0.05990525335073471, + "rewards/rejected": -0.17086945474147797, + "step": 2071 + }, + { + "epoch": 0.12, + "learning_rate": 9.786510315990893e-08, + "logits/chosen": -2.0829007625579834, + "logits/rejected": -2.084240198135376, + "logps/chosen": -178.4227752685547, + "logps/rejected": -255.83706665039062, + "loss": 0.4316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6863357424736023, + "rewards/margins": 0.4233657717704773, + "rewards/rejected": 0.262969970703125, + "step": 2072 + }, + { + "epoch": 0.12, + "learning_rate": 9.786237792298757e-08, + "logits/chosen": -2.421424627304077, + "logits/rejected": -2.3606369495391846, + "logps/chosen": -49.2774543762207, + "logps/rejected": -272.94146728515625, + "loss": 0.516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1213001236319542, + "rewards/margins": 0.7887561917304993, + "rewards/rejected": -0.6674560904502869, + "step": 2073 + }, + { + "epoch": 0.12, + "learning_rate": 9.785965098576043e-08, + "logits/chosen": -2.2160871028900146, + "logits/rejected": -2.1997663974761963, + "logps/chosen": -10.062071800231934, + "logps/rejected": -178.69369506835938, + "loss": 0.5047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010603046976029873, + "rewards/margins": 0.9742993712425232, + "rewards/rejected": -0.963696300983429, + "step": 2074 + }, + { + "epoch": 0.12, + "learning_rate": 9.785692234832439e-08, + "logits/chosen": -2.1736435890197754, + "logits/rejected": -2.1718900203704834, + "logps/chosen": -27.149263381958008, + "logps/rejected": -165.00665283203125, + "loss": 0.6884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010797691531479359, + "rewards/margins": 0.022038841620087624, + "rewards/rejected": -0.011241150088608265, + "step": 2075 + }, + { + "epoch": 0.12, + "learning_rate": 9.785419201077637e-08, + "logits/chosen": -1.9581639766693115, + "logits/rejected": -2.0001206398010254, + "logps/chosen": -264.93328857421875, + "logps/rejected": -440.3455505371094, + "loss": 0.4497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5678070187568665, + "rewards/margins": 0.4147125482559204, + "rewards/rejected": 0.15309448540210724, + "step": 2076 + }, + { + "epoch": 0.12, + "learning_rate": 9.785145997321337e-08, + "logits/chosen": -2.023322105407715, + "logits/rejected": -2.02767014503479, + "logps/chosen": -23.52469825744629, + "logps/rejected": -167.14462280273438, + "loss": 0.5842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07434596866369247, + "rewards/margins": 0.48418790102005005, + "rewards/rejected": -0.409841924905777, + "step": 2077 + }, + { + "epoch": 0.12, + "learning_rate": 9.784872623573244e-08, + "logits/chosen": -2.2344963550567627, + "logits/rejected": -2.227166175842285, + "logps/chosen": -13.402501106262207, + "logps/rejected": -206.12509155273438, + "loss": 0.5733, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02508230321109295, + "rewards/margins": 0.5220305323600769, + "rewards/rejected": -0.4969482421875, + "step": 2078 + }, + { + "epoch": 0.12, + "learning_rate": 9.78459907984307e-08, + "logits/chosen": -1.9390431642532349, + "logits/rejected": -1.9329674243927002, + "logps/chosen": -14.000093460083008, + "logps/rejected": -152.02725219726562, + "loss": 0.5265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025001144036650658, + "rewards/margins": 0.7962719202041626, + "rewards/rejected": -0.771270751953125, + "step": 2079 + }, + { + "epoch": 0.12, + "learning_rate": 9.784325366140534e-08, + "logits/chosen": -2.1147868633270264, + "logits/rejected": -2.1144721508026123, + "logps/chosen": -230.41770935058594, + "logps/rejected": -285.33502197265625, + "loss": 0.4738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7610641717910767, + "rewards/margins": 0.15889132022857666, + "rewards/rejected": 0.6021728515625, + "step": 2080 + }, + { + "epoch": 0.12, + "learning_rate": 9.784051482475358e-08, + "logits/chosen": -2.2428081035614014, + "logits/rejected": -2.232318639755249, + "logps/chosen": -0.000138036921271123, + "logps/rejected": -120.86064910888672, + "loss": 0.6213, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0992814004093816e-07, + "rewards/margins": 0.3133503794670105, + "rewards/rejected": -0.3133506774902344, + "step": 2081 + }, + { + "epoch": 0.12, + "learning_rate": 9.783777428857271e-08, + "logits/chosen": -2.1792714595794678, + "logits/rejected": -2.175119400024414, + "logps/chosen": -2.2209362983703613, + "logps/rejected": -131.73512268066406, + "loss": 0.6761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008900857530534267, + "rewards/margins": 0.08999522030353546, + "rewards/rejected": -0.08109436184167862, + "step": 2082 + }, + { + "epoch": 0.12, + "learning_rate": 9.78350320529601e-08, + "logits/chosen": -2.0294885635375977, + "logits/rejected": -2.0309088230133057, + "logps/chosen": -34.35188293457031, + "logps/rejected": -177.60824584960938, + "loss": 0.6314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06548309326171875, + "rewards/margins": 0.364410400390625, + "rewards/rejected": -0.42989349365234375, + "step": 2083 + }, + { + "epoch": 0.12, + "learning_rate": 9.783228811801317e-08, + "logits/chosen": -2.079258918762207, + "logits/rejected": -1.9915084838867188, + "logps/chosen": -184.42938232421875, + "logps/rejected": -357.0311279296875, + "loss": 0.5839, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.590496838092804, + "rewards/margins": -0.23025816679000854, + "rewards/rejected": 0.8207550048828125, + "step": 2084 + }, + { + "epoch": 0.12, + "learning_rate": 9.782954248382938e-08, + "logits/chosen": -1.9844169616699219, + "logits/rejected": -1.900333285331726, + "logps/chosen": -339.3206481933594, + "logps/rejected": -422.0116271972656, + "loss": 0.3703, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.029199242591858, + "rewards/margins": 0.500775158405304, + "rewards/rejected": 0.528424084186554, + "step": 2085 + }, + { + "epoch": 0.12, + "learning_rate": 9.78267951505063e-08, + "logits/chosen": -2.123389482498169, + "logits/rejected": -2.0842080116271973, + "logps/chosen": -259.7441101074219, + "logps/rejected": -423.537353515625, + "loss": 0.4857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7011657953262329, + "rewards/margins": 0.10178834199905396, + "rewards/rejected": 0.599377453327179, + "step": 2086 + }, + { + "epoch": 0.12, + "learning_rate": 9.78240461181415e-08, + "logits/chosen": -1.923097014427185, + "logits/rejected": -1.8719687461853027, + "logps/chosen": -170.07464599609375, + "logps/rejected": -295.11981201171875, + "loss": 0.5572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5255951285362244, + "rewards/margins": 0.024960339069366455, + "rewards/rejected": 0.5006347894668579, + "step": 2087 + }, + { + "epoch": 0.12, + "learning_rate": 9.782129538683265e-08, + "logits/chosen": -2.0253002643585205, + "logits/rejected": -2.0002355575561523, + "logps/chosen": -206.5802001953125, + "logps/rejected": -269.418701171875, + "loss": 0.5042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6890320181846619, + "rewards/margins": 0.14364933967590332, + "rewards/rejected": 0.5453826785087585, + "step": 2088 + }, + { + "epoch": 0.12, + "learning_rate": 9.781854295667746e-08, + "logits/chosen": -2.1403751373291016, + "logits/rejected": -2.174339532852173, + "logps/chosen": -248.826904296875, + "logps/rejected": -217.09588623046875, + "loss": 0.5024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5319747924804688, + "rewards/margins": 0.2367660403251648, + "rewards/rejected": 0.29520875215530396, + "step": 2089 + }, + { + "epoch": 0.12, + "learning_rate": 9.781578882777373e-08, + "logits/chosen": -2.3085453510284424, + "logits/rejected": -2.2860538959503174, + "logps/chosen": -76.8016357421875, + "logps/rejected": -258.0207214355469, + "loss": 0.5612, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07334060966968536, + "rewards/margins": 0.8367042541503906, + "rewards/rejected": -0.9100448489189148, + "step": 2090 + }, + { + "epoch": 0.12, + "learning_rate": 9.781303300021929e-08, + "logits/chosen": -1.8968852758407593, + "logits/rejected": -1.888363242149353, + "logps/chosen": -224.6105194091797, + "logps/rejected": -313.368408203125, + "loss": 0.615, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.608538806438446, + "rewards/margins": -0.2211700677871704, + "rewards/rejected": 0.8297088742256165, + "step": 2091 + }, + { + "epoch": 0.12, + "learning_rate": 9.781027547411203e-08, + "logits/chosen": -2.226095199584961, + "logits/rejected": -2.2182271480560303, + "logps/chosen": -2.238438606262207, + "logps/rejected": -111.2114486694336, + "loss": 0.5836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.053647805005311966, + "rewards/margins": 0.5680235028266907, + "rewards/rejected": -0.6216713190078735, + "step": 2092 + }, + { + "epoch": 0.12, + "learning_rate": 9.780751624954992e-08, + "logits/chosen": -2.0942349433898926, + "logits/rejected": -2.0940301418304443, + "logps/chosen": -28.581514358520508, + "logps/rejected": -212.7474822998047, + "loss": 0.5448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12409725040197372, + "rewards/margins": 0.4440237283706665, + "rewards/rejected": -0.3199264705181122, + "step": 2093 + }, + { + "epoch": 0.12, + "learning_rate": 9.7804755326631e-08, + "logits/chosen": -2.2097136974334717, + "logits/rejected": -2.1904749870300293, + "logps/chosen": -8.181347846984863, + "logps/rejected": -169.05404663085938, + "loss": 0.6371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0038260461296886206, + "rewards/margins": 0.21036292612552643, + "rewards/rejected": -0.20653687417507172, + "step": 2094 + }, + { + "epoch": 0.12, + "learning_rate": 9.780199270545331e-08, + "logits/chosen": -2.2836949825286865, + "logits/rejected": -2.272749900817871, + "logps/chosen": -4.494148015510291e-05, + "logps/rejected": -181.52841186523438, + "loss": 0.5918, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.549702659531249e-07, + "rewards/margins": 0.4614393711090088, + "rewards/rejected": -0.4614395201206207, + "step": 2095 + }, + { + "epoch": 0.12, + "learning_rate": 9.779922838611503e-08, + "logits/chosen": -2.1507070064544678, + "logits/rejected": -2.1450917720794678, + "logps/chosen": -63.118309020996094, + "logps/rejected": -218.70477294921875, + "loss": 0.7047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3199501037597656, + "rewards/margins": 0.3312187194824219, + "rewards/rejected": -0.6511688232421875, + "step": 2096 + }, + { + "epoch": 0.12, + "learning_rate": 9.779646236871433e-08, + "logits/chosen": -2.1272246837615967, + "logits/rejected": -2.1371312141418457, + "logps/chosen": -217.83929443359375, + "logps/rejected": -327.53973388671875, + "loss": 0.4175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9298431277275085, + "rewards/margins": 0.2847259044647217, + "rewards/rejected": 0.6451172232627869, + "step": 2097 + }, + { + "epoch": 0.12, + "learning_rate": 9.77936946533495e-08, + "logits/chosen": -1.9950904846191406, + "logits/rejected": -2.009631395339966, + "logps/chosen": -288.1058044433594, + "logps/rejected": -358.28363037109375, + "loss": 0.2476, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0288056135177612, + "rewards/margins": 1.238937497138977, + "rewards/rejected": -0.21013183891773224, + "step": 2098 + }, + { + "epoch": 0.12, + "learning_rate": 9.779092524011884e-08, + "logits/chosen": -2.008007287979126, + "logits/rejected": -2.0077476501464844, + "logps/chosen": -314.68951416015625, + "logps/rejected": -485.7456359863281, + "loss": 0.4502, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.008758544921875, + "rewards/margins": 0.13122254610061646, + "rewards/rejected": 0.8775359988212585, + "step": 2099 + }, + { + "epoch": 0.12, + "learning_rate": 9.778815412912077e-08, + "logits/chosen": -2.2165944576263428, + "logits/rejected": -2.206139326095581, + "logps/chosen": -36.01591491699219, + "logps/rejected": -247.98297119140625, + "loss": 0.6549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24059104919433594, + "rewards/margins": 0.4230533838272095, + "rewards/rejected": -0.6636444330215454, + "step": 2100 + }, + { + "epoch": 0.12, + "learning_rate": 9.778538132045369e-08, + "logits/chosen": -2.04565691947937, + "logits/rejected": -2.0372653007507324, + "logps/chosen": -206.51400756835938, + "logps/rejected": -287.1827392578125, + "loss": 0.4337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7561203241348267, + "rewards/margins": 0.3181656002998352, + "rewards/rejected": 0.43795472383499146, + "step": 2101 + }, + { + "epoch": 0.12, + "learning_rate": 9.778260681421613e-08, + "logits/chosen": -2.1803650856018066, + "logits/rejected": -2.1542398929595947, + "logps/chosen": -41.380126953125, + "logps/rejected": -243.47311401367188, + "loss": 0.5972, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08236580342054367, + "rewards/margins": 0.309836208820343, + "rewards/rejected": -0.22747039794921875, + "step": 2102 + }, + { + "epoch": 0.12, + "learning_rate": 9.777983061050663e-08, + "logits/chosen": -2.080061197280884, + "logits/rejected": -2.0319089889526367, + "logps/chosen": -154.90985107421875, + "logps/rejected": -284.58038330078125, + "loss": 0.4838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5821442008018494, + "rewards/margins": 0.3394562005996704, + "rewards/rejected": 0.24268798530101776, + "step": 2103 + }, + { + "epoch": 0.12, + "learning_rate": 9.777705270942385e-08, + "logits/chosen": -2.2185425758361816, + "logits/rejected": -2.1885344982147217, + "logps/chosen": -3.5677857398986816, + "logps/rejected": -260.7847900390625, + "loss": 0.5103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010446811094880104, + "rewards/margins": 0.92698734998703, + "rewards/rejected": -0.9165405631065369, + "step": 2104 + }, + { + "epoch": 0.12, + "learning_rate": 9.777427311106645e-08, + "logits/chosen": -2.205876588821411, + "logits/rejected": -2.2167751789093018, + "logps/chosen": -172.25546264648438, + "logps/rejected": -348.83758544921875, + "loss": 0.4357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8175994753837585, + "rewards/margins": 0.2794158458709717, + "rewards/rejected": 0.5381836295127869, + "step": 2105 + }, + { + "epoch": 0.12, + "learning_rate": 9.777149181553317e-08, + "logits/chosen": -2.079103946685791, + "logits/rejected": -2.108635425567627, + "logps/chosen": -215.4017791748047, + "logps/rejected": -230.99368286132812, + "loss": 0.4061, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0503982305526733, + "rewards/margins": 0.2583877444267273, + "rewards/rejected": 0.792010486125946, + "step": 2106 + }, + { + "epoch": 0.12, + "learning_rate": 9.776870882292284e-08, + "logits/chosen": -2.0338680744171143, + "logits/rejected": -2.0054686069488525, + "logps/chosen": -172.947021484375, + "logps/rejected": -361.62445068359375, + "loss": 0.4458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5918914675712585, + "rewards/margins": 0.3211822211742401, + "rewards/rejected": 0.27070924639701843, + "step": 2107 + }, + { + "epoch": 0.12, + "learning_rate": 9.77659241333343e-08, + "logits/chosen": -2.0738706588745117, + "logits/rejected": -2.0420124530792236, + "logps/chosen": -181.50326538085938, + "logps/rejected": -403.79412841796875, + "loss": 0.2841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8598602414131165, + "rewards/margins": 1.1597747802734375, + "rewards/rejected": -0.29991456866264343, + "step": 2108 + }, + { + "epoch": 0.12, + "learning_rate": 9.776313774686649e-08, + "logits/chosen": -2.0622382164001465, + "logits/rejected": -2.0497233867645264, + "logps/chosen": -74.5934829711914, + "logps/rejected": -195.0906982421875, + "loss": 0.6962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2028953582048416, + "rewards/margins": 0.14626236259937286, + "rewards/rejected": -0.3491577208042145, + "step": 2109 + }, + { + "epoch": 0.12, + "learning_rate": 9.776034966361838e-08, + "logits/chosen": -2.086683750152588, + "logits/rejected": -2.0325872898101807, + "logps/chosen": -308.0230407714844, + "logps/rejected": -436.1051330566406, + "loss": 0.5487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16764222085475922, + "rewards/margins": 0.486053466796875, + "rewards/rejected": -0.318411260843277, + "step": 2110 + }, + { + "epoch": 0.12, + "learning_rate": 9.775755988368904e-08, + "logits/chosen": -2.0655548572540283, + "logits/rejected": -2.0667836666107178, + "logps/chosen": -1.8166546821594238, + "logps/rejected": -76.0759506225586, + "loss": 0.6872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08499877899885178, + "rewards/margins": 0.11821853369474411, + "rewards/rejected": -0.2032173126935959, + "step": 2111 + }, + { + "epoch": 0.12, + "learning_rate": 9.775476840717756e-08, + "logits/chosen": -2.2375736236572266, + "logits/rejected": -2.2335219383239746, + "logps/chosen": -219.2767333984375, + "logps/rejected": -361.5265197753906, + "loss": 0.3585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0645798444747925, + "rewards/margins": 0.47334450483322144, + "rewards/rejected": 0.591235339641571, + "step": 2112 + }, + { + "epoch": 0.12, + "learning_rate": 9.775197523418313e-08, + "logits/chosen": -2.2132203578948975, + "logits/rejected": -2.068208694458008, + "logps/chosen": -0.2527834177017212, + "logps/rejected": -358.64215087890625, + "loss": 0.4292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013499963097274303, + "rewards/margins": 1.6950674057006836, + "rewards/rejected": -1.6815674304962158, + "step": 2113 + }, + { + "epoch": 0.12, + "learning_rate": 9.774918036480494e-08, + "logits/chosen": -1.9991952180862427, + "logits/rejected": -1.9614441394805908, + "logps/chosen": -242.06796264648438, + "logps/rejected": -453.68231201171875, + "loss": 0.2539, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.030329942703247, + "rewards/margins": 1.1432113647460938, + "rewards/rejected": -0.11288147419691086, + "step": 2114 + }, + { + "epoch": 0.12, + "learning_rate": 9.774638379914231e-08, + "logits/chosen": -2.056497573852539, + "logits/rejected": -2.064976215362549, + "logps/chosen": -101.5156478881836, + "logps/rejected": -142.0581512451172, + "loss": 0.6822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016872406005859375, + "rewards/margins": 0.060018159449100494, + "rewards/rejected": -0.07689056545495987, + "step": 2115 + }, + { + "epoch": 0.12, + "learning_rate": 9.774358553729455e-08, + "logits/chosen": -1.9840691089630127, + "logits/rejected": -1.9841299057006836, + "logps/chosen": -31.742191314697266, + "logps/rejected": -197.10751342773438, + "loss": 0.6672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026835251599550247, + "rewards/margins": 0.0816722884774208, + "rewards/rejected": -0.05483703687787056, + "step": 2116 + }, + { + "epoch": 0.12, + "learning_rate": 9.774078557936112e-08, + "logits/chosen": -2.2519078254699707, + "logits/rejected": -2.25886607170105, + "logps/chosen": -5.655052661895752, + "logps/rejected": -55.879886627197266, + "loss": 0.6984, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02459707297384739, + "rewards/margins": -0.007916545495390892, + "rewards/rejected": -0.016680527478456497, + "step": 2117 + }, + { + "epoch": 0.12, + "learning_rate": 9.773798392544145e-08, + "logits/chosen": -2.124608039855957, + "logits/rejected": -2.1170260906219482, + "logps/chosen": -29.42229461669922, + "logps/rejected": -140.84768676757812, + "loss": 0.507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19897805154323578, + "rewards/margins": 0.5493869781494141, + "rewards/rejected": -0.3504089415073395, + "step": 2118 + }, + { + "epoch": 0.12, + "learning_rate": 9.773518057563508e-08, + "logits/chosen": -1.9698717594146729, + "logits/rejected": -1.9418622255325317, + "logps/chosen": -257.9691162109375, + "logps/rejected": -284.14996337890625, + "loss": 0.413, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1010010242462158, + "rewards/margins": 0.2491699457168579, + "rewards/rejected": 0.8518310785293579, + "step": 2119 + }, + { + "epoch": 0.12, + "learning_rate": 9.77323755300416e-08, + "logits/chosen": -2.1233885288238525, + "logits/rejected": -2.1500024795532227, + "logps/chosen": -184.6214141845703, + "logps/rejected": -366.9576721191406, + "loss": 0.4342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5093917846679688, + "rewards/margins": 0.6045120358467102, + "rewards/rejected": -0.09512024372816086, + "step": 2120 + }, + { + "epoch": 0.12, + "learning_rate": 9.772956878876066e-08, + "logits/chosen": -2.058605670928955, + "logits/rejected": -2.056412935256958, + "logps/chosen": -14.432124137878418, + "logps/rejected": -184.67889404296875, + "loss": 0.6475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010157966986298561, + "rewards/margins": 0.22055073082447052, + "rewards/rejected": -0.2103927582502365, + "step": 2121 + }, + { + "epoch": 0.12, + "learning_rate": 9.772676035189195e-08, + "logits/chosen": -2.145698070526123, + "logits/rejected": -2.136547803878784, + "logps/chosen": -33.10337448120117, + "logps/rejected": -262.4738464355469, + "loss": 0.5214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21629944443702698, + "rewards/margins": 0.494293212890625, + "rewards/rejected": -0.277993768453598, + "step": 2122 + }, + { + "epoch": 0.12, + "learning_rate": 9.772395021953528e-08, + "logits/chosen": -2.2092368602752686, + "logits/rejected": -2.2038657665252686, + "logps/chosen": -27.505441665649414, + "logps/rejected": -159.8099365234375, + "loss": 0.6214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009736252017319202, + "rewards/margins": 0.2840953767299652, + "rewards/rejected": -0.2743591368198395, + "step": 2123 + }, + { + "epoch": 0.12, + "learning_rate": 9.772113839179043e-08, + "logits/chosen": -1.9690147638320923, + "logits/rejected": -1.957385778427124, + "logps/chosen": -286.442626953125, + "logps/rejected": -392.902099609375, + "loss": 0.3854, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0068024396896362, + "rewards/margins": 0.39839786291122437, + "rewards/rejected": 0.6084045767784119, + "step": 2124 + }, + { + "epoch": 0.12, + "learning_rate": 9.771832486875733e-08, + "logits/chosen": -2.1830267906188965, + "logits/rejected": -2.1592986583709717, + "logps/chosen": -104.06592559814453, + "logps/rejected": -274.60662841796875, + "loss": 0.4221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1356498748064041, + "rewards/margins": 1.2980095148086548, + "rewards/rejected": -1.162359595298767, + "step": 2125 + }, + { + "epoch": 0.12, + "learning_rate": 9.771550965053592e-08, + "logits/chosen": -1.9101711511611938, + "logits/rejected": -1.865282416343689, + "logps/chosen": -150.69468688964844, + "logps/rejected": -285.67254638671875, + "loss": 0.5587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3065231442451477, + "rewards/margins": 0.2235153317451477, + "rewards/rejected": 0.0830078125, + "step": 2126 + }, + { + "epoch": 0.12, + "learning_rate": 9.77126927372262e-08, + "logits/chosen": -2.3661017417907715, + "logits/rejected": -2.3500146865844727, + "logps/chosen": -18.89094352722168, + "logps/rejected": -203.6276397705078, + "loss": 0.507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021491050720214844, + "rewards/margins": 0.9104803204536438, + "rewards/rejected": -0.888989269733429, + "step": 2127 + }, + { + "epoch": 0.12, + "learning_rate": 9.770987412892823e-08, + "logits/chosen": -2.1685190200805664, + "logits/rejected": -2.1568210124969482, + "logps/chosen": -42.615638732910156, + "logps/rejected": -229.07794189453125, + "loss": 0.4529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3530406951904297, + "rewards/margins": 0.788712739944458, + "rewards/rejected": -0.43567201495170593, + "step": 2128 + }, + { + "epoch": 0.12, + "learning_rate": 9.770705382574217e-08, + "logits/chosen": -2.1257002353668213, + "logits/rejected": -2.1071953773498535, + "logps/chosen": -61.809356689453125, + "logps/rejected": -278.8100891113281, + "loss": 0.432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23047637939453125, + "rewards/margins": 1.047023057937622, + "rewards/rejected": -0.816546618938446, + "step": 2129 + }, + { + "epoch": 0.12, + "learning_rate": 9.770423182776822e-08, + "logits/chosen": -2.169273614883423, + "logits/rejected": -2.127624988555908, + "logps/chosen": -246.99269104003906, + "logps/rejected": -447.72454833984375, + "loss": 0.3617, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1208969354629517, + "rewards/margins": 0.48227691650390625, + "rewards/rejected": 0.6386200189590454, + "step": 2130 + }, + { + "epoch": 0.12, + "learning_rate": 9.770140813510657e-08, + "logits/chosen": -2.105844259262085, + "logits/rejected": -2.115722417831421, + "logps/chosen": -13.690221786499023, + "logps/rejected": -90.6331787109375, + "loss": 0.726, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04364175722002983, + "rewards/margins": -0.14791250228881836, + "rewards/rejected": 0.1915542632341385, + "step": 2131 + }, + { + "epoch": 0.12, + "learning_rate": 9.769858274785759e-08, + "logits/chosen": -2.20526385307312, + "logits/rejected": -2.1990528106689453, + "logps/chosen": -26.41240692138672, + "logps/rejected": -141.95050048828125, + "loss": 0.6601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06278210133314133, + "rewards/margins": 0.16200390458106995, + "rewards/rejected": -0.09922180324792862, + "step": 2132 + }, + { + "epoch": 0.12, + "learning_rate": 9.769575566612163e-08, + "logits/chosen": -1.7834819555282593, + "logits/rejected": -1.7405232191085815, + "logps/chosen": -338.7845458984375, + "logps/rejected": -403.2013244628906, + "loss": 0.5181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3559814393520355, + "rewards/margins": 0.3472961485385895, + "rewards/rejected": 0.008685302920639515, + "step": 2133 + }, + { + "epoch": 0.12, + "learning_rate": 9.769292688999913e-08, + "logits/chosen": -1.9323958158493042, + "logits/rejected": -1.9132142066955566, + "logps/chosen": -336.39166259765625, + "logps/rejected": -494.75970458984375, + "loss": 0.359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8539520502090454, + "rewards/margins": 0.7142913937568665, + "rewards/rejected": 0.13966064155101776, + "step": 2134 + }, + { + "epoch": 0.12, + "learning_rate": 9.769009641959056e-08, + "logits/chosen": -2.2161121368408203, + "logits/rejected": -2.213411331176758, + "logps/chosen": -7.746399402618408, + "logps/rejected": -190.31707763671875, + "loss": 0.5553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04991583898663521, + "rewards/margins": 0.7304506897926331, + "rewards/rejected": -0.7803665399551392, + "step": 2135 + }, + { + "epoch": 0.12, + "learning_rate": 9.768726425499649e-08, + "logits/chosen": -2.169193983078003, + "logits/rejected": -2.1512436866760254, + "logps/chosen": -46.58369445800781, + "logps/rejected": -139.8415985107422, + "loss": 0.5712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03490447998046875, + "rewards/margins": 0.583782970905304, + "rewards/rejected": -0.6186874508857727, + "step": 2136 + }, + { + "epoch": 0.12, + "learning_rate": 9.768443039631755e-08, + "logits/chosen": -2.01175856590271, + "logits/rejected": -2.0047237873077393, + "logps/chosen": -39.05773162841797, + "logps/rejected": -172.69964599609375, + "loss": 0.5677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024851227179169655, + "rewards/margins": 0.5557655096054077, + "rewards/rejected": -0.530914306640625, + "step": 2137 + }, + { + "epoch": 0.12, + "learning_rate": 9.768159484365437e-08, + "logits/chosen": -2.151832342147827, + "logits/rejected": -2.1510331630706787, + "logps/chosen": -21.88257598876953, + "logps/rejected": -52.64448547363281, + "loss": 0.6749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03898487240076065, + "rewards/margins": 0.12001170963048935, + "rewards/rejected": -0.15899658203125, + "step": 2138 + }, + { + "epoch": 0.12, + "learning_rate": 9.767875759710771e-08, + "logits/chosen": -2.2065794467926025, + "logits/rejected": -2.2041029930114746, + "logps/chosen": -38.45199966430664, + "logps/rejected": -47.83747863769531, + "loss": 0.746, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1562419980764389, + "rewards/margins": -0.11306916177272797, + "rewards/rejected": -0.04317283630371094, + "step": 2139 + }, + { + "epoch": 0.12, + "learning_rate": 9.767591865677836e-08, + "logits/chosen": -1.9854683876037598, + "logits/rejected": -1.9807870388031006, + "logps/chosen": -0.6099408864974976, + "logps/rejected": -45.097328186035156, + "loss": 0.7137, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.049398023635149, + "rewards/margins": -0.05394819378852844, + "rewards/rejected": 0.004550171084702015, + "step": 2140 + }, + { + "epoch": 0.12, + "learning_rate": 9.767307802276718e-08, + "logits/chosen": -2.078687906265259, + "logits/rejected": -2.0281155109405518, + "logps/chosen": -229.46768188476562, + "logps/rejected": -434.645751953125, + "loss": 0.3971, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.999804675579071, + "rewards/margins": 0.3266357183456421, + "rewards/rejected": 0.673168957233429, + "step": 2141 + }, + { + "epoch": 0.12, + "learning_rate": 9.767023569517507e-08, + "logits/chosen": -2.24686336517334, + "logits/rejected": -2.2098515033721924, + "logps/chosen": -229.1636199951172, + "logps/rejected": -361.8404235839844, + "loss": 0.5113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4978195130825043, + "rewards/margins": 0.3133743405342102, + "rewards/rejected": 0.18444518744945526, + "step": 2142 + }, + { + "epoch": 0.12, + "learning_rate": 9.7667391674103e-08, + "logits/chosen": -1.9466116428375244, + "logits/rejected": -1.8636367321014404, + "logps/chosen": -404.47906494140625, + "logps/rejected": -627.080322265625, + "loss": 0.4007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.538464367389679, + "rewards/margins": 0.7584472894668579, + "rewards/rejected": -0.21998290717601776, + "step": 2143 + }, + { + "epoch": 0.12, + "learning_rate": 9.766454595965202e-08, + "logits/chosen": -1.8730357885360718, + "logits/rejected": -1.862385869026184, + "logps/chosen": -272.28057861328125, + "logps/rejected": -424.3254089355469, + "loss": 0.561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1932830810546875, + "rewards/margins": 0.23701171576976776, + "rewards/rejected": -0.04372863844037056, + "step": 2144 + }, + { + "epoch": 0.12, + "learning_rate": 9.766169855192321e-08, + "logits/chosen": -2.1151936054229736, + "logits/rejected": -2.1270859241485596, + "logps/chosen": -257.5657043457031, + "logps/rejected": -293.77227783203125, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9610199332237244, + "rewards/margins": 0.30034488439559937, + "rewards/rejected": 0.660675048828125, + "step": 2145 + }, + { + "epoch": 0.12, + "learning_rate": 9.765884945101774e-08, + "logits/chosen": -2.1743366718292236, + "logits/rejected": -2.164290428161621, + "logps/chosen": -39.84049987792969, + "logps/rejected": -161.53689575195312, + "loss": 0.6904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015153122134506702, + "rewards/margins": 0.021135330200195312, + "rewards/rejected": -0.03628845140337944, + "step": 2146 + }, + { + "epoch": 0.12, + "learning_rate": 9.76559986570368e-08, + "logits/chosen": -2.1036713123321533, + "logits/rejected": -2.091892957687378, + "logps/chosen": -18.7342529296875, + "logps/rejected": -119.36351776123047, + "loss": 0.5921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06301917880773544, + "rewards/margins": 0.545170247554779, + "rewards/rejected": -0.6081894040107727, + "step": 2147 + }, + { + "epoch": 0.12, + "learning_rate": 9.765314617008168e-08, + "logits/chosen": -2.0037479400634766, + "logits/rejected": -1.9551734924316406, + "logps/chosen": -200.10693359375, + "logps/rejected": -274.7104797363281, + "loss": 0.5771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3169616758823395, + "rewards/margins": 0.15606994926929474, + "rewards/rejected": 0.16089172661304474, + "step": 2148 + }, + { + "epoch": 0.13, + "learning_rate": 9.765029199025371e-08, + "logits/chosen": -2.2346932888031006, + "logits/rejected": -2.2312872409820557, + "logps/chosen": -52.185577392578125, + "logps/rejected": -140.6807861328125, + "loss": 0.6474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031074142083525658, + "rewards/margins": 0.22237052023410797, + "rewards/rejected": -0.19129638373851776, + "step": 2149 + }, + { + "epoch": 0.13, + "learning_rate": 9.764743611765429e-08, + "logits/chosen": -2.029775857925415, + "logits/rejected": -2.0265860557556152, + "logps/chosen": -24.06814956665039, + "logps/rejected": -120.79092407226562, + "loss": 0.6642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037381362169981, + "rewards/margins": 0.08340950310230255, + "rewards/rejected": -0.04602813720703125, + "step": 2150 + }, + { + "epoch": 0.13, + "learning_rate": 9.764457855238486e-08, + "logits/chosen": -2.093843460083008, + "logits/rejected": -2.0936026573181152, + "logps/chosen": -12.462553024291992, + "logps/rejected": -88.69773864746094, + "loss": 0.6272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06951236724853516, + "rewards/margins": 0.17709597945213318, + "rewards/rejected": -0.10758361965417862, + "step": 2151 + }, + { + "epoch": 0.13, + "learning_rate": 9.764171929454695e-08, + "logits/chosen": -2.1818978786468506, + "logits/rejected": -2.1760306358337402, + "logps/chosen": -47.63710021972656, + "logps/rejected": -354.58831787109375, + "loss": 0.5324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09881172329187393, + "rewards/margins": 0.5871357321739197, + "rewards/rejected": -0.48832398653030396, + "step": 2152 + }, + { + "epoch": 0.13, + "learning_rate": 9.763885834424214e-08, + "logits/chosen": -2.2120814323425293, + "logits/rejected": -2.1888058185577393, + "logps/chosen": -164.7864227294922, + "logps/rejected": -210.21847534179688, + "loss": 0.6502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13754425942897797, + "rewards/margins": 0.028218083083629608, + "rewards/rejected": 0.10932617634534836, + "step": 2153 + }, + { + "epoch": 0.13, + "learning_rate": 9.763599570157204e-08, + "logits/chosen": -1.9649361371994019, + "logits/rejected": -1.948520541191101, + "logps/chosen": -47.1224479675293, + "logps/rejected": -146.46929931640625, + "loss": 0.6327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04016990587115288, + "rewards/margins": 0.26657602190971375, + "rewards/rejected": -0.3067459166049957, + "step": 2154 + }, + { + "epoch": 0.13, + "learning_rate": 9.763313136663838e-08, + "logits/chosen": -2.09970760345459, + "logits/rejected": -2.1094741821289062, + "logps/chosen": -73.25421905517578, + "logps/rejected": -135.05894470214844, + "loss": 0.6488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.048462677747011185, + "rewards/margins": 0.23752976953983307, + "rewards/rejected": -0.28599244356155396, + "step": 2155 + }, + { + "epoch": 0.13, + "learning_rate": 9.763026533954287e-08, + "logits/chosen": -2.2808306217193604, + "logits/rejected": -2.2733728885650635, + "logps/chosen": -143.0941925048828, + "logps/rejected": -287.0181579589844, + "loss": 0.5042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5329940915107727, + "rewards/margins": 0.16410064697265625, + "rewards/rejected": 0.36889344453811646, + "step": 2156 + }, + { + "epoch": 0.13, + "learning_rate": 9.762739762038737e-08, + "logits/chosen": -2.182251453399658, + "logits/rejected": -2.182345390319824, + "logps/chosen": -18.17007827758789, + "logps/rejected": -69.37516021728516, + "loss": 0.6599, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011754989624023438, + "rewards/margins": 0.0626903548836708, + "rewards/rejected": -0.07444534450769424, + "step": 2157 + }, + { + "epoch": 0.13, + "learning_rate": 9.762452820927373e-08, + "logits/chosen": -2.2969696521759033, + "logits/rejected": -2.2896618843078613, + "logps/chosen": -6.139215838629752e-05, + "logps/rejected": -191.16836547851562, + "loss": 0.4756, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.741780917858705e-07, + "rewards/margins": 1.1947599649429321, + "rewards/rejected": -1.1947602033615112, + "step": 2158 + }, + { + "epoch": 0.13, + "learning_rate": 9.762165710630388e-08, + "logits/chosen": -1.8767890930175781, + "logits/rejected": -1.8768283128738403, + "logps/chosen": -179.04779052734375, + "logps/rejected": -294.707763671875, + "loss": 0.571, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5752105712890625, + "rewards/margins": -0.13227540254592896, + "rewards/rejected": 0.7074859738349915, + "step": 2159 + }, + { + "epoch": 0.13, + "learning_rate": 9.761878431157983e-08, + "logits/chosen": -2.1970748901367188, + "logits/rejected": -2.193162441253662, + "logps/chosen": -2.1174416542053223, + "logps/rejected": -63.16628646850586, + "loss": 0.6971, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.010473060421645641, + "rewards/margins": -0.02574291080236435, + "rewards/rejected": 0.036215972155332565, + "step": 2160 + }, + { + "epoch": 0.13, + "learning_rate": 9.761590982520364e-08, + "logits/chosen": -2.2448770999908447, + "logits/rejected": -2.194247007369995, + "logps/chosen": -185.03634643554688, + "logps/rejected": -452.0960388183594, + "loss": 0.3556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7258056998252869, + "rewards/margins": 0.7942230701446533, + "rewards/rejected": -0.06841736286878586, + "step": 2161 + }, + { + "epoch": 0.13, + "learning_rate": 9.761303364727742e-08, + "logits/chosen": -2.158560037612915, + "logits/rejected": -2.1581130027770996, + "logps/chosen": -45.429534912109375, + "logps/rejected": -112.1322021484375, + "loss": 0.6866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13502274453639984, + "rewards/margins": 0.1476287692785263, + "rewards/rejected": -0.28265151381492615, + "step": 2162 + }, + { + "epoch": 0.13, + "learning_rate": 9.761015577790335e-08, + "logits/chosen": -2.1029469966888428, + "logits/rejected": -2.108093500137329, + "logps/chosen": -194.29173278808594, + "logps/rejected": -367.3289794921875, + "loss": 0.4707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.658294677734375, + "rewards/margins": 0.30534055829048157, + "rewards/rejected": 0.35295411944389343, + "step": 2163 + }, + { + "epoch": 0.13, + "learning_rate": 9.760727621718366e-08, + "logits/chosen": -2.0576224327087402, + "logits/rejected": -2.0424132347106934, + "logps/chosen": -42.291831970214844, + "logps/rejected": -157.03211975097656, + "loss": 0.6062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03327636793255806, + "rewards/margins": 0.35360413789749146, + "rewards/rejected": -0.3203277587890625, + "step": 2164 + }, + { + "epoch": 0.13, + "learning_rate": 9.760439496522064e-08, + "logits/chosen": -2.162090539932251, + "logits/rejected": -2.0588531494140625, + "logps/chosen": -292.5924072265625, + "logps/rejected": -479.098876953125, + "loss": 0.2052, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.483435034751892, + "rewards/margins": 1.2263946533203125, + "rewards/rejected": 0.257040411233902, + "step": 2165 + }, + { + "epoch": 0.13, + "learning_rate": 9.760151202211665e-08, + "logits/chosen": -2.2480788230895996, + "logits/rejected": -2.2338054180145264, + "logps/chosen": -14.024493217468262, + "logps/rejected": -298.11407470703125, + "loss": 0.4671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027942944318056107, + "rewards/margins": 1.1463724374771118, + "rewards/rejected": -1.1184295415878296, + "step": 2166 + }, + { + "epoch": 0.13, + "learning_rate": 9.759862738797411e-08, + "logits/chosen": -2.0296177864074707, + "logits/rejected": -1.9802645444869995, + "logps/chosen": -194.10491943359375, + "logps/rejected": -269.45916748046875, + "loss": 0.5329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30991822481155396, + "rewards/margins": 0.29553529620170593, + "rewards/rejected": 0.014382935129106045, + "step": 2167 + }, + { + "epoch": 0.13, + "learning_rate": 9.759574106289549e-08, + "logits/chosen": -2.178300380706787, + "logits/rejected": -2.176612138748169, + "logps/chosen": -16.212810516357422, + "logps/rejected": -136.60736083984375, + "loss": 0.5722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09761982411146164, + "rewards/margins": 0.4003969430923462, + "rewards/rejected": -0.30277711153030396, + "step": 2168 + }, + { + "epoch": 0.13, + "learning_rate": 9.759285304698333e-08, + "logits/chosen": -2.2004709243774414, + "logits/rejected": -2.2103171348571777, + "logps/chosen": -125.97856903076172, + "logps/rejected": -201.52188110351562, + "loss": 0.5013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6431908011436462, + "rewards/margins": 0.04912036657333374, + "rewards/rejected": 0.5940704345703125, + "step": 2169 + }, + { + "epoch": 0.13, + "learning_rate": 9.758996334034025e-08, + "logits/chosen": -2.1997604370117188, + "logits/rejected": -2.169019937515259, + "logps/chosen": -61.616432189941406, + "logps/rejected": -265.38031005859375, + "loss": 0.4523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1295822113752365, + "rewards/margins": 1.042759656906128, + "rewards/rejected": -0.913177490234375, + "step": 2170 + }, + { + "epoch": 0.13, + "learning_rate": 9.758707194306886e-08, + "logits/chosen": -2.121931314468384, + "logits/rejected": -2.1045966148376465, + "logps/chosen": -46.218936920166016, + "logps/rejected": -128.5865936279297, + "loss": 0.5813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24070587754249573, + "rewards/margins": 0.211578369140625, + "rewards/rejected": 0.02912750281393528, + "step": 2171 + }, + { + "epoch": 0.13, + "learning_rate": 9.75841788552719e-08, + "logits/chosen": -2.134080171585083, + "logits/rejected": -2.142303705215454, + "logps/chosen": -22.36074447631836, + "logps/rejected": -58.986595153808594, + "loss": 0.6806, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0325469970703125, + "rewards/margins": -0.013603974133729935, + "rewards/rejected": 0.046150971204042435, + "step": 2172 + }, + { + "epoch": 0.13, + "learning_rate": 9.758128407705216e-08, + "logits/chosen": -2.2272205352783203, + "logits/rejected": -2.216078519821167, + "logps/chosen": -27.414844512939453, + "logps/rejected": -104.48908996582031, + "loss": 0.659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03211669996380806, + "rewards/margins": 0.0905914306640625, + "rewards/rejected": -0.05847473070025444, + "step": 2173 + }, + { + "epoch": 0.13, + "learning_rate": 9.757838760851247e-08, + "logits/chosen": -1.949357509613037, + "logits/rejected": -1.9291170835494995, + "logps/chosen": -325.7821044921875, + "logps/rejected": -458.972412109375, + "loss": 0.3571, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2338745594024658, + "rewards/margins": 0.3854004144668579, + "rewards/rejected": 0.8484741449356079, + "step": 2174 + }, + { + "epoch": 0.13, + "learning_rate": 9.757548944975571e-08, + "logits/chosen": -2.0710597038269043, + "logits/rejected": -2.0565545558929443, + "logps/chosen": -37.0538330078125, + "logps/rejected": -158.654541015625, + "loss": 0.5543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024700164794921875, + "rewards/margins": 0.6719657778739929, + "rewards/rejected": -0.6966659426689148, + "step": 2175 + }, + { + "epoch": 0.13, + "learning_rate": 9.757258960088484e-08, + "logits/chosen": -2.045055627822876, + "logits/rejected": -2.0496771335601807, + "logps/chosen": -4.141113758087158, + "logps/rejected": -111.46005249023438, + "loss": 0.5609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04937181621789932, + "rewards/margins": 0.5569180846214294, + "rewards/rejected": -0.5075462460517883, + "step": 2176 + }, + { + "epoch": 0.13, + "learning_rate": 9.75696880620029e-08, + "logits/chosen": -2.1554906368255615, + "logits/rejected": -2.12385892868042, + "logps/chosen": -139.02716064453125, + "logps/rejected": -330.55767822265625, + "loss": 0.3776, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6588470339775085, + "rewards/margins": 0.6945831179618835, + "rewards/rejected": -0.035736083984375, + "step": 2177 + }, + { + "epoch": 0.13, + "learning_rate": 9.756678483321294e-08, + "logits/chosen": -2.009596824645996, + "logits/rejected": -2.0032951831817627, + "logps/chosen": -63.02417755126953, + "logps/rejected": -118.39015197753906, + "loss": 0.669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.055948641151189804, + "rewards/margins": 0.1558067351579666, + "rewards/rejected": -0.21175538003444672, + "step": 2178 + }, + { + "epoch": 0.13, + "learning_rate": 9.756387991461813e-08, + "logits/chosen": -2.1535773277282715, + "logits/rejected": -2.200967788696289, + "logps/chosen": -178.80816650390625, + "logps/rejected": -238.92129516601562, + "loss": 0.3599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6503036618232727, + "rewards/margins": 0.8096023797988892, + "rewards/rejected": -0.15929870307445526, + "step": 2179 + }, + { + "epoch": 0.13, + "learning_rate": 9.756097330632162e-08, + "logits/chosen": -2.193864583969116, + "logits/rejected": -2.203376531600952, + "logps/chosen": -0.00013219847460277379, + "logps/rejected": -126.78994750976562, + "loss": 0.5029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.740310835884884e-06, + "rewards/margins": 1.0024724006652832, + "rewards/rejected": -1.0024741888046265, + "step": 2180 + }, + { + "epoch": 0.13, + "learning_rate": 9.755806500842671e-08, + "logits/chosen": -1.981108546257019, + "logits/rejected": -1.9724456071853638, + "logps/chosen": -51.459774017333984, + "logps/rejected": -212.33139038085938, + "loss": 0.5748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025297928601503372, + "rewards/margins": 0.4861255884170532, + "rewards/rejected": -0.46082764863967896, + "step": 2181 + }, + { + "epoch": 0.13, + "learning_rate": 9.75551550210367e-08, + "logits/chosen": -2.170473098754883, + "logits/rejected": -2.0947697162628174, + "logps/chosen": -325.9017028808594, + "logps/rejected": -524.4468994140625, + "loss": 0.399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5024170279502869, + "rewards/margins": 0.9311279654502869, + "rewards/rejected": -0.4287109375, + "step": 2182 + }, + { + "epoch": 0.13, + "learning_rate": 9.755224334425496e-08, + "logits/chosen": -2.1474523544311523, + "logits/rejected": -2.1438441276550293, + "logps/chosen": -0.00544316740706563, + "logps/rejected": -181.92945861816406, + "loss": 0.4929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016665831208229065, + "rewards/margins": 1.0622494220733643, + "rewards/rejected": -1.0624160766601562, + "step": 2183 + }, + { + "epoch": 0.13, + "learning_rate": 9.754932997818492e-08, + "logits/chosen": -2.071399450302124, + "logits/rejected": -2.0400314331054688, + "logps/chosen": -235.00637817382812, + "logps/rejected": -357.8589782714844, + "loss": 0.447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5650665163993835, + "rewards/margins": 0.560253918170929, + "rewards/rejected": 0.0048126219771802425, + "step": 2184 + }, + { + "epoch": 0.13, + "learning_rate": 9.754641492293012e-08, + "logits/chosen": -2.208073377609253, + "logits/rejected": -2.2122318744659424, + "logps/chosen": -27.27206039428711, + "logps/rejected": -159.5186309814453, + "loss": 0.5589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06619491428136826, + "rewards/margins": 0.5333579778671265, + "rewards/rejected": -0.4671630859375, + "step": 2185 + }, + { + "epoch": 0.13, + "learning_rate": 9.754349817859406e-08, + "logits/chosen": -2.0565147399902344, + "logits/rejected": -2.054011106491089, + "logps/chosen": -3.8861719076521695e-05, + "logps/rejected": -138.62319946289062, + "loss": 0.6252, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3112658336922323e-07, + "rewards/margins": 0.2933790981769562, + "rewards/rejected": -0.2933792173862457, + "step": 2186 + }, + { + "epoch": 0.13, + "learning_rate": 9.754057974528038e-08, + "logits/chosen": -2.1516366004943848, + "logits/rejected": -2.153447389602661, + "logps/chosen": -19.544721603393555, + "logps/rejected": -41.02499008178711, + "loss": 0.7107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06172695383429527, + "rewards/margins": 0.009006690233945847, + "rewards/rejected": -0.07073364406824112, + "step": 2187 + }, + { + "epoch": 0.13, + "learning_rate": 9.753765962309278e-08, + "logits/chosen": -2.134735584259033, + "logits/rejected": -2.136493444442749, + "logps/chosen": -21.724658966064453, + "logps/rejected": -21.01032829284668, + "loss": 0.7498, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1523231565952301, + "rewards/margins": -0.08663025498390198, + "rewards/rejected": -0.06569290161132812, + "step": 2188 + }, + { + "epoch": 0.13, + "learning_rate": 9.753473781213498e-08, + "logits/chosen": -2.0366790294647217, + "logits/rejected": -1.9611729383468628, + "logps/chosen": -186.41326904296875, + "logps/rejected": -276.27166748046875, + "loss": 0.4857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5922073721885681, + "rewards/margins": 0.3192337453365326, + "rewards/rejected": 0.2729736268520355, + "step": 2189 + }, + { + "epoch": 0.13, + "learning_rate": 9.753181431251076e-08, + "logits/chosen": -1.9772708415985107, + "logits/rejected": -1.8725022077560425, + "logps/chosen": -200.376708984375, + "logps/rejected": -334.190673828125, + "loss": 0.3736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8075928092002869, + "rewards/margins": 0.5311523675918579, + "rewards/rejected": 0.27644044160842896, + "step": 2190 + }, + { + "epoch": 0.13, + "learning_rate": 9.752888912432401e-08, + "logits/chosen": -2.0978524684906006, + "logits/rejected": -2.09482741355896, + "logps/chosen": -44.33042907714844, + "logps/rejected": -66.3814926147461, + "loss": 0.6782, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06602020561695099, + "rewards/margins": -0.05850142985582352, + "rewards/rejected": 0.1245216354727745, + "step": 2191 + }, + { + "epoch": 0.13, + "learning_rate": 9.752596224767862e-08, + "logits/chosen": -2.130748987197876, + "logits/rejected": -2.0994532108306885, + "logps/chosen": -154.104736328125, + "logps/rejected": -291.337890625, + "loss": 0.6391, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5077148675918579, + "rewards/margins": -0.38764649629592896, + "rewards/rejected": 0.8953613638877869, + "step": 2192 + }, + { + "epoch": 0.13, + "learning_rate": 9.752303368267858e-08, + "logits/chosen": -2.0110135078430176, + "logits/rejected": -1.984851360321045, + "logps/chosen": -333.73016357421875, + "logps/rejected": -482.5885925292969, + "loss": 0.3034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.90960693359375, + "rewards/margins": 0.9593719244003296, + "rewards/rejected": -0.04976501688361168, + "step": 2193 + }, + { + "epoch": 0.13, + "learning_rate": 9.752010342942791e-08, + "logits/chosen": -2.025620698928833, + "logits/rejected": -1.9788013696670532, + "logps/chosen": -214.56980895996094, + "logps/rejected": -353.16192626953125, + "loss": 0.5273, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7569748163223267, + "rewards/margins": -0.059376537799835205, + "rewards/rejected": 0.8163513541221619, + "step": 2194 + }, + { + "epoch": 0.13, + "learning_rate": 9.751717148803074e-08, + "logits/chosen": -2.1851325035095215, + "logits/rejected": -2.181244373321533, + "logps/chosen": -24.465635299682617, + "logps/rejected": -214.44735717773438, + "loss": 0.5655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0016256332164630294, + "rewards/margins": 0.5265570282936096, + "rewards/rejected": -0.5249313712120056, + "step": 2195 + }, + { + "epoch": 0.13, + "learning_rate": 9.75142378585912e-08, + "logits/chosen": -2.127580165863037, + "logits/rejected": -2.0828115940093994, + "logps/chosen": -164.45101928710938, + "logps/rejected": -496.23883056640625, + "loss": 0.3782, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7044281363487244, + "rewards/margins": 0.5696685910224915, + "rewards/rejected": 0.13475953042507172, + "step": 2196 + }, + { + "epoch": 0.13, + "learning_rate": 9.751130254121352e-08, + "logits/chosen": -2.1845717430114746, + "logits/rejected": -2.1855528354644775, + "logps/chosen": -4.5576581954956055, + "logps/rejected": -34.30076217651367, + "loss": 0.6102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006699991412460804, + "rewards/margins": 0.35483649373054504, + "rewards/rejected": -0.3481365144252777, + "step": 2197 + }, + { + "epoch": 0.13, + "learning_rate": 9.750836553600195e-08, + "logits/chosen": -2.403796911239624, + "logits/rejected": -2.3937902450561523, + "logps/chosen": -34.48678207397461, + "logps/rejected": -108.79486083984375, + "loss": 0.6093, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10613518208265305, + "rewards/margins": 0.20224419236183167, + "rewards/rejected": -0.09610901027917862, + "step": 2198 + }, + { + "epoch": 0.13, + "learning_rate": 9.750542684306086e-08, + "logits/chosen": -2.0848639011383057, + "logits/rejected": -2.0740323066711426, + "logps/chosen": -235.2200927734375, + "logps/rejected": -324.9127197265625, + "loss": 0.4953, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9040588736534119, + "rewards/margins": -0.10993653535842896, + "rewards/rejected": 1.0139954090118408, + "step": 2199 + }, + { + "epoch": 0.13, + "learning_rate": 9.750248646249464e-08, + "logits/chosen": -1.9585729837417603, + "logits/rejected": -1.9593638181686401, + "logps/chosen": -275.5727844238281, + "logps/rejected": -483.6180419921875, + "loss": 0.4821, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.531512439250946, + "rewards/margins": 0.15399473905563354, + "rewards/rejected": 0.3775177001953125, + "step": 2200 + }, + { + "epoch": 0.13, + "learning_rate": 9.749954439440773e-08, + "logits/chosen": -2.2589471340179443, + "logits/rejected": -2.2343342304229736, + "logps/chosen": -139.4348907470703, + "logps/rejected": -413.2684326171875, + "loss": 0.508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.155619814991951, + "rewards/margins": 0.5552597045898438, + "rewards/rejected": -0.39963990449905396, + "step": 2201 + }, + { + "epoch": 0.13, + "learning_rate": 9.749660063890466e-08, + "logits/chosen": -2.089776039123535, + "logits/rejected": -2.098808765411377, + "logps/chosen": -214.398193359375, + "logps/rejected": -314.03558349609375, + "loss": 0.4108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8101226687431335, + "rewards/margins": 0.3508453071117401, + "rewards/rejected": 0.45927736163139343, + "step": 2202 + }, + { + "epoch": 0.13, + "learning_rate": 9.749365519609001e-08, + "logits/chosen": -1.9009398221969604, + "logits/rejected": -1.8878002166748047, + "logps/chosen": -318.63616943359375, + "logps/rejected": -444.130615234375, + "loss": 0.2361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0065460205078125, + "rewards/margins": 1.3688385486602783, + "rewards/rejected": -0.36229249835014343, + "step": 2203 + }, + { + "epoch": 0.13, + "learning_rate": 9.74907080660684e-08, + "logits/chosen": -2.0647177696228027, + "logits/rejected": -2.0362093448638916, + "logps/chosen": -66.23135375976562, + "logps/rejected": -292.65435791015625, + "loss": 0.7668, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1944580078125, + "rewards/margins": -0.08166198432445526, + "rewards/rejected": -0.11279602348804474, + "step": 2204 + }, + { + "epoch": 0.13, + "learning_rate": 9.748775924894455e-08, + "logits/chosen": -2.0626189708709717, + "logits/rejected": -2.047330379486084, + "logps/chosen": -15.631034851074219, + "logps/rejected": -293.7705993652344, + "loss": 0.5509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009744644048623741, + "rewards/margins": 0.6879984140396118, + "rewards/rejected": -0.687023937702179, + "step": 2205 + }, + { + "epoch": 0.13, + "learning_rate": 9.748480874482321e-08, + "logits/chosen": -2.227214813232422, + "logits/rejected": -2.233128309249878, + "logps/chosen": -0.000557292194571346, + "logps/rejected": -267.63043212890625, + "loss": 0.4881, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.434937288053334e-06, + "rewards/margins": 1.1303820610046387, + "rewards/rejected": -1.1303894519805908, + "step": 2206 + }, + { + "epoch": 0.13, + "learning_rate": 9.748185655380917e-08, + "logits/chosen": -2.0961813926696777, + "logits/rejected": -2.0969839096069336, + "logps/chosen": -226.99578857421875, + "logps/rejected": -265.8751220703125, + "loss": 0.4258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8629913330078125, + "rewards/margins": 0.23906856775283813, + "rewards/rejected": 0.6239227652549744, + "step": 2207 + }, + { + "epoch": 0.13, + "learning_rate": 9.747890267600733e-08, + "logits/chosen": -2.200557231903076, + "logits/rejected": -2.1907424926757812, + "logps/chosen": -32.44148254394531, + "logps/rejected": -147.11622619628906, + "loss": 0.7112, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1371173858642578, + "rewards/margins": 0.060970693826675415, + "rewards/rejected": -0.19808807969093323, + "step": 2208 + }, + { + "epoch": 0.13, + "learning_rate": 9.747594711152263e-08, + "logits/chosen": -2.145040988922119, + "logits/rejected": -2.039128303527832, + "logps/chosen": -191.5720672607422, + "logps/rejected": -423.48553466796875, + "loss": 0.4308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7006515860557556, + "rewards/margins": 0.37681734561920166, + "rewards/rejected": 0.32383424043655396, + "step": 2209 + }, + { + "epoch": 0.13, + "learning_rate": 9.747298986046006e-08, + "logits/chosen": -2.091374397277832, + "logits/rejected": -2.086638927459717, + "logps/chosen": -37.359249114990234, + "logps/rejected": -111.04109191894531, + "loss": 0.6177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11163749545812607, + "rewards/margins": 0.2210262268781662, + "rewards/rejected": -0.10938873142004013, + "step": 2210 + }, + { + "epoch": 0.13, + "learning_rate": 9.747003092292468e-08, + "logits/chosen": -2.1611437797546387, + "logits/rejected": -2.141038417816162, + "logps/chosen": -47.48814010620117, + "logps/rejected": -265.3873291015625, + "loss": 0.6321, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1758441925048828, + "rewards/margins": 0.3622661828994751, + "rewards/rejected": -0.5381103754043579, + "step": 2211 + }, + { + "epoch": 0.13, + "learning_rate": 9.74670702990216e-08, + "logits/chosen": -2.1181936264038086, + "logits/rejected": -2.1167266368865967, + "logps/chosen": -0.40223175287246704, + "logps/rejected": -27.848041534423828, + "loss": 0.6998, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.013820362277328968, + "rewards/margins": -0.01402978878468275, + "rewards/rejected": 0.0002094268857035786, + "step": 2212 + }, + { + "epoch": 0.13, + "learning_rate": 9.746410798885598e-08, + "logits/chosen": -2.2209792137145996, + "logits/rejected": -2.219494342803955, + "logps/chosen": -13.899499893188477, + "logps/rejected": -203.63111877441406, + "loss": 0.4957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01253652572631836, + "rewards/margins": 0.9873518943786621, + "rewards/rejected": -0.9748153686523438, + "step": 2213 + }, + { + "epoch": 0.13, + "learning_rate": 9.746114399253308e-08, + "logits/chosen": -2.233839273452759, + "logits/rejected": -2.2179453372955322, + "logps/chosen": -180.59967041015625, + "logps/rejected": -322.950927734375, + "loss": 0.5138, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8924499750137329, + "rewards/margins": -0.052880823612213135, + "rewards/rejected": 0.945330798625946, + "step": 2214 + }, + { + "epoch": 0.13, + "learning_rate": 9.74581783101582e-08, + "logits/chosen": -2.0724105834960938, + "logits/rejected": -2.0841832160949707, + "logps/chosen": -191.39669799804688, + "logps/rejected": -289.2367248535156, + "loss": 0.4663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6679901480674744, + "rewards/margins": 0.24222415685653687, + "rewards/rejected": 0.4257659912109375, + "step": 2215 + }, + { + "epoch": 0.13, + "learning_rate": 9.745521094183666e-08, + "logits/chosen": -2.286651134490967, + "logits/rejected": -2.2803890705108643, + "logps/chosen": -56.276512145996094, + "logps/rejected": -232.85427856445312, + "loss": 0.5616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18453407287597656, + "rewards/margins": 0.274039089679718, + "rewards/rejected": -0.08950500935316086, + "step": 2216 + }, + { + "epoch": 0.13, + "learning_rate": 9.745224188767391e-08, + "logits/chosen": -2.1932787895202637, + "logits/rejected": -2.1956863403320312, + "logps/chosen": -71.53126525878906, + "logps/rejected": -135.812744140625, + "loss": 0.4847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11085968464612961, + "rewards/margins": 0.9369239807128906, + "rewards/rejected": -0.8260642886161804, + "step": 2217 + }, + { + "epoch": 0.13, + "learning_rate": 9.744927114777541e-08, + "logits/chosen": -2.07069993019104, + "logits/rejected": -2.069518566131592, + "logps/chosen": -2.5028183460235596, + "logps/rejected": -119.10364532470703, + "loss": 0.5236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02714862860739231, + "rewards/margins": 0.7884981036186218, + "rewards/rejected": -0.7613494992256165, + "step": 2218 + }, + { + "epoch": 0.13, + "learning_rate": 9.744629872224671e-08, + "logits/chosen": -2.0782740116119385, + "logits/rejected": -2.064061164855957, + "logps/chosen": -250.89938354492188, + "logps/rejected": -313.2102966308594, + "loss": 0.4259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6765899658203125, + "rewards/margins": 0.47551268339157104, + "rewards/rejected": 0.20107726752758026, + "step": 2219 + }, + { + "epoch": 0.13, + "learning_rate": 9.74433246111934e-08, + "logits/chosen": -2.3449981212615967, + "logits/rejected": -2.333021402359009, + "logps/chosen": -8.170162200927734, + "logps/rejected": -194.22903442382812, + "loss": 0.5238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004900074098259211, + "rewards/margins": 0.8821172118186951, + "rewards/rejected": -0.8772171139717102, + "step": 2220 + }, + { + "epoch": 0.13, + "learning_rate": 9.744034881472111e-08, + "logits/chosen": -2.0696346759796143, + "logits/rejected": -1.95947265625, + "logps/chosen": -239.07778930664062, + "logps/rejected": -489.87322998046875, + "loss": 0.4624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38768312335014343, + "rewards/margins": 0.8057922720909119, + "rewards/rejected": -0.41810914874076843, + "step": 2221 + }, + { + "epoch": 0.13, + "learning_rate": 9.743737133293557e-08, + "logits/chosen": -2.1952624320983887, + "logits/rejected": -2.1930038928985596, + "logps/chosen": -2.7784597873687744, + "logps/rejected": -185.69171142578125, + "loss": 0.6175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1375715285539627, + "rewards/margins": 0.49189627170562744, + "rewards/rejected": -0.629467785358429, + "step": 2222 + }, + { + "epoch": 0.13, + "learning_rate": 9.743439216594259e-08, + "logits/chosen": -2.090363025665283, + "logits/rejected": -2.08302903175354, + "logps/chosen": -0.0022480636835098267, + "logps/rejected": -126.7950439453125, + "loss": 0.6496, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.846974115935154e-05, + "rewards/margins": 0.18222233653068542, + "rewards/rejected": -0.182270810008049, + "step": 2223 + }, + { + "epoch": 0.13, + "learning_rate": 9.743141131384796e-08, + "logits/chosen": -2.146474599838257, + "logits/rejected": -2.1397900581359863, + "logps/chosen": -68.42781066894531, + "logps/rejected": -198.18682861328125, + "loss": 0.8567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.865002453327179, + "rewards/margins": 0.3192763924598694, + "rewards/rejected": -1.1842788457870483, + "step": 2224 + }, + { + "epoch": 0.13, + "learning_rate": 9.742842877675757e-08, + "logits/chosen": -2.1803627014160156, + "logits/rejected": -2.1220028400421143, + "logps/chosen": -169.35806274414062, + "logps/rejected": -308.06512451171875, + "loss": 0.4542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8315674066543579, + "rewards/margins": 0.12878113985061646, + "rewards/rejected": 0.7027862668037415, + "step": 2225 + }, + { + "epoch": 0.13, + "learning_rate": 9.742544455477742e-08, + "logits/chosen": -2.1209278106689453, + "logits/rejected": -2.1262447834014893, + "logps/chosen": -27.03446388244629, + "logps/rejected": -214.1581268310547, + "loss": 0.6315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08411484211683273, + "rewards/margins": 0.1678871214389801, + "rewards/rejected": -0.08377227932214737, + "step": 2226 + }, + { + "epoch": 0.13, + "learning_rate": 9.742245864801347e-08, + "logits/chosen": -2.1577308177948, + "logits/rejected": -2.1730988025665283, + "logps/chosen": -205.23704528808594, + "logps/rejected": -245.8421630859375, + "loss": 0.663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05321350321173668, + "rewards/margins": 0.07609406113624573, + "rewards/rejected": -0.02288055419921875, + "step": 2227 + }, + { + "epoch": 0.13, + "learning_rate": 9.741947105657183e-08, + "logits/chosen": -2.0524423122406006, + "logits/rejected": -2.0581729412078857, + "logps/chosen": -3.058565378189087, + "logps/rejected": -145.9149932861328, + "loss": 0.6264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023480582982301712, + "rewards/margins": 0.31741753220558167, + "rewards/rejected": -0.3408981263637543, + "step": 2228 + }, + { + "epoch": 0.13, + "learning_rate": 9.741648178055864e-08, + "logits/chosen": -2.143143653869629, + "logits/rejected": -2.1293840408325195, + "logps/chosen": -57.64556884765625, + "logps/rejected": -198.19947814941406, + "loss": 0.5562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09924354404211044, + "rewards/margins": 0.5070041418075562, + "rewards/rejected": -0.4077606201171875, + "step": 2229 + }, + { + "epoch": 0.13, + "learning_rate": 9.741349082008006e-08, + "logits/chosen": -2.1690354347229004, + "logits/rejected": -2.16951060295105, + "logps/chosen": -5.33920955657959, + "logps/rejected": -139.77517700195312, + "loss": 0.6427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05279717594385147, + "rewards/margins": 0.2424054741859436, + "rewards/rejected": -0.2952026426792145, + "step": 2230 + }, + { + "epoch": 0.13, + "learning_rate": 9.741049817524236e-08, + "logits/chosen": -2.0368869304656982, + "logits/rejected": -2.0177865028381348, + "logps/chosen": -41.64313507080078, + "logps/rejected": -364.9574279785156, + "loss": 0.5533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.146351620554924, + "rewards/margins": 0.5236953496932983, + "rewards/rejected": -0.3773437440395355, + "step": 2231 + }, + { + "epoch": 0.13, + "learning_rate": 9.740750384615187e-08, + "logits/chosen": -2.18293833732605, + "logits/rejected": -2.1768603324890137, + "logps/chosen": -35.67892074584961, + "logps/rejected": -162.27792358398438, + "loss": 0.7058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004727173130959272, + "rewards/margins": 0.01753387413918972, + "rewards/rejected": -0.02226104773581028, + "step": 2232 + }, + { + "epoch": 0.13, + "learning_rate": 9.740450783291494e-08, + "logits/chosen": -2.2443580627441406, + "logits/rejected": -2.2537899017333984, + "logps/chosen": -301.04132080078125, + "logps/rejected": -305.37603759765625, + "loss": 0.3845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.759967029094696, + "rewards/margins": 0.696441650390625, + "rewards/rejected": 0.06352539360523224, + "step": 2233 + }, + { + "epoch": 0.13, + "learning_rate": 9.7401510135638e-08, + "logits/chosen": -2.0404601097106934, + "logits/rejected": -1.9897462129592896, + "logps/chosen": -139.2840118408203, + "logps/rejected": -323.21270751953125, + "loss": 0.693, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4172042906284332, + "rewards/margins": -0.5084244012832642, + "rewards/rejected": 0.925628662109375, + "step": 2234 + }, + { + "epoch": 0.13, + "learning_rate": 9.739851075442757e-08, + "logits/chosen": -2.193072557449341, + "logits/rejected": -2.151709794998169, + "logps/chosen": -209.85536193847656, + "logps/rejected": -316.8985290527344, + "loss": 0.4905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3646896481513977, + "rewards/margins": 0.5663406848907471, + "rewards/rejected": -0.20165100693702698, + "step": 2235 + }, + { + "epoch": 0.13, + "learning_rate": 9.739550968939019e-08, + "logits/chosen": -2.131472587585449, + "logits/rejected": -2.1291537284851074, + "logps/chosen": -0.0007498249760828912, + "logps/rejected": -65.49830627441406, + "loss": 0.6452, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.308604838523024e-07, + "rewards/margins": 0.19933076202869415, + "rewards/rejected": -0.199330136179924, + "step": 2236 + }, + { + "epoch": 0.13, + "learning_rate": 9.739250694063246e-08, + "logits/chosen": -2.2288174629211426, + "logits/rejected": -2.226214647293091, + "logps/chosen": -0.0015716881025582552, + "logps/rejected": -120.10000610351562, + "loss": 0.5924, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.993674858473241e-05, + "rewards/margins": 0.4493938088417053, + "rewards/rejected": -0.449453741312027, + "step": 2237 + }, + { + "epoch": 0.13, + "learning_rate": 9.738950250826106e-08, + "logits/chosen": -1.9545730352401733, + "logits/rejected": -1.9366570711135864, + "logps/chosen": -252.22300720214844, + "logps/rejected": -469.71539306640625, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8757644891738892, + "rewards/margins": 0.7419509887695312, + "rewards/rejected": 0.13381348550319672, + "step": 2238 + }, + { + "epoch": 0.13, + "learning_rate": 9.738649639238273e-08, + "logits/chosen": -2.1792891025543213, + "logits/rejected": -2.184086322784424, + "logps/chosen": -7.632758617401123, + "logps/rejected": -72.74870300292969, + "loss": 0.6687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03735842928290367, + "rewards/margins": 0.06086611747741699, + "rewards/rejected": -0.02350769005715847, + "step": 2239 + }, + { + "epoch": 0.13, + "learning_rate": 9.738348859310425e-08, + "logits/chosen": -2.021036386489868, + "logits/rejected": -2.0007853507995605, + "logps/chosen": -222.20562744140625, + "logps/rejected": -304.0416564941406, + "loss": 0.5167, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.758190929889679, + "rewards/margins": -0.09921568632125854, + "rewards/rejected": 0.8574066162109375, + "step": 2240 + }, + { + "epoch": 0.13, + "learning_rate": 9.738047911053247e-08, + "logits/chosen": -2.1043641567230225, + "logits/rejected": -2.133577346801758, + "logps/chosen": -234.79421997070312, + "logps/rejected": -232.001708984375, + "loss": 0.4161, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0142090320587158, + "rewards/margins": 0.3162079453468323, + "rewards/rejected": 0.6980010867118835, + "step": 2241 + }, + { + "epoch": 0.13, + "learning_rate": 9.737746794477433e-08, + "logits/chosen": -2.167430877685547, + "logits/rejected": -2.172456979751587, + "logps/chosen": -36.44146728515625, + "logps/rejected": -151.02003479003906, + "loss": 0.5177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049985505640506744, + "rewards/margins": 0.8134911060333252, + "rewards/rejected": -0.7635055780410767, + "step": 2242 + }, + { + "epoch": 0.13, + "learning_rate": 9.737445509593676e-08, + "logits/chosen": -2.1534504890441895, + "logits/rejected": -2.154291868209839, + "logps/chosen": -375.0076599121094, + "logps/rejected": -370.88946533203125, + "loss": 0.472, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0423187017440796, + "rewards/margins": -0.04720163345336914, + "rewards/rejected": 1.0895203351974487, + "step": 2243 + }, + { + "epoch": 0.13, + "learning_rate": 9.737144056412683e-08, + "logits/chosen": -2.269878625869751, + "logits/rejected": -2.240361452102661, + "logps/chosen": -32.029991149902344, + "logps/rejected": -319.44415283203125, + "loss": 0.4799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04897613450884819, + "rewards/margins": 1.3588454723358154, + "rewards/rejected": -1.4078216552734375, + "step": 2244 + }, + { + "epoch": 0.13, + "learning_rate": 9.73684243494516e-08, + "logits/chosen": -2.094409942626953, + "logits/rejected": -2.0957677364349365, + "logps/chosen": -26.570154190063477, + "logps/rejected": -96.43899536132812, + "loss": 0.5454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11550617218017578, + "rewards/margins": 0.5046297311782837, + "rewards/rejected": -0.3891235291957855, + "step": 2245 + }, + { + "epoch": 0.13, + "learning_rate": 9.736540645201823e-08, + "logits/chosen": -1.991494059562683, + "logits/rejected": -1.9708640575408936, + "logps/chosen": -75.76217651367188, + "logps/rejected": -235.54562377929688, + "loss": 0.4208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3676399290561676, + "rewards/margins": 0.7589362859725952, + "rewards/rejected": -0.39129638671875, + "step": 2246 + }, + { + "epoch": 0.13, + "learning_rate": 9.736238687193395e-08, + "logits/chosen": -2.1576483249664307, + "logits/rejected": -2.1766321659088135, + "logps/chosen": -196.05148315429688, + "logps/rejected": -230.17332458496094, + "loss": 0.5999, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2983871400356293, + "rewards/margins": 0.11667479574680328, + "rewards/rejected": 0.181712344288826, + "step": 2247 + }, + { + "epoch": 0.13, + "learning_rate": 9.7359365609306e-08, + "logits/chosen": -2.0899605751037598, + "logits/rejected": -2.083911895751953, + "logps/chosen": -24.141393661499023, + "logps/rejected": -150.63851928710938, + "loss": 0.5576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12914447486400604, + "rewards/margins": 0.2916688919067383, + "rewards/rejected": -0.16252441704273224, + "step": 2248 + }, + { + "epoch": 0.13, + "learning_rate": 9.735634266424173e-08, + "logits/chosen": -2.058999538421631, + "logits/rejected": -2.0483713150024414, + "logps/chosen": -0.040629792958498, + "logps/rejected": -176.99545288085938, + "loss": 0.4863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007728975615464151, + "rewards/margins": 1.1334067583084106, + "rewards/rejected": -1.134179711341858, + "step": 2249 + }, + { + "epoch": 0.13, + "learning_rate": 9.735331803684851e-08, + "logits/chosen": -1.8598049879074097, + "logits/rejected": -1.8151774406433105, + "logps/chosen": -214.5087432861328, + "logps/rejected": -417.95330810546875, + "loss": 0.3614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9806594848632812, + "rewards/margins": 0.5196639895439148, + "rewards/rejected": 0.46099549531936646, + "step": 2250 + }, + { + "epoch": 0.13, + "learning_rate": 9.735029172723383e-08, + "logits/chosen": -1.9778308868408203, + "logits/rejected": -1.9674491882324219, + "logps/chosen": -124.51683044433594, + "logps/rejected": -231.28086853027344, + "loss": 0.6173, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3903183043003082, + "rewards/margins": -0.15938571095466614, + "rewards/rejected": 0.5497040152549744, + "step": 2251 + }, + { + "epoch": 0.13, + "learning_rate": 9.734726373550514e-08, + "logits/chosen": -2.112004280090332, + "logits/rejected": -2.1022732257843018, + "logps/chosen": -87.68463897705078, + "logps/rejected": -250.5548095703125, + "loss": 0.5179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27036818861961365, + "rewards/margins": 0.4801780581474304, + "rewards/rejected": -0.20980988442897797, + "step": 2252 + }, + { + "epoch": 0.13, + "learning_rate": 9.734423406177007e-08, + "logits/chosen": -2.330362319946289, + "logits/rejected": -2.3268344402313232, + "logps/chosen": -0.21738849580287933, + "logps/rejected": -128.9574432373047, + "loss": 0.638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004654129035770893, + "rewards/margins": 0.23591899871826172, + "rewards/rejected": -0.24057312309741974, + "step": 2253 + }, + { + "epoch": 0.13, + "learning_rate": 9.734120270613622e-08, + "logits/chosen": -2.1166131496429443, + "logits/rejected": -2.081974506378174, + "logps/chosen": -323.4383544921875, + "logps/rejected": -533.5134887695312, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.755969226360321, + "rewards/margins": 0.6018493175506592, + "rewards/rejected": 0.15411987900733948, + "step": 2254 + }, + { + "epoch": 0.13, + "learning_rate": 9.733816966871126e-08, + "logits/chosen": -1.979715347290039, + "logits/rejected": -1.9064643383026123, + "logps/chosen": -231.47897338867188, + "logps/rejected": -579.4083251953125, + "loss": 0.3058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9755798578262329, + "rewards/margins": 0.820098876953125, + "rewards/rejected": 0.15548096597194672, + "step": 2255 + }, + { + "epoch": 0.13, + "learning_rate": 9.733513494960297e-08, + "logits/chosen": -2.169729471206665, + "logits/rejected": -2.161292314529419, + "logps/chosen": -13.011974334716797, + "logps/rejected": -207.11065673828125, + "loss": 0.5945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03236217424273491, + "rewards/margins": 0.48248767852783203, + "rewards/rejected": -0.514849841594696, + "step": 2256 + }, + { + "epoch": 0.13, + "learning_rate": 9.733209854891913e-08, + "logits/chosen": -2.1777658462524414, + "logits/rejected": -2.131150484085083, + "logps/chosen": -214.26113891601562, + "logps/rejected": -457.51654052734375, + "loss": 0.3571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8175598382949829, + "rewards/margins": 0.7728638052940369, + "rewards/rejected": 0.04469604417681694, + "step": 2257 + }, + { + "epoch": 0.13, + "learning_rate": 9.732906046676763e-08, + "logits/chosen": -2.1692049503326416, + "logits/rejected": -2.1611547470092773, + "logps/chosen": -226.79251098632812, + "logps/rejected": -292.7987365722656, + "loss": 0.5147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10521240532398224, + "rewards/margins": 0.7666717767715454, + "rewards/rejected": -0.6614593863487244, + "step": 2258 + }, + { + "epoch": 0.13, + "learning_rate": 9.732602070325641e-08, + "logits/chosen": -2.074004650115967, + "logits/rejected": -2.1832172870635986, + "logps/chosen": -199.0258026123047, + "logps/rejected": -297.27801513671875, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6305496096611023, + "rewards/margins": 0.7513595223426819, + "rewards/rejected": -0.12080993503332138, + "step": 2259 + }, + { + "epoch": 0.13, + "learning_rate": 9.732297925849342e-08, + "logits/chosen": -2.282716751098633, + "logits/rejected": -2.2828760147094727, + "logps/chosen": -16.449609756469727, + "logps/rejected": -83.4928207397461, + "loss": 0.5826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034340858459472656, + "rewards/margins": 0.547529399394989, + "rewards/rejected": -0.5818702578544617, + "step": 2260 + }, + { + "epoch": 0.13, + "learning_rate": 9.731993613258674e-08, + "logits/chosen": -2.162498950958252, + "logits/rejected": -2.1465132236480713, + "logps/chosen": -165.91131591796875, + "logps/rejected": -232.421630859375, + "loss": 0.5029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4279937744140625, + "rewards/margins": 0.21539306640625, + "rewards/rejected": 0.2126007080078125, + "step": 2261 + }, + { + "epoch": 0.13, + "learning_rate": 9.731689132564444e-08, + "logits/chosen": -2.151078462600708, + "logits/rejected": -2.157078504562378, + "logps/chosen": -0.10367822647094727, + "logps/rejected": -85.24935150146484, + "loss": 0.6658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021896101534366608, + "rewards/margins": 0.1147865504026413, + "rewards/rejected": -0.11697616428136826, + "step": 2262 + }, + { + "epoch": 0.13, + "learning_rate": 9.731384483777472e-08, + "logits/chosen": -2.014774799346924, + "logits/rejected": -2.011524200439453, + "logps/chosen": -16.87432861328125, + "logps/rejected": -84.26304626464844, + "loss": 0.524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11694660037755966, + "rewards/margins": 0.5796670913696289, + "rewards/rejected": -0.46272048354148865, + "step": 2263 + }, + { + "epoch": 0.13, + "learning_rate": 9.73107966690858e-08, + "logits/chosen": -2.1712636947631836, + "logits/rejected": -2.1347782611846924, + "logps/chosen": -236.931396484375, + "logps/rejected": -380.1464538574219, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7424286007881165, + "rewards/margins": 1.0630279779434204, + "rewards/rejected": -0.32059937715530396, + "step": 2264 + }, + { + "epoch": 0.13, + "learning_rate": 9.730774681968594e-08, + "logits/chosen": -2.0366296768188477, + "logits/rejected": -2.0586047172546387, + "logps/chosen": -226.7386474609375, + "logps/rejected": -407.755615234375, + "loss": 0.5825, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6840500235557556, + "rewards/margins": -0.17439723014831543, + "rewards/rejected": 0.858447253704071, + "step": 2265 + }, + { + "epoch": 0.13, + "learning_rate": 9.730469528968351e-08, + "logits/chosen": -2.091071605682373, + "logits/rejected": -2.0703351497650146, + "logps/chosen": -97.38313293457031, + "logps/rejected": -303.3985290527344, + "loss": 0.524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1306358426809311, + "rewards/margins": 0.6344352960586548, + "rewards/rejected": -0.5037994384765625, + "step": 2266 + }, + { + "epoch": 0.13, + "learning_rate": 9.730164207918692e-08, + "logits/chosen": -2.2060513496398926, + "logits/rejected": -2.207770347595215, + "logps/chosen": -63.013885498046875, + "logps/rejected": -127.74393463134766, + "loss": 0.6069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01389083918184042, + "rewards/margins": 0.3133201599121094, + "rewards/rejected": -0.299429327249527, + "step": 2267 + }, + { + "epoch": 0.13, + "learning_rate": 9.729858718830464e-08, + "logits/chosen": -2.2800047397613525, + "logits/rejected": -2.2566945552825928, + "logps/chosen": -215.66586303710938, + "logps/rejected": -375.5052490234375, + "loss": 0.4395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7115722894668579, + "rewards/margins": 0.3114379942417145, + "rewards/rejected": 0.40013429522514343, + "step": 2268 + }, + { + "epoch": 0.13, + "learning_rate": 9.729553061714515e-08, + "logits/chosen": -1.8606568574905396, + "logits/rejected": -1.853562355041504, + "logps/chosen": -135.172607421875, + "logps/rejected": -381.8365783691406, + "loss": 0.4928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06078186258673668, + "rewards/margins": 1.0750153064727783, + "rewards/rejected": -1.135797142982483, + "step": 2269 + }, + { + "epoch": 0.13, + "learning_rate": 9.729247236581708e-08, + "logits/chosen": -2.20400071144104, + "logits/rejected": -2.16934871673584, + "logps/chosen": -230.77554321289062, + "logps/rejected": -304.3199462890625, + "loss": 0.4892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8002105951309204, + "rewards/margins": 0.01648867130279541, + "rewards/rejected": 0.783721923828125, + "step": 2270 + }, + { + "epoch": 0.13, + "learning_rate": 9.728941243442907e-08, + "logits/chosen": -2.142179012298584, + "logits/rejected": -2.1415557861328125, + "logps/chosen": -173.66128540039062, + "logps/rejected": -284.49267578125, + "loss": 0.412, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1522674560546875, + "rewards/margins": 0.2171783447265625, + "rewards/rejected": 0.935089111328125, + "step": 2271 + }, + { + "epoch": 0.13, + "learning_rate": 9.72863508230898e-08, + "logits/chosen": -2.0052623748779297, + "logits/rejected": -1.8399157524108887, + "logps/chosen": -208.60678100585938, + "logps/rejected": -786.6022338867188, + "loss": 0.5487, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01694946363568306, + "rewards/margins": 0.8631531000137329, + "rewards/rejected": -0.8801025748252869, + "step": 2272 + }, + { + "epoch": 0.13, + "learning_rate": 9.728328753190804e-08, + "logits/chosen": -1.9342070817947388, + "logits/rejected": -1.90303635597229, + "logps/chosen": -336.56817626953125, + "logps/rejected": -459.49261474609375, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1196717023849487, + "rewards/margins": 0.5328735709190369, + "rewards/rejected": 0.5867981314659119, + "step": 2273 + }, + { + "epoch": 0.13, + "learning_rate": 9.728022256099264e-08, + "logits/chosen": -2.0507662296295166, + "logits/rejected": -2.036421775817871, + "logps/chosen": -178.11253356933594, + "logps/rejected": -336.9296569824219, + "loss": 0.494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5021499991416931, + "rewards/margins": 0.3024185299873352, + "rewards/rejected": 0.19973145425319672, + "step": 2274 + }, + { + "epoch": 0.13, + "learning_rate": 9.727715591045245e-08, + "logits/chosen": -2.084545612335205, + "logits/rejected": -2.064138174057007, + "logps/chosen": -129.44432067871094, + "logps/rejected": -220.483154296875, + "loss": 0.5365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3959854245185852, + "rewards/margins": 0.04044342041015625, + "rewards/rejected": 0.35554200410842896, + "step": 2275 + }, + { + "epoch": 0.13, + "learning_rate": 9.727408758039642e-08, + "logits/chosen": -2.097001075744629, + "logits/rejected": -2.0966360569000244, + "logps/chosen": -2.715344190597534, + "logps/rejected": -80.8998031616211, + "loss": 0.7053, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.022555304691195488, + "rewards/margins": -0.03380074352025986, + "rewards/rejected": 0.056356050074100494, + "step": 2276 + }, + { + "epoch": 0.13, + "learning_rate": 9.727101757093357e-08, + "logits/chosen": -2.1738014221191406, + "logits/rejected": -2.1486599445343018, + "logps/chosen": -152.6731719970703, + "logps/rejected": -352.5471496582031, + "loss": 0.4326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06083526834845543, + "rewards/margins": 1.2988388538360596, + "rewards/rejected": -1.2380036115646362, + "step": 2277 + }, + { + "epoch": 0.13, + "learning_rate": 9.726794588217294e-08, + "logits/chosen": -2.159097671508789, + "logits/rejected": -2.1478066444396973, + "logps/chosen": -22.11890411376953, + "logps/rejected": -187.64529418945312, + "loss": 0.5827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010462760925292969, + "rewards/margins": 0.5476946234703064, + "rewards/rejected": -0.5581573843955994, + "step": 2278 + }, + { + "epoch": 0.13, + "learning_rate": 9.726487251422366e-08, + "logits/chosen": -1.9378279447555542, + "logits/rejected": -1.900465726852417, + "logps/chosen": -332.6747741699219, + "logps/rejected": -483.1329650878906, + "loss": 0.4282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5091522336006165, + "rewards/margins": 0.508160412311554, + "rewards/rejected": 0.0009918212890625, + "step": 2279 + }, + { + "epoch": 0.13, + "learning_rate": 9.726179746719491e-08, + "logits/chosen": -2.200749158859253, + "logits/rejected": -2.2006969451904297, + "logps/chosen": -32.78472137451172, + "logps/rejected": -124.18423461914062, + "loss": 0.5973, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32155534625053406, + "rewards/margins": 0.030471056699752808, + "rewards/rejected": 0.29108428955078125, + "step": 2280 + }, + { + "epoch": 0.13, + "learning_rate": 9.725872074119592e-08, + "logits/chosen": -2.117523431777954, + "logits/rejected": -2.0443646907806396, + "logps/chosen": -215.50784301757812, + "logps/rejected": -511.4020690917969, + "loss": 0.395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6929962038993835, + "rewards/margins": 0.45158079266548157, + "rewards/rejected": 0.24141541123390198, + "step": 2281 + }, + { + "epoch": 0.13, + "learning_rate": 9.725564233633602e-08, + "logits/chosen": -1.8748598098754883, + "logits/rejected": -1.8764623403549194, + "logps/chosen": -133.20010375976562, + "logps/rejected": -146.5093231201172, + "loss": 0.52, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45502930879592896, + "rewards/margins": 0.16293641924858093, + "rewards/rejected": 0.292092889547348, + "step": 2282 + }, + { + "epoch": 0.13, + "learning_rate": 9.725256225272455e-08, + "logits/chosen": -1.980790615081787, + "logits/rejected": -1.9707149267196655, + "logps/chosen": -29.208877563476562, + "logps/rejected": -227.9222869873047, + "loss": 0.5003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06549473106861115, + "rewards/margins": 1.1453847885131836, + "rewards/rejected": -1.2108795642852783, + "step": 2283 + }, + { + "epoch": 0.13, + "learning_rate": 9.724948049047093e-08, + "logits/chosen": -2.310865879058838, + "logits/rejected": -2.2575113773345947, + "logps/chosen": -176.3270721435547, + "logps/rejected": -356.57879638671875, + "loss": 0.4302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.809918224811554, + "rewards/margins": 0.3625854551792145, + "rewards/rejected": 0.4473327696323395, + "step": 2284 + }, + { + "epoch": 0.13, + "learning_rate": 9.724639704968463e-08, + "logits/chosen": -2.128798484802246, + "logits/rejected": -2.1266324520111084, + "logps/chosen": -40.51276397705078, + "logps/rejected": -178.11849975585938, + "loss": 0.6225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005860519595444202, + "rewards/margins": 0.26575204730033875, + "rewards/rejected": -0.2716125547885895, + "step": 2285 + }, + { + "epoch": 0.13, + "learning_rate": 9.724331193047522e-08, + "logits/chosen": -2.203265905380249, + "logits/rejected": -2.2390851974487305, + "logps/chosen": -203.40081787109375, + "logps/rejected": -447.03564453125, + "loss": 0.2291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8031646609306335, + "rewards/margins": 1.5941040515899658, + "rewards/rejected": -0.7909393310546875, + "step": 2286 + }, + { + "epoch": 0.13, + "learning_rate": 9.724022513295226e-08, + "logits/chosen": -2.1653594970703125, + "logits/rejected": -2.1582181453704834, + "logps/chosen": -207.2247314453125, + "logps/rejected": -288.56890869140625, + "loss": 0.4878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36022645235061646, + "rewards/margins": 0.4166259765625, + "rewards/rejected": -0.05639953538775444, + "step": 2287 + }, + { + "epoch": 0.13, + "learning_rate": 9.723713665722543e-08, + "logits/chosen": -2.214707612991333, + "logits/rejected": -2.2022104263305664, + "logps/chosen": -118.70885467529297, + "logps/rejected": -265.639892578125, + "loss": 0.5965, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5196998715400696, + "rewards/margins": -0.19340741634368896, + "rewards/rejected": 0.7131072878837585, + "step": 2288 + }, + { + "epoch": 0.13, + "learning_rate": 9.723404650340444e-08, + "logits/chosen": -2.1170201301574707, + "logits/rejected": -2.0951013565063477, + "logps/chosen": -60.820213317871094, + "logps/rejected": -233.97579956054688, + "loss": 0.6073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1418815702199936, + "rewards/margins": 0.5652839541435242, + "rewards/rejected": -0.707165539264679, + "step": 2289 + }, + { + "epoch": 0.13, + "learning_rate": 9.723095467159908e-08, + "logits/chosen": -2.1651453971862793, + "logits/rejected": -2.1645796298980713, + "logps/chosen": -0.18035165965557098, + "logps/rejected": -149.7621612548828, + "loss": 0.5727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019408240914344788, + "rewards/margins": 0.5374115705490112, + "rewards/rejected": -0.5393524169921875, + "step": 2290 + }, + { + "epoch": 0.13, + "learning_rate": 9.722786116191918e-08, + "logits/chosen": -2.035238742828369, + "logits/rejected": -1.9886788129806519, + "logps/chosen": -269.6726379394531, + "logps/rejected": -526.02978515625, + "loss": 0.344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7474212646484375, + "rewards/margins": 0.9639984369277954, + "rewards/rejected": -0.21657715737819672, + "step": 2291 + }, + { + "epoch": 0.13, + "learning_rate": 9.722476597447463e-08, + "logits/chosen": -2.0497207641601562, + "logits/rejected": -2.0386178493499756, + "logps/chosen": -16.589998245239258, + "logps/rejected": -274.08642578125, + "loss": 0.5175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1371532529592514, + "rewards/margins": 0.6287547945976257, + "rewards/rejected": -0.4916015565395355, + "step": 2292 + }, + { + "epoch": 0.13, + "learning_rate": 9.722166910937539e-08, + "logits/chosen": -1.9701004028320312, + "logits/rejected": -1.9630311727523804, + "logps/chosen": -0.002476399764418602, + "logps/rejected": -85.21768188476562, + "loss": 0.6965, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.4981701396172866e-05, + "rewards/margins": -0.01312260888516903, + "rewards/rejected": 0.01306762732565403, + "step": 2293 + }, + { + "epoch": 0.13, + "learning_rate": 9.721857056673149e-08, + "logits/chosen": -1.974888563156128, + "logits/rejected": -1.956443428993225, + "logps/chosen": -54.68976593017578, + "logps/rejected": -284.20751953125, + "loss": 0.5676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08803367614746094, + "rewards/margins": 0.5893917083740234, + "rewards/rejected": -0.5013580322265625, + "step": 2294 + }, + { + "epoch": 0.13, + "learning_rate": 9.721547034665298e-08, + "logits/chosen": -2.1291329860687256, + "logits/rejected": -2.131772041320801, + "logps/chosen": -8.714028081158176e-05, + "logps/rejected": -123.87356567382812, + "loss": 0.5129, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.152492003115185e-07, + "rewards/margins": 0.929297685623169, + "rewards/rejected": -0.9292984008789062, + "step": 2295 + }, + { + "epoch": 0.13, + "learning_rate": 9.721236844925e-08, + "logits/chosen": -2.3065345287323, + "logits/rejected": -2.305475950241089, + "logps/chosen": -24.129066467285156, + "logps/rejected": -126.88138580322266, + "loss": 0.6196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0637691542506218, + "rewards/margins": 0.20297127962112427, + "rewards/rejected": -0.13920211791992188, + "step": 2296 + }, + { + "epoch": 0.13, + "learning_rate": 9.720926487463277e-08, + "logits/chosen": -2.0430526733398438, + "logits/rejected": -2.0442519187927246, + "logps/chosen": -7.325606822967529, + "logps/rejected": -145.33181762695312, + "loss": 0.6141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024829579517245293, + "rewards/margins": 0.3770731985569, + "rewards/rejected": -0.4019027650356293, + "step": 2297 + }, + { + "epoch": 0.13, + "learning_rate": 9.720615962291152e-08, + "logits/chosen": -2.0582284927368164, + "logits/rejected": -2.053927183151245, + "logps/chosen": -9.897392272949219, + "logps/rejected": -150.22195434570312, + "loss": 0.5608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011227608192712069, + "rewards/margins": 0.626745879650116, + "rewards/rejected": -0.62786865234375, + "step": 2298 + }, + { + "epoch": 0.13, + "learning_rate": 9.720305269419656e-08, + "logits/chosen": -2.102562427520752, + "logits/rejected": -2.080202341079712, + "logps/chosen": -25.704673767089844, + "logps/rejected": -172.27752685546875, + "loss": 0.6936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09536724537611008, + "rewards/margins": 0.11279468983411789, + "rewards/rejected": -0.20816193521022797, + "step": 2299 + }, + { + "epoch": 0.13, + "learning_rate": 9.719994408859829e-08, + "logits/chosen": -2.123465061187744, + "logits/rejected": -2.100759983062744, + "logps/chosen": -108.18193054199219, + "logps/rejected": -245.23533630371094, + "loss": 0.5174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1931602507829666, + "rewards/margins": 1.5518699884414673, + "rewards/rejected": -1.7450302839279175, + "step": 2300 + }, + { + "epoch": 0.13, + "learning_rate": 9.719683380622712e-08, + "logits/chosen": -2.1809253692626953, + "logits/rejected": -2.1591482162475586, + "logps/chosen": -19.819961547851562, + "logps/rejected": -177.11822509765625, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08136596530675888, + "rewards/margins": 0.6779342293739319, + "rewards/rejected": -0.5965682864189148, + "step": 2301 + }, + { + "epoch": 0.13, + "learning_rate": 9.719372184719355e-08, + "logits/chosen": -1.9646146297454834, + "logits/rejected": -1.908310055732727, + "logps/chosen": -266.41644287109375, + "logps/rejected": -336.8413391113281, + "loss": 0.3186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8295135498046875, + "rewards/margins": 0.8589569330215454, + "rewards/rejected": -0.02944335900247097, + "step": 2302 + }, + { + "epoch": 0.13, + "learning_rate": 9.719060821160812e-08, + "logits/chosen": -2.2882728576660156, + "logits/rejected": -2.279428005218506, + "logps/chosen": -46.79560089111328, + "logps/rejected": -207.5327606201172, + "loss": 0.4906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1551685333251953, + "rewards/margins": 0.8756168484687805, + "rewards/rejected": -0.7204483151435852, + "step": 2303 + }, + { + "epoch": 0.13, + "learning_rate": 9.718749289958146e-08, + "logits/chosen": -2.341599941253662, + "logits/rejected": -2.339292049407959, + "logps/chosen": -10.921008110046387, + "logps/rejected": -97.52449035644531, + "loss": 0.6237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04546642303466797, + "rewards/margins": 0.34809741377830505, + "rewards/rejected": -0.393563836812973, + "step": 2304 + }, + { + "epoch": 0.13, + "learning_rate": 9.718437591122423e-08, + "logits/chosen": -2.080526828765869, + "logits/rejected": -2.074324131011963, + "logps/chosen": -41.662906646728516, + "logps/rejected": -191.84046936035156, + "loss": 0.4926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16356392204761505, + "rewards/margins": 0.802267849445343, + "rewards/rejected": -0.6387039422988892, + "step": 2305 + }, + { + "epoch": 0.13, + "learning_rate": 9.718125724664716e-08, + "logits/chosen": -2.190808057785034, + "logits/rejected": -2.1870222091674805, + "logps/chosen": -1.7023881673812866, + "logps/rejected": -138.98826599121094, + "loss": 0.5659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.020862115547060966, + "rewards/margins": 0.5547961592674255, + "rewards/rejected": -0.5339340567588806, + "step": 2306 + }, + { + "epoch": 0.13, + "learning_rate": 9.717813690596104e-08, + "logits/chosen": -2.2131247520446777, + "logits/rejected": -2.216092824935913, + "logps/chosen": -223.68939208984375, + "logps/rejected": -375.010009765625, + "loss": 0.4103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7719208002090454, + "rewards/margins": 0.35465699434280396, + "rewards/rejected": 0.41726380586624146, + "step": 2307 + }, + { + "epoch": 0.13, + "learning_rate": 9.717501488927674e-08, + "logits/chosen": -2.141890525817871, + "logits/rejected": -2.1381325721740723, + "logps/chosen": -184.21014404296875, + "logps/rejected": -298.33135986328125, + "loss": 0.512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3512512147426605, + "rewards/margins": 0.21465453505516052, + "rewards/rejected": 0.1365966796875, + "step": 2308 + }, + { + "epoch": 0.13, + "learning_rate": 9.717189119670514e-08, + "logits/chosen": -2.129018783569336, + "logits/rejected": -2.1264448165893555, + "logps/chosen": -66.3721694946289, + "logps/rejected": -303.15936279296875, + "loss": 0.5373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05086975172162056, + "rewards/margins": 0.7853118777275085, + "rewards/rejected": -0.836181640625, + "step": 2309 + }, + { + "epoch": 0.13, + "learning_rate": 9.716876582835724e-08, + "logits/chosen": -1.9724220037460327, + "logits/rejected": -1.9825369119644165, + "logps/chosen": -0.0015410765772685409, + "logps/rejected": -342.122314453125, + "loss": 0.4013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9806980705870956e-07, + "rewards/margins": 2.1273252964019775, + "rewards/rejected": -2.1273255348205566, + "step": 2310 + }, + { + "epoch": 0.13, + "learning_rate": 9.716563878434403e-08, + "logits/chosen": -2.0312647819519043, + "logits/rejected": -2.0261027812957764, + "logps/chosen": -231.84092712402344, + "logps/rejected": -331.401123046875, + "loss": 0.5035, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7774505615234375, + "rewards/margins": -0.0037445425987243652, + "rewards/rejected": 0.7811951041221619, + "step": 2311 + }, + { + "epoch": 0.13, + "learning_rate": 9.716251006477664e-08, + "logits/chosen": -2.246428966522217, + "logits/rejected": -2.2397632598876953, + "logps/chosen": -19.687124252319336, + "logps/rejected": -117.1401596069336, + "loss": 0.6644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0181440357118845, + "rewards/margins": 0.1200166717171669, + "rewards/rejected": -0.13816070556640625, + "step": 2312 + }, + { + "epoch": 0.13, + "learning_rate": 9.715937966976617e-08, + "logits/chosen": -2.100064277648926, + "logits/rejected": -2.0753612518310547, + "logps/chosen": -107.03764343261719, + "logps/rejected": -387.5841979980469, + "loss": 0.4788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06540298461914062, + "rewards/margins": 1.4543358087539673, + "rewards/rejected": -1.519738793373108, + "step": 2313 + }, + { + "epoch": 0.13, + "learning_rate": 9.715624759942386e-08, + "logits/chosen": -2.1896181106567383, + "logits/rejected": -2.188123941421509, + "logps/chosen": -11.87522029876709, + "logps/rejected": -180.55076599121094, + "loss": 0.5596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014140796847641468, + "rewards/margins": 0.5951945781707764, + "rewards/rejected": -0.6093353629112244, + "step": 2314 + }, + { + "epoch": 0.13, + "learning_rate": 9.7153113853861e-08, + "logits/chosen": -2.194247007369995, + "logits/rejected": -2.1930439472198486, + "logps/chosen": -12.974285125732422, + "logps/rejected": -62.25539016723633, + "loss": 0.6887, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00036878586979582906, + "rewards/margins": 0.008534908294677734, + "rewards/rejected": -0.008166122250258923, + "step": 2315 + }, + { + "epoch": 0.13, + "learning_rate": 9.714997843318885e-08, + "logits/chosen": -1.988786220550537, + "logits/rejected": -1.993289828300476, + "logps/chosen": -0.09129474312067032, + "logps/rejected": -57.06220245361328, + "loss": 0.7077, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0020819143392145634, + "rewards/margins": -0.05532097443938255, + "rewards/rejected": 0.05323905870318413, + "step": 2316 + }, + { + "epoch": 0.13, + "learning_rate": 9.714684133751884e-08, + "logits/chosen": -2.277752637863159, + "logits/rejected": -2.2575364112854004, + "logps/chosen": -34.51506042480469, + "logps/rejected": -116.69132995605469, + "loss": 0.7184, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08607254177331924, + "rewards/margins": -0.016646578907966614, + "rewards/rejected": -0.06942596286535263, + "step": 2317 + }, + { + "epoch": 0.13, + "learning_rate": 9.714370256696241e-08, + "logits/chosen": -2.2064220905303955, + "logits/rejected": -2.1636359691619873, + "logps/chosen": -130.6711883544922, + "logps/rejected": -373.68359375, + "loss": 0.5917, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5666122436523438, + "rewards/margins": -0.20304107666015625, + "rewards/rejected": 0.7696533203125, + "step": 2318 + }, + { + "epoch": 0.13, + "learning_rate": 9.714056212163105e-08, + "logits/chosen": -1.9990646839141846, + "logits/rejected": -1.9765167236328125, + "logps/chosen": -90.27423858642578, + "logps/rejected": -407.5367431640625, + "loss": 0.4003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25761109590530396, + "rewards/margins": 1.0318634510040283, + "rewards/rejected": -0.7742523550987244, + "step": 2319 + }, + { + "epoch": 0.14, + "learning_rate": 9.713742000163634e-08, + "logits/chosen": -2.199617385864258, + "logits/rejected": -2.2136950492858887, + "logps/chosen": -217.30751037597656, + "logps/rejected": -310.5389099121094, + "loss": 0.4523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.894927978515625, + "rewards/margins": 0.0805816650390625, + "rewards/rejected": 0.8143463134765625, + "step": 2320 + }, + { + "epoch": 0.14, + "learning_rate": 9.71342762070899e-08, + "logits/chosen": -2.129136800765991, + "logits/rejected": -2.11362361907959, + "logps/chosen": -53.24266052246094, + "logps/rejected": -243.14334106445312, + "loss": 0.5835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05986633524298668, + "rewards/margins": 0.5220276117324829, + "rewards/rejected": -0.5818939208984375, + "step": 2321 + }, + { + "epoch": 0.14, + "learning_rate": 9.713113073810342e-08, + "logits/chosen": -2.113328218460083, + "logits/rejected": -2.0689404010772705, + "logps/chosen": -134.85150146484375, + "logps/rejected": -258.7683410644531, + "loss": 0.6064, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.46837159991264343, + "rewards/margins": -0.15237119793891907, + "rewards/rejected": 0.6207427978515625, + "step": 2322 + }, + { + "epoch": 0.14, + "learning_rate": 9.712798359478861e-08, + "logits/chosen": -1.9774869680404663, + "logits/rejected": -1.9879014492034912, + "logps/chosen": -372.03973388671875, + "logps/rejected": -484.3100280761719, + "loss": 0.5161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3077148497104645, + "rewards/margins": 0.5407196283340454, + "rewards/rejected": -0.23300476372241974, + "step": 2323 + }, + { + "epoch": 0.14, + "learning_rate": 9.712483477725731e-08, + "logits/chosen": -2.2014291286468506, + "logits/rejected": -2.1889281272888184, + "logps/chosen": -28.585641860961914, + "logps/rejected": -174.85040283203125, + "loss": 0.5964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.041178323328495026, + "rewards/margins": 0.5016973614692688, + "rewards/rejected": -0.5428757071495056, + "step": 2324 + }, + { + "epoch": 0.14, + "learning_rate": 9.712168428562136e-08, + "logits/chosen": -2.2470035552978516, + "logits/rejected": -2.2352726459503174, + "logps/chosen": -22.651037216186523, + "logps/rejected": -230.52012634277344, + "loss": 0.5859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05766410753130913, + "rewards/margins": 0.3987956941127777, + "rewards/rejected": -0.3411315977573395, + "step": 2325 + }, + { + "epoch": 0.14, + "learning_rate": 9.711853211999269e-08, + "logits/chosen": -2.1565539836883545, + "logits/rejected": -2.134122371673584, + "logps/chosen": -45.23910903930664, + "logps/rejected": -149.8673858642578, + "loss": 0.5949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0881778746843338, + "rewards/margins": 0.5030453205108643, + "rewards/rejected": -0.5912231802940369, + "step": 2326 + }, + { + "epoch": 0.14, + "learning_rate": 9.711537828048329e-08, + "logits/chosen": -2.1530497074127197, + "logits/rejected": -2.154899835586548, + "logps/chosen": -55.348697662353516, + "logps/rejected": -200.94775390625, + "loss": 0.4647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3094329833984375, + "rewards/margins": 0.6912704706192017, + "rewards/rejected": -0.3818374574184418, + "step": 2327 + }, + { + "epoch": 0.14, + "learning_rate": 9.711222276720516e-08, + "logits/chosen": -2.206345319747925, + "logits/rejected": -2.202423095703125, + "logps/chosen": -8.219551086425781, + "logps/rejected": -203.86622619628906, + "loss": 0.477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07027711719274521, + "rewards/margins": 1.1010143756866455, + "rewards/rejected": -1.030737280845642, + "step": 2328 + }, + { + "epoch": 0.14, + "learning_rate": 9.710906558027044e-08, + "logits/chosen": -2.1221885681152344, + "logits/rejected": -2.125410556793213, + "logps/chosen": -26.421606063842773, + "logps/rejected": -86.75845336914062, + "loss": 0.6944, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.196803480386734, + "rewards/margins": 0.12212657928466797, + "rewards/rejected": -0.318930059671402, + "step": 2329 + }, + { + "epoch": 0.14, + "learning_rate": 9.710590671979127e-08, + "logits/chosen": -1.9644365310668945, + "logits/rejected": -1.9603297710418701, + "logps/chosen": -19.887510299682617, + "logps/rejected": -127.80949401855469, + "loss": 0.5518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07715816795825958, + "rewards/margins": 0.5458379983901978, + "rewards/rejected": -0.468679815530777, + "step": 2330 + }, + { + "epoch": 0.14, + "learning_rate": 9.710274618587988e-08, + "logits/chosen": -2.244838237762451, + "logits/rejected": -2.2354536056518555, + "logps/chosen": -226.74703979492188, + "logps/rejected": -281.2633972167969, + "loss": 0.3856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9987854361534119, + "rewards/margins": 0.45204776525497437, + "rewards/rejected": 0.5467376708984375, + "step": 2331 + }, + { + "epoch": 0.14, + "learning_rate": 9.709958397864855e-08, + "logits/chosen": -1.9915968179702759, + "logits/rejected": -1.9061846733093262, + "logps/chosen": -258.28033447265625, + "logps/rejected": -439.8338623046875, + "loss": 0.1889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2618926763534546, + "rewards/margins": 1.7212860584259033, + "rewards/rejected": -0.45939332246780396, + "step": 2332 + }, + { + "epoch": 0.14, + "learning_rate": 9.709642009820959e-08, + "logits/chosen": -1.8644002676010132, + "logits/rejected": -1.855010986328125, + "logps/chosen": -1.60957932472229, + "logps/rejected": -250.68130493164062, + "loss": 0.487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05277109146118164, + "rewards/margins": 1.016977071762085, + "rewards/rejected": -0.9642059206962585, + "step": 2333 + }, + { + "epoch": 0.14, + "learning_rate": 9.709325454467543e-08, + "logits/chosen": -2.034071922302246, + "logits/rejected": -2.0331780910491943, + "logps/chosen": -42.119834899902344, + "logps/rejected": -261.715087890625, + "loss": 0.5228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1535266935825348, + "rewards/margins": 0.5091174840927124, + "rewards/rejected": -0.3555908203125, + "step": 2334 + }, + { + "epoch": 0.14, + "learning_rate": 9.709008731815851e-08, + "logits/chosen": -2.226963758468628, + "logits/rejected": -2.175123691558838, + "logps/chosen": -247.81808471679688, + "logps/rejected": -410.0920715332031, + "loss": 0.4996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.356985479593277, + "rewards/margins": 0.46345826983451843, + "rewards/rejected": -0.10647278279066086, + "step": 2335 + }, + { + "epoch": 0.14, + "learning_rate": 9.708691841877132e-08, + "logits/chosen": -2.2036519050598145, + "logits/rejected": -2.193272352218628, + "logps/chosen": -97.25289916992188, + "logps/rejected": -245.186279296875, + "loss": 0.4738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2792556881904602, + "rewards/margins": 0.7792419195175171, + "rewards/rejected": -0.4999862611293793, + "step": 2336 + }, + { + "epoch": 0.14, + "learning_rate": 9.70837478466265e-08, + "logits/chosen": -2.2805938720703125, + "logits/rejected": -2.274637222290039, + "logps/chosen": -5.655496597290039, + "logps/rejected": -105.29086303710938, + "loss": 0.6523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03796706348657608, + "rewards/margins": 0.12688690423965454, + "rewards/rejected": -0.08891983330249786, + "step": 2337 + }, + { + "epoch": 0.14, + "learning_rate": 9.708057560183662e-08, + "logits/chosen": -2.15993070602417, + "logits/rejected": -2.1446149349212646, + "logps/chosen": -93.16468048095703, + "logps/rejected": -258.9117431640625, + "loss": 0.4249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22142258286476135, + "rewards/margins": 1.0954490900039673, + "rewards/rejected": -0.8740264773368835, + "step": 2338 + }, + { + "epoch": 0.14, + "learning_rate": 9.707740168451439e-08, + "logits/chosen": -2.057887554168701, + "logits/rejected": -2.0462751388549805, + "logps/chosen": -4.159843444824219, + "logps/rejected": -188.33668518066406, + "loss": 0.5437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043826104956679046, + "rewards/margins": 0.7288082838058472, + "rewards/rejected": -0.7292465567588806, + "step": 2339 + }, + { + "epoch": 0.14, + "learning_rate": 9.70742260947726e-08, + "logits/chosen": -2.1388580799102783, + "logits/rejected": -2.1357038021087646, + "logps/chosen": -86.44683837890625, + "logps/rejected": -124.58641815185547, + "loss": 0.5918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15173493325710297, + "rewards/margins": 0.25808945298194885, + "rewards/rejected": -0.10635452717542648, + "step": 2340 + }, + { + "epoch": 0.14, + "learning_rate": 9.707104883272402e-08, + "logits/chosen": -2.1855740547180176, + "logits/rejected": -2.1649301052093506, + "logps/chosen": -178.70797729492188, + "logps/rejected": -251.3688507080078, + "loss": 0.5185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5519653558731079, + "rewards/margins": 0.036830127239227295, + "rewards/rejected": 0.5151352286338806, + "step": 2341 + }, + { + "epoch": 0.14, + "learning_rate": 9.706786989848152e-08, + "logits/chosen": -2.1394357681274414, + "logits/rejected": -2.1328070163726807, + "logps/chosen": -33.20350646972656, + "logps/rejected": -116.66482543945312, + "loss": 0.6242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03823700174689293, + "rewards/margins": 0.21377259492874146, + "rewards/rejected": -0.17553558945655823, + "step": 2342 + }, + { + "epoch": 0.14, + "learning_rate": 9.706468929215807e-08, + "logits/chosen": -1.9996806383132935, + "logits/rejected": -1.998295783996582, + "logps/chosen": -0.6831529140472412, + "logps/rejected": -47.07550048828125, + "loss": 0.6899, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010198998264968395, + "rewards/margins": 0.06227414682507515, + "rewards/rejected": -0.07247314602136612, + "step": 2343 + }, + { + "epoch": 0.14, + "learning_rate": 9.706150701386664e-08, + "logits/chosen": -1.9434068202972412, + "logits/rejected": -1.8911943435668945, + "logps/chosen": -175.5774383544922, + "logps/rejected": -340.955322265625, + "loss": 0.5014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7391464114189148, + "rewards/margins": 0.06992340087890625, + "rewards/rejected": 0.6692230105400085, + "step": 2344 + }, + { + "epoch": 0.14, + "learning_rate": 9.705832306372027e-08, + "logits/chosen": -1.9091193675994873, + "logits/rejected": -1.7765471935272217, + "logps/chosen": -232.930908203125, + "logps/rejected": -514.5346069335938, + "loss": 0.5958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02776184119284153, + "rewards/margins": 0.3419281244277954, + "rewards/rejected": -0.31416627764701843, + "step": 2345 + }, + { + "epoch": 0.14, + "learning_rate": 9.705513744183206e-08, + "logits/chosen": -2.255708694458008, + "logits/rejected": -2.2472569942474365, + "logps/chosen": -222.05303955078125, + "logps/rejected": -295.04833984375, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1032532453536987, + "rewards/margins": 0.6212616562843323, + "rewards/rejected": 0.48199158906936646, + "step": 2346 + }, + { + "epoch": 0.14, + "learning_rate": 9.705195014831522e-08, + "logits/chosen": -2.074347734451294, + "logits/rejected": -2.0493290424346924, + "logps/chosen": -304.15643310546875, + "logps/rejected": -416.5723876953125, + "loss": 0.5046, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8217193484306335, + "rewards/margins": -0.00422060489654541, + "rewards/rejected": 0.825939953327179, + "step": 2347 + }, + { + "epoch": 0.14, + "learning_rate": 9.704876118328295e-08, + "logits/chosen": -2.132190227508545, + "logits/rejected": -2.1276583671569824, + "logps/chosen": -12.23781967163086, + "logps/rejected": -192.4705810546875, + "loss": 0.4971, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048291970044374466, + "rewards/margins": 0.9974496960639954, + "rewards/rejected": -0.94915771484375, + "step": 2348 + }, + { + "epoch": 0.14, + "learning_rate": 9.704557054684854e-08, + "logits/chosen": -1.926494836807251, + "logits/rejected": -1.9337470531463623, + "logps/chosen": -348.0794677734375, + "logps/rejected": -454.38409423828125, + "loss": 0.1739, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6901673078536987, + "rewards/margins": 1.359857201576233, + "rewards/rejected": 0.33031007647514343, + "step": 2349 + }, + { + "epoch": 0.14, + "learning_rate": 9.704237823912534e-08, + "logits/chosen": -2.1251041889190674, + "logits/rejected": -2.0767130851745605, + "logps/chosen": -233.07354736328125, + "logps/rejected": -464.27044677734375, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0009812116622925, + "rewards/margins": 1.1297532320022583, + "rewards/rejected": -0.12877197563648224, + "step": 2350 + }, + { + "epoch": 0.14, + "learning_rate": 9.703918426022676e-08, + "logits/chosen": -2.0708229541778564, + "logits/rejected": -2.0729470252990723, + "logps/chosen": -51.74793243408203, + "logps/rejected": -139.3271484375, + "loss": 0.5804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009049606509506702, + "rewards/margins": 0.388467013835907, + "rewards/rejected": -0.37941741943359375, + "step": 2351 + }, + { + "epoch": 0.14, + "learning_rate": 9.703598861026627e-08, + "logits/chosen": -1.940743088722229, + "logits/rejected": -1.9262866973876953, + "logps/chosen": -172.6546173095703, + "logps/rejected": -269.38287353515625, + "loss": 0.6663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0762481689453125, + "rewards/margins": 0.0039520263671875, + "rewards/rejected": 0.072296142578125, + "step": 2352 + }, + { + "epoch": 0.14, + "learning_rate": 9.703279128935736e-08, + "logits/chosen": -2.084182024002075, + "logits/rejected": -2.063141345977783, + "logps/chosen": -189.50650024414062, + "logps/rejected": -280.2296142578125, + "loss": 0.5256, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8360244631767273, + "rewards/margins": -0.08587193489074707, + "rewards/rejected": 0.9218963980674744, + "step": 2353 + }, + { + "epoch": 0.14, + "learning_rate": 9.702959229761366e-08, + "logits/chosen": -2.043870210647583, + "logits/rejected": -2.0216286182403564, + "logps/chosen": -186.74893188476562, + "logps/rejected": -264.1046142578125, + "loss": 0.4806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8261260986328125, + "rewards/margins": 0.055023193359375, + "rewards/rejected": 0.7711029052734375, + "step": 2354 + }, + { + "epoch": 0.14, + "learning_rate": 9.702639163514878e-08, + "logits/chosen": -2.2065322399139404, + "logits/rejected": -2.069634199142456, + "logps/chosen": -185.58447265625, + "logps/rejected": -464.97869873046875, + "loss": 0.6559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030990601517260075, + "rewards/margins": 0.1580459624528885, + "rewards/rejected": -0.16114501655101776, + "step": 2355 + }, + { + "epoch": 0.14, + "learning_rate": 9.702318930207645e-08, + "logits/chosen": -2.119781970977783, + "logits/rejected": -2.117144823074341, + "logps/chosen": -57.67154312133789, + "logps/rejected": -313.480224609375, + "loss": 0.3793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32782211899757385, + "rewards/margins": 1.2946434020996094, + "rewards/rejected": -0.9668213129043579, + "step": 2356 + }, + { + "epoch": 0.14, + "learning_rate": 9.701998529851043e-08, + "logits/chosen": -2.0535900592803955, + "logits/rejected": -2.0532233715057373, + "logps/chosen": -21.29254722595215, + "logps/rejected": -146.21978759765625, + "loss": 0.6896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15863895416259766, + "rewards/margins": 0.21357479691505432, + "rewards/rejected": -0.372213751077652, + "step": 2357 + }, + { + "epoch": 0.14, + "learning_rate": 9.701677962456451e-08, + "logits/chosen": -2.344866991043091, + "logits/rejected": -2.346696376800537, + "logps/chosen": -0.006772303022444248, + "logps/rejected": -149.47537231445312, + "loss": 0.6647, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.007193148136139e-05, + "rewards/margins": 0.10965266823768616, + "rewards/rejected": -0.10974273830652237, + "step": 2358 + }, + { + "epoch": 0.14, + "learning_rate": 9.701357228035261e-08, + "logits/chosen": -2.255509853363037, + "logits/rejected": -2.2322590351104736, + "logps/chosen": -64.39959716796875, + "logps/rejected": -275.438720703125, + "loss": 0.6283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3158020079135895, + "rewards/margins": 0.7413787841796875, + "rewards/rejected": -1.0571807622909546, + "step": 2359 + }, + { + "epoch": 0.14, + "learning_rate": 9.701036326598866e-08, + "logits/chosen": -2.0129737854003906, + "logits/rejected": -1.9919432401657104, + "logps/chosen": -337.058349609375, + "logps/rejected": -497.405517578125, + "loss": 0.2108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.127893090248108, + "rewards/margins": 1.5772217512130737, + "rewards/rejected": -0.44932863116264343, + "step": 2360 + }, + { + "epoch": 0.14, + "learning_rate": 9.700715258158664e-08, + "logits/chosen": -1.9974658489227295, + "logits/rejected": -1.9770824909210205, + "logps/chosen": -81.78996276855469, + "logps/rejected": -285.28558349609375, + "loss": 0.6257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01939086988568306, + "rewards/margins": 0.17250366508960724, + "rewards/rejected": -0.19189453125, + "step": 2361 + }, + { + "epoch": 0.14, + "learning_rate": 9.700394022726063e-08, + "logits/chosen": -2.1516098976135254, + "logits/rejected": -2.110128164291382, + "logps/chosen": -160.4197998046875, + "logps/rejected": -278.8691711425781, + "loss": 0.5996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2394454926252365, + "rewards/margins": 0.145771786570549, + "rewards/rejected": 0.0936737060546875, + "step": 2362 + }, + { + "epoch": 0.14, + "learning_rate": 9.700072620312474e-08, + "logits/chosen": -2.084554433822632, + "logits/rejected": -2.0855979919433594, + "logps/chosen": -12.641799926757812, + "logps/rejected": -177.66177368164062, + "loss": 0.5015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012055397033691406, + "rewards/margins": 0.9589594006538391, + "rewards/rejected": -0.9469040036201477, + "step": 2363 + }, + { + "epoch": 0.14, + "learning_rate": 9.699751050929315e-08, + "logits/chosen": -2.111133575439453, + "logits/rejected": -2.110833168029785, + "logps/chosen": -1.7566583156585693, + "logps/rejected": -271.952392578125, + "loss": 0.4819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0037909268867224455, + "rewards/margins": 1.1584784984588623, + "rewards/rejected": -1.154687523841858, + "step": 2364 + }, + { + "epoch": 0.14, + "learning_rate": 9.69942931458801e-08, + "logits/chosen": -2.221667528152466, + "logits/rejected": -2.219191312789917, + "logps/chosen": -23.763263702392578, + "logps/rejected": -120.90164184570312, + "loss": 0.5611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015426444821059704, + "rewards/margins": 0.6042448282241821, + "rewards/rejected": -0.588818371295929, + "step": 2365 + }, + { + "epoch": 0.14, + "learning_rate": 9.699107411299987e-08, + "logits/chosen": -1.9664942026138306, + "logits/rejected": -1.9612140655517578, + "logps/chosen": -21.988309860229492, + "logps/rejected": -133.62783813476562, + "loss": 0.5034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09224548190832138, + "rewards/margins": 0.8699020147323608, + "rewards/rejected": -0.7776565551757812, + "step": 2366 + }, + { + "epoch": 0.14, + "learning_rate": 9.698785341076685e-08, + "logits/chosen": -2.1544864177703857, + "logits/rejected": -2.138920783996582, + "logps/chosen": -17.152206420898438, + "logps/rejected": -146.88507080078125, + "loss": 0.506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012803459540009499, + "rewards/margins": 1.018010139465332, + "rewards/rejected": -1.0308135747909546, + "step": 2367 + }, + { + "epoch": 0.14, + "learning_rate": 9.698463103929542e-08, + "logits/chosen": -2.1641976833343506, + "logits/rejected": -2.168281078338623, + "logps/chosen": -220.1727752685547, + "logps/rejected": -372.4895324707031, + "loss": 0.4393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4841140806674957, + "rewards/margins": 0.4936630427837372, + "rewards/rejected": -0.009548950009047985, + "step": 2368 + }, + { + "epoch": 0.14, + "learning_rate": 9.698140699870006e-08, + "logits/chosen": -2.007168769836426, + "logits/rejected": -1.956438422203064, + "logps/chosen": -235.05999755859375, + "logps/rejected": -341.0372009277344, + "loss": 0.2227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9814743399620056, + "rewards/margins": 1.728907823562622, + "rewards/rejected": -0.7474334836006165, + "step": 2369 + }, + { + "epoch": 0.14, + "learning_rate": 9.697818128909534e-08, + "logits/chosen": -2.075355052947998, + "logits/rejected": -2.0569467544555664, + "logps/chosen": -161.57369995117188, + "logps/rejected": -329.7001953125, + "loss": 0.4534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.505645751953125, + "rewards/margins": 0.3736327886581421, + "rewards/rejected": 0.13201294839382172, + "step": 2370 + }, + { + "epoch": 0.14, + "learning_rate": 9.69749539105958e-08, + "logits/chosen": -2.2655160427093506, + "logits/rejected": -2.261814832687378, + "logps/chosen": -17.448047637939453, + "logps/rejected": -139.24197387695312, + "loss": 0.5394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05953521654009819, + "rewards/margins": 0.6066513061523438, + "rewards/rejected": -0.5471161007881165, + "step": 2371 + }, + { + "epoch": 0.14, + "learning_rate": 9.697172486331613e-08, + "logits/chosen": -1.9467370510101318, + "logits/rejected": -1.977972388267517, + "logps/chosen": -199.87078857421875, + "logps/rejected": -215.09613037109375, + "loss": 0.6236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01324462890625, + "rewards/margins": 0.2687667906284332, + "rewards/rejected": -0.2555221617221832, + "step": 2372 + }, + { + "epoch": 0.14, + "learning_rate": 9.696849414737103e-08, + "logits/chosen": -2.1145095825195312, + "logits/rejected": -2.1146671772003174, + "logps/chosen": -20.170875549316406, + "logps/rejected": -85.21585845947266, + "loss": 0.7421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21239987015724182, + "rewards/margins": 0.062262147665023804, + "rewards/rejected": -0.2746620178222656, + "step": 2373 + }, + { + "epoch": 0.14, + "learning_rate": 9.696526176287528e-08, + "logits/chosen": -2.088484048843384, + "logits/rejected": -2.0898282527923584, + "logps/chosen": -232.37973022460938, + "logps/rejected": -336.13824462890625, + "loss": 0.4226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5757110714912415, + "rewards/margins": 0.5040435791015625, + "rewards/rejected": 0.07166748493909836, + "step": 2374 + }, + { + "epoch": 0.14, + "learning_rate": 9.696202770994368e-08, + "logits/chosen": -2.2708141803741455, + "logits/rejected": -2.252664566040039, + "logps/chosen": -116.83450317382812, + "logps/rejected": -277.4216003417969, + "loss": 0.6261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0959571823477745, + "rewards/margins": 0.3236351013183594, + "rewards/rejected": -0.4195922911167145, + "step": 2375 + }, + { + "epoch": 0.14, + "learning_rate": 9.695879198869115e-08, + "logits/chosen": -2.094774007797241, + "logits/rejected": -2.039400815963745, + "logps/chosen": -234.96746826171875, + "logps/rejected": -482.2843933105469, + "loss": 0.3653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.708905041217804, + "rewards/margins": 0.691021740436554, + "rewards/rejected": 0.01788330078125, + "step": 2376 + }, + { + "epoch": 0.14, + "learning_rate": 9.695555459923262e-08, + "logits/chosen": -2.1491990089416504, + "logits/rejected": -2.142357349395752, + "logps/chosen": -2.588231086730957, + "logps/rejected": -195.62631225585938, + "loss": 0.5245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010293317027390003, + "rewards/margins": 0.8084744811058044, + "rewards/rejected": -0.7981811761856079, + "step": 2377 + }, + { + "epoch": 0.14, + "learning_rate": 9.69523155416831e-08, + "logits/chosen": -2.0108869075775146, + "logits/rejected": -2.008748769760132, + "logps/chosen": -35.39936065673828, + "logps/rejected": -191.72325134277344, + "loss": 0.4962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14120064675807953, + "rewards/margins": 0.6939380764961243, + "rewards/rejected": -0.5527374148368835, + "step": 2378 + }, + { + "epoch": 0.14, + "learning_rate": 9.694907481615768e-08, + "logits/chosen": -2.1840014457702637, + "logits/rejected": -2.179593563079834, + "logps/chosen": -30.049036026000977, + "logps/rejected": -65.68148803710938, + "loss": 0.7156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20306873321533203, + "rewards/margins": 0.052612870931625366, + "rewards/rejected": -0.2556816041469574, + "step": 2379 + }, + { + "epoch": 0.14, + "learning_rate": 9.694583242277148e-08, + "logits/chosen": -2.2942373752593994, + "logits/rejected": -2.2921481132507324, + "logps/chosen": -6.651821604464203e-05, + "logps/rejected": -142.81198120117188, + "loss": 0.5785, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.821334631647915e-07, + "rewards/margins": 0.53355473279953, + "rewards/rejected": -0.5335556268692017, + "step": 2380 + }, + { + "epoch": 0.14, + "learning_rate": 9.694258836163966e-08, + "logits/chosen": -2.1848714351654053, + "logits/rejected": -2.192866325378418, + "logps/chosen": -38.30740737915039, + "logps/rejected": -160.12164306640625, + "loss": 0.543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11404228210449219, + "rewards/margins": 0.5649960041046143, + "rewards/rejected": -0.4509536921977997, + "step": 2381 + }, + { + "epoch": 0.14, + "learning_rate": 9.693934263287748e-08, + "logits/chosen": -2.012716293334961, + "logits/rejected": -2.01054310798645, + "logps/chosen": -0.00023470690939575434, + "logps/rejected": -132.26779174804688, + "loss": 0.5777, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.808460734144319e-06, + "rewards/margins": 0.5275477766990662, + "rewards/rejected": -0.5275565981864929, + "step": 2382 + }, + { + "epoch": 0.14, + "learning_rate": 9.693609523660025e-08, + "logits/chosen": -2.2815542221069336, + "logits/rejected": -2.2788920402526855, + "logps/chosen": -17.267505645751953, + "logps/rejected": -215.35421752929688, + "loss": 0.6098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19631080329418182, + "rewards/margins": 0.594887912273407, + "rewards/rejected": -0.79119873046875, + "step": 2383 + }, + { + "epoch": 0.14, + "learning_rate": 9.693284617292332e-08, + "logits/chosen": -2.247067928314209, + "logits/rejected": -2.176527500152588, + "logps/chosen": -220.83795166015625, + "logps/rejected": -479.7902526855469, + "loss": 0.2637, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1576385498046875, + "rewards/margins": 0.9346679449081421, + "rewards/rejected": 0.22297058999538422, + "step": 2384 + }, + { + "epoch": 0.14, + "learning_rate": 9.692959544196212e-08, + "logits/chosen": -2.0776710510253906, + "logits/rejected": -2.094304323196411, + "logps/chosen": -162.80270385742188, + "logps/rejected": -231.35540771484375, + "loss": 0.5216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3944763243198395, + "rewards/margins": 0.33397218585014343, + "rewards/rejected": 0.06050414964556694, + "step": 2385 + }, + { + "epoch": 0.14, + "learning_rate": 9.692634304383213e-08, + "logits/chosen": -1.983382225036621, + "logits/rejected": -1.9862515926361084, + "logps/chosen": -137.7716064453125, + "logps/rejected": -265.1414489746094, + "loss": 0.6276, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.34495240449905396, + "rewards/margins": -0.18981629610061646, + "rewards/rejected": 0.5347687005996704, + "step": 2386 + }, + { + "epoch": 0.14, + "learning_rate": 9.69230889786489e-08, + "logits/chosen": -2.0833282470703125, + "logits/rejected": -2.0833349227905273, + "logps/chosen": -1.8475444316864014, + "logps/rejected": -67.1526870727539, + "loss": 0.5962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11858637630939484, + "rewards/margins": 0.3632732629776001, + "rewards/rejected": -0.24468688666820526, + "step": 2387 + }, + { + "epoch": 0.14, + "learning_rate": 9.691983324652803e-08, + "logits/chosen": -2.177020788192749, + "logits/rejected": -2.16371488571167, + "logps/chosen": -69.63921356201172, + "logps/rejected": -182.51080322265625, + "loss": 0.4527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38754960894584656, + "rewards/margins": 0.5777854919433594, + "rewards/rejected": -0.190235897898674, + "step": 2388 + }, + { + "epoch": 0.14, + "learning_rate": 9.691657584758517e-08, + "logits/chosen": -2.0974297523498535, + "logits/rejected": -2.0913589000701904, + "logps/chosen": -222.54660034179688, + "logps/rejected": -309.562744140625, + "loss": 0.4434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.755871593952179, + "rewards/margins": 0.2425537109375, + "rewards/rejected": 0.513317883014679, + "step": 2389 + }, + { + "epoch": 0.14, + "learning_rate": 9.691331678193604e-08, + "logits/chosen": -2.009679079055786, + "logits/rejected": -1.9885305166244507, + "logps/chosen": -159.43202209472656, + "logps/rejected": -360.69549560546875, + "loss": 0.4239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6030837893486023, + "rewards/margins": 0.3495284914970398, + "rewards/rejected": 0.2535552978515625, + "step": 2390 + }, + { + "epoch": 0.14, + "learning_rate": 9.691005604969642e-08, + "logits/chosen": -2.207044839859009, + "logits/rejected": -2.216580390930176, + "logps/chosen": -23.980436325073242, + "logps/rejected": -164.16055297851562, + "loss": 0.6152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09355106204748154, + "rewards/margins": 0.23292332887649536, + "rewards/rejected": -0.13937225937843323, + "step": 2391 + }, + { + "epoch": 0.14, + "learning_rate": 9.690679365098214e-08, + "logits/chosen": -2.0443451404571533, + "logits/rejected": -2.0207083225250244, + "logps/chosen": -244.1245880126953, + "logps/rejected": -398.982666015625, + "loss": 0.3881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8942520022392273, + "rewards/margins": 0.3382827639579773, + "rewards/rejected": 0.55596923828125, + "step": 2392 + }, + { + "epoch": 0.14, + "learning_rate": 9.690352958590912e-08, + "logits/chosen": -2.1917903423309326, + "logits/rejected": -2.0671849250793457, + "logps/chosen": -249.3354034423828, + "logps/rejected": -496.0509948730469, + "loss": 0.5396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19303588569164276, + "rewards/margins": 0.49655455350875854, + "rewards/rejected": -0.303518682718277, + "step": 2393 + }, + { + "epoch": 0.14, + "learning_rate": 9.690026385459329e-08, + "logits/chosen": -2.2563445568084717, + "logits/rejected": -2.2659764289855957, + "logps/chosen": -28.258405685424805, + "logps/rejected": -176.30392456054688, + "loss": 0.6711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07158470153808594, + "rewards/margins": 0.2138950526714325, + "rewards/rejected": -0.28547975420951843, + "step": 2394 + }, + { + "epoch": 0.14, + "learning_rate": 9.689699645715067e-08, + "logits/chosen": -2.092334270477295, + "logits/rejected": -2.0889055728912354, + "logps/chosen": -22.928104400634766, + "logps/rejected": -213.9520263671875, + "loss": 0.5711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09355030208826065, + "rewards/margins": 0.43237170577049255, + "rewards/rejected": -0.3388214111328125, + "step": 2395 + }, + { + "epoch": 0.14, + "learning_rate": 9.689372739369735e-08, + "logits/chosen": -2.2054123878479004, + "logits/rejected": -2.2023303508758545, + "logps/chosen": -17.8558406829834, + "logps/rejected": -111.08096313476562, + "loss": 0.5664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02698059193789959, + "rewards/margins": 0.5761321783065796, + "rewards/rejected": -0.549151599407196, + "step": 2396 + }, + { + "epoch": 0.14, + "learning_rate": 9.689045666434942e-08, + "logits/chosen": -2.275649309158325, + "logits/rejected": -2.277038335800171, + "logps/chosen": -66.49067687988281, + "logps/rejected": -192.4479217529297, + "loss": 0.5944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07355957478284836, + "rewards/margins": 0.2894439697265625, + "rewards/rejected": -0.21588440239429474, + "step": 2397 + }, + { + "epoch": 0.14, + "learning_rate": 9.688718426922313e-08, + "logits/chosen": -2.2505364418029785, + "logits/rejected": -2.243326187133789, + "logps/chosen": -0.0005679029272869229, + "logps/rejected": -197.238525390625, + "loss": 0.5753, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5383278878289275e-05, + "rewards/margins": 0.5391753315925598, + "rewards/rejected": -0.5391907095909119, + "step": 2398 + }, + { + "epoch": 0.14, + "learning_rate": 9.68839102084347e-08, + "logits/chosen": -2.0924909114837646, + "logits/rejected": -2.0883288383483887, + "logps/chosen": -25.022981643676758, + "logps/rejected": -202.84890747070312, + "loss": 0.5854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06867390125989914, + "rewards/margins": 0.334459125995636, + "rewards/rejected": -0.26578521728515625, + "step": 2399 + }, + { + "epoch": 0.14, + "learning_rate": 9.688063448210044e-08, + "logits/chosen": -2.0907280445098877, + "logits/rejected": -2.0857667922973633, + "logps/chosen": -58.51897430419922, + "logps/rejected": -262.8868408203125, + "loss": 0.4327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26038628816604614, + "rewards/margins": 0.9844417572021484, + "rewards/rejected": -0.7240554690361023, + "step": 2400 + }, + { + "epoch": 0.14, + "learning_rate": 9.687735709033674e-08, + "logits/chosen": -2.1313955783843994, + "logits/rejected": -2.121375560760498, + "logps/chosen": -161.7611083984375, + "logps/rejected": -357.1028137207031, + "loss": 0.3812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.762225329875946, + "rewards/margins": 0.5538909435272217, + "rewards/rejected": 0.20833435654640198, + "step": 2401 + }, + { + "epoch": 0.14, + "learning_rate": 9.687407803326001e-08, + "logits/chosen": -2.1370086669921875, + "logits/rejected": -2.137092351913452, + "logps/chosen": -10.888550758361816, + "logps/rejected": -72.8144760131836, + "loss": 0.6394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03746051713824272, + "rewards/margins": 0.1808893233537674, + "rewards/rejected": -0.14342880249023438, + "step": 2402 + }, + { + "epoch": 0.14, + "learning_rate": 9.687079731098673e-08, + "logits/chosen": -2.278411388397217, + "logits/rejected": -2.27340030670166, + "logps/chosen": -21.97048568725586, + "logps/rejected": -107.79669189453125, + "loss": 0.5292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0044235228560864925, + "rewards/margins": 0.8032951354980469, + "rewards/rejected": -0.798871636390686, + "step": 2403 + }, + { + "epoch": 0.14, + "learning_rate": 9.686751492363347e-08, + "logits/chosen": -1.9204862117767334, + "logits/rejected": -1.912225604057312, + "logps/chosen": -179.09210205078125, + "logps/rejected": -270.7108459472656, + "loss": 0.57, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35670167207717896, + "rewards/margins": 0.15469971299171448, + "rewards/rejected": 0.20200195908546448, + "step": 2404 + }, + { + "epoch": 0.14, + "learning_rate": 9.686423087131683e-08, + "logits/chosen": -2.1695897579193115, + "logits/rejected": -2.1448001861572266, + "logps/chosen": -166.41329956054688, + "logps/rejected": -232.87689208984375, + "loss": 0.6437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06268768757581711, + "rewards/margins": 0.05702972784638405, + "rewards/rejected": 0.005657959263771772, + "step": 2405 + }, + { + "epoch": 0.14, + "learning_rate": 9.686094515415347e-08, + "logits/chosen": -2.114933490753174, + "logits/rejected": -2.0916996002197266, + "logps/chosen": -192.503173828125, + "logps/rejected": -498.6507263183594, + "loss": 0.2937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6642913818359375, + "rewards/margins": 1.2263703346252441, + "rewards/rejected": -0.5620788931846619, + "step": 2406 + }, + { + "epoch": 0.14, + "learning_rate": 9.68576577722601e-08, + "logits/chosen": -2.0795018672943115, + "logits/rejected": -2.0380759239196777, + "logps/chosen": -290.7171936035156, + "logps/rejected": -552.3973999023438, + "loss": 0.3783, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.263665795326233, + "rewards/margins": 0.28941649198532104, + "rewards/rejected": 0.9742493033409119, + "step": 2407 + }, + { + "epoch": 0.14, + "learning_rate": 9.685436872575354e-08, + "logits/chosen": -1.9318674802780151, + "logits/rejected": -1.9203646183013916, + "logps/chosen": -62.621376037597656, + "logps/rejected": -302.7763366699219, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09093284606933594, + "rewards/margins": 1.6950680017471313, + "rewards/rejected": -1.6041351556777954, + "step": 2408 + }, + { + "epoch": 0.14, + "learning_rate": 9.685107801475059e-08, + "logits/chosen": -2.2509779930114746, + "logits/rejected": -2.259549856185913, + "logps/chosen": -0.013949551619589329, + "logps/rejected": -167.67079162597656, + "loss": 0.5197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003746291622519493, + "rewards/margins": 0.8833640813827515, + "rewards/rejected": -0.8837386965751648, + "step": 2409 + }, + { + "epoch": 0.14, + "learning_rate": 9.684778563936819e-08, + "logits/chosen": -2.2129602432250977, + "logits/rejected": -2.206969976425171, + "logps/chosen": -92.78741455078125, + "logps/rejected": -255.9294891357422, + "loss": 0.4517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14752426743507385, + "rewards/margins": 1.079515814781189, + "rewards/rejected": -0.9319915771484375, + "step": 2410 + }, + { + "epoch": 0.14, + "learning_rate": 9.684449159972329e-08, + "logits/chosen": -2.201596260070801, + "logits/rejected": -2.1919846534729004, + "logps/chosen": -0.0009514756384305656, + "logps/rejected": -105.60499572753906, + "loss": 0.5187, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.342495958553627e-05, + "rewards/margins": 0.884569525718689, + "rewards/rejected": -0.8845260739326477, + "step": 2411 + }, + { + "epoch": 0.14, + "learning_rate": 9.684119589593292e-08, + "logits/chosen": -2.178405523300171, + "logits/rejected": -2.1748507022857666, + "logps/chosen": -168.10360717773438, + "logps/rejected": -238.491455078125, + "loss": 0.6157, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.37360382080078125, + "rewards/margins": -0.06361541152000427, + "rewards/rejected": 0.4372192323207855, + "step": 2412 + }, + { + "epoch": 0.14, + "learning_rate": 9.683789852811412e-08, + "logits/chosen": -2.128243923187256, + "logits/rejected": -2.111654758453369, + "logps/chosen": -204.89796447753906, + "logps/rejected": -352.58380126953125, + "loss": 0.3847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8325439691543579, + "rewards/margins": 0.3493957817554474, + "rewards/rejected": 0.4831481873989105, + "step": 2413 + }, + { + "epoch": 0.14, + "learning_rate": 9.683459949638408e-08, + "logits/chosen": -2.194662570953369, + "logits/rejected": -2.1916184425354004, + "logps/chosen": -205.73521423339844, + "logps/rejected": -267.27471923828125, + "loss": 0.4706, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.805419921875, + "rewards/margins": 0.14041441679000854, + "rewards/rejected": 0.6650055050849915, + "step": 2414 + }, + { + "epoch": 0.14, + "learning_rate": 9.683129880085995e-08, + "logits/chosen": -2.1658496856689453, + "logits/rejected": -2.1704819202423096, + "logps/chosen": -172.72152709960938, + "logps/rejected": -230.61700439453125, + "loss": 0.4122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45480042695999146, + "rewards/margins": 0.8112869262695312, + "rewards/rejected": -0.3564865291118622, + "step": 2415 + }, + { + "epoch": 0.14, + "learning_rate": 9.682799644165902e-08, + "logits/chosen": -2.025989532470703, + "logits/rejected": -2.0203444957733154, + "logps/chosen": -0.47773560881614685, + "logps/rejected": -172.982177734375, + "loss": 0.5854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027338212355971336, + "rewards/margins": 0.5150247812271118, + "rewards/rejected": -0.5423629879951477, + "step": 2416 + }, + { + "epoch": 0.14, + "learning_rate": 9.682469241889859e-08, + "logits/chosen": -2.160315752029419, + "logits/rejected": -2.148968458175659, + "logps/chosen": -0.03274531662464142, + "logps/rejected": -267.6896057128906, + "loss": 0.3888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016704703448340297, + "rewards/margins": 2.4342992305755615, + "rewards/rejected": -2.435969591140747, + "step": 2417 + }, + { + "epoch": 0.14, + "learning_rate": 9.682138673269605e-08, + "logits/chosen": -2.0177979469299316, + "logits/rejected": -2.0223870277404785, + "logps/chosen": -17.00145721435547, + "logps/rejected": -119.2430648803711, + "loss": 0.4908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04810504987835884, + "rewards/margins": 0.8668962717056274, + "rewards/rejected": -0.8187912106513977, + "step": 2418 + }, + { + "epoch": 0.14, + "learning_rate": 9.681807938316881e-08, + "logits/chosen": -2.0402631759643555, + "logits/rejected": -2.0385117530822754, + "logps/chosen": -21.264659881591797, + "logps/rejected": -195.8587646484375, + "loss": 0.6172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10705146938562393, + "rewards/margins": 0.23609352111816406, + "rewards/rejected": -0.12904205918312073, + "step": 2419 + }, + { + "epoch": 0.14, + "learning_rate": 9.68147703704344e-08, + "logits/chosen": -1.927072525024414, + "logits/rejected": -1.9851958751678467, + "logps/chosen": -236.85885620117188, + "logps/rejected": -205.57308959960938, + "loss": 0.6841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00618743896484375, + "rewards/margins": 0.05211639404296875, + "rewards/rejected": -0.045928955078125, + "step": 2420 + }, + { + "epoch": 0.14, + "learning_rate": 9.681145969461034e-08, + "logits/chosen": -2.09554123878479, + "logits/rejected": -2.098235607147217, + "logps/chosen": -18.004047393798828, + "logps/rejected": -100.98762512207031, + "loss": 0.6785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.053984642028808594, + "rewards/margins": 0.051420025527477264, + "rewards/rejected": -0.10540466755628586, + "step": 2421 + }, + { + "epoch": 0.14, + "learning_rate": 9.680814735581426e-08, + "logits/chosen": -2.3627328872680664, + "logits/rejected": -2.3617727756500244, + "logps/chosen": -41.262794494628906, + "logps/rejected": -206.5998077392578, + "loss": 0.5534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006500244140625, + "rewards/margins": 0.6684128046035767, + "rewards/rejected": -0.6619125604629517, + "step": 2422 + }, + { + "epoch": 0.14, + "learning_rate": 9.68048333541638e-08, + "logits/chosen": -2.0047123432159424, + "logits/rejected": -1.989382028579712, + "logps/chosen": -284.3214416503906, + "logps/rejected": -301.41534423828125, + "loss": 0.5927, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2652740478515625, + "rewards/margins": 0.14292296767234802, + "rewards/rejected": 0.12235107272863388, + "step": 2423 + }, + { + "epoch": 0.14, + "learning_rate": 9.680151768977673e-08, + "logits/chosen": -2.1119415760040283, + "logits/rejected": -2.106886148452759, + "logps/chosen": -53.330955505371094, + "logps/rejected": -148.57522583007812, + "loss": 0.6027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1458740234375, + "rewards/margins": 0.22644653916358948, + "rewards/rejected": -0.08057250827550888, + "step": 2424 + }, + { + "epoch": 0.14, + "learning_rate": 9.679820036277082e-08, + "logits/chosen": -2.0337536334991455, + "logits/rejected": -2.037541151046753, + "logps/chosen": -269.4866027832031, + "logps/rejected": -491.7313232421875, + "loss": 0.3394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9341155886650085, + "rewards/margins": 0.793182373046875, + "rewards/rejected": 0.14093323051929474, + "step": 2425 + }, + { + "epoch": 0.14, + "learning_rate": 9.679488137326392e-08, + "logits/chosen": -2.08138370513916, + "logits/rejected": -2.0804460048675537, + "logps/chosen": -13.48814868927002, + "logps/rejected": -28.551816940307617, + "loss": 0.699, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04268045350909233, + "rewards/margins": 0.02876310423016548, + "rewards/rejected": -0.07144355773925781, + "step": 2426 + }, + { + "epoch": 0.14, + "learning_rate": 9.679156072137393e-08, + "logits/chosen": -2.278533697128296, + "logits/rejected": -2.2699785232543945, + "logps/chosen": -22.514965057373047, + "logps/rejected": -215.03799438476562, + "loss": 0.6076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14025592803955078, + "rewards/margins": 0.5438017249107361, + "rewards/rejected": -0.6840576529502869, + "step": 2427 + }, + { + "epoch": 0.14, + "learning_rate": 9.678823840721884e-08, + "logits/chosen": -2.095263719558716, + "logits/rejected": -2.1305415630340576, + "logps/chosen": -275.6231384277344, + "logps/rejected": -652.166015625, + "loss": 0.1673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9047638177871704, + "rewards/margins": 2.4135589599609375, + "rewards/rejected": -1.508795142173767, + "step": 2428 + }, + { + "epoch": 0.14, + "learning_rate": 9.678491443091663e-08, + "logits/chosen": -1.9468152523040771, + "logits/rejected": -1.9447312355041504, + "logps/chosen": -165.24009704589844, + "logps/rejected": -209.40480041503906, + "loss": 0.4533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6262955069541931, + "rewards/margins": 0.34232792258262634, + "rewards/rejected": 0.2839675843715668, + "step": 2429 + }, + { + "epoch": 0.14, + "learning_rate": 9.67815887925854e-08, + "logits/chosen": -1.9973704814910889, + "logits/rejected": -1.9383034706115723, + "logps/chosen": -169.1264190673828, + "logps/rejected": -383.4856872558594, + "loss": 0.3564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6610733270645142, + "rewards/margins": 0.7976303100585938, + "rewards/rejected": -0.13655701279640198, + "step": 2430 + }, + { + "epoch": 0.14, + "learning_rate": 9.677826149234334e-08, + "logits/chosen": -1.9843376874923706, + "logits/rejected": -1.989198088645935, + "logps/chosen": -81.17737579345703, + "logps/rejected": -320.13189697265625, + "loss": 0.5271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08871841430664062, + "rewards/margins": 0.5911567807197571, + "rewards/rejected": -0.5024383664131165, + "step": 2431 + }, + { + "epoch": 0.14, + "learning_rate": 9.67749325303086e-08, + "logits/chosen": -2.0968379974365234, + "logits/rejected": -2.045731544494629, + "logps/chosen": -179.21173095703125, + "logps/rejected": -395.26702880859375, + "loss": 0.4035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8381118774414062, + "rewards/margins": 0.4210525453090668, + "rewards/rejected": 0.4170593321323395, + "step": 2432 + }, + { + "epoch": 0.14, + "learning_rate": 9.677160190659944e-08, + "logits/chosen": -2.186539649963379, + "logits/rejected": -2.179083824157715, + "logps/chosen": -4.592632293701172, + "logps/rejected": -177.3671875, + "loss": 0.7084, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.008348179049789906, + "rewards/margins": -0.05872659757733345, + "rewards/rejected": 0.05037841945886612, + "step": 2433 + }, + { + "epoch": 0.14, + "learning_rate": 9.676826962133421e-08, + "logits/chosen": -2.174659490585327, + "logits/rejected": -2.1644036769866943, + "logps/chosen": -0.00012003740266663954, + "logps/rejected": -97.5938949584961, + "loss": 0.6339, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9335355950242956e-07, + "rewards/margins": 0.2530040740966797, + "rewards/rejected": -0.2530044615268707, + "step": 2434 + }, + { + "epoch": 0.14, + "learning_rate": 9.676493567463127e-08, + "logits/chosen": -1.9827351570129395, + "logits/rejected": -1.9476929903030396, + "logps/chosen": -192.63421630859375, + "logps/rejected": -404.41741943359375, + "loss": 0.3526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5906646847724915, + "rewards/margins": 0.8961731195449829, + "rewards/rejected": -0.30550843477249146, + "step": 2435 + }, + { + "epoch": 0.14, + "learning_rate": 9.676160006660906e-08, + "logits/chosen": -2.213520050048828, + "logits/rejected": -2.2111451625823975, + "logps/chosen": -13.088974952697754, + "logps/rejected": -97.99597930908203, + "loss": 0.6098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04454917833209038, + "rewards/margins": 0.3413105010986328, + "rewards/rejected": -0.29676133394241333, + "step": 2436 + }, + { + "epoch": 0.14, + "learning_rate": 9.675826279738608e-08, + "logits/chosen": -2.0494470596313477, + "logits/rejected": -2.051237106323242, + "logps/chosen": -8.618254661560059, + "logps/rejected": -215.25213623046875, + "loss": 0.6066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12673740088939667, + "rewards/margins": 0.21219119429588318, + "rewards/rejected": -0.08545380085706711, + "step": 2437 + }, + { + "epoch": 0.14, + "learning_rate": 9.67549238670809e-08, + "logits/chosen": -2.014887809753418, + "logits/rejected": -2.017040967941284, + "logps/chosen": -3.004053360200487e-05, + "logps/rejected": -188.63235473632812, + "loss": 0.5465, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7881430380839447e-07, + "rewards/margins": 0.7067716717720032, + "rewards/rejected": -0.7067718505859375, + "step": 2438 + }, + { + "epoch": 0.14, + "learning_rate": 9.67515832758121e-08, + "logits/chosen": -2.1454696655273438, + "logits/rejected": -2.141963005065918, + "logps/chosen": -13.108322143554688, + "logps/rejected": -46.56486511230469, + "loss": 0.6405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030702782794833183, + "rewards/margins": 0.17532634735107422, + "rewards/rejected": -0.1446235626935959, + "step": 2439 + }, + { + "epoch": 0.14, + "learning_rate": 9.674824102369838e-08, + "logits/chosen": -2.1704864501953125, + "logits/rejected": -2.158775568008423, + "logps/chosen": -22.25497817993164, + "logps/rejected": -176.3355712890625, + "loss": 0.5617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05256977304816246, + "rewards/margins": 0.6248811483383179, + "rewards/rejected": -0.5723114013671875, + "step": 2440 + }, + { + "epoch": 0.14, + "learning_rate": 9.674489711085849e-08, + "logits/chosen": -2.1597352027893066, + "logits/rejected": -2.158952236175537, + "logps/chosen": -22.21209716796875, + "logps/rejected": -153.48876953125, + "loss": 0.5821, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020402146503329277, + "rewards/margins": 0.6563816070556641, + "rewards/rejected": -0.6767837405204773, + "step": 2441 + }, + { + "epoch": 0.14, + "learning_rate": 9.674155153741118e-08, + "logits/chosen": -2.1180405616760254, + "logits/rejected": -2.0786921977996826, + "logps/chosen": -125.43902587890625, + "logps/rejected": -244.4969482421875, + "loss": 0.3882, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6494202017784119, + "rewards/margins": 0.588081419467926, + "rewards/rejected": 0.06133880838751793, + "step": 2442 + }, + { + "epoch": 0.14, + "learning_rate": 9.673820430347532e-08, + "logits/chosen": -2.406528949737549, + "logits/rejected": -2.400855302810669, + "logps/chosen": -35.663490295410156, + "logps/rejected": -178.1759796142578, + "loss": 0.5343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08684120327234268, + "rewards/margins": 0.49646490812301636, + "rewards/rejected": -0.4096237123012543, + "step": 2443 + }, + { + "epoch": 0.14, + "learning_rate": 9.673485540916983e-08, + "logits/chosen": -2.0534772872924805, + "logits/rejected": -2.108586072921753, + "logps/chosen": -228.0124969482422, + "logps/rejected": -180.2562255859375, + "loss": 0.3311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.047755479812622, + "rewards/margins": 0.6032485961914062, + "rewards/rejected": 0.44450685381889343, + "step": 2444 + }, + { + "epoch": 0.14, + "learning_rate": 9.673150485461368e-08, + "logits/chosen": -1.9873194694519043, + "logits/rejected": -1.9849992990493774, + "logps/chosen": -125.67420196533203, + "logps/rejected": -239.72836303710938, + "loss": 0.6232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01271133404225111, + "rewards/margins": 0.19264450669288635, + "rewards/rejected": -0.17993317544460297, + "step": 2445 + }, + { + "epoch": 0.14, + "learning_rate": 9.672815263992589e-08, + "logits/chosen": -2.126272201538086, + "logits/rejected": -2.130829095840454, + "logps/chosen": -29.710298538208008, + "logps/rejected": -144.38848876953125, + "loss": 0.5516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.053727149963378906, + "rewards/margins": 0.6167139410972595, + "rewards/rejected": -0.5629867911338806, + "step": 2446 + }, + { + "epoch": 0.14, + "learning_rate": 9.672479876522552e-08, + "logits/chosen": -2.174569606781006, + "logits/rejected": -2.1604995727539062, + "logps/chosen": -1.461667537689209, + "logps/rejected": -163.7894744873047, + "loss": 0.475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017964793369174004, + "rewards/margins": 1.2563683986663818, + "rewards/rejected": -1.2743332386016846, + "step": 2447 + }, + { + "epoch": 0.14, + "learning_rate": 9.672144323063176e-08, + "logits/chosen": -2.1677892208099365, + "logits/rejected": -2.139735460281372, + "logps/chosen": -193.841552734375, + "logps/rejected": -225.2548065185547, + "loss": 0.532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28319549560546875, + "rewards/margins": 0.38428038358688354, + "rewards/rejected": -0.10108490288257599, + "step": 2448 + }, + { + "epoch": 0.14, + "learning_rate": 9.67180860362638e-08, + "logits/chosen": -2.194092273712158, + "logits/rejected": -2.1857857704162598, + "logps/chosen": -27.000944137573242, + "logps/rejected": -177.608642578125, + "loss": 0.597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.041100312024354935, + "rewards/margins": 0.4364265501499176, + "rewards/rejected": -0.47752687335014343, + "step": 2449 + }, + { + "epoch": 0.14, + "learning_rate": 9.671472718224089e-08, + "logits/chosen": -2.12125563621521, + "logits/rejected": -2.122731924057007, + "logps/chosen": -38.396018981933594, + "logps/rejected": -205.51605224609375, + "loss": 0.4668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.235066220164299, + "rewards/margins": 0.6845016479492188, + "rewards/rejected": -0.44943544268608093, + "step": 2450 + }, + { + "epoch": 0.14, + "learning_rate": 9.671136666868237e-08, + "logits/chosen": -1.919946312904358, + "logits/rejected": -1.8798563480377197, + "logps/chosen": -196.5770263671875, + "logps/rejected": -430.06866455078125, + "loss": 0.5471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24799194931983948, + "rewards/margins": 0.26905518770217896, + "rewards/rejected": -0.02106323279440403, + "step": 2451 + }, + { + "epoch": 0.14, + "learning_rate": 9.670800449570762e-08, + "logits/chosen": -2.1279382705688477, + "logits/rejected": -2.118147611618042, + "logps/chosen": -199.18743896484375, + "logps/rejected": -348.17059326171875, + "loss": 0.3406, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0399048328399658, + "rewards/margins": 0.6880829334259033, + "rewards/rejected": 0.3518218994140625, + "step": 2452 + }, + { + "epoch": 0.14, + "learning_rate": 9.670464066343606e-08, + "logits/chosen": -2.041170597076416, + "logits/rejected": -2.0463571548461914, + "logps/chosen": -12.736546516418457, + "logps/rejected": -127.1826171875, + "loss": 0.6306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029965495690703392, + "rewards/margins": 0.2969624698162079, + "rewards/rejected": -0.32692795991897583, + "step": 2453 + }, + { + "epoch": 0.14, + "learning_rate": 9.670127517198723e-08, + "logits/chosen": -2.0127980709075928, + "logits/rejected": -1.9949342012405396, + "logps/chosen": -115.6225814819336, + "logps/rejected": -246.90484619140625, + "loss": 0.5645, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5613464713096619, + "rewards/margins": -0.15478515625, + "rewards/rejected": 0.7161316275596619, + "step": 2454 + }, + { + "epoch": 0.14, + "learning_rate": 9.669790802148066e-08, + "logits/chosen": -2.146578311920166, + "logits/rejected": -2.1330740451812744, + "logps/chosen": -227.76400756835938, + "logps/rejected": -352.643310546875, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4069366455078125, + "rewards/margins": 0.41415101289749146, + "rewards/rejected": 0.992785632610321, + "step": 2455 + }, + { + "epoch": 0.14, + "learning_rate": 9.669453921203596e-08, + "logits/chosen": -1.9715642929077148, + "logits/rejected": -1.9799777269363403, + "logps/chosen": -7.255500316619873, + "logps/rejected": -136.06419372558594, + "loss": 0.608, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.7697296142578125e-05, + "rewards/margins": 0.37935611605644226, + "rewards/rejected": -0.3792984187602997, + "step": 2456 + }, + { + "epoch": 0.14, + "learning_rate": 9.669116874377283e-08, + "logits/chosen": -2.0966553688049316, + "logits/rejected": -2.0679991245269775, + "logps/chosen": -24.158100128173828, + "logps/rejected": -207.49478149414062, + "loss": 0.5004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.060474395751953125, + "rewards/margins": 0.9752006530761719, + "rewards/rejected": -1.035675048828125, + "step": 2457 + }, + { + "epoch": 0.14, + "learning_rate": 9.6687796616811e-08, + "logits/chosen": -1.9642518758773804, + "logits/rejected": -2.0218660831451416, + "logps/chosen": -180.28842163085938, + "logps/rejected": -262.95721435546875, + "loss": 0.3089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6669601798057556, + "rewards/margins": 1.1160354614257812, + "rewards/rejected": -0.449075311422348, + "step": 2458 + }, + { + "epoch": 0.14, + "learning_rate": 9.668442283127024e-08, + "logits/chosen": -2.1050493717193604, + "logits/rejected": -1.9943410158157349, + "logps/chosen": -275.40118408203125, + "logps/rejected": -428.296142578125, + "loss": 0.6747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09816284477710724, + "rewards/margins": 0.17946778237819672, + "rewards/rejected": -0.27763062715530396, + "step": 2459 + }, + { + "epoch": 0.14, + "learning_rate": 9.668104738727043e-08, + "logits/chosen": -2.0327796936035156, + "logits/rejected": -2.023362398147583, + "logps/chosen": -39.449058532714844, + "logps/rejected": -257.28271484375, + "loss": 0.549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026222992688417435, + "rewards/margins": 0.8707160949707031, + "rewards/rejected": -0.8969390988349915, + "step": 2460 + }, + { + "epoch": 0.14, + "learning_rate": 9.667767028493148e-08, + "logits/chosen": -1.9044969081878662, + "logits/rejected": -1.8642380237579346, + "logps/chosen": -270.97332763671875, + "logps/rejected": -409.18072509765625, + "loss": 0.5422, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8703338503837585, + "rewards/margins": -0.16193240880966187, + "rewards/rejected": 1.0322662591934204, + "step": 2461 + }, + { + "epoch": 0.14, + "learning_rate": 9.667429152437337e-08, + "logits/chosen": -2.148432731628418, + "logits/rejected": -2.210674285888672, + "logps/chosen": -339.408447265625, + "logps/rejected": -480.3678283691406, + "loss": 0.2719, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.021612524986267, + "rewards/margins": 1.1378203630447388, + "rewards/rejected": -0.11620789021253586, + "step": 2462 + }, + { + "epoch": 0.14, + "learning_rate": 9.667091110571609e-08, + "logits/chosen": -2.0277857780456543, + "logits/rejected": -1.9991350173950195, + "logps/chosen": -226.87850952148438, + "logps/rejected": -318.470703125, + "loss": 0.579, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6701629757881165, + "rewards/margins": -0.17777711153030396, + "rewards/rejected": 0.8479400873184204, + "step": 2463 + }, + { + "epoch": 0.14, + "learning_rate": 9.666752902907978e-08, + "logits/chosen": -2.21486759185791, + "logits/rejected": -2.2132108211517334, + "logps/chosen": -0.06133776530623436, + "logps/rejected": -142.33021545410156, + "loss": 0.5497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003522298065945506, + "rewards/margins": 0.6885507702827454, + "rewards/rejected": -0.6920730471611023, + "step": 2464 + }, + { + "epoch": 0.14, + "learning_rate": 9.666414529458453e-08, + "logits/chosen": -2.232430934906006, + "logits/rejected": -2.223846912384033, + "logps/chosen": -35.8128547668457, + "logps/rejected": -193.77618408203125, + "loss": 0.5814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18199042975902557, + "rewards/margins": 0.29864540696144104, + "rewards/rejected": -0.11665496975183487, + "step": 2465 + }, + { + "epoch": 0.14, + "learning_rate": 9.666075990235061e-08, + "logits/chosen": -2.0405311584472656, + "logits/rejected": -2.0313808917999268, + "logps/chosen": -89.2467041015625, + "logps/rejected": -222.12313842773438, + "loss": 0.4904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21632690727710724, + "rewards/margins": 0.69512939453125, + "rewards/rejected": -0.47880250215530396, + "step": 2466 + }, + { + "epoch": 0.14, + "learning_rate": 9.665737285249824e-08, + "logits/chosen": -2.162748336791992, + "logits/rejected": -2.144080400466919, + "logps/chosen": -152.56436157226562, + "logps/rejected": -256.5226135253906, + "loss": 0.6532, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.430612176656723, + "rewards/margins": -0.21213379502296448, + "rewards/rejected": 0.6427459716796875, + "step": 2467 + }, + { + "epoch": 0.14, + "learning_rate": 9.665398414514777e-08, + "logits/chosen": -1.9225739240646362, + "logits/rejected": -1.9158669710159302, + "logps/chosen": -15.424860954284668, + "logps/rejected": -108.81690979003906, + "loss": 0.5889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017089843458961695, + "rewards/margins": 0.4842025935649872, + "rewards/rejected": -0.4843734800815582, + "step": 2468 + }, + { + "epoch": 0.14, + "learning_rate": 9.665059378041958e-08, + "logits/chosen": -2.1193854808807373, + "logits/rejected": -2.033580780029297, + "logps/chosen": -300.028076171875, + "logps/rejected": -487.3056640625, + "loss": 0.5357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18446044623851776, + "rewards/margins": 0.5089172124862671, + "rewards/rejected": -0.3244567811489105, + "step": 2469 + }, + { + "epoch": 0.14, + "learning_rate": 9.664720175843409e-08, + "logits/chosen": -1.9994640350341797, + "logits/rejected": -2.025693893432617, + "logps/chosen": -265.0967102050781, + "logps/rejected": -302.8725891113281, + "loss": 0.4199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7711029052734375, + "rewards/margins": 0.312753289937973, + "rewards/rejected": 0.4583496153354645, + "step": 2470 + }, + { + "epoch": 0.14, + "learning_rate": 9.664380807931183e-08, + "logits/chosen": -2.3545849323272705, + "logits/rejected": -2.35906720161438, + "logps/chosen": -16.29191017150879, + "logps/rejected": -149.30250549316406, + "loss": 0.5937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004424667451530695, + "rewards/margins": 0.42938730120658875, + "rewards/rejected": -0.4338119626045227, + "step": 2471 + }, + { + "epoch": 0.14, + "learning_rate": 9.664041274317334e-08, + "logits/chosen": -1.9908084869384766, + "logits/rejected": -1.9515118598937988, + "logps/chosen": -207.49038696289062, + "logps/rejected": -357.50152587890625, + "loss": 0.5322, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6438232660293579, + "rewards/margins": -0.044628918170928955, + "rewards/rejected": 0.6884521842002869, + "step": 2472 + }, + { + "epoch": 0.14, + "learning_rate": 9.663701575013926e-08, + "logits/chosen": -2.0687222480773926, + "logits/rejected": -2.0666985511779785, + "logps/chosen": -53.60176467895508, + "logps/rejected": -189.5458984375, + "loss": 0.6261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0633167251944542, + "rewards/margins": 0.30457574129104614, + "rewards/rejected": -0.36789247393608093, + "step": 2473 + }, + { + "epoch": 0.14, + "learning_rate": 9.663361710033022e-08, + "logits/chosen": -2.1552884578704834, + "logits/rejected": -2.1527419090270996, + "logps/chosen": -0.4061039984226227, + "logps/rejected": -122.6170654296875, + "loss": 0.5541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012192368507385254, + "rewards/margins": 0.682273268699646, + "rewards/rejected": -0.6944656372070312, + "step": 2474 + }, + { + "epoch": 0.14, + "learning_rate": 9.663021679386702e-08, + "logits/chosen": -2.1834335327148438, + "logits/rejected": -2.1830713748931885, + "logps/chosen": -10.474376678466797, + "logps/rejected": -152.46908569335938, + "loss": 0.6325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07190113514661789, + "rewards/margins": 0.350498765707016, + "rewards/rejected": -0.4223999083042145, + "step": 2475 + }, + { + "epoch": 0.14, + "learning_rate": 9.662681483087042e-08, + "logits/chosen": -2.123631715774536, + "logits/rejected": -2.121791362762451, + "logps/chosen": -31.064971923828125, + "logps/rejected": -195.85079956054688, + "loss": 0.6127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07119732350111008, + "rewards/margins": 0.510167121887207, + "rewards/rejected": -0.5813644528388977, + "step": 2476 + }, + { + "epoch": 0.14, + "learning_rate": 9.662341121146129e-08, + "logits/chosen": -2.209080219268799, + "logits/rejected": -2.196744918823242, + "logps/chosen": -138.68450927734375, + "logps/rejected": -430.77838134765625, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2970565855503082, + "rewards/margins": 2.607304573059082, + "rewards/rejected": -2.3102478981018066, + "step": 2477 + }, + { + "epoch": 0.14, + "learning_rate": 9.662000593576051e-08, + "logits/chosen": -2.251272439956665, + "logits/rejected": -2.2338240146636963, + "logps/chosen": -82.09959411621094, + "logps/rejected": -163.27767944335938, + "loss": 0.7081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13959503173828125, + "rewards/margins": 0.08625335991382599, + "rewards/rejected": -0.22584839165210724, + "step": 2478 + }, + { + "epoch": 0.14, + "learning_rate": 9.66165990038891e-08, + "logits/chosen": -2.154378890991211, + "logits/rejected": -2.163008451461792, + "logps/chosen": -7.320880889892578, + "logps/rejected": -130.65090942382812, + "loss": 0.6583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02149367332458496, + "rewards/margins": 0.169273242354393, + "rewards/rejected": -0.19076691567897797, + "step": 2479 + }, + { + "epoch": 0.14, + "learning_rate": 9.661319041596805e-08, + "logits/chosen": -1.9674659967422485, + "logits/rejected": -1.9512208700180054, + "logps/chosen": -186.5970458984375, + "logps/rejected": -262.21405029296875, + "loss": 0.5615, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2125244140625, + "rewards/margins": 0.30421143770217896, + "rewards/rejected": -0.09168701618909836, + "step": 2480 + }, + { + "epoch": 0.14, + "learning_rate": 9.660978017211847e-08, + "logits/chosen": -2.027679204940796, + "logits/rejected": -2.0221855640411377, + "logps/chosen": -70.71094512939453, + "logps/rejected": -184.18202209472656, + "loss": 0.6891, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24191056191921234, + "rewards/margins": 0.23031844198703766, + "rewards/rejected": -0.47222900390625, + "step": 2481 + }, + { + "epoch": 0.14, + "learning_rate": 9.660636827246151e-08, + "logits/chosen": -2.262418508529663, + "logits/rejected": -2.2523834705352783, + "logps/chosen": -14.855879783630371, + "logps/rejected": -250.20248413085938, + "loss": 0.5939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.033418845385313034, + "rewards/margins": 0.3723485767841339, + "rewards/rejected": -0.3389297425746918, + "step": 2482 + }, + { + "epoch": 0.14, + "learning_rate": 9.660295471711838e-08, + "logits/chosen": -2.1849727630615234, + "logits/rejected": -2.177260160446167, + "logps/chosen": -40.497901916503906, + "logps/rejected": -202.98025512695312, + "loss": 0.5373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02952118031680584, + "rewards/margins": 0.8174819946289062, + "rewards/rejected": -0.7879608273506165, + "step": 2483 + }, + { + "epoch": 0.14, + "learning_rate": 9.659953950621034e-08, + "logits/chosen": -1.9102082252502441, + "logits/rejected": -1.9253772497177124, + "logps/chosen": -223.4234619140625, + "logps/rejected": -294.1611328125, + "loss": 0.4542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7551788687705994, + "rewards/margins": 0.23852849006652832, + "rewards/rejected": 0.516650378704071, + "step": 2484 + }, + { + "epoch": 0.14, + "learning_rate": 9.65961226398587e-08, + "logits/chosen": -2.2401950359344482, + "logits/rejected": -2.2205371856689453, + "logps/chosen": -227.41851806640625, + "logps/rejected": -250.91561889648438, + "loss": 0.4745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.874407947063446, + "rewards/margins": 0.00905454158782959, + "rewards/rejected": 0.8653534054756165, + "step": 2485 + }, + { + "epoch": 0.14, + "learning_rate": 9.659270411818485e-08, + "logits/chosen": -2.018000364303589, + "logits/rejected": -1.9638595581054688, + "logps/chosen": -308.8614807128906, + "logps/rejected": -449.51812744140625, + "loss": 0.2545, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.944927990436554, + "rewards/margins": 1.3457672595977783, + "rewards/rejected": -0.400839239358902, + "step": 2486 + }, + { + "epoch": 0.14, + "learning_rate": 9.658928394131027e-08, + "logits/chosen": -2.1313836574554443, + "logits/rejected": -2.1256351470947266, + "logps/chosen": -210.00900268554688, + "logps/rejected": -317.57470703125, + "loss": 0.2699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.848248302936554, + "rewards/margins": 1.0810546875, + "rewards/rejected": -0.23280639946460724, + "step": 2487 + }, + { + "epoch": 0.14, + "learning_rate": 9.65858621093564e-08, + "logits/chosen": -2.0283236503601074, + "logits/rejected": -2.0275588035583496, + "logps/chosen": -145.6424560546875, + "logps/rejected": -258.665771484375, + "loss": 0.5587, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4276672303676605, + "rewards/margins": -0.03357240557670593, + "rewards/rejected": 0.46123963594436646, + "step": 2488 + }, + { + "epoch": 0.14, + "learning_rate": 9.658243862244485e-08, + "logits/chosen": -2.235077381134033, + "logits/rejected": -2.227379322052002, + "logps/chosen": -0.20164741575717926, + "logps/rejected": -220.87060546875, + "loss": 0.496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011255228891968727, + "rewards/margins": 1.072018027305603, + "rewards/rejected": -1.0832732915878296, + "step": 2489 + }, + { + "epoch": 0.14, + "learning_rate": 9.657901348069721e-08, + "logits/chosen": -2.081707239151001, + "logits/rejected": -2.063676118850708, + "logps/chosen": -243.0775146484375, + "logps/rejected": -276.02587890625, + "loss": 0.4756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7727112174034119, + "rewards/margins": 0.050732433795928955, + "rewards/rejected": 0.7219787836074829, + "step": 2490 + }, + { + "epoch": 0.14, + "learning_rate": 9.657558668423517e-08, + "logits/chosen": -2.191107749938965, + "logits/rejected": -2.179701805114746, + "logps/chosen": -192.56875610351562, + "logps/rejected": -283.9880676269531, + "loss": 0.4886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7481293082237244, + "rewards/margins": 0.07462465763092041, + "rewards/rejected": 0.673504650592804, + "step": 2491 + }, + { + "epoch": 0.15, + "learning_rate": 9.657215823318048e-08, + "logits/chosen": -2.107506513595581, + "logits/rejected": -2.1093873977661133, + "logps/chosen": -10.154279708862305, + "logps/rejected": -257.87109375, + "loss": 0.4756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08231163024902344, + "rewards/margins": 1.0182262659072876, + "rewards/rejected": -0.9359146356582642, + "step": 2492 + }, + { + "epoch": 0.15, + "learning_rate": 9.656872812765491e-08, + "logits/chosen": -2.3421263694763184, + "logits/rejected": -2.354036331176758, + "logps/chosen": -161.42752075195312, + "logps/rejected": -253.98631286621094, + "loss": 0.7892, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.244964599609375, + "rewards/margins": -0.2297622710466385, + "rewards/rejected": -0.015202331356704235, + "step": 2493 + }, + { + "epoch": 0.15, + "learning_rate": 9.656529636778032e-08, + "logits/chosen": -2.153538465499878, + "logits/rejected": -2.150571823120117, + "logps/chosen": -0.0003223191015422344, + "logps/rejected": -126.35527038574219, + "loss": 0.6143, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2529800389747834e-06, + "rewards/margins": 0.35005953907966614, + "rewards/rejected": -0.3500618040561676, + "step": 2494 + }, + { + "epoch": 0.15, + "learning_rate": 9.656186295367865e-08, + "logits/chosen": -2.017867088317871, + "logits/rejected": -2.024451732635498, + "logps/chosen": -0.05730637162923813, + "logps/rejected": -76.57290649414062, + "loss": 0.6526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001735611236654222, + "rewards/margins": 0.18579262495040894, + "rewards/rejected": -0.18752823770046234, + "step": 2495 + }, + { + "epoch": 0.15, + "learning_rate": 9.655842788547183e-08, + "logits/chosen": -2.125702142715454, + "logits/rejected": -2.1117193698883057, + "logps/chosen": -176.6319122314453, + "logps/rejected": -296.4651794433594, + "loss": 0.5304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3175552487373352, + "rewards/margins": 0.5411484241485596, + "rewards/rejected": -0.22359314560890198, + "step": 2496 + }, + { + "epoch": 0.15, + "learning_rate": 9.65549911632819e-08, + "logits/chosen": -1.8837790489196777, + "logits/rejected": -1.8930771350860596, + "logps/chosen": -344.7711181640625, + "logps/rejected": -364.78692626953125, + "loss": 0.2643, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0013946294784546, + "rewards/margins": 1.0662689208984375, + "rewards/rejected": -0.06487426906824112, + "step": 2497 + }, + { + "epoch": 0.15, + "learning_rate": 9.655155278723097e-08, + "logits/chosen": -2.274599075317383, + "logits/rejected": -2.2708330154418945, + "logps/chosen": -6.219775199890137, + "logps/rejected": -150.95413208007812, + "loss": 0.5228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04413289949297905, + "rewards/margins": 0.9513322114944458, + "rewards/rejected": -0.995465099811554, + "step": 2498 + }, + { + "epoch": 0.15, + "learning_rate": 9.65481127574412e-08, + "logits/chosen": -2.041640281677246, + "logits/rejected": -2.0357868671417236, + "logps/chosen": -10.163285255432129, + "logps/rejected": -102.59681701660156, + "loss": 0.5157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2087501585483551, + "rewards/margins": 0.588752031326294, + "rewards/rejected": -0.38000184297561646, + "step": 2499 + }, + { + "epoch": 0.15, + "learning_rate": 9.654467107403475e-08, + "logits/chosen": -2.1660399436950684, + "logits/rejected": -2.117154598236084, + "logps/chosen": -209.55026245117188, + "logps/rejected": -360.7098388671875, + "loss": 0.4141, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1683028936386108, + "rewards/margins": 0.1730758547782898, + "rewards/rejected": 0.995227038860321, + "step": 2500 + }, + { + "epoch": 0.15, + "learning_rate": 9.654122773713388e-08, + "logits/chosen": -1.9250171184539795, + "logits/rejected": -1.9012374877929688, + "logps/chosen": -198.91769409179688, + "logps/rejected": -294.30670166015625, + "loss": 0.5671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21493378281593323, + "rewards/margins": 0.24379120767116547, + "rewards/rejected": -0.02885742299258709, + "step": 2501 + }, + { + "epoch": 0.15, + "learning_rate": 9.6537782746861e-08, + "logits/chosen": -2.179860830307007, + "logits/rejected": -2.1393632888793945, + "logps/chosen": -101.66415405273438, + "logps/rejected": -471.75732421875, + "loss": 0.4223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0157928466796875, + "rewards/margins": 1.8593231439590454, + "rewards/rejected": -1.875115990638733, + "step": 2502 + }, + { + "epoch": 0.15, + "learning_rate": 9.65343361033384e-08, + "logits/chosen": -2.0141870975494385, + "logits/rejected": -2.0023345947265625, + "logps/chosen": -15.456987380981445, + "logps/rejected": -257.01416015625, + "loss": 0.4526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021936798468232155, + "rewards/margins": 1.3688361644744873, + "rewards/rejected": -1.346899390220642, + "step": 2503 + }, + { + "epoch": 0.15, + "learning_rate": 9.653088780668856e-08, + "logits/chosen": -2.025195360183716, + "logits/rejected": -2.0236458778381348, + "logps/chosen": -29.768081665039062, + "logps/rejected": -258.03155517578125, + "loss": 0.5536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021715164184570312, + "rewards/margins": 0.6964656710624695, + "rewards/rejected": -0.7181808352470398, + "step": 2504 + }, + { + "epoch": 0.15, + "learning_rate": 9.652743785703398e-08, + "logits/chosen": -2.211297035217285, + "logits/rejected": -2.2156248092651367, + "logps/chosen": -8.953117370605469, + "logps/rejected": -99.52099609375, + "loss": 0.6426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035437870770692825, + "rewards/margins": 0.23796837031841278, + "rewards/rejected": -0.2734062373638153, + "step": 2505 + }, + { + "epoch": 0.15, + "learning_rate": 9.652398625449723e-08, + "logits/chosen": -2.0614571571350098, + "logits/rejected": -2.0563929080963135, + "logps/chosen": -18.86658477783203, + "logps/rejected": -119.84129333496094, + "loss": 0.4195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11519107967615128, + "rewards/margins": 1.4589190483093262, + "rewards/rejected": -1.343727946281433, + "step": 2506 + }, + { + "epoch": 0.15, + "learning_rate": 9.652053299920091e-08, + "logits/chosen": -2.120072364807129, + "logits/rejected": -2.1075801849365234, + "logps/chosen": -175.08303833007812, + "logps/rejected": -263.47296142578125, + "loss": 0.4324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9616363644599915, + "rewards/margins": 0.20419007539749146, + "rewards/rejected": 0.7574462890625, + "step": 2507 + }, + { + "epoch": 0.15, + "learning_rate": 9.65170780912677e-08, + "logits/chosen": -2.189892292022705, + "logits/rejected": -2.1632885932922363, + "logps/chosen": -63.65402603149414, + "logps/rejected": -179.92852783203125, + "loss": 0.5339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09496879577636719, + "rewards/margins": 0.9460468292236328, + "rewards/rejected": -1.041015625, + "step": 2508 + }, + { + "epoch": 0.15, + "learning_rate": 9.651362153082032e-08, + "logits/chosen": -1.814245581626892, + "logits/rejected": -1.7770148515701294, + "logps/chosen": -145.41510009765625, + "logps/rejected": -344.9374694824219, + "loss": 0.485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06354065239429474, + "rewards/margins": 0.9915984869003296, + "rewards/rejected": -0.928057849407196, + "step": 2509 + }, + { + "epoch": 0.15, + "learning_rate": 9.651016331798161e-08, + "logits/chosen": -2.1144940853118896, + "logits/rejected": -2.106837034225464, + "logps/chosen": -3.9196925163269043, + "logps/rejected": -125.19851684570312, + "loss": 0.6809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018455887213349342, + "rewards/margins": 0.057336047291755676, + "rewards/rejected": -0.038880158215761185, + "step": 2510 + }, + { + "epoch": 0.15, + "learning_rate": 9.650670345287437e-08, + "logits/chosen": -2.074347734451294, + "logits/rejected": -2.0807816982269287, + "logps/chosen": -0.9833810925483704, + "logps/rejected": -55.478050231933594, + "loss": 0.6165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0001882612705230713, + "rewards/margins": 0.23380833864212036, + "rewards/rejected": -0.2336200773715973, + "step": 2511 + }, + { + "epoch": 0.15, + "learning_rate": 9.650324193562154e-08, + "logits/chosen": -2.298733949661255, + "logits/rejected": -2.271881103515625, + "logps/chosen": -60.698097229003906, + "logps/rejected": -330.5611572265625, + "loss": 0.5476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2499343901872635, + "rewards/margins": 0.9154037833213806, + "rewards/rejected": -1.165338158607483, + "step": 2512 + }, + { + "epoch": 0.15, + "learning_rate": 9.649977876634608e-08, + "logits/chosen": -2.264867067337036, + "logits/rejected": -2.259643077850342, + "logps/chosen": -0.000681067758705467, + "logps/rejected": -202.2399139404297, + "loss": 0.4753, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.037131500605028e-05, + "rewards/margins": 1.2416490316390991, + "rewards/rejected": -1.2416794300079346, + "step": 2513 + }, + { + "epoch": 0.15, + "learning_rate": 9.649631394517103e-08, + "logits/chosen": -2.3655972480773926, + "logits/rejected": -2.3618242740631104, + "logps/chosen": -12.473186492919922, + "logps/rejected": -157.01315307617188, + "loss": 0.5291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.164284810423851, + "rewards/margins": 0.6089763045310974, + "rewards/rejected": -0.4446914792060852, + "step": 2514 + }, + { + "epoch": 0.15, + "learning_rate": 9.649284747221948e-08, + "logits/chosen": -2.150724411010742, + "logits/rejected": -2.1420929431915283, + "logps/chosen": -25.094890594482422, + "logps/rejected": -255.61102294921875, + "loss": 0.5131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16861553490161896, + "rewards/margins": 0.6417417526245117, + "rewards/rejected": -0.47312623262405396, + "step": 2515 + }, + { + "epoch": 0.15, + "learning_rate": 9.648937934761455e-08, + "logits/chosen": -2.207059144973755, + "logits/rejected": -2.194204568862915, + "logps/chosen": -0.004139372147619724, + "logps/rejected": -142.74386596679688, + "loss": 0.5807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001068287092493847, + "rewards/margins": 0.5155853629112244, + "rewards/rejected": -0.5156921744346619, + "step": 2516 + }, + { + "epoch": 0.15, + "learning_rate": 9.648590957147948e-08, + "logits/chosen": -2.163104295730591, + "logits/rejected": -2.162809133529663, + "logps/chosen": -7.126620292663574, + "logps/rejected": -111.7036361694336, + "loss": 0.6427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049941252917051315, + "rewards/margins": 0.1594291627407074, + "rewards/rejected": -0.10948791354894638, + "step": 2517 + }, + { + "epoch": 0.15, + "learning_rate": 9.648243814393749e-08, + "logits/chosen": -2.027848243713379, + "logits/rejected": -2.018284797668457, + "logps/chosen": -0.10316665470600128, + "logps/rejected": -125.36158752441406, + "loss": 0.6381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003909864462912083, + "rewards/margins": 0.23168888688087463, + "rewards/rejected": -0.23559875786304474, + "step": 2518 + }, + { + "epoch": 0.15, + "learning_rate": 9.647896506511194e-08, + "logits/chosen": -1.9185426235198975, + "logits/rejected": -1.92348313331604, + "logps/chosen": -211.04153442382812, + "logps/rejected": -401.6370849609375, + "loss": 0.249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9872711300849915, + "rewards/margins": 1.2416473627090454, + "rewards/rejected": -0.25437623262405396, + "step": 2519 + }, + { + "epoch": 0.15, + "learning_rate": 9.64754903351262e-08, + "logits/chosen": -2.1128716468811035, + "logits/rejected": -2.1202619075775146, + "logps/chosen": -239.68118286132812, + "logps/rejected": -357.7034912109375, + "loss": 0.3887, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.931774914264679, + "rewards/margins": 0.2776336669921875, + "rewards/rejected": 0.6541412472724915, + "step": 2520 + }, + { + "epoch": 0.15, + "learning_rate": 9.647201395410372e-08, + "logits/chosen": -2.066056251525879, + "logits/rejected": -2.0651798248291016, + "logps/chosen": -0.002856483682990074, + "logps/rejected": -94.12041473388672, + "loss": 0.5842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001279945281567052, + "rewards/margins": 0.49929675459861755, + "rewards/rejected": -0.4994247555732727, + "step": 2521 + }, + { + "epoch": 0.15, + "learning_rate": 9.646853592216796e-08, + "logits/chosen": -2.0388219356536865, + "logits/rejected": -2.0341897010803223, + "logps/chosen": -34.11400604248047, + "logps/rejected": -152.0494384765625, + "loss": 0.6249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17299118638038635, + "rewards/margins": 0.5229194164276123, + "rewards/rejected": -0.695910632610321, + "step": 2522 + }, + { + "epoch": 0.15, + "learning_rate": 9.64650562394425e-08, + "logits/chosen": -1.9821159839630127, + "logits/rejected": -1.858355164527893, + "logps/chosen": -351.2818603515625, + "logps/rejected": -535.818359375, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35039064288139343, + "rewards/margins": 0.8699280023574829, + "rewards/rejected": -0.5195373892784119, + "step": 2523 + }, + { + "epoch": 0.15, + "learning_rate": 9.646157490605098e-08, + "logits/chosen": -2.0026051998138428, + "logits/rejected": -2.012566089630127, + "logps/chosen": -243.51278686523438, + "logps/rejected": -242.11944580078125, + "loss": 0.5562, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9537292718887329, + "rewards/margins": -0.29911351203918457, + "rewards/rejected": 1.2528427839279175, + "step": 2524 + }, + { + "epoch": 0.15, + "learning_rate": 9.645809192211705e-08, + "logits/chosen": -2.1028544902801514, + "logits/rejected": -2.086700201034546, + "logps/chosen": -20.53797721862793, + "logps/rejected": -207.36187744140625, + "loss": 0.4829, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01897735707461834, + "rewards/margins": 1.110670566558838, + "rewards/rejected": -1.0916931629180908, + "step": 2525 + }, + { + "epoch": 0.15, + "learning_rate": 9.645460728776443e-08, + "logits/chosen": -2.090822458267212, + "logits/rejected": -2.0418171882629395, + "logps/chosen": -269.37054443359375, + "logps/rejected": -318.9253845214844, + "loss": 0.4635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7340973019599915, + "rewards/margins": 0.344564825296402, + "rewards/rejected": 0.3895324766635895, + "step": 2526 + }, + { + "epoch": 0.15, + "learning_rate": 9.645112100311692e-08, + "logits/chosen": -2.1130144596099854, + "logits/rejected": -2.0899739265441895, + "logps/chosen": -188.30892944335938, + "logps/rejected": -373.4573974609375, + "loss": 0.3198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9906402826309204, + "rewards/margins": 0.8392670154571533, + "rewards/rejected": 0.15137329697608948, + "step": 2527 + }, + { + "epoch": 0.15, + "learning_rate": 9.64476330682984e-08, + "logits/chosen": -1.9627935886383057, + "logits/rejected": -1.958009958267212, + "logps/chosen": -36.727901458740234, + "logps/rejected": -105.46540832519531, + "loss": 0.6577, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.041974641382694244, + "rewards/margins": 0.25177764892578125, + "rewards/rejected": -0.2937522828578949, + "step": 2528 + }, + { + "epoch": 0.15, + "learning_rate": 9.644414348343273e-08, + "logits/chosen": -2.127723455429077, + "logits/rejected": -2.127997875213623, + "logps/chosen": -1.7762133211363107e-05, + "logps/rejected": -84.55087280273438, + "loss": 0.6328, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.7683716530855236e-08, + "rewards/margins": 0.26292577385902405, + "rewards/rejected": -0.2629257142543793, + "step": 2529 + }, + { + "epoch": 0.15, + "learning_rate": 9.64406522486439e-08, + "logits/chosen": -2.0963635444641113, + "logits/rejected": -2.0870275497436523, + "logps/chosen": -0.004175184760242701, + "logps/rejected": -162.65817260742188, + "loss": 0.4906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001805078936740756, + "rewards/margins": 1.1178432703018188, + "rewards/rejected": -1.1180237531661987, + "step": 2530 + }, + { + "epoch": 0.15, + "learning_rate": 9.643715936405595e-08, + "logits/chosen": -2.1954541206359863, + "logits/rejected": -2.192328929901123, + "logps/chosen": -10.282669067382812, + "logps/rejected": -261.73455810546875, + "loss": 0.4906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023324966430664062, + "rewards/margins": 0.9777957797050476, + "rewards/rejected": -0.9544708132743835, + "step": 2531 + }, + { + "epoch": 0.15, + "learning_rate": 9.643366482979296e-08, + "logits/chosen": -2.1757395267486572, + "logits/rejected": -2.114563226699829, + "logps/chosen": -187.52725219726562, + "logps/rejected": -289.35040283203125, + "loss": 0.3504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37833863496780396, + "rewards/margins": 1.376220703125, + "rewards/rejected": -0.997882068157196, + "step": 2532 + }, + { + "epoch": 0.15, + "learning_rate": 9.643016864597903e-08, + "logits/chosen": -2.1936330795288086, + "logits/rejected": -2.183922052383423, + "logps/chosen": -32.351318359375, + "logps/rejected": -116.80703735351562, + "loss": 0.6194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04044055938720703, + "rewards/margins": 0.32089748978614807, + "rewards/rejected": -0.3613380491733551, + "step": 2533 + }, + { + "epoch": 0.15, + "learning_rate": 9.642667081273842e-08, + "logits/chosen": -2.0087757110595703, + "logits/rejected": -1.984508991241455, + "logps/chosen": -119.42583465576172, + "logps/rejected": -313.57452392578125, + "loss": 0.4965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06948547810316086, + "rewards/margins": 1.0985748767852783, + "rewards/rejected": -1.168060302734375, + "step": 2534 + }, + { + "epoch": 0.15, + "learning_rate": 9.642317133019536e-08, + "logits/chosen": -2.040231466293335, + "logits/rejected": -2.078740119934082, + "logps/chosen": -293.44219970703125, + "logps/rejected": -408.3987731933594, + "loss": 0.3808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8118896484375, + "rewards/margins": 0.5286407470703125, + "rewards/rejected": 0.2832489013671875, + "step": 2535 + }, + { + "epoch": 0.15, + "learning_rate": 9.641967019847417e-08, + "logits/chosen": -2.272261619567871, + "logits/rejected": -2.2815940380096436, + "logps/chosen": -70.7776870727539, + "logps/rejected": -194.9432830810547, + "loss": 0.4955, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18212509155273438, + "rewards/margins": 0.7619606256484985, + "rewards/rejected": -0.5798355340957642, + "step": 2536 + }, + { + "epoch": 0.15, + "learning_rate": 9.641616741769924e-08, + "logits/chosen": -1.970179557800293, + "logits/rejected": -1.9659037590026855, + "logps/chosen": -267.9818420410156, + "logps/rejected": -334.048583984375, + "loss": 0.4412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5094574093818665, + "rewards/margins": 0.591723620891571, + "rewards/rejected": -0.08226623386144638, + "step": 2537 + }, + { + "epoch": 0.15, + "learning_rate": 9.6412662987995e-08, + "logits/chosen": -1.9878367185592651, + "logits/rejected": -1.9764896631240845, + "logps/chosen": -87.21550750732422, + "logps/rejected": -436.4510192871094, + "loss": 0.3055, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3329582214355469, + "rewards/margins": 2.835768938064575, + "rewards/rejected": -2.5028107166290283, + "step": 2538 + }, + { + "epoch": 0.15, + "learning_rate": 9.640915690948593e-08, + "logits/chosen": -1.9461702108383179, + "logits/rejected": -1.9353251457214355, + "logps/chosen": -43.013587951660156, + "logps/rejected": -293.289306640625, + "loss": 0.539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16202469170093536, + "rewards/margins": 0.5648506283760071, + "rewards/rejected": -0.4028259217739105, + "step": 2539 + }, + { + "epoch": 0.15, + "learning_rate": 9.640564918229659e-08, + "logits/chosen": -2.1142494678497314, + "logits/rejected": -2.114182472229004, + "logps/chosen": -239.11148071289062, + "logps/rejected": -384.4753723144531, + "loss": 0.3841, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0310944318771362, + "rewards/margins": 0.3374115824699402, + "rewards/rejected": 0.693682849407196, + "step": 2540 + }, + { + "epoch": 0.15, + "learning_rate": 9.640213980655161e-08, + "logits/chosen": -1.7126117944717407, + "logits/rejected": -1.655805230140686, + "logps/chosen": -285.5841369628906, + "logps/rejected": -437.77813720703125, + "loss": 0.522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.265066534280777, + "rewards/margins": 0.482168585062027, + "rewards/rejected": -0.21710205078125, + "step": 2541 + }, + { + "epoch": 0.15, + "learning_rate": 9.639862878237564e-08, + "logits/chosen": -2.153406858444214, + "logits/rejected": -2.150573492050171, + "logps/chosen": -82.15953063964844, + "logps/rejected": -132.38131713867188, + "loss": 0.5989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02179565466940403, + "rewards/margins": 0.38325807452201843, + "rewards/rejected": -0.36146241426467896, + "step": 2542 + }, + { + "epoch": 0.15, + "learning_rate": 9.639511610989341e-08, + "logits/chosen": -2.025895595550537, + "logits/rejected": -2.0104384422302246, + "logps/chosen": -0.006782369688153267, + "logps/rejected": -441.8857421875, + "loss": 0.3737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012542912736535072, + "rewards/margins": 2.8770382404327393, + "rewards/rejected": -2.8771636486053467, + "step": 2543 + }, + { + "epoch": 0.15, + "learning_rate": 9.639160178922974e-08, + "logits/chosen": -2.1813504695892334, + "logits/rejected": -2.1704466342926025, + "logps/chosen": -14.346504211425781, + "logps/rejected": -176.0145721435547, + "loss": 0.6065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00471076974645257, + "rewards/margins": 0.394736111164093, + "rewards/rejected": -0.3900253474712372, + "step": 2544 + }, + { + "epoch": 0.15, + "learning_rate": 9.638808582050943e-08, + "logits/chosen": -1.7844828367233276, + "logits/rejected": -1.8183144330978394, + "logps/chosen": -339.8021240234375, + "logps/rejected": -475.02410888671875, + "loss": 0.2463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9765106439590454, + "rewards/margins": 1.3433411121368408, + "rewards/rejected": -0.366830438375473, + "step": 2545 + }, + { + "epoch": 0.15, + "learning_rate": 9.638456820385739e-08, + "logits/chosen": -1.9343496561050415, + "logits/rejected": -1.9238479137420654, + "logps/chosen": -42.949501037597656, + "logps/rejected": -82.88862609863281, + "loss": 0.7001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035684969276189804, + "rewards/margins": 0.06036758050322533, + "rewards/rejected": -0.09605254977941513, + "step": 2546 + }, + { + "epoch": 0.15, + "learning_rate": 9.638104893939862e-08, + "logits/chosen": -2.0798540115356445, + "logits/rejected": -2.0758843421936035, + "logps/chosen": -22.56476593017578, + "logps/rejected": -115.81268310546875, + "loss": 0.6339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03838043287396431, + "rewards/margins": 0.21755066514015198, + "rewards/rejected": -0.17917023599147797, + "step": 2547 + }, + { + "epoch": 0.15, + "learning_rate": 9.637752802725809e-08, + "logits/chosen": -2.200843572616577, + "logits/rejected": -2.1068990230560303, + "logps/chosen": -257.39324951171875, + "logps/rejected": -551.8248291015625, + "loss": 0.2979, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0024536848068237, + "rewards/margins": 0.9160706400871277, + "rewards/rejected": 0.08638305962085724, + "step": 2548 + }, + { + "epoch": 0.15, + "learning_rate": 9.637400546756093e-08, + "logits/chosen": -2.0813188552856445, + "logits/rejected": -2.0443079471588135, + "logps/chosen": -160.29925537109375, + "logps/rejected": -240.54098510742188, + "loss": 0.6142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07645568996667862, + "rewards/margins": 0.13885650038719177, + "rewards/rejected": -0.06240081787109375, + "step": 2549 + }, + { + "epoch": 0.15, + "learning_rate": 9.637048126043225e-08, + "logits/chosen": -2.2238481044769287, + "logits/rejected": -2.203000068664551, + "logps/chosen": -32.89080047607422, + "logps/rejected": -267.7234802246094, + "loss": 0.4055, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19740180671215057, + "rewards/margins": 1.481581449508667, + "rewards/rejected": -1.2841796875, + "step": 2550 + }, + { + "epoch": 0.15, + "learning_rate": 9.636695540599725e-08, + "logits/chosen": -1.9763643741607666, + "logits/rejected": -1.9722540378570557, + "logps/chosen": -33.11851119995117, + "logps/rejected": -255.12863159179688, + "loss": 0.4853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02340393140912056, + "rewards/margins": 1.005645751953125, + "rewards/rejected": -0.9822418093681335, + "step": 2551 + }, + { + "epoch": 0.15, + "learning_rate": 9.63634279043812e-08, + "logits/chosen": -1.8855468034744263, + "logits/rejected": -1.8634028434753418, + "logps/chosen": -249.92718505859375, + "logps/rejected": -532.9338989257812, + "loss": 0.2095, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.472747802734375, + "rewards/margins": 1.222741723060608, + "rewards/rejected": 0.2500061094760895, + "step": 2552 + }, + { + "epoch": 0.15, + "learning_rate": 9.635989875570937e-08, + "logits/chosen": -1.8819708824157715, + "logits/rejected": -1.8735318183898926, + "logps/chosen": -46.937618255615234, + "logps/rejected": -238.82351684570312, + "loss": 0.5564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00075531005859375, + "rewards/margins": 0.5698882937431335, + "rewards/rejected": -0.5706436038017273, + "step": 2553 + }, + { + "epoch": 0.15, + "learning_rate": 9.63563679601072e-08, + "logits/chosen": -2.0815300941467285, + "logits/rejected": -2.075861692428589, + "logps/chosen": -62.20184326171875, + "logps/rejected": -173.60910034179688, + "loss": 0.556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18881073594093323, + "rewards/margins": 0.3111984431743622, + "rewards/rejected": -0.12238769978284836, + "step": 2554 + }, + { + "epoch": 0.15, + "learning_rate": 9.635283551770008e-08, + "logits/chosen": -2.1947696208953857, + "logits/rejected": -2.197169780731201, + "logps/chosen": -33.8659553527832, + "logps/rejected": -109.162109375, + "loss": 0.5279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028711318969726562, + "rewards/margins": 0.8108081817626953, + "rewards/rejected": -0.7820968627929688, + "step": 2555 + }, + { + "epoch": 0.15, + "learning_rate": 9.63493014286135e-08, + "logits/chosen": -2.056574583053589, + "logits/rejected": -2.0529868602752686, + "logps/chosen": -0.043140310794115067, + "logps/rejected": -97.24725341796875, + "loss": 0.6085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002695908769965172, + "rewards/margins": 0.3698643445968628, + "rewards/rejected": -0.3671684265136719, + "step": 2556 + }, + { + "epoch": 0.15, + "learning_rate": 9.634576569297301e-08, + "logits/chosen": -2.0545589923858643, + "logits/rejected": -2.077242136001587, + "logps/chosen": -216.35537719726562, + "logps/rejected": -288.397705078125, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4848587214946747, + "rewards/margins": 0.7505050897598267, + "rewards/rejected": -0.265646368265152, + "step": 2557 + }, + { + "epoch": 0.15, + "learning_rate": 9.634222831090423e-08, + "logits/chosen": -2.0101563930511475, + "logits/rejected": -2.013416051864624, + "logps/chosen": -4.903677463531494, + "logps/rejected": -55.89071273803711, + "loss": 0.5445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0065683843567967415, + "rewards/margins": 0.7199811935424805, + "rewards/rejected": -0.7265495657920837, + "step": 2558 + }, + { + "epoch": 0.15, + "learning_rate": 9.633868928253281e-08, + "logits/chosen": -2.160123586654663, + "logits/rejected": -2.145681858062744, + "logps/chosen": -274.086181640625, + "logps/rejected": -426.813720703125, + "loss": 0.3936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2160247564315796, + "rewards/margins": 0.16739189624786377, + "rewards/rejected": 1.0486328601837158, + "step": 2559 + }, + { + "epoch": 0.15, + "learning_rate": 9.633514860798449e-08, + "logits/chosen": -2.035942554473877, + "logits/rejected": -2.031956911087036, + "logps/chosen": -124.70518493652344, + "logps/rejected": -176.20143127441406, + "loss": 0.5979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.264883428812027, + "rewards/margins": 0.04717865586280823, + "rewards/rejected": 0.21770477294921875, + "step": 2560 + }, + { + "epoch": 0.15, + "learning_rate": 9.633160628738504e-08, + "logits/chosen": -2.233769178390503, + "logits/rejected": -2.2333412170410156, + "logps/chosen": -35.750511169433594, + "logps/rejected": -132.8280029296875, + "loss": 0.6898, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.017280960455536842, + "rewards/margins": -0.006914902478456497, + "rewards/rejected": 0.02419586293399334, + "step": 2561 + }, + { + "epoch": 0.15, + "learning_rate": 9.632806232086029e-08, + "logits/chosen": -2.050628423690796, + "logits/rejected": -2.0447275638580322, + "logps/chosen": -0.023390423506498337, + "logps/rejected": -200.30584716796875, + "loss": 0.4716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010783594334498048, + "rewards/margins": 1.2066060304641724, + "rewards/rejected": -1.2076843976974487, + "step": 2562 + }, + { + "epoch": 0.15, + "learning_rate": 9.632451670853617e-08, + "logits/chosen": -1.9518771171569824, + "logits/rejected": -1.948723316192627, + "logps/chosen": -30.359172821044922, + "logps/rejected": -150.29852294921875, + "loss": 0.5006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04429512098431587, + "rewards/margins": 0.949917197227478, + "rewards/rejected": -0.9942123293876648, + "step": 2563 + }, + { + "epoch": 0.15, + "learning_rate": 9.63209694505386e-08, + "logits/chosen": -2.0036518573760986, + "logits/rejected": -1.9966284036636353, + "logps/chosen": -0.00040169525891542435, + "logps/rejected": -148.0445098876953, + "loss": 0.5135, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.332579793408513e-06, + "rewards/margins": 0.9187086224555969, + "rewards/rejected": -0.9187179803848267, + "step": 2564 + }, + { + "epoch": 0.15, + "learning_rate": 9.63174205469936e-08, + "logits/chosen": -2.200533866882324, + "logits/rejected": -2.197258472442627, + "logps/chosen": -210.6492919921875, + "logps/rejected": -464.18060302734375, + "loss": 0.2312, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0173492431640625, + "rewards/margins": 1.2739349603652954, + "rewards/rejected": -0.2565856873989105, + "step": 2565 + }, + { + "epoch": 0.15, + "learning_rate": 9.63138699980273e-08, + "logits/chosen": -2.2058815956115723, + "logits/rejected": -2.2017300128936768, + "logps/chosen": -46.31536102294922, + "logps/rejected": -162.7254638671875, + "loss": 0.5077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04600830003619194, + "rewards/margins": 0.7504333257675171, + "rewards/rejected": -0.704425036907196, + "step": 2566 + }, + { + "epoch": 0.15, + "learning_rate": 9.631031780376577e-08, + "logits/chosen": -2.188997983932495, + "logits/rejected": -2.1673717498779297, + "logps/chosen": -16.272109985351562, + "logps/rejected": -212.18167114257812, + "loss": 0.4133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03925829008221626, + "rewards/margins": 1.5122157335281372, + "rewards/rejected": -1.5514739751815796, + "step": 2567 + }, + { + "epoch": 0.15, + "learning_rate": 9.630676396433522e-08, + "logits/chosen": -2.2546520233154297, + "logits/rejected": -2.2458925247192383, + "logps/chosen": -6.245966911315918, + "logps/rejected": -255.7464141845703, + "loss": 0.3863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01752610318362713, + "rewards/margins": 2.355046272277832, + "rewards/rejected": -2.372572422027588, + "step": 2568 + }, + { + "epoch": 0.15, + "learning_rate": 9.630320847986191e-08, + "logits/chosen": -2.025283098220825, + "logits/rejected": -2.0127806663513184, + "logps/chosen": -319.2024230957031, + "logps/rejected": -371.90985107421875, + "loss": 0.5257, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1348663568496704, + "rewards/margins": -0.3263213634490967, + "rewards/rejected": 1.461187720298767, + "step": 2569 + }, + { + "epoch": 0.15, + "learning_rate": 9.629965135047215e-08, + "logits/chosen": -2.1011416912078857, + "logits/rejected": -2.0986409187316895, + "logps/chosen": -49.637901306152344, + "logps/rejected": -205.042236328125, + "loss": 0.5102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026070786640048027, + "rewards/margins": 0.8772037625312805, + "rewards/rejected": -0.9032745361328125, + "step": 2570 + }, + { + "epoch": 0.15, + "learning_rate": 9.629609257629229e-08, + "logits/chosen": -2.06360125541687, + "logits/rejected": -2.0371170043945312, + "logps/chosen": -283.2550964355469, + "logps/rejected": -578.0892944335938, + "loss": 0.3036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0183624029159546, + "rewards/margins": 0.8848114013671875, + "rewards/rejected": 0.13355103135108948, + "step": 2571 + }, + { + "epoch": 0.15, + "learning_rate": 9.629253215744876e-08, + "logits/chosen": -1.9967635869979858, + "logits/rejected": -2.010910987854004, + "logps/chosen": -42.70718002319336, + "logps/rejected": -287.6787414550781, + "loss": 0.4288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13860054314136505, + "rewards/margins": 1.437065601348877, + "rewards/rejected": -1.2984650135040283, + "step": 2572 + }, + { + "epoch": 0.15, + "learning_rate": 9.628897009406804e-08, + "logits/chosen": -2.2118866443634033, + "logits/rejected": -2.2111587524414062, + "logps/chosen": -36.080841064453125, + "logps/rejected": -140.23690795898438, + "loss": 0.5988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14767494797706604, + "rewards/margins": 0.42558708786964417, + "rewards/rejected": -0.5732620358467102, + "step": 2573 + }, + { + "epoch": 0.15, + "learning_rate": 9.628540638627669e-08, + "logits/chosen": -2.1637725830078125, + "logits/rejected": -2.155716896057129, + "logps/chosen": -45.262908935546875, + "logps/rejected": -115.34297180175781, + "loss": 0.708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00972518976777792, + "rewards/margins": 0.09824752807617188, + "rewards/rejected": -0.10797271877527237, + "step": 2574 + }, + { + "epoch": 0.15, + "learning_rate": 9.628184103420129e-08, + "logits/chosen": -2.1445512771606445, + "logits/rejected": -2.12679123878479, + "logps/chosen": -93.8763656616211, + "logps/rejected": -211.01425170898438, + "loss": 0.5804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1895149201154709, + "rewards/margins": 0.16175155341625214, + "rewards/rejected": 0.02776336669921875, + "step": 2575 + }, + { + "epoch": 0.15, + "learning_rate": 9.627827403796851e-08, + "logits/chosen": -2.1613025665283203, + "logits/rejected": -2.1641504764556885, + "logps/chosen": -121.09449768066406, + "logps/rejected": -138.1572265625, + "loss": 0.5713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12115478515625, + "rewards/margins": 0.282095342874527, + "rewards/rejected": -0.16094055771827698, + "step": 2576 + }, + { + "epoch": 0.15, + "learning_rate": 9.627470539770509e-08, + "logits/chosen": -1.9902338981628418, + "logits/rejected": -2.0010905265808105, + "logps/chosen": -10.25144100189209, + "logps/rejected": -69.27680206298828, + "loss": 0.5921, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9468536013155244e-05, + "rewards/margins": 0.45577573776245117, + "rewards/rejected": -0.4558052122592926, + "step": 2577 + }, + { + "epoch": 0.15, + "learning_rate": 9.627113511353775e-08, + "logits/chosen": -2.147926092147827, + "logits/rejected": -2.1318862438201904, + "logps/chosen": -96.74131774902344, + "logps/rejected": -323.54681396484375, + "loss": 0.5034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15517883002758026, + "rewards/margins": 1.4207123517990112, + "rewards/rejected": -1.575891137123108, + "step": 2578 + }, + { + "epoch": 0.15, + "learning_rate": 9.626756318559335e-08, + "logits/chosen": -2.164823532104492, + "logits/rejected": -2.149674415588379, + "logps/chosen": -5.766637325286865, + "logps/rejected": -214.3753662109375, + "loss": 0.5844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01696038246154785, + "rewards/margins": 0.4998059868812561, + "rewards/rejected": -0.516766369342804, + "step": 2579 + }, + { + "epoch": 0.15, + "learning_rate": 9.62639896139988e-08, + "logits/chosen": -2.160989761352539, + "logits/rejected": -2.170513391494751, + "logps/chosen": -143.0985107421875, + "logps/rejected": -336.6815490722656, + "loss": 0.4442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.625903308391571, + "rewards/margins": 0.2966308295726776, + "rewards/rejected": 0.32927247881889343, + "step": 2580 + }, + { + "epoch": 0.15, + "learning_rate": 9.626041439888105e-08, + "logits/chosen": -2.119011402130127, + "logits/rejected": -2.1174252033233643, + "logps/chosen": -68.07267761230469, + "logps/rejected": -166.19772338867188, + "loss": 0.7043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2101448029279709, + "rewards/margins": 0.1928703337907791, + "rewards/rejected": -0.40301513671875, + "step": 2581 + }, + { + "epoch": 0.15, + "learning_rate": 9.625683754036707e-08, + "logits/chosen": -2.17128586769104, + "logits/rejected": -2.150031089782715, + "logps/chosen": -204.6990203857422, + "logps/rejected": -243.376220703125, + "loss": 0.5845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010371399112045765, + "rewards/margins": 0.37041017413139343, + "rewards/rejected": -0.3807815611362457, + "step": 2582 + }, + { + "epoch": 0.15, + "learning_rate": 9.625325903858397e-08, + "logits/chosen": -2.146690607070923, + "logits/rejected": -2.143646240234375, + "logps/chosen": -202.38040161132812, + "logps/rejected": -306.2351989746094, + "loss": 0.5506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38959047198295593, + "rewards/margins": 0.17168579995632172, + "rewards/rejected": 0.21790467202663422, + "step": 2583 + }, + { + "epoch": 0.15, + "learning_rate": 9.624967889365884e-08, + "logits/chosen": -2.3075814247131348, + "logits/rejected": -2.2734973430633545, + "logps/chosen": -80.4873046875, + "logps/rejected": -256.9965515136719, + "loss": 0.4476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2686019837856293, + "rewards/margins": 0.9706604480743408, + "rewards/rejected": -0.7020584344863892, + "step": 2584 + }, + { + "epoch": 0.15, + "learning_rate": 9.624609710571892e-08, + "logits/chosen": -2.2862839698791504, + "logits/rejected": -2.277169704437256, + "logps/chosen": -14.625521659851074, + "logps/rejected": -172.56735229492188, + "loss": 0.5181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02497425116598606, + "rewards/margins": 0.8515825271606445, + "rewards/rejected": -0.8266083002090454, + "step": 2585 + }, + { + "epoch": 0.15, + "learning_rate": 9.624251367489138e-08, + "logits/chosen": -2.085972547531128, + "logits/rejected": -2.0717456340789795, + "logps/chosen": -134.19049072265625, + "logps/rejected": -179.86691284179688, + "loss": 0.6548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03704528883099556, + "rewards/margins": 0.121612548828125, + "rewards/rejected": -0.08456726372241974, + "step": 2586 + }, + { + "epoch": 0.15, + "learning_rate": 9.623892860130358e-08, + "logits/chosen": -2.2406883239746094, + "logits/rejected": -2.235581874847412, + "logps/chosen": -0.0008882437832653522, + "logps/rejected": -272.0000915527344, + "loss": 0.3786, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4909060812206008e-05, + "rewards/margins": 2.70910906791687, + "rewards/rejected": -2.7091338634490967, + "step": 2587 + }, + { + "epoch": 0.15, + "learning_rate": 9.623534188508285e-08, + "logits/chosen": -2.1744589805603027, + "logits/rejected": -2.194397449493408, + "logps/chosen": -282.7133483886719, + "logps/rejected": -428.6333923339844, + "loss": 0.326, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3780182600021362, + "rewards/margins": 0.4604889750480652, + "rewards/rejected": 0.917529284954071, + "step": 2588 + }, + { + "epoch": 0.15, + "learning_rate": 9.623175352635662e-08, + "logits/chosen": -2.057668924331665, + "logits/rejected": -2.0643744468688965, + "logps/chosen": -38.716392517089844, + "logps/rejected": -178.363037109375, + "loss": 0.6001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039258576929569244, + "rewards/margins": 0.35935133695602417, + "rewards/rejected": -0.3200927674770355, + "step": 2589 + }, + { + "epoch": 0.15, + "learning_rate": 9.622816352525236e-08, + "logits/chosen": -2.1885464191436768, + "logits/rejected": -2.19529128074646, + "logps/chosen": -245.67788696289062, + "logps/rejected": -304.4153747558594, + "loss": 0.3237, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8137451410293579, + "rewards/margins": 0.7766205072402954, + "rewards/rejected": 0.0371246337890625, + "step": 2590 + }, + { + "epoch": 0.15, + "learning_rate": 9.62245718818976e-08, + "logits/chosen": -2.1407766342163086, + "logits/rejected": -2.131558656692505, + "logps/chosen": -33.233001708984375, + "logps/rejected": -232.18707275390625, + "loss": 0.4753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04896507412195206, + "rewards/margins": 1.2155605554580688, + "rewards/rejected": -1.166595458984375, + "step": 2591 + }, + { + "epoch": 0.15, + "learning_rate": 9.622097859641993e-08, + "logits/chosen": -2.288818359375, + "logits/rejected": -2.2912774085998535, + "logps/chosen": -20.350446701049805, + "logps/rejected": -142.72195434570312, + "loss": 0.4307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0051212310791015625, + "rewards/margins": 1.6327327489852905, + "rewards/rejected": -1.627611517906189, + "step": 2592 + }, + { + "epoch": 0.15, + "learning_rate": 9.621738366894703e-08, + "logits/chosen": -2.156071424484253, + "logits/rejected": -2.1179146766662598, + "logps/chosen": -185.63558959960938, + "logps/rejected": -377.35357666015625, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0540679693222046, + "rewards/margins": 0.6264159679412842, + "rewards/rejected": 0.427651971578598, + "step": 2593 + }, + { + "epoch": 0.15, + "learning_rate": 9.621378709960657e-08, + "logits/chosen": -1.9598842859268188, + "logits/rejected": -2.016911029815674, + "logps/chosen": -282.62017822265625, + "logps/rejected": -261.15338134765625, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0991302728652954, + "rewards/margins": 0.4515380859375, + "rewards/rejected": 0.6475921869277954, + "step": 2594 + }, + { + "epoch": 0.15, + "learning_rate": 9.621018888852634e-08, + "logits/chosen": -2.2907330989837646, + "logits/rejected": -2.2726826667785645, + "logps/chosen": -90.19032287597656, + "logps/rejected": -259.2243347167969, + "loss": 0.4395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32958221435546875, + "rewards/margins": 0.7498520016670227, + "rewards/rejected": -0.42026978731155396, + "step": 2595 + }, + { + "epoch": 0.15, + "learning_rate": 9.620658903583418e-08, + "logits/chosen": -2.3490242958068848, + "logits/rejected": -2.344956398010254, + "logps/chosen": -8.520066261291504, + "logps/rejected": -133.09645080566406, + "loss": 0.5237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007906914106570184, + "rewards/margins": 0.8002522587776184, + "rewards/rejected": -0.8010429739952087, + "step": 2596 + }, + { + "epoch": 0.15, + "learning_rate": 9.620298754165794e-08, + "logits/chosen": -1.7876707315444946, + "logits/rejected": -1.7355951070785522, + "logps/chosen": -179.23516845703125, + "logps/rejected": -439.4775390625, + "loss": 0.2856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.615246593952179, + "rewards/margins": 1.3535065650939941, + "rewards/rejected": -0.7382599115371704, + "step": 2597 + }, + { + "epoch": 0.15, + "learning_rate": 9.619938440612558e-08, + "logits/chosen": -2.1384835243225098, + "logits/rejected": -2.1239876747131348, + "logps/chosen": -59.03478240966797, + "logps/rejected": -282.6961364746094, + "loss": 0.5253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1643756926059723, + "rewards/margins": 0.46377450227737427, + "rewards/rejected": -0.299398809671402, + "step": 2598 + }, + { + "epoch": 0.15, + "learning_rate": 9.619577962936511e-08, + "logits/chosen": -2.213796377182007, + "logits/rejected": -2.209531307220459, + "logps/chosen": -0.00021158522577024996, + "logps/rejected": -107.06037902832031, + "loss": 0.661, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7572383411752526e-06, + "rewards/margins": 0.13286854326725006, + "rewards/rejected": -0.1328742951154709, + "step": 2599 + }, + { + "epoch": 0.15, + "learning_rate": 9.619217321150457e-08, + "logits/chosen": -2.170206069946289, + "logits/rejected": -2.1202642917633057, + "logps/chosen": -148.47975158691406, + "logps/rejected": -297.11358642578125, + "loss": 0.5104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7578048706054688, + "rewards/margins": 0.003123462200164795, + "rewards/rejected": 0.754681408405304, + "step": 2600 + }, + { + "epoch": 0.15, + "learning_rate": 9.618856515267209e-08, + "logits/chosen": -2.145606756210327, + "logits/rejected": -2.138850450515747, + "logps/chosen": -59.44713592529297, + "logps/rejected": -230.2928924560547, + "loss": 0.5305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08787422627210617, + "rewards/margins": 1.0267666578292847, + "rewards/rejected": -1.1146408319473267, + "step": 2601 + }, + { + "epoch": 0.15, + "learning_rate": 9.618495545299583e-08, + "logits/chosen": -2.0075113773345947, + "logits/rejected": -2.009063720703125, + "logps/chosen": -195.6414337158203, + "logps/rejected": -370.36492919921875, + "loss": 0.3405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6852218508720398, + "rewards/margins": 0.9775344729423523, + "rewards/rejected": -0.2923126220703125, + "step": 2602 + }, + { + "epoch": 0.15, + "learning_rate": 9.618134411260405e-08, + "logits/chosen": -2.1160929203033447, + "logits/rejected": -2.1085619926452637, + "logps/chosen": -2.040870428085327, + "logps/rejected": -203.26760864257812, + "loss": 0.4748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05289297178387642, + "rewards/margins": 1.342137336730957, + "rewards/rejected": -1.3950302600860596, + "step": 2603 + }, + { + "epoch": 0.15, + "learning_rate": 9.617773113162504e-08, + "logits/chosen": -2.2951488494873047, + "logits/rejected": -2.289858341217041, + "logps/chosen": -36.86155700683594, + "logps/rejected": -217.86288452148438, + "loss": 0.3953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14379386603832245, + "rewards/margins": 1.7261089086532593, + "rewards/rejected": -1.5823150873184204, + "step": 2604 + }, + { + "epoch": 0.15, + "learning_rate": 9.617411651018712e-08, + "logits/chosen": -2.081059455871582, + "logits/rejected": -2.0823395252227783, + "logps/chosen": -187.18161010742188, + "logps/rejected": -299.7519836425781, + "loss": 0.3927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0376633405685425, + "rewards/margins": 0.3506912589073181, + "rewards/rejected": 0.6869720816612244, + "step": 2605 + }, + { + "epoch": 0.15, + "learning_rate": 9.617050024841873e-08, + "logits/chosen": -1.8564804792404175, + "logits/rejected": -1.912219762802124, + "logps/chosen": -229.96875, + "logps/rejected": -234.9417724609375, + "loss": 0.5443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24743042886257172, + "rewards/margins": 0.39318543672561646, + "rewards/rejected": -0.14575500786304474, + "step": 2606 + }, + { + "epoch": 0.15, + "learning_rate": 9.616688234644832e-08, + "logits/chosen": -2.1304304599761963, + "logits/rejected": -2.114333152770996, + "logps/chosen": -0.48766452074050903, + "logps/rejected": -184.34228515625, + "loss": 0.5714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01666480116546154, + "rewards/margins": 0.5842019319534302, + "rewards/rejected": -0.6008667349815369, + "step": 2607 + }, + { + "epoch": 0.15, + "learning_rate": 9.616326280440444e-08, + "logits/chosen": -2.1048803329467773, + "logits/rejected": -1.9312044382095337, + "logps/chosen": -319.07135009765625, + "logps/rejected": -503.490478515625, + "loss": 0.3955, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45480653643608093, + "rewards/margins": 0.8540558218955994, + "rewards/rejected": -0.39924928545951843, + "step": 2608 + }, + { + "epoch": 0.15, + "learning_rate": 9.615964162241563e-08, + "logits/chosen": -2.194920778274536, + "logits/rejected": -2.1793386936187744, + "logps/chosen": -170.12142944335938, + "logps/rejected": -230.66053771972656, + "loss": 0.4437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4708190858364105, + "rewards/margins": 0.6315414309501648, + "rewards/rejected": -0.16072236001491547, + "step": 2609 + }, + { + "epoch": 0.15, + "learning_rate": 9.615601880061057e-08, + "logits/chosen": -2.050450086593628, + "logits/rejected": -2.0500500202178955, + "logps/chosen": -3.3416342735290527, + "logps/rejected": -161.58074951171875, + "loss": 0.5858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011606168933212757, + "rewards/margins": 0.49390292167663574, + "rewards/rejected": -0.48229676485061646, + "step": 2610 + }, + { + "epoch": 0.15, + "learning_rate": 9.615239433911796e-08, + "logits/chosen": -1.849342703819275, + "logits/rejected": -1.8425159454345703, + "logps/chosen": -169.804443359375, + "logps/rejected": -275.3672790527344, + "loss": 0.4652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5242096185684204, + "rewards/margins": 0.5720245838165283, + "rewards/rejected": -0.04781494289636612, + "step": 2611 + }, + { + "epoch": 0.15, + "learning_rate": 9.614876823806655e-08, + "logits/chosen": -1.9317551851272583, + "logits/rejected": -1.9884752035140991, + "logps/chosen": -194.73098754882812, + "logps/rejected": -435.9889831542969, + "loss": 0.5416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5069519281387329, + "rewards/margins": 0.05414429306983948, + "rewards/rejected": 0.45280763506889343, + "step": 2612 + }, + { + "epoch": 0.15, + "learning_rate": 9.614514049758513e-08, + "logits/chosen": -2.23700213432312, + "logits/rejected": -2.236063003540039, + "logps/chosen": -1.3776369094848633, + "logps/rejected": -37.13919448852539, + "loss": 0.6904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02152121067047119, + "rewards/margins": 0.0027086492627859116, + "rewards/rejected": 0.01881256140768528, + "step": 2613 + }, + { + "epoch": 0.15, + "learning_rate": 9.614151111780262e-08, + "logits/chosen": -2.0284759998321533, + "logits/rejected": -2.011406898498535, + "logps/chosen": -81.60319519042969, + "logps/rejected": -374.36199951171875, + "loss": 0.4306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06551285088062286, + "rewards/margins": 1.9177634716033936, + "rewards/rejected": -1.9832763671875, + "step": 2614 + }, + { + "epoch": 0.15, + "learning_rate": 9.613788009884791e-08, + "logits/chosen": -2.1380879878997803, + "logits/rejected": -2.1441750526428223, + "logps/chosen": -3.612014188547619e-05, + "logps/rejected": -93.93697357177734, + "loss": 0.5949, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7881066582958738e-07, + "rewards/margins": 0.437434583902359, + "rewards/rejected": -0.4374344050884247, + "step": 2615 + }, + { + "epoch": 0.15, + "learning_rate": 9.613424744085004e-08, + "logits/chosen": -2.0645556449890137, + "logits/rejected": -2.082677125930786, + "logps/chosen": -182.65089416503906, + "logps/rejected": -324.00927734375, + "loss": 0.4456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.310690313577652, + "rewards/margins": 0.7719573974609375, + "rewards/rejected": -0.4612670838832855, + "step": 2616 + }, + { + "epoch": 0.15, + "learning_rate": 9.613061314393802e-08, + "logits/chosen": -2.1517128944396973, + "logits/rejected": -2.14547061920166, + "logps/chosen": -21.58753776550293, + "logps/rejected": -232.16763305664062, + "loss": 0.584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015627671033143997, + "rewards/margins": 0.5328655242919922, + "rewards/rejected": -0.5172378420829773, + "step": 2617 + }, + { + "epoch": 0.15, + "learning_rate": 9.612697720824097e-08, + "logits/chosen": -2.2095184326171875, + "logits/rejected": -2.194314479827881, + "logps/chosen": -46.63018798828125, + "logps/rejected": -197.54574584960938, + "loss": 0.47, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1589759886264801, + "rewards/margins": 0.9446327686309814, + "rewards/rejected": -0.785656750202179, + "step": 2618 + }, + { + "epoch": 0.15, + "learning_rate": 9.612333963388807e-08, + "logits/chosen": -1.9728513956069946, + "logits/rejected": -1.965499997138977, + "logps/chosen": -235.31350708007812, + "logps/rejected": -442.3343505859375, + "loss": 0.4765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3301346004009247, + "rewards/margins": 0.5660018920898438, + "rewards/rejected": -0.23586730659008026, + "step": 2619 + }, + { + "epoch": 0.15, + "learning_rate": 9.611970042100852e-08, + "logits/chosen": -2.2609777450561523, + "logits/rejected": -2.2538082599639893, + "logps/chosen": -4.381547927856445, + "logps/rejected": -57.39102554321289, + "loss": 0.6681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031340599060058594, + "rewards/margins": 0.04328937456011772, + "rewards/rejected": -0.011948776431381702, + "step": 2620 + }, + { + "epoch": 0.15, + "learning_rate": 9.611605956973162e-08, + "logits/chosen": -2.152866840362549, + "logits/rejected": -2.1574113368988037, + "logps/chosen": -218.10980224609375, + "logps/rejected": -471.78515625, + "loss": 0.4326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6303436160087585, + "rewards/margins": 0.42384031414985657, + "rewards/rejected": 0.20650330185890198, + "step": 2621 + }, + { + "epoch": 0.15, + "learning_rate": 9.611241708018671e-08, + "logits/chosen": -1.8482892513275146, + "logits/rejected": -1.8198941946029663, + "logps/chosen": -119.14520263671875, + "logps/rejected": -270.5129089355469, + "loss": 0.5453, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23267364501953125, + "rewards/margins": 0.3771347105503082, + "rewards/rejected": -0.14446106553077698, + "step": 2622 + }, + { + "epoch": 0.15, + "learning_rate": 9.610877295250318e-08, + "logits/chosen": -2.1069204807281494, + "logits/rejected": -2.0763235092163086, + "logps/chosen": -209.82723999023438, + "logps/rejected": -247.96243286132812, + "loss": 0.3892, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1792999505996704, + "rewards/margins": 0.2502990961074829, + "rewards/rejected": 0.9290008544921875, + "step": 2623 + }, + { + "epoch": 0.15, + "learning_rate": 9.610512718681049e-08, + "logits/chosen": -2.0602033138275146, + "logits/rejected": -2.0458033084869385, + "logps/chosen": -88.93282318115234, + "logps/rejected": -228.774658203125, + "loss": 0.6324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15074387192726135, + "rewards/margins": 0.11517563462257385, + "rewards/rejected": 0.0355682373046875, + "step": 2624 + }, + { + "epoch": 0.15, + "learning_rate": 9.610147978323817e-08, + "logits/chosen": -1.9128940105438232, + "logits/rejected": -1.789537787437439, + "logps/chosen": -207.449951171875, + "logps/rejected": -352.98394775390625, + "loss": 0.3028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8077301383018494, + "rewards/margins": 0.8994964957237244, + "rewards/rejected": -0.091766357421875, + "step": 2625 + }, + { + "epoch": 0.15, + "learning_rate": 9.609783074191577e-08, + "logits/chosen": -2.046614170074463, + "logits/rejected": -2.0576682090759277, + "logps/chosen": -290.1481018066406, + "logps/rejected": -440.41485595703125, + "loss": 0.4744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37277528643608093, + "rewards/margins": 0.527478039264679, + "rewards/rejected": -0.15470276772975922, + "step": 2626 + }, + { + "epoch": 0.15, + "learning_rate": 9.609418006297295e-08, + "logits/chosen": -1.8892823457717896, + "logits/rejected": -1.8845287561416626, + "logps/chosen": -137.30599975585938, + "logps/rejected": -233.0558319091797, + "loss": 0.5747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11403351277112961, + "rewards/margins": 0.3209686279296875, + "rewards/rejected": -0.2069351226091385, + "step": 2627 + }, + { + "epoch": 0.15, + "learning_rate": 9.609052774653936e-08, + "logits/chosen": -2.2283265590667725, + "logits/rejected": -2.2180957794189453, + "logps/chosen": -14.62053394317627, + "logps/rejected": -127.41490173339844, + "loss": 0.5102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07178783416748047, + "rewards/margins": 0.8435285687446594, + "rewards/rejected": -0.771740734577179, + "step": 2628 + }, + { + "epoch": 0.15, + "learning_rate": 9.608687379274479e-08, + "logits/chosen": -2.3022818565368652, + "logits/rejected": -2.296185255050659, + "logps/chosen": -0.0009846203029155731, + "logps/rejected": -104.86495971679688, + "loss": 0.6552, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7906557079404593e-05, + "rewards/margins": 0.14521363377571106, + "rewards/rejected": -0.14524154365062714, + "step": 2629 + }, + { + "epoch": 0.15, + "learning_rate": 9.6083218201719e-08, + "logits/chosen": -2.0644302368164062, + "logits/rejected": -2.0390982627868652, + "logps/chosen": -215.72073364257812, + "logps/rejected": -302.94964599609375, + "loss": 0.5436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3379974365234375, + "rewards/margins": 0.3086608946323395, + "rewards/rejected": 0.02933654747903347, + "step": 2630 + }, + { + "epoch": 0.15, + "learning_rate": 9.607956097359191e-08, + "logits/chosen": -1.944090485572815, + "logits/rejected": -1.947451114654541, + "logps/chosen": -338.437255859375, + "logps/rejected": -497.0881652832031, + "loss": 0.1789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0368164777755737, + "rewards/margins": 1.9657471179962158, + "rewards/rejected": -0.9289306998252869, + "step": 2631 + }, + { + "epoch": 0.15, + "learning_rate": 9.60759021084934e-08, + "logits/chosen": -2.069819211959839, + "logits/rejected": -2.0862879753112793, + "logps/chosen": -264.1516418457031, + "logps/rejected": -349.709716796875, + "loss": 0.3972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0019501447677612, + "rewards/margins": 0.26900947093963623, + "rewards/rejected": 0.732940673828125, + "step": 2632 + }, + { + "epoch": 0.15, + "learning_rate": 9.607224160655347e-08, + "logits/chosen": -2.062889575958252, + "logits/rejected": -2.0601389408111572, + "logps/chosen": -14.870676040649414, + "logps/rejected": -149.30545043945312, + "loss": 0.5775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01272974070161581, + "rewards/margins": 0.5364065766334534, + "rewards/rejected": -0.5491363406181335, + "step": 2633 + }, + { + "epoch": 0.15, + "learning_rate": 9.606857946790216e-08, + "logits/chosen": -2.333385705947876, + "logits/rejected": -2.325127601623535, + "logps/chosen": -8.756170272827148, + "logps/rejected": -154.14315795898438, + "loss": 0.4708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012686729431152344, + "rewards/margins": 1.3379968404769897, + "rewards/rejected": -1.350683569908142, + "step": 2634 + }, + { + "epoch": 0.15, + "learning_rate": 9.606491569266955e-08, + "logits/chosen": -2.06426739692688, + "logits/rejected": -2.049999237060547, + "logps/chosen": -53.84930419921875, + "logps/rejected": -221.83566284179688, + "loss": 0.579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26213112473487854, + "rewards/margins": 0.382358193397522, + "rewards/rejected": -0.12022705376148224, + "step": 2635 + }, + { + "epoch": 0.15, + "learning_rate": 9.606125028098581e-08, + "logits/chosen": -1.8276833295822144, + "logits/rejected": -1.8390840291976929, + "logps/chosen": -323.2966003417969, + "logps/rejected": -439.91973876953125, + "loss": 0.4248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.847760021686554, + "rewards/margins": 0.44557496905326843, + "rewards/rejected": 0.4021850526332855, + "step": 2636 + }, + { + "epoch": 0.15, + "learning_rate": 9.605758323298117e-08, + "logits/chosen": -2.12954044342041, + "logits/rejected": -2.120401620864868, + "logps/chosen": -62.61784744262695, + "logps/rejected": -218.9853057861328, + "loss": 0.5893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03580513224005699, + "rewards/margins": 0.43387413024902344, + "rewards/rejected": -0.4696792662143707, + "step": 2637 + }, + { + "epoch": 0.15, + "learning_rate": 9.605391454878586e-08, + "logits/chosen": -2.2478671073913574, + "logits/rejected": -2.244565486907959, + "logps/chosen": -28.961095809936523, + "logps/rejected": -143.21945190429688, + "loss": 0.5226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14823131263256073, + "rewards/margins": 0.5509275197982788, + "rewards/rejected": -0.4026962220668793, + "step": 2638 + }, + { + "epoch": 0.15, + "learning_rate": 9.605024422853023e-08, + "logits/chosen": -1.9381999969482422, + "logits/rejected": -1.9323179721832275, + "logps/chosen": -11.390843391418457, + "logps/rejected": -168.5269775390625, + "loss": 0.5059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002032756805419922, + "rewards/margins": 0.9841006398200989, + "rewards/rejected": -0.982067883014679, + "step": 2639 + }, + { + "epoch": 0.15, + "learning_rate": 9.604657227234467e-08, + "logits/chosen": -2.117262125015259, + "logits/rejected": -2.0896453857421875, + "logps/chosen": -168.29295349121094, + "logps/rejected": -402.2987060546875, + "loss": 0.2937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4345047175884247, + "rewards/margins": 2.0168471336364746, + "rewards/rejected": -1.582342505455017, + "step": 2640 + }, + { + "epoch": 0.15, + "learning_rate": 9.604289868035964e-08, + "logits/chosen": -2.106201171875, + "logits/rejected": -2.119346857070923, + "logps/chosen": -195.986083984375, + "logps/rejected": -232.9290313720703, + "loss": 0.6422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1676986664533615, + "rewards/margins": 0.12773284316062927, + "rewards/rejected": 0.03996581956744194, + "step": 2641 + }, + { + "epoch": 0.15, + "learning_rate": 9.60392234527056e-08, + "logits/chosen": -2.132338762283325, + "logits/rejected": -2.085266351699829, + "logps/chosen": -251.09420776367188, + "logps/rejected": -425.237060546875, + "loss": 0.338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6529785394668579, + "rewards/margins": 1.1812835931777954, + "rewards/rejected": -0.5283050537109375, + "step": 2642 + }, + { + "epoch": 0.15, + "learning_rate": 9.603554658951318e-08, + "logits/chosen": -2.1512374877929688, + "logits/rejected": -2.044288396835327, + "logps/chosen": -286.5601806640625, + "logps/rejected": -659.035888671875, + "loss": 0.3319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.952239990234375, + "rewards/margins": 0.72589111328125, + "rewards/rejected": 0.226348876953125, + "step": 2643 + }, + { + "epoch": 0.15, + "learning_rate": 9.603186809091294e-08, + "logits/chosen": -2.110651969909668, + "logits/rejected": -2.071103096008301, + "logps/chosen": -59.1065788269043, + "logps/rejected": -227.59449768066406, + "loss": 0.5335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08982467651367188, + "rewards/margins": 0.5646584033966064, + "rewards/rejected": -0.4748336970806122, + "step": 2644 + }, + { + "epoch": 0.15, + "learning_rate": 9.602818795703558e-08, + "logits/chosen": -1.8762644529342651, + "logits/rejected": -1.8696386814117432, + "logps/chosen": -298.90069580078125, + "logps/rejected": -418.019775390625, + "loss": 0.4405, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3981598615646362, + "rewards/margins": 0.00961923599243164, + "rewards/rejected": 1.3885406255722046, + "step": 2645 + }, + { + "epoch": 0.15, + "learning_rate": 9.602450618801184e-08, + "logits/chosen": -2.15987229347229, + "logits/rejected": -2.1404008865356445, + "logps/chosen": -0.008259149268269539, + "logps/rejected": -169.18374633789062, + "loss": 0.4992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018118620209861547, + "rewards/margins": 1.0301532745361328, + "rewards/rejected": -1.03033447265625, + "step": 2646 + }, + { + "epoch": 0.15, + "learning_rate": 9.60208227839725e-08, + "logits/chosen": -1.9380698204040527, + "logits/rejected": -1.9297316074371338, + "logps/chosen": -12.501602172851562, + "logps/rejected": -173.4755859375, + "loss": 0.5313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0499577522277832, + "rewards/margins": 0.7272771000862122, + "rewards/rejected": -0.677319347858429, + "step": 2647 + }, + { + "epoch": 0.15, + "learning_rate": 9.601713774504843e-08, + "logits/chosen": -2.193423271179199, + "logits/rejected": -2.1877388954162598, + "logps/chosen": -0.160963773727417, + "logps/rejected": -156.13922119140625, + "loss": 0.5843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0019424021011218429, + "rewards/margins": 0.4922958016395569, + "rewards/rejected": -0.49035340547561646, + "step": 2648 + }, + { + "epoch": 0.15, + "learning_rate": 9.601345107137052e-08, + "logits/chosen": -2.020643472671509, + "logits/rejected": -2.012024164199829, + "logps/chosen": -198.795166015625, + "logps/rejected": -269.0531005859375, + "loss": 0.4297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48412781953811646, + "rewards/margins": 0.7284576892852783, + "rewards/rejected": -0.24432983994483948, + "step": 2649 + }, + { + "epoch": 0.15, + "learning_rate": 9.600976276306977e-08, + "logits/chosen": -1.9224358797073364, + "logits/rejected": -1.8512272834777832, + "logps/chosen": -222.08607482910156, + "logps/rejected": -526.6354370117188, + "loss": 0.1191, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7077773809432983, + "rewards/margins": 2.304548740386963, + "rewards/rejected": -0.596771240234375, + "step": 2650 + }, + { + "epoch": 0.15, + "learning_rate": 9.600607282027718e-08, + "logits/chosen": -2.107954263687134, + "logits/rejected": -2.0748519897460938, + "logps/chosen": -212.24220275878906, + "logps/rejected": -269.7169189453125, + "loss": 0.4319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4093780517578125, + "rewards/margins": 0.7496490478515625, + "rewards/rejected": -0.34027099609375, + "step": 2651 + }, + { + "epoch": 0.15, + "learning_rate": 9.600238124312385e-08, + "logits/chosen": -2.2334482669830322, + "logits/rejected": -2.2290914058685303, + "logps/chosen": -47.2564811706543, + "logps/rejected": -189.54421997070312, + "loss": 0.6346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0531490333378315, + "rewards/margins": 0.1257549375295639, + "rewards/rejected": -0.07260590046644211, + "step": 2652 + }, + { + "epoch": 0.15, + "learning_rate": 9.599868803174091e-08, + "logits/chosen": -2.017878532409668, + "logits/rejected": -2.0135180950164795, + "logps/chosen": -54.40074920654297, + "logps/rejected": -263.04754638671875, + "loss": 0.4326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04815330728888512, + "rewards/margins": 1.670559287071228, + "rewards/rejected": -1.622406005859375, + "step": 2653 + }, + { + "epoch": 0.15, + "learning_rate": 9.599499318625956e-08, + "logits/chosen": -2.196230411529541, + "logits/rejected": -2.191598415374756, + "logps/chosen": -2.201265573501587, + "logps/rejected": -100.00990295410156, + "loss": 0.7329, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.023054981604218483, + "rewards/margins": -0.13237504661083221, + "rewards/rejected": 0.10932006686925888, + "step": 2654 + }, + { + "epoch": 0.15, + "learning_rate": 9.599129670681108e-08, + "logits/chosen": -2.063751459121704, + "logits/rejected": -2.0636792182922363, + "logps/chosen": -24.607126235961914, + "logps/rejected": -98.76367950439453, + "loss": 0.528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10403233021497726, + "rewards/margins": 0.6806657910346985, + "rewards/rejected": -0.5766334533691406, + "step": 2655 + }, + { + "epoch": 0.15, + "learning_rate": 9.598759859352677e-08, + "logits/chosen": -2.0046470165252686, + "logits/rejected": -1.959209680557251, + "logps/chosen": -273.8176574707031, + "logps/rejected": -395.959228515625, + "loss": 0.5199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4025115966796875, + "rewards/margins": 0.3850769102573395, + "rewards/rejected": 0.01743469201028347, + "step": 2656 + }, + { + "epoch": 0.15, + "learning_rate": 9.5983898846538e-08, + "logits/chosen": -1.9448007345199585, + "logits/rejected": -1.9452491998672485, + "logps/chosen": -9.915360450744629, + "logps/rejected": -235.17527770996094, + "loss": 0.4977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15944452583789825, + "rewards/margins": 1.3885122537612915, + "rewards/rejected": -1.5479568243026733, + "step": 2657 + }, + { + "epoch": 0.15, + "learning_rate": 9.598019746597621e-08, + "logits/chosen": -2.1315758228302, + "logits/rejected": -2.126053810119629, + "logps/chosen": -1.5456141233444214, + "logps/rejected": -70.98104095458984, + "loss": 0.6125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010319280438125134, + "rewards/margins": 0.28937584161758423, + "rewards/rejected": -0.2790565490722656, + "step": 2658 + }, + { + "epoch": 0.15, + "learning_rate": 9.597649445197291e-08, + "logits/chosen": -2.123715400695801, + "logits/rejected": -2.122180938720703, + "logps/chosen": -3.7635531425476074, + "logps/rejected": -55.088462829589844, + "loss": 0.5373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014768672175705433, + "rewards/margins": 0.7438488602638245, + "rewards/rejected": -0.7290802001953125, + "step": 2659 + }, + { + "epoch": 0.15, + "learning_rate": 9.597278980465962e-08, + "logits/chosen": -2.2173633575439453, + "logits/rejected": -2.2126665115356445, + "logps/chosen": -1.0555051565170288, + "logps/rejected": -123.7013931274414, + "loss": 0.5173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007979053072631359, + "rewards/margins": 0.9270215630531311, + "rewards/rejected": -0.9350005984306335, + "step": 2660 + }, + { + "epoch": 0.15, + "learning_rate": 9.596908352416796e-08, + "logits/chosen": -2.077711582183838, + "logits/rejected": -2.067776918411255, + "logps/chosen": -216.37060546875, + "logps/rejected": -251.5089569091797, + "loss": 0.5627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2186233550310135, + "rewards/margins": 0.351797491312027, + "rewards/rejected": -0.1331741362810135, + "step": 2661 + }, + { + "epoch": 0.15, + "learning_rate": 9.596537561062958e-08, + "logits/chosen": -2.009054660797119, + "logits/rejected": -1.9989230632781982, + "logps/chosen": -300.91485595703125, + "logps/rejected": -481.497314453125, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0350220203399658, + "rewards/margins": 0.668670654296875, + "rewards/rejected": 0.36635133624076843, + "step": 2662 + }, + { + "epoch": 0.15, + "learning_rate": 9.596166606417623e-08, + "logits/chosen": -1.987087607383728, + "logits/rejected": -1.950744867324829, + "logps/chosen": -282.0500793457031, + "logps/rejected": -489.98516845703125, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9562622308731079, + "rewards/margins": 0.8634399771690369, + "rewards/rejected": 0.09282226860523224, + "step": 2663 + }, + { + "epoch": 0.16, + "learning_rate": 9.595795488493969e-08, + "logits/chosen": -1.9936280250549316, + "logits/rejected": -1.9913067817687988, + "logps/chosen": -155.96282958984375, + "logps/rejected": -168.29354858398438, + "loss": 0.5849, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5064148306846619, + "rewards/margins": -0.01075434684753418, + "rewards/rejected": 0.517169177532196, + "step": 2664 + }, + { + "epoch": 0.16, + "learning_rate": 9.595424207305178e-08, + "logits/chosen": -2.0623998641967773, + "logits/rejected": -2.059807062149048, + "logps/chosen": -48.566375732421875, + "logps/rejected": -133.9674835205078, + "loss": 0.4803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13330383598804474, + "rewards/margins": 0.7972427606582642, + "rewards/rejected": -0.6639389395713806, + "step": 2665 + }, + { + "epoch": 0.16, + "learning_rate": 9.59505276286444e-08, + "logits/chosen": -2.061434745788574, + "logits/rejected": -2.065291404724121, + "logps/chosen": -299.5179443359375, + "logps/rejected": -393.77154541015625, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.810894787311554, + "rewards/margins": 0.5957580804824829, + "rewards/rejected": 0.21513672173023224, + "step": 2666 + }, + { + "epoch": 0.16, + "learning_rate": 9.594681155184952e-08, + "logits/chosen": -2.058389902114868, + "logits/rejected": -2.044593095779419, + "logps/chosen": -55.5906982421875, + "logps/rejected": -246.6258544921875, + "loss": 0.6364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.056073762476444244, + "rewards/margins": 0.1938072144985199, + "rewards/rejected": -0.13773345947265625, + "step": 2667 + }, + { + "epoch": 0.16, + "learning_rate": 9.594309384279914e-08, + "logits/chosen": -2.1880385875701904, + "logits/rejected": -2.168353796005249, + "logps/chosen": -250.20204162597656, + "logps/rejected": -373.8704833984375, + "loss": 0.175, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1941025257110596, + "rewards/margins": 1.8677659034729004, + "rewards/rejected": -0.673663318157196, + "step": 2668 + }, + { + "epoch": 0.16, + "learning_rate": 9.593937450162533e-08, + "logits/chosen": -2.161545753479004, + "logits/rejected": -2.1668872833251953, + "logps/chosen": -2.697225570678711, + "logps/rejected": -89.15084838867188, + "loss": 0.5884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.045726776123046875, + "rewards/margins": 0.5348610281944275, + "rewards/rejected": -0.5805878043174744, + "step": 2669 + }, + { + "epoch": 0.16, + "learning_rate": 9.593565352846022e-08, + "logits/chosen": -2.0624454021453857, + "logits/rejected": -2.068115711212158, + "logps/chosen": -0.010025691241025925, + "logps/rejected": -143.49761962890625, + "loss": 0.5299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005213288241066039, + "rewards/margins": 0.8155050277709961, + "rewards/rejected": -0.8160263299942017, + "step": 2670 + }, + { + "epoch": 0.16, + "learning_rate": 9.5931930923436e-08, + "logits/chosen": -2.2927229404449463, + "logits/rejected": -2.2943766117095947, + "logps/chosen": -7.793780326843262, + "logps/rejected": -122.57878112792969, + "loss": 0.6946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08926206082105637, + "rewards/margins": 0.06332201510667801, + "rewards/rejected": -0.15258407592773438, + "step": 2671 + }, + { + "epoch": 0.16, + "learning_rate": 9.592820668668493e-08, + "logits/chosen": -1.9672366380691528, + "logits/rejected": -2.01419997215271, + "logps/chosen": -266.81610107421875, + "logps/rejected": -230.4088897705078, + "loss": 0.3772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7053284049034119, + "rewards/margins": 0.761665403842926, + "rewards/rejected": -0.05633697658777237, + "step": 2672 + }, + { + "epoch": 0.16, + "learning_rate": 9.592448081833931e-08, + "logits/chosen": -1.8297961950302124, + "logits/rejected": -1.8420716524124146, + "logps/chosen": -179.881591796875, + "logps/rejected": -259.67987060546875, + "loss": 0.4521, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.092626929283142, + "rewards/margins": 0.08484184741973877, + "rewards/rejected": 1.0077850818634033, + "step": 2673 + }, + { + "epoch": 0.16, + "learning_rate": 9.592075331853147e-08, + "logits/chosen": -2.1411874294281006, + "logits/rejected": -2.153409242630005, + "logps/chosen": -231.50010681152344, + "logps/rejected": -462.9347839355469, + "loss": 0.2277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.857867419719696, + "rewards/margins": 1.4651641845703125, + "rewards/rejected": -0.6072967648506165, + "step": 2674 + }, + { + "epoch": 0.16, + "learning_rate": 9.591702418739384e-08, + "logits/chosen": -2.2567012310028076, + "logits/rejected": -2.249617099761963, + "logps/chosen": -15.134735107421875, + "logps/rejected": -184.58627319335938, + "loss": 0.5344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006304645445197821, + "rewards/margins": 0.8384549021720886, + "rewards/rejected": -0.8321502804756165, + "step": 2675 + }, + { + "epoch": 0.16, + "learning_rate": 9.591329342505892e-08, + "logits/chosen": -2.0474629402160645, + "logits/rejected": -1.9481134414672852, + "logps/chosen": -201.59005737304688, + "logps/rejected": -373.34210205078125, + "loss": 0.4626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5770172476768494, + "rewards/margins": 0.27957460284233093, + "rewards/rejected": 0.29744264483451843, + "step": 2676 + }, + { + "epoch": 0.16, + "learning_rate": 9.590956103165923e-08, + "logits/chosen": -2.1813697814941406, + "logits/rejected": -2.1837539672851562, + "logps/chosen": -227.591552734375, + "logps/rejected": -292.6116638183594, + "loss": 0.4698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.873309314250946, + "rewards/margins": 0.08161312341690063, + "rewards/rejected": 0.7916961908340454, + "step": 2677 + }, + { + "epoch": 0.16, + "learning_rate": 9.590582700732737e-08, + "logits/chosen": -2.2917468547821045, + "logits/rejected": -2.2955799102783203, + "logps/chosen": -4.11440372467041, + "logps/rejected": -45.00533676147461, + "loss": 0.6747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0977487787604332, + "rewards/margins": 0.11387193948030472, + "rewards/rejected": -0.21162071824073792, + "step": 2678 + }, + { + "epoch": 0.16, + "learning_rate": 9.590209135219598e-08, + "logits/chosen": -1.9452176094055176, + "logits/rejected": -1.922989010810852, + "logps/chosen": -188.59217834472656, + "logps/rejected": -310.61322021484375, + "loss": 0.4842, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8545913696289062, + "rewards/margins": -0.038767993450164795, + "rewards/rejected": 0.893359363079071, + "step": 2679 + }, + { + "epoch": 0.16, + "learning_rate": 9.589835406639776e-08, + "logits/chosen": -2.2199318408966064, + "logits/rejected": -2.2232789993286133, + "logps/chosen": -0.002680376637727022, + "logps/rejected": -149.18247985839844, + "loss": 0.5825, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.796011464553885e-05, + "rewards/margins": 0.5671036243438721, + "rewards/rejected": -0.5671615600585938, + "step": 2680 + }, + { + "epoch": 0.16, + "learning_rate": 9.589461515006551e-08, + "logits/chosen": -2.1445913314819336, + "logits/rejected": -2.119988203048706, + "logps/chosen": -256.7611999511719, + "logps/rejected": -420.6476745605469, + "loss": 0.158, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1114044189453125, + "rewards/margins": 2.167581081390381, + "rewards/rejected": -1.056176781654358, + "step": 2681 + }, + { + "epoch": 0.16, + "learning_rate": 9.589087460333203e-08, + "logits/chosen": -2.1441683769226074, + "logits/rejected": -2.1394803524017334, + "logps/chosen": -207.3262939453125, + "logps/rejected": -300.1536865234375, + "loss": 0.4454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7690979242324829, + "rewards/margins": 0.35081177949905396, + "rewards/rejected": 0.41828614473342896, + "step": 2682 + }, + { + "epoch": 0.16, + "learning_rate": 9.58871324263302e-08, + "logits/chosen": -2.0524933338165283, + "logits/rejected": -1.9738699197769165, + "logps/chosen": -155.81381225585938, + "logps/rejected": -295.1967468261719, + "loss": 0.5508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2502914369106293, + "rewards/margins": 0.3433181643486023, + "rewards/rejected": -0.09302673488855362, + "step": 2683 + }, + { + "epoch": 0.16, + "learning_rate": 9.588338861919299e-08, + "logits/chosen": -1.9639313220977783, + "logits/rejected": -1.9612172842025757, + "logps/chosen": -65.87908935546875, + "logps/rejected": -208.4365997314453, + "loss": 0.6313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03800811991095543, + "rewards/margins": 0.2137909084558487, + "rewards/rejected": -0.17578278481960297, + "step": 2684 + }, + { + "epoch": 0.16, + "learning_rate": 9.587964318205334e-08, + "logits/chosen": -2.0822043418884277, + "logits/rejected": -2.084477663040161, + "logps/chosen": -20.293792724609375, + "logps/rejected": -136.67977905273438, + "loss": 0.5216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04502449184656143, + "rewards/margins": 0.8005756735801697, + "rewards/rejected": -0.7555511593818665, + "step": 2685 + }, + { + "epoch": 0.16, + "learning_rate": 9.587589611504437e-08, + "logits/chosen": -2.1963937282562256, + "logits/rejected": -2.148818254470825, + "logps/chosen": -363.728759765625, + "logps/rejected": -560.697265625, + "loss": 0.396, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7445648908615112, + "rewards/margins": 0.06640326976776123, + "rewards/rejected": 1.67816162109375, + "step": 2686 + }, + { + "epoch": 0.16, + "learning_rate": 9.587214741829916e-08, + "logits/chosen": -2.056690216064453, + "logits/rejected": -2.1303701400756836, + "logps/chosen": -281.01507568359375, + "logps/rejected": -295.7961120605469, + "loss": 0.3542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.974896252155304, + "rewards/margins": 0.5390655994415283, + "rewards/rejected": 0.435830682516098, + "step": 2687 + }, + { + "epoch": 0.16, + "learning_rate": 9.58683970919509e-08, + "logits/chosen": -2.0261263847351074, + "logits/rejected": -2.020545482635498, + "logps/chosen": -37.09693145751953, + "logps/rejected": -173.18255615234375, + "loss": 0.3805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2623329162597656, + "rewards/margins": 1.4810631275177002, + "rewards/rejected": -1.2187302112579346, + "step": 2688 + }, + { + "epoch": 0.16, + "learning_rate": 9.58646451361328e-08, + "logits/chosen": -2.039055585861206, + "logits/rejected": -1.980988621711731, + "logps/chosen": -247.7066650390625, + "logps/rejected": -348.9000244140625, + "loss": 0.5509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3447403013706207, + "rewards/margins": 0.31882476806640625, + "rewards/rejected": 0.02591552771627903, + "step": 2689 + }, + { + "epoch": 0.16, + "learning_rate": 9.586089155097814e-08, + "logits/chosen": -2.128859043121338, + "logits/rejected": -2.1238887310028076, + "logps/chosen": -73.02938842773438, + "logps/rejected": -285.9283447265625, + "loss": 0.5328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07762451469898224, + "rewards/margins": 0.8783844113349915, + "rewards/rejected": -0.9560089111328125, + "step": 2690 + }, + { + "epoch": 0.16, + "learning_rate": 9.58571363366203e-08, + "logits/chosen": -1.999333381652832, + "logits/rejected": -1.9892035722732544, + "logps/chosen": -166.6220245361328, + "logps/rejected": -262.95623779296875, + "loss": 0.5187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2534011900424957, + "rewards/margins": 0.4656783938407898, + "rewards/rejected": -0.21227721869945526, + "step": 2691 + }, + { + "epoch": 0.16, + "learning_rate": 9.585337949319267e-08, + "logits/chosen": -2.2224040031433105, + "logits/rejected": -2.2248551845550537, + "logps/chosen": -0.059595368802547455, + "logps/rejected": -124.58438873291016, + "loss": 0.4574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003756435588002205, + "rewards/margins": 1.4156962633132935, + "rewards/rejected": -1.4194526672363281, + "step": 2692 + }, + { + "epoch": 0.16, + "learning_rate": 9.58496210208287e-08, + "logits/chosen": -1.8806334733963013, + "logits/rejected": -1.888418197631836, + "logps/chosen": -1.4296549558639526, + "logps/rejected": -93.13651275634766, + "loss": 0.5513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00929948128759861, + "rewards/margins": 0.6637107729911804, + "rewards/rejected": -0.6544113159179688, + "step": 2693 + }, + { + "epoch": 0.16, + "learning_rate": 9.584586091966191e-08, + "logits/chosen": -2.1018800735473633, + "logits/rejected": -2.09584379196167, + "logps/chosen": -50.902931213378906, + "logps/rejected": -162.1957244873047, + "loss": 0.5407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15747375786304474, + "rewards/margins": 0.5092697143554688, + "rewards/rejected": -0.3517959713935852, + "step": 2694 + }, + { + "epoch": 0.16, + "learning_rate": 9.584209918982589e-08, + "logits/chosen": -2.204770565032959, + "logits/rejected": -2.206982374191284, + "logps/chosen": -0.6571928262710571, + "logps/rejected": -73.27188873291016, + "loss": 0.6734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020879371091723442, + "rewards/margins": 0.14387130737304688, + "rewards/rejected": -0.16475068032741547, + "step": 2695 + }, + { + "epoch": 0.16, + "learning_rate": 9.583833583145426e-08, + "logits/chosen": -1.9336880445480347, + "logits/rejected": -1.8478621244430542, + "logps/chosen": -271.17681884765625, + "logps/rejected": -405.08575439453125, + "loss": 0.4855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5436310172080994, + "rewards/margins": 0.31578677892684937, + "rewards/rejected": 0.22784423828125, + "step": 2696 + }, + { + "epoch": 0.16, + "learning_rate": 9.583457084468074e-08, + "logits/chosen": -2.1866257190704346, + "logits/rejected": -2.166288375854492, + "logps/chosen": -196.21160888671875, + "logps/rejected": -370.21588134765625, + "loss": 0.3349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0079452991485596, + "rewards/margins": 0.6006729602813721, + "rewards/rejected": 0.4072723388671875, + "step": 2697 + }, + { + "epoch": 0.16, + "learning_rate": 9.583080422963905e-08, + "logits/chosen": -2.0469348430633545, + "logits/rejected": -2.0994343757629395, + "logps/chosen": -220.8285675048828, + "logps/rejected": -349.07086181640625, + "loss": 0.2345, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.19517982006073, + "rewards/margins": 1.1519242525100708, + "rewards/rejected": 0.04325561597943306, + "step": 2698 + }, + { + "epoch": 0.16, + "learning_rate": 9.582703598646301e-08, + "logits/chosen": -2.158521890640259, + "logits/rejected": -2.1745941638946533, + "logps/chosen": -249.14065551757812, + "logps/rejected": -289.9242248535156, + "loss": 0.5061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.686199963092804, + "rewards/margins": 0.1619110107421875, + "rewards/rejected": 0.5242889523506165, + "step": 2699 + }, + { + "epoch": 0.16, + "learning_rate": 9.58232661152865e-08, + "logits/chosen": -2.103645086288452, + "logits/rejected": -2.1209778785705566, + "logps/chosen": -218.5430450439453, + "logps/rejected": -322.06854248046875, + "loss": 0.2249, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2153748273849487, + "rewards/margins": 1.2592225074768066, + "rewards/rejected": -0.04384765774011612, + "step": 2700 + }, + { + "epoch": 0.16, + "learning_rate": 9.581949461624341e-08, + "logits/chosen": -1.9282317161560059, + "logits/rejected": -1.9307441711425781, + "logps/chosen": -36.10942459106445, + "logps/rejected": -173.21484375, + "loss": 0.7826, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.13773499429225922, + "rewards/margins": -0.21237489581108093, + "rewards/rejected": 0.07463989406824112, + "step": 2701 + }, + { + "epoch": 0.16, + "learning_rate": 9.581572148946777e-08, + "logits/chosen": -1.9967725276947021, + "logits/rejected": -1.9957988262176514, + "logps/chosen": -335.8577575683594, + "logps/rejected": -611.1111450195312, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3162444829940796, + "rewards/margins": 0.5441863536834717, + "rewards/rejected": 0.7720581293106079, + "step": 2702 + }, + { + "epoch": 0.16, + "learning_rate": 9.581194673509357e-08, + "logits/chosen": -2.1887366771698, + "logits/rejected": -2.1887619495391846, + "logps/chosen": -2.443774792482145e-05, + "logps/rejected": -122.85160064697266, + "loss": 0.4541, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9073449664119835e-07, + "rewards/margins": 1.4161993265151978, + "rewards/rejected": -1.4161995649337769, + "step": 2703 + }, + { + "epoch": 0.16, + "learning_rate": 9.580817035325495e-08, + "logits/chosen": -1.9142099618911743, + "logits/rejected": -1.9085158109664917, + "logps/chosen": -30.260921478271484, + "logps/rejected": -267.3471374511719, + "loss": 0.4998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06431808322668076, + "rewards/margins": 1.1591697931289673, + "rewards/rejected": -1.2234878540039062, + "step": 2704 + }, + { + "epoch": 0.16, + "learning_rate": 9.580439234408603e-08, + "logits/chosen": -2.2106552124023438, + "logits/rejected": -2.20984148979187, + "logps/chosen": -0.9051067233085632, + "logps/rejected": -53.690452575683594, + "loss": 0.6456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07592959702014923, + "rewards/margins": 0.28508496284484863, + "rewards/rejected": -0.36101457476615906, + "step": 2705 + }, + { + "epoch": 0.16, + "learning_rate": 9.580061270772106e-08, + "logits/chosen": -2.147470474243164, + "logits/rejected": -2.1328630447387695, + "logps/chosen": -32.695594787597656, + "logps/rejected": -187.02122497558594, + "loss": 0.6231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3236503601074219, + "rewards/margins": 0.0052131712436676025, + "rewards/rejected": 0.3184371888637543, + "step": 2706 + }, + { + "epoch": 0.16, + "learning_rate": 9.579683144429427e-08, + "logits/chosen": -2.1292855739593506, + "logits/rejected": -2.0570812225341797, + "logps/chosen": -157.0914306640625, + "logps/rejected": -386.3128967285156, + "loss": 0.3564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.316610723733902, + "rewards/margins": 1.338403344154358, + "rewards/rejected": -1.0217926502227783, + "step": 2707 + }, + { + "epoch": 0.16, + "learning_rate": 9.579304855394003e-08, + "logits/chosen": -2.175143241882324, + "logits/rejected": -2.1421964168548584, + "logps/chosen": -245.15652465820312, + "logps/rejected": -337.5331726074219, + "loss": 0.3093, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2830047607421875, + "rewards/margins": 0.6432891488075256, + "rewards/rejected": 0.6397156119346619, + "step": 2708 + }, + { + "epoch": 0.16, + "learning_rate": 9.57892640367927e-08, + "logits/chosen": -1.8143445253372192, + "logits/rejected": -1.7378677129745483, + "logps/chosen": -201.27992248535156, + "logps/rejected": -379.64227294921875, + "loss": 0.5077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5345627069473267, + "rewards/margins": 0.25978854298591614, + "rewards/rejected": 0.2747741639614105, + "step": 2709 + }, + { + "epoch": 0.16, + "learning_rate": 9.578547789298672e-08, + "logits/chosen": -2.160362958908081, + "logits/rejected": -2.150111198425293, + "logps/chosen": -7.438471948262304e-05, + "logps/rejected": -207.57933044433594, + "loss": 0.3924, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8489664600783726e-06, + "rewards/margins": 2.287092924118042, + "rewards/rejected": -2.287095785140991, + "step": 2710 + }, + { + "epoch": 0.16, + "learning_rate": 9.578169012265662e-08, + "logits/chosen": -1.9629888534545898, + "logits/rejected": -1.9641720056533813, + "logps/chosen": -3.1524837017059326, + "logps/rejected": -29.516021728515625, + "loss": 0.6705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0376996286213398, + "rewards/margins": 0.11214607954025269, + "rewards/rejected": -0.14984570443630219, + "step": 2711 + }, + { + "epoch": 0.16, + "learning_rate": 9.577790072593694e-08, + "logits/chosen": -2.257692337036133, + "logits/rejected": -2.2423460483551025, + "logps/chosen": -0.00039298037881962955, + "logps/rejected": -160.24526977539062, + "loss": 0.51, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.144102407124592e-05, + "rewards/margins": 0.9505043029785156, + "rewards/rejected": -0.9505157470703125, + "step": 2712 + }, + { + "epoch": 0.16, + "learning_rate": 9.577410970296231e-08, + "logits/chosen": -1.976334571838379, + "logits/rejected": -1.987471103668213, + "logps/chosen": -268.872802734375, + "logps/rejected": -316.19439697265625, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.161163330078125, + "rewards/margins": 0.646453857421875, + "rewards/rejected": 0.51470947265625, + "step": 2713 + }, + { + "epoch": 0.16, + "learning_rate": 9.577031705386739e-08, + "logits/chosen": -2.131394863128662, + "logits/rejected": -2.0159130096435547, + "logps/chosen": -181.92408752441406, + "logps/rejected": -652.234130859375, + "loss": 0.2615, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0858322381973267, + "rewards/margins": 1.0096298456192017, + "rewards/rejected": 0.076202392578125, + "step": 2714 + }, + { + "epoch": 0.16, + "learning_rate": 9.576652277878692e-08, + "logits/chosen": -2.0820655822753906, + "logits/rejected": -2.0393261909484863, + "logps/chosen": -154.8016357421875, + "logps/rejected": -509.09979248046875, + "loss": 0.4506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.443014532327652, + "rewards/margins": 0.5138275027275085, + "rewards/rejected": -0.07081299275159836, + "step": 2715 + }, + { + "epoch": 0.16, + "learning_rate": 9.576272687785569e-08, + "logits/chosen": -2.131706476211548, + "logits/rejected": -2.052889347076416, + "logps/chosen": -51.49311447143555, + "logps/rejected": -384.375732421875, + "loss": 0.3972, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04256324842572212, + "rewards/margins": 2.0607943534851074, + "rewards/rejected": -2.018231153488159, + "step": 2716 + }, + { + "epoch": 0.16, + "learning_rate": 9.575892935120855e-08, + "logits/chosen": -1.9378714561462402, + "logits/rejected": -1.9244356155395508, + "logps/chosen": -235.76541137695312, + "logps/rejected": -364.6685791015625, + "loss": 0.4353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5826263427734375, + "rewards/margins": 0.6470306515693665, + "rewards/rejected": -0.06440430134534836, + "step": 2717 + }, + { + "epoch": 0.16, + "learning_rate": 9.575513019898042e-08, + "logits/chosen": -2.0228488445281982, + "logits/rejected": -1.993096113204956, + "logps/chosen": -242.9678497314453, + "logps/rejected": -440.49066162109375, + "loss": 0.2995, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2617905139923096, + "rewards/margins": 0.7145065665245056, + "rewards/rejected": 0.547283947467804, + "step": 2718 + }, + { + "epoch": 0.16, + "learning_rate": 9.575132942130624e-08, + "logits/chosen": -2.018510103225708, + "logits/rejected": -2.008556604385376, + "logps/chosen": -15.322141647338867, + "logps/rejected": -265.05841064453125, + "loss": 0.418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.033243563026189804, + "rewards/margins": 1.940473198890686, + "rewards/rejected": -1.9072296619415283, + "step": 2719 + }, + { + "epoch": 0.16, + "learning_rate": 9.574752701832107e-08, + "logits/chosen": -2.154667854309082, + "logits/rejected": -2.142906904220581, + "logps/chosen": -4.899439954897389e-05, + "logps/rejected": -277.47772216796875, + "loss": 0.4324, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.125802999828011e-07, + "rewards/margins": 1.698163390159607, + "rewards/rejected": -1.6981629133224487, + "step": 2720 + }, + { + "epoch": 0.16, + "learning_rate": 9.574372299015994e-08, + "logits/chosen": -2.2044284343719482, + "logits/rejected": -2.200895309448242, + "logps/chosen": -93.20796203613281, + "logps/rejected": -210.7115478515625, + "loss": 0.484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17737655341625214, + "rewards/margins": 0.7011192440986633, + "rewards/rejected": -0.52374267578125, + "step": 2721 + }, + { + "epoch": 0.16, + "learning_rate": 9.5739917336958e-08, + "logits/chosen": -2.194488048553467, + "logits/rejected": -2.1908679008483887, + "logps/chosen": -29.96550178527832, + "logps/rejected": -106.94564056396484, + "loss": 0.5749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05307731777429581, + "rewards/margins": 0.6182720065116882, + "rewards/rejected": -0.6713493466377258, + "step": 2722 + }, + { + "epoch": 0.16, + "learning_rate": 9.573611005885049e-08, + "logits/chosen": -1.970109224319458, + "logits/rejected": -1.9859042167663574, + "logps/chosen": -211.90621948242188, + "logps/rejected": -520.6337280273438, + "loss": 0.2584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7976471185684204, + "rewards/margins": 1.3844878673553467, + "rewards/rejected": -0.586840808391571, + "step": 2723 + }, + { + "epoch": 0.16, + "learning_rate": 9.573230115597261e-08, + "logits/chosen": -1.8787822723388672, + "logits/rejected": -1.855183482170105, + "logps/chosen": -212.77182006835938, + "logps/rejected": -402.3545837402344, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0488067865371704, + "rewards/margins": 0.8779571652412415, + "rewards/rejected": 0.17084960639476776, + "step": 2724 + }, + { + "epoch": 0.16, + "learning_rate": 9.572849062845971e-08, + "logits/chosen": -2.0507566928863525, + "logits/rejected": -2.0467817783355713, + "logps/chosen": -9.74740982055664, + "logps/rejected": -113.88496398925781, + "loss": 0.5365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055633544921875, + "rewards/margins": 0.6412292718887329, + "rewards/rejected": -0.5855957269668579, + "step": 2725 + }, + { + "epoch": 0.16, + "learning_rate": 9.572467847644714e-08, + "logits/chosen": -2.011176109313965, + "logits/rejected": -1.9948718547821045, + "logps/chosen": -45.920806884765625, + "logps/rejected": -280.9830017089844, + "loss": 0.4483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24216651916503906, + "rewards/margins": 0.8199467062950134, + "rewards/rejected": -0.5777801871299744, + "step": 2726 + }, + { + "epoch": 0.16, + "learning_rate": 9.572086470007032e-08, + "logits/chosen": -2.2074496746063232, + "logits/rejected": -2.200305461883545, + "logps/chosen": -6.6531195640563965, + "logps/rejected": -87.1362075805664, + "loss": 0.6673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01653294637799263, + "rewards/margins": 0.028635455295443535, + "rewards/rejected": -0.012102508917450905, + "step": 2727 + }, + { + "epoch": 0.16, + "learning_rate": 9.571704929946474e-08, + "logits/chosen": -2.061635732650757, + "logits/rejected": -2.058332681655884, + "logps/chosen": -0.6242548823356628, + "logps/rejected": -201.76625061035156, + "loss": 0.4761, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022374678403139114, + "rewards/margins": 1.179929494857788, + "rewards/rejected": -1.2023041248321533, + "step": 2728 + }, + { + "epoch": 0.16, + "learning_rate": 9.571323227476593e-08, + "logits/chosen": -2.08404803276062, + "logits/rejected": -2.0662336349487305, + "logps/chosen": -148.67242431640625, + "logps/rejected": -234.86619567871094, + "loss": 0.5345, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8319458365440369, + "rewards/margins": -0.14321744441986084, + "rewards/rejected": 0.9751632809638977, + "step": 2729 + }, + { + "epoch": 0.16, + "learning_rate": 9.570941362610951e-08, + "logits/chosen": -2.0380496978759766, + "logits/rejected": -1.948154091835022, + "logps/chosen": -159.89015197753906, + "logps/rejected": -461.553466796875, + "loss": 0.2688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3958999812602997, + "rewards/margins": 2.3142197132110596, + "rewards/rejected": -1.9183197021484375, + "step": 2730 + }, + { + "epoch": 0.16, + "learning_rate": 9.570559335363114e-08, + "logits/chosen": -2.2749176025390625, + "logits/rejected": -2.2567970752716064, + "logps/chosen": -219.36834716796875, + "logps/rejected": -373.3603820800781, + "loss": 0.2151, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2280105352401733, + "rewards/margins": 1.1405258178710938, + "rewards/rejected": 0.08748473972082138, + "step": 2731 + }, + { + "epoch": 0.16, + "learning_rate": 9.570177145746652e-08, + "logits/chosen": -1.9432103633880615, + "logits/rejected": -1.9511357545852661, + "logps/chosen": -290.8797607421875, + "logps/rejected": -414.6989440917969, + "loss": 0.3015, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5380890369415283, + "rewards/margins": 0.5381836295127869, + "rewards/rejected": 0.9999054074287415, + "step": 2732 + }, + { + "epoch": 0.16, + "learning_rate": 9.569794793775143e-08, + "logits/chosen": -2.246492862701416, + "logits/rejected": -2.237973213195801, + "logps/chosen": -142.40908813476562, + "logps/rejected": -165.28640747070312, + "loss": 0.6262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18571166694164276, + "rewards/margins": 0.17404480278491974, + "rewards/rejected": 0.011666870675981045, + "step": 2733 + }, + { + "epoch": 0.16, + "learning_rate": 9.569412279462167e-08, + "logits/chosen": -2.073111057281494, + "logits/rejected": -2.0690903663635254, + "logps/chosen": -8.723973274230957, + "logps/rejected": -88.78781127929688, + "loss": 0.5802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02805204503238201, + "rewards/margins": 0.5611417293548584, + "rewards/rejected": -0.5891937613487244, + "step": 2734 + }, + { + "epoch": 0.16, + "learning_rate": 9.569029602821319e-08, + "logits/chosen": -2.067700147628784, + "logits/rejected": -2.055241107940674, + "logps/chosen": -138.06060791015625, + "logps/rejected": -223.83004760742188, + "loss": 0.5762, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.57672119140625, + "rewards/margins": -0.12548065185546875, + "rewards/rejected": 0.7022018432617188, + "step": 2735 + }, + { + "epoch": 0.16, + "learning_rate": 9.568646763866188e-08, + "logits/chosen": -2.1453630924224854, + "logits/rejected": -2.1440820693969727, + "logps/chosen": -193.169921875, + "logps/rejected": -307.3046875, + "loss": 0.3517, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0677261352539062, + "rewards/margins": 0.5118362307548523, + "rewards/rejected": 0.555889904499054, + "step": 2736 + }, + { + "epoch": 0.16, + "learning_rate": 9.568263762610376e-08, + "logits/chosen": -2.01956844329834, + "logits/rejected": -2.0317928791046143, + "logps/chosen": -124.52127838134766, + "logps/rejected": -258.7477111816406, + "loss": 0.505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10953140258789062, + "rewards/margins": 0.8387153744697571, + "rewards/rejected": -0.7291839718818665, + "step": 2737 + }, + { + "epoch": 0.16, + "learning_rate": 9.56788059906749e-08, + "logits/chosen": -2.2317252159118652, + "logits/rejected": -2.2166473865509033, + "logps/chosen": -91.8778305053711, + "logps/rejected": -235.77059936523438, + "loss": 0.4299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4962402284145355, + "rewards/margins": 0.7542937994003296, + "rewards/rejected": -0.25805360078811646, + "step": 2738 + }, + { + "epoch": 0.16, + "learning_rate": 9.567497273251141e-08, + "logits/chosen": -2.128364086151123, + "logits/rejected": -2.123542070388794, + "logps/chosen": -40.76290512084961, + "logps/rejected": -131.31390380859375, + "loss": 0.515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006295395083725452, + "rewards/margins": 0.7908619046211243, + "rewards/rejected": -0.7845665216445923, + "step": 2739 + }, + { + "epoch": 0.16, + "learning_rate": 9.567113785174947e-08, + "logits/chosen": -2.0315659046173096, + "logits/rejected": -2.0352184772491455, + "logps/chosen": -0.11556509137153625, + "logps/rejected": -48.299564361572266, + "loss": 0.6851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016764715546742082, + "rewards/margins": 0.07811860740184784, + "rewards/rejected": -0.07979507744312286, + "step": 2740 + }, + { + "epoch": 0.16, + "learning_rate": 9.566730134852532e-08, + "logits/chosen": -2.2464871406555176, + "logits/rejected": -2.234128713607788, + "logps/chosen": -50.966033935546875, + "logps/rejected": -239.71844482421875, + "loss": 0.4787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23241043090820312, + "rewards/margins": 0.8250603079795837, + "rewards/rejected": -0.5926498770713806, + "step": 2741 + }, + { + "epoch": 0.16, + "learning_rate": 9.566346322297523e-08, + "logits/chosen": -2.021873950958252, + "logits/rejected": -2.0133700370788574, + "logps/chosen": -42.093631744384766, + "logps/rejected": -110.6890640258789, + "loss": 0.6087, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020814895629882812, + "rewards/margins": 0.35557517409324646, + "rewards/rejected": -0.3763900697231293, + "step": 2742 + }, + { + "epoch": 0.16, + "learning_rate": 9.565962347523557e-08, + "logits/chosen": -1.821864366531372, + "logits/rejected": -1.7613694667816162, + "logps/chosen": -230.28717041015625, + "logps/rejected": -458.9320373535156, + "loss": 0.6057, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9067749381065369, + "rewards/margins": -0.4841460585594177, + "rewards/rejected": 1.3909209966659546, + "step": 2743 + }, + { + "epoch": 0.16, + "learning_rate": 9.565578210544273e-08, + "logits/chosen": -2.148955821990967, + "logits/rejected": -2.145311117172241, + "logps/chosen": -6.546578884124756, + "logps/rejected": -97.65599822998047, + "loss": 0.5949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008533335290849209, + "rewards/margins": 0.45646682381629944, + "rewards/rejected": -0.4650001525878906, + "step": 2744 + }, + { + "epoch": 0.16, + "learning_rate": 9.56519391137332e-08, + "logits/chosen": -2.1396522521972656, + "logits/rejected": -2.1049861907958984, + "logps/chosen": -95.43550872802734, + "logps/rejected": -246.59829711914062, + "loss": 0.4653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29488906264305115, + "rewards/margins": 0.8660743236541748, + "rewards/rejected": -0.571185290813446, + "step": 2745 + }, + { + "epoch": 0.16, + "learning_rate": 9.564809450024346e-08, + "logits/chosen": -2.131338596343994, + "logits/rejected": -2.1307291984558105, + "logps/chosen": -23.583158493041992, + "logps/rejected": -147.5456085205078, + "loss": 0.5684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016147995367646217, + "rewards/margins": 0.5869724750518799, + "rewards/rejected": -0.6031204462051392, + "step": 2746 + }, + { + "epoch": 0.16, + "learning_rate": 9.564424826511013e-08, + "logits/chosen": -2.0653748512268066, + "logits/rejected": -2.0696475505828857, + "logps/chosen": -32.45637512207031, + "logps/rejected": -231.61981201171875, + "loss": 0.4496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08860474079847336, + "rewards/margins": 1.279486060142517, + "rewards/rejected": -1.190881371498108, + "step": 2747 + }, + { + "epoch": 0.16, + "learning_rate": 9.564040040846983e-08, + "logits/chosen": -2.1072590351104736, + "logits/rejected": -2.1098244190216064, + "logps/chosen": -0.0047018323093652725, + "logps/rejected": -109.46830749511719, + "loss": 0.6866, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.236283712089062e-05, + "rewards/margins": 0.0131610669195652, + "rewards/rejected": -0.013203429989516735, + "step": 2748 + }, + { + "epoch": 0.16, + "learning_rate": 9.563655093045925e-08, + "logits/chosen": -2.1673948764801025, + "logits/rejected": -2.157484531402588, + "logps/chosen": -0.013188387267291546, + "logps/rejected": -194.69265747070312, + "loss": 0.3868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017524474242236465, + "rewards/margins": 2.319179058074951, + "rewards/rejected": -2.319354295730591, + "step": 2749 + }, + { + "epoch": 0.16, + "learning_rate": 9.563269983121516e-08, + "logits/chosen": -2.1520097255706787, + "logits/rejected": -2.1501760482788086, + "logps/chosen": -6.010630130767822, + "logps/rejected": -115.60884094238281, + "loss": 0.7025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11189021915197372, + "rewards/margins": 0.05256291478872299, + "rewards/rejected": -0.16445313394069672, + "step": 2750 + }, + { + "epoch": 0.16, + "learning_rate": 9.562884711087435e-08, + "logits/chosen": -2.0372979640960693, + "logits/rejected": -2.0282111167907715, + "logps/chosen": -41.999271392822266, + "logps/rejected": -231.6233673095703, + "loss": 0.5479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09272613376379013, + "rewards/margins": 0.49888917803764343, + "rewards/rejected": -0.4061630368232727, + "step": 2751 + }, + { + "epoch": 0.16, + "learning_rate": 9.562499276957371e-08, + "logits/chosen": -1.9822802543640137, + "logits/rejected": -1.9788538217544556, + "logps/chosen": -0.0007023377693258226, + "logps/rejected": -132.6405487060547, + "loss": 0.5412, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.061261304537766e-05, + "rewards/margins": 0.7390151023864746, + "rewards/rejected": -0.7390457391738892, + "step": 2752 + }, + { + "epoch": 0.16, + "learning_rate": 9.562113680745014e-08, + "logits/chosen": -1.9575222730636597, + "logits/rejected": -1.9578334093093872, + "logps/chosen": -73.75312042236328, + "logps/rejected": -229.94677734375, + "loss": 0.6984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3575187623500824, + "rewards/margins": 0.25045856833457947, + "rewards/rejected": -0.6079773306846619, + "step": 2753 + }, + { + "epoch": 0.16, + "learning_rate": 9.561727922464064e-08, + "logits/chosen": -2.070051431655884, + "logits/rejected": -1.9096964597702026, + "logps/chosen": -230.46826171875, + "logps/rejected": -389.69818115234375, + "loss": 0.6545, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12090911716222763, + "rewards/margins": -0.10880585759878159, + "rewards/rejected": 0.22971497476100922, + "step": 2754 + }, + { + "epoch": 0.16, + "learning_rate": 9.561342002128223e-08, + "logits/chosen": -1.8553069829940796, + "logits/rejected": -1.7748581171035767, + "logps/chosen": -257.4489440917969, + "logps/rejected": -526.431396484375, + "loss": 0.2384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8859039545059204, + "rewards/margins": 1.5851960182189941, + "rewards/rejected": -0.699292004108429, + "step": 2755 + }, + { + "epoch": 0.16, + "learning_rate": 9.560955919751204e-08, + "logits/chosen": -2.232630968093872, + "logits/rejected": -2.225762128829956, + "logps/chosen": -7.808064401615411e-05, + "logps/rejected": -172.08908081054688, + "loss": 0.4677, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.251651042366575e-06, + "rewards/margins": 1.2798160314559937, + "rewards/rejected": -1.2798172235488892, + "step": 2756 + }, + { + "epoch": 0.16, + "learning_rate": 9.560569675346718e-08, + "logits/chosen": -2.0936856269836426, + "logits/rejected": -2.0933732986450195, + "logps/chosen": -0.030744116753339767, + "logps/rejected": -179.71878051757812, + "loss": 0.4084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006586929666809738, + "rewards/margins": 1.9398030042648315, + "rewards/rejected": -1.9404617547988892, + "step": 2757 + }, + { + "epoch": 0.16, + "learning_rate": 9.56018326892849e-08, + "logits/chosen": -2.1134159564971924, + "logits/rejected": -2.11295485496521, + "logps/chosen": -165.3036346435547, + "logps/rejected": -409.10491943359375, + "loss": 0.3033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6156356930732727, + "rewards/margins": 1.403090000152588, + "rewards/rejected": -0.7874542474746704, + "step": 2758 + }, + { + "epoch": 0.16, + "learning_rate": 9.559796700510248e-08, + "logits/chosen": -1.8094605207443237, + "logits/rejected": -1.8164666891098022, + "logps/chosen": -219.46603393554688, + "logps/rejected": -285.84814453125, + "loss": 0.5458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23328399658203125, + "rewards/margins": 0.4383804202079773, + "rewards/rejected": -0.20509643852710724, + "step": 2759 + }, + { + "epoch": 0.16, + "learning_rate": 9.559409970105721e-08, + "logits/chosen": -1.9903688430786133, + "logits/rejected": -1.9890533685684204, + "logps/chosen": -235.95254516601562, + "logps/rejected": -451.420166015625, + "loss": 0.1578, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4175338745117188, + "rewards/margins": 1.7245285511016846, + "rewards/rejected": -0.30699464678764343, + "step": 2760 + }, + { + "epoch": 0.16, + "learning_rate": 9.559023077728649e-08, + "logits/chosen": -2.0140902996063232, + "logits/rejected": -2.005310297012329, + "logps/chosen": -63.03701400756836, + "logps/rejected": -281.763427734375, + "loss": 0.5257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.020420456305146217, + "rewards/margins": 0.8906719088554382, + "rewards/rejected": -0.870251476764679, + "step": 2761 + }, + { + "epoch": 0.16, + "learning_rate": 9.558636023392778e-08, + "logits/chosen": -1.9869955778121948, + "logits/rejected": -1.9640178680419922, + "logps/chosen": -171.5427703857422, + "logps/rejected": -295.8983154296875, + "loss": 0.5185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30023500323295593, + "rewards/margins": 0.22926941514015198, + "rewards/rejected": 0.07096558064222336, + "step": 2762 + }, + { + "epoch": 0.16, + "learning_rate": 9.558248807111855e-08, + "logits/chosen": -2.2644288539886475, + "logits/rejected": -2.268897771835327, + "logps/chosen": -32.762123107910156, + "logps/rejected": -292.01190185546875, + "loss": 0.4931, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025023270398378372, + "rewards/margins": 1.056471586227417, + "rewards/rejected": -1.0314483642578125, + "step": 2763 + }, + { + "epoch": 0.16, + "learning_rate": 9.557861428899639e-08, + "logits/chosen": -2.3276724815368652, + "logits/rejected": -2.308994770050049, + "logps/chosen": -14.046037673950195, + "logps/rejected": -158.33517456054688, + "loss": 0.5842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.039385031908750534, + "rewards/margins": 0.41988927125930786, + "rewards/rejected": -0.4592742919921875, + "step": 2764 + }, + { + "epoch": 0.16, + "learning_rate": 9.55747388876989e-08, + "logits/chosen": -2.0979764461517334, + "logits/rejected": -2.1210761070251465, + "logps/chosen": -186.49542236328125, + "logps/rejected": -318.4256591796875, + "loss": 0.2301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0792206525802612, + "rewards/margins": 1.1782532930374146, + "rewards/rejected": -0.09903259575366974, + "step": 2765 + }, + { + "epoch": 0.16, + "learning_rate": 9.557086186736376e-08, + "logits/chosen": -2.0852560997009277, + "logits/rejected": -2.0816473960876465, + "logps/chosen": -95.20921325683594, + "logps/rejected": -171.71437072753906, + "loss": 0.8596, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2945961058139801, + "rewards/margins": -0.41592180728912354, + "rewards/rejected": 0.12132568657398224, + "step": 2766 + }, + { + "epoch": 0.16, + "learning_rate": 9.556698322812869e-08, + "logits/chosen": -2.125602960586548, + "logits/rejected": -2.1154890060424805, + "logps/chosen": -230.989501953125, + "logps/rejected": -341.59649658203125, + "loss": 0.3482, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3231995105743408, + "rewards/margins": 0.3809998035430908, + "rewards/rejected": 0.94219970703125, + "step": 2767 + }, + { + "epoch": 0.16, + "learning_rate": 9.556310297013148e-08, + "logits/chosen": -2.2177398204803467, + "logits/rejected": -2.2167000770568848, + "logps/chosen": -1.7543859481811523, + "logps/rejected": -97.13530731201172, + "loss": 0.5472, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030408669263124466, + "rewards/margins": 0.6990892887115479, + "rewards/rejected": -0.6686806082725525, + "step": 2768 + }, + { + "epoch": 0.16, + "learning_rate": 9.555922109350997e-08, + "logits/chosen": -2.2217628955841064, + "logits/rejected": -2.212629795074463, + "logps/chosen": -0.00032755540451034904, + "logps/rejected": -228.6035614013672, + "loss": 0.4742, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.761358342075255e-06, + "rewards/margins": 1.213303565979004, + "rewards/rejected": -1.213313341140747, + "step": 2769 + }, + { + "epoch": 0.16, + "learning_rate": 9.555533759840209e-08, + "logits/chosen": -2.2298262119293213, + "logits/rejected": -2.223649501800537, + "logps/chosen": -0.00012051698286086321, + "logps/rejected": -153.6416015625, + "loss": 0.5151, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.154158510047637e-08, + "rewards/margins": 0.9139007329940796, + "rewards/rejected": -0.9139007925987244, + "step": 2770 + }, + { + "epoch": 0.16, + "learning_rate": 9.555145248494578e-08, + "logits/chosen": -2.1975691318511963, + "logits/rejected": -2.175670623779297, + "logps/chosen": -163.19735717773438, + "logps/rejected": -245.26272583007812, + "loss": 0.5312, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.972460925579071, + "rewards/margins": -0.2844696640968323, + "rewards/rejected": 1.2569305896759033, + "step": 2771 + }, + { + "epoch": 0.16, + "learning_rate": 9.554756575327905e-08, + "logits/chosen": -2.2077696323394775, + "logits/rejected": -2.203476905822754, + "logps/chosen": -23.512508392333984, + "logps/rejected": -260.58221435546875, + "loss": 0.4331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15345554053783417, + "rewards/margins": 2.230741024017334, + "rewards/rejected": -2.3841965198516846, + "step": 2772 + }, + { + "epoch": 0.16, + "learning_rate": 9.554367740354e-08, + "logits/chosen": -2.2443809509277344, + "logits/rejected": -2.2454440593719482, + "logps/chosen": -26.899131774902344, + "logps/rejected": -167.32736206054688, + "loss": 0.4912, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.113011933863163, + "rewards/margins": 0.9672111868858337, + "rewards/rejected": -0.854199230670929, + "step": 2773 + }, + { + "epoch": 0.16, + "learning_rate": 9.553978743586675e-08, + "logits/chosen": -2.041740655899048, + "logits/rejected": -2.0408926010131836, + "logps/chosen": -2.4035136699676514, + "logps/rejected": -67.2884521484375, + "loss": 0.6414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.050213444977998734, + "rewards/margins": 0.27487581968307495, + "rewards/rejected": -0.3250892758369446, + "step": 2774 + }, + { + "epoch": 0.16, + "learning_rate": 9.553589585039749e-08, + "logits/chosen": -2.2785515785217285, + "logits/rejected": -2.249547243118286, + "logps/chosen": -64.7201919555664, + "logps/rejected": -241.52804565429688, + "loss": 0.3903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1761833280324936, + "rewards/margins": 1.6427620649337769, + "rewards/rejected": -1.466578722000122, + "step": 2775 + }, + { + "epoch": 0.16, + "learning_rate": 9.553200264727046e-08, + "logits/chosen": -2.0351035594940186, + "logits/rejected": -2.0297465324401855, + "logps/chosen": -19.896501541137695, + "logps/rejected": -147.61195373535156, + "loss": 0.5703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010011672973632812, + "rewards/margins": 0.591256320476532, + "rewards/rejected": -0.6012679934501648, + "step": 2776 + }, + { + "epoch": 0.16, + "learning_rate": 9.552810782662398e-08, + "logits/chosen": -2.189697504043579, + "logits/rejected": -2.164574384689331, + "logps/chosen": -263.9976501464844, + "logps/rejected": -385.1224365234375, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1079620122909546, + "rewards/margins": 0.5269927382469177, + "rewards/rejected": 0.5809692740440369, + "step": 2777 + }, + { + "epoch": 0.16, + "learning_rate": 9.552421138859642e-08, + "logits/chosen": -2.2318496704101562, + "logits/rejected": -2.2200751304626465, + "logps/chosen": -197.3743896484375, + "logps/rejected": -317.23272705078125, + "loss": 0.5217, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.585003674030304, + "rewards/margins": -0.04314577579498291, + "rewards/rejected": 0.6281494498252869, + "step": 2778 + }, + { + "epoch": 0.16, + "learning_rate": 9.552031333332617e-08, + "logits/chosen": -2.143439769744873, + "logits/rejected": -2.1268022060394287, + "logps/chosen": -164.54696655273438, + "logps/rejected": -252.75830078125, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.893267810344696, + "rewards/margins": 0.3493407964706421, + "rewards/rejected": 0.543927013874054, + "step": 2779 + }, + { + "epoch": 0.16, + "learning_rate": 9.551641366095173e-08, + "logits/chosen": -2.1175851821899414, + "logits/rejected": -2.0949273109436035, + "logps/chosen": -245.79061889648438, + "logps/rejected": -450.3445129394531, + "loss": 0.2167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9359771609306335, + "rewards/margins": 1.527868628501892, + "rewards/rejected": -0.5918914675712585, + "step": 2780 + }, + { + "epoch": 0.16, + "learning_rate": 9.551251237161164e-08, + "logits/chosen": -2.234114170074463, + "logits/rejected": -2.2337119579315186, + "logps/chosen": -26.74988555908203, + "logps/rejected": -40.60676574707031, + "loss": 0.7461, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.15152454376220703, + "rewards/margins": -0.04413394629955292, + "rewards/rejected": -0.10739059746265411, + "step": 2781 + }, + { + "epoch": 0.16, + "learning_rate": 9.550860946544448e-08, + "logits/chosen": -2.1267666816711426, + "logits/rejected": -2.1046347618103027, + "logps/chosen": -12.496081352233887, + "logps/rejected": -162.06951904296875, + "loss": 0.4691, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015706254169344902, + "rewards/margins": 1.299428939819336, + "rewards/rejected": -1.3151352405548096, + "step": 2782 + }, + { + "epoch": 0.16, + "learning_rate": 9.550470494258891e-08, + "logits/chosen": -2.1116621494293213, + "logits/rejected": -2.10915470123291, + "logps/chosen": -190.462890625, + "logps/rejected": -455.78485107421875, + "loss": 0.2137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9764953851699829, + "rewards/margins": 1.5438294410705566, + "rewards/rejected": -0.567333996295929, + "step": 2783 + }, + { + "epoch": 0.16, + "learning_rate": 9.550079880318362e-08, + "logits/chosen": -2.2437336444854736, + "logits/rejected": -2.2116565704345703, + "logps/chosen": -57.67067337036133, + "logps/rejected": -227.15597534179688, + "loss": 0.5607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0017108917236328125, + "rewards/margins": 0.5521808862686157, + "rewards/rejected": -0.5504699945449829, + "step": 2784 + }, + { + "epoch": 0.16, + "learning_rate": 9.54968910473674e-08, + "logits/chosen": -1.8224353790283203, + "logits/rejected": -1.8055025339126587, + "logps/chosen": -178.98837280273438, + "logps/rejected": -217.86944580078125, + "loss": 0.4507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.902386486530304, + "rewards/margins": 0.08004152774810791, + "rewards/rejected": 0.822344958782196, + "step": 2785 + }, + { + "epoch": 0.16, + "learning_rate": 9.549298167527906e-08, + "logits/chosen": -2.0713086128234863, + "logits/rejected": -2.0719492435455322, + "logps/chosen": -18.126541137695312, + "logps/rejected": -78.38607788085938, + "loss": 0.6905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14817295968532562, + "rewards/margins": 0.13065622746944427, + "rewards/rejected": -0.2788291871547699, + "step": 2786 + }, + { + "epoch": 0.16, + "learning_rate": 9.548907068705747e-08, + "logits/chosen": -2.2786285877227783, + "logits/rejected": -2.260286808013916, + "logps/chosen": -0.02161705680191517, + "logps/rejected": -277.2630310058594, + "loss": 0.416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000837452185805887, + "rewards/margins": 1.9069933891296387, + "rewards/rejected": -1.907830834388733, + "step": 2787 + }, + { + "epoch": 0.16, + "learning_rate": 9.548515808284158e-08, + "logits/chosen": -2.1737632751464844, + "logits/rejected": -2.1792032718658447, + "logps/chosen": -18.752395629882812, + "logps/rejected": -125.35235595703125, + "loss": 0.5585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015437888912856579, + "rewards/margins": 0.658708393573761, + "rewards/rejected": -0.674146294593811, + "step": 2788 + }, + { + "epoch": 0.16, + "learning_rate": 9.54812438627704e-08, + "logits/chosen": -2.1923296451568604, + "logits/rejected": -2.189948320388794, + "logps/chosen": -8.5983304977417, + "logps/rejected": -82.80194854736328, + "loss": 0.5505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08947868645191193, + "rewards/margins": 0.5760288238525391, + "rewards/rejected": -0.48655015230178833, + "step": 2789 + }, + { + "epoch": 0.16, + "learning_rate": 9.547732802698296e-08, + "logits/chosen": -2.014566659927368, + "logits/rejected": -2.0179531574249268, + "logps/chosen": -52.374168395996094, + "logps/rejected": -213.38087463378906, + "loss": 0.6306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20695725083351135, + "rewards/margins": 0.5727317333221436, + "rewards/rejected": -0.7796890139579773, + "step": 2790 + }, + { + "epoch": 0.16, + "learning_rate": 9.547341057561837e-08, + "logits/chosen": -1.9727327823638916, + "logits/rejected": -1.9378318786621094, + "logps/chosen": -259.3572082519531, + "logps/rejected": -443.29931640625, + "loss": 0.2543, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.063104271888733, + "rewards/margins": 1.2389404773712158, + "rewards/rejected": -0.17583619058132172, + "step": 2791 + }, + { + "epoch": 0.16, + "learning_rate": 9.546949150881579e-08, + "logits/chosen": -2.146268367767334, + "logits/rejected": -2.139850378036499, + "logps/chosen": -72.62499237060547, + "logps/rejected": -209.31027221679688, + "loss": 0.5475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1585128754377365, + "rewards/margins": 0.4787856936454773, + "rewards/rejected": -0.320272833108902, + "step": 2792 + }, + { + "epoch": 0.16, + "learning_rate": 9.546557082671447e-08, + "logits/chosen": -2.142721652984619, + "logits/rejected": -2.1423161029815674, + "logps/chosen": -92.35987854003906, + "logps/rejected": -161.2505340576172, + "loss": 0.5239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09851150959730148, + "rewards/margins": 0.38281023502349854, + "rewards/rejected": -0.28429871797561646, + "step": 2793 + }, + { + "epoch": 0.16, + "learning_rate": 9.546164852945367e-08, + "logits/chosen": -2.1453633308410645, + "logits/rejected": -2.138453960418701, + "logps/chosen": -224.27322387695312, + "logps/rejected": -298.92242431640625, + "loss": 0.4427, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0694488286972046, + "rewards/margins": 0.05087578296661377, + "rewards/rejected": 1.0185730457305908, + "step": 2794 + }, + { + "epoch": 0.16, + "learning_rate": 9.545772461717275e-08, + "logits/chosen": -2.1523501873016357, + "logits/rejected": -2.1947247982025146, + "logps/chosen": -275.2982177734375, + "logps/rejected": -341.1942138671875, + "loss": 0.4221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.555645763874054, + "rewards/margins": 0.39570921659469604, + "rewards/rejected": 0.15993653237819672, + "step": 2795 + }, + { + "epoch": 0.16, + "learning_rate": 9.54537990900111e-08, + "logits/chosen": -2.118039608001709, + "logits/rejected": -2.106037139892578, + "logps/chosen": -14.389105796813965, + "logps/rejected": -230.5517578125, + "loss": 0.5104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06782598793506622, + "rewards/margins": 1.151924967765808, + "rewards/rejected": -1.219751000404358, + "step": 2796 + }, + { + "epoch": 0.16, + "learning_rate": 9.544987194810814e-08, + "logits/chosen": -2.2214274406433105, + "logits/rejected": -2.220465660095215, + "logps/chosen": -10.985032081604004, + "logps/rejected": -84.0333251953125, + "loss": 0.5626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03774576261639595, + "rewards/margins": 0.737699031829834, + "rewards/rejected": -0.7754448056221008, + "step": 2797 + }, + { + "epoch": 0.16, + "learning_rate": 9.544594319160342e-08, + "logits/chosen": -2.188410758972168, + "logits/rejected": -2.1722960472106934, + "logps/chosen": -26.084842681884766, + "logps/rejected": -274.9157409667969, + "loss": 0.5442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01592559926211834, + "rewards/margins": 0.6014602780342102, + "rewards/rejected": -0.5855346918106079, + "step": 2798 + }, + { + "epoch": 0.16, + "learning_rate": 9.544201282063651e-08, + "logits/chosen": -2.239137649536133, + "logits/rejected": -2.2146289348602295, + "logps/chosen": -13.062225341796875, + "logps/rejected": -214.26412963867188, + "loss": 0.723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22841282188892365, + "rewards/margins": 0.04397030174732208, + "rewards/rejected": -0.2723831236362457, + "step": 2799 + }, + { + "epoch": 0.16, + "learning_rate": 9.543808083534701e-08, + "logits/chosen": -2.218148708343506, + "logits/rejected": -2.192315101623535, + "logps/chosen": -38.1876220703125, + "logps/rejected": -105.2751693725586, + "loss": 0.5944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10237999260425568, + "rewards/margins": 0.2716945707798004, + "rewards/rejected": -0.16931457817554474, + "step": 2800 + }, + { + "epoch": 0.16, + "learning_rate": 9.543414723587461e-08, + "logits/chosen": -1.968985676765442, + "logits/rejected": -1.934265375137329, + "logps/chosen": -250.26580810546875, + "logps/rejected": -436.15216064453125, + "loss": 0.3141, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.699591040611267, + "rewards/margins": 0.4200713634490967, + "rewards/rejected": 1.2795196771621704, + "step": 2801 + }, + { + "epoch": 0.16, + "learning_rate": 9.543021202235908e-08, + "logits/chosen": -2.2546167373657227, + "logits/rejected": -2.2431368827819824, + "logps/chosen": -14.88729190826416, + "logps/rejected": -156.1417999267578, + "loss": 0.5514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2102813720703125, + "rewards/margins": 0.9821014404296875, + "rewards/rejected": -1.1923828125, + "step": 2802 + }, + { + "epoch": 0.16, + "learning_rate": 9.542627519494019e-08, + "logits/chosen": -1.9381873607635498, + "logits/rejected": -1.9343847036361694, + "logps/chosen": -74.57550048828125, + "logps/rejected": -165.03643798828125, + "loss": 0.6111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0857696533203125, + "rewards/margins": 0.5516800284385681, + "rewards/rejected": -0.6374496817588806, + "step": 2803 + }, + { + "epoch": 0.16, + "learning_rate": 9.542233675375779e-08, + "logits/chosen": -2.0580170154571533, + "logits/rejected": -2.059088945388794, + "logps/chosen": -19.499202728271484, + "logps/rejected": -215.27871704101562, + "loss": 0.4179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1113048568367958, + "rewards/margins": 1.4647060632705688, + "rewards/rejected": -1.3534011840820312, + "step": 2804 + }, + { + "epoch": 0.16, + "learning_rate": 9.541839669895182e-08, + "logits/chosen": -1.8344987630844116, + "logits/rejected": -1.817571759223938, + "logps/chosen": -264.42742919921875, + "logps/rejected": -445.00146484375, + "loss": 0.2753, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2134430408477783, + "rewards/margins": 1.0230255126953125, + "rewards/rejected": 0.19041748344898224, + "step": 2805 + }, + { + "epoch": 0.16, + "learning_rate": 9.541445503066221e-08, + "logits/chosen": -1.9645200967788696, + "logits/rejected": -1.9740091562271118, + "logps/chosen": -31.158931732177734, + "logps/rejected": -102.23748016357422, + "loss": 0.6064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07468586415052414, + "rewards/margins": 0.42825525999069214, + "rewards/rejected": -0.5029411315917969, + "step": 2806 + }, + { + "epoch": 0.16, + "learning_rate": 9.541051174902903e-08, + "logits/chosen": -2.1300339698791504, + "logits/rejected": -2.128767251968384, + "logps/chosen": -15.908801078796387, + "logps/rejected": -126.90230560302734, + "loss": 0.4447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012935543432831764, + "rewards/margins": 1.4859702587127686, + "rewards/rejected": -1.4730347394943237, + "step": 2807 + }, + { + "epoch": 0.16, + "learning_rate": 9.540656685419235e-08, + "logits/chosen": -2.102099657058716, + "logits/rejected": -2.0944554805755615, + "logps/chosen": -13.470985412597656, + "logps/rejected": -122.73832702636719, + "loss": 0.5093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01495828665792942, + "rewards/margins": 0.9559661149978638, + "rewards/rejected": -0.9709243774414062, + "step": 2808 + }, + { + "epoch": 0.16, + "learning_rate": 9.540262034629229e-08, + "logits/chosen": -2.2465732097625732, + "logits/rejected": -2.239132881164551, + "logps/chosen": -15.501830101013184, + "logps/rejected": -106.9319839477539, + "loss": 0.5363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2668723165988922, + "rewards/margins": 0.46290579438209534, + "rewards/rejected": -0.19603347778320312, + "step": 2809 + }, + { + "epoch": 0.16, + "learning_rate": 9.539867222546907e-08, + "logits/chosen": -2.247044563293457, + "logits/rejected": -2.198634147644043, + "logps/chosen": -200.71730041503906, + "logps/rejected": -402.0930480957031, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7736557126045227, + "rewards/margins": 1.026057481765747, + "rewards/rejected": -0.252401739358902, + "step": 2810 + }, + { + "epoch": 0.16, + "learning_rate": 9.539472249186295e-08, + "logits/chosen": -2.004396915435791, + "logits/rejected": -1.9970884323120117, + "logps/chosen": -69.95631408691406, + "logps/rejected": -148.32667541503906, + "loss": 0.8184, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.398123174905777, + "rewards/margins": -0.11803588271141052, + "rewards/rejected": -0.28008729219436646, + "step": 2811 + }, + { + "epoch": 0.16, + "learning_rate": 9.539077114561424e-08, + "logits/chosen": -2.1143336296081543, + "logits/rejected": -2.10908842086792, + "logps/chosen": -241.91749572753906, + "logps/rejected": -297.9881286621094, + "loss": 0.3985, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1433883905410767, + "rewards/margins": 0.22376865148544312, + "rewards/rejected": 0.9196197390556335, + "step": 2812 + }, + { + "epoch": 0.16, + "learning_rate": 9.538681818686331e-08, + "logits/chosen": -2.0298173427581787, + "logits/rejected": -2.0264892578125, + "logps/chosen": -39.540470123291016, + "logps/rejected": -215.6343231201172, + "loss": 0.515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018497466808184981, + "rewards/margins": 0.8624553680419922, + "rewards/rejected": -0.8643051385879517, + "step": 2813 + }, + { + "epoch": 0.16, + "learning_rate": 9.538286361575058e-08, + "logits/chosen": -2.20725154876709, + "logits/rejected": -2.1992123126983643, + "logps/chosen": -42.2804069519043, + "logps/rejected": -237.07461547851562, + "loss": 0.626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18204155564308167, + "rewards/margins": 0.5403003692626953, + "rewards/rejected": -0.7223419547080994, + "step": 2814 + }, + { + "epoch": 0.16, + "learning_rate": 9.537890743241654e-08, + "logits/chosen": -2.0899038314819336, + "logits/rejected": -2.085359811782837, + "logps/chosen": -25.259174346923828, + "logps/rejected": -220.281982421875, + "loss": 0.548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10061569511890411, + "rewards/margins": 0.5648781061172485, + "rewards/rejected": -0.4642623960971832, + "step": 2815 + }, + { + "epoch": 0.16, + "learning_rate": 9.537494963700175e-08, + "logits/chosen": -2.0667669773101807, + "logits/rejected": -2.0691444873809814, + "logps/chosen": -0.19865740835666656, + "logps/rejected": -129.4078369140625, + "loss": 0.5744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005685797426849604, + "rewards/margins": 0.5519534945487976, + "rewards/rejected": -0.5576393008232117, + "step": 2816 + }, + { + "epoch": 0.16, + "learning_rate": 9.537099022964678e-08, + "logits/chosen": -2.225391387939453, + "logits/rejected": -2.221500873565674, + "logps/chosen": -8.467601776123047, + "logps/rejected": -160.16204833984375, + "loss": 0.6262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031682015396654606, + "rewards/margins": 0.3091990649700165, + "rewards/rejected": -0.3123672604560852, + "step": 2817 + }, + { + "epoch": 0.16, + "learning_rate": 9.536702921049233e-08, + "logits/chosen": -2.013803005218506, + "logits/rejected": -2.0046393871307373, + "logps/chosen": -235.24501037597656, + "logps/rejected": -377.00152587890625, + "loss": 0.2275, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0759124755859375, + "rewards/margins": 1.3233795166015625, + "rewards/rejected": -0.247467041015625, + "step": 2818 + }, + { + "epoch": 0.16, + "learning_rate": 9.536306657967905e-08, + "logits/chosen": -1.94035804271698, + "logits/rejected": -1.9533867835998535, + "logps/chosen": -232.24424743652344, + "logps/rejected": -264.463623046875, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3344238996505737, + "rewards/margins": 0.6664215922355652, + "rewards/rejected": 0.6680023074150085, + "step": 2819 + }, + { + "epoch": 0.16, + "learning_rate": 9.535910233734779e-08, + "logits/chosen": -2.0950443744659424, + "logits/rejected": -2.0901942253112793, + "logps/chosen": -176.44808959960938, + "logps/rejected": -281.177978515625, + "loss": 0.3584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9550995230674744, + "rewards/margins": 0.532958984375, + "rewards/rejected": 0.422140508890152, + "step": 2820 + }, + { + "epoch": 0.16, + "learning_rate": 9.535513648363931e-08, + "logits/chosen": -2.1077053546905518, + "logits/rejected": -2.0562233924865723, + "logps/chosen": -133.51644897460938, + "logps/rejected": -257.1626281738281, + "loss": 0.5433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21952514350414276, + "rewards/margins": 0.3823394775390625, + "rewards/rejected": -0.16281433403491974, + "step": 2821 + }, + { + "epoch": 0.16, + "learning_rate": 9.535116901869453e-08, + "logits/chosen": -2.018456220626831, + "logits/rejected": -2.016248941421509, + "logps/chosen": -15.938655853271484, + "logps/rejected": -216.79501342773438, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30745944380760193, + "rewards/margins": 1.8270654678344727, + "rewards/rejected": -1.5196059942245483, + "step": 2822 + }, + { + "epoch": 0.16, + "learning_rate": 9.53471999426544e-08, + "logits/chosen": -2.159128427505493, + "logits/rejected": -2.1559391021728516, + "logps/chosen": -0.787024736404419, + "logps/rejected": -99.10847473144531, + "loss": 0.5696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01797194592654705, + "rewards/margins": 0.6008627414703369, + "rewards/rejected": -0.6188346743583679, + "step": 2823 + }, + { + "epoch": 0.16, + "learning_rate": 9.53432292556599e-08, + "logits/chosen": -1.9080311059951782, + "logits/rejected": -1.8878731727600098, + "logps/chosen": -89.95183563232422, + "logps/rejected": -255.5301055908203, + "loss": 0.7, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5227012634277344, + "rewards/margins": 0.578203558921814, + "rewards/rejected": -1.1009048223495483, + "step": 2824 + }, + { + "epoch": 0.16, + "learning_rate": 9.533925695785211e-08, + "logits/chosen": -2.270935297012329, + "logits/rejected": -2.263455390930176, + "logps/chosen": -5.857910633087158, + "logps/rejected": -114.918701171875, + "loss": 0.6231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0036941529251635075, + "rewards/margins": 0.313558965921402, + "rewards/rejected": -0.3098648190498352, + "step": 2825 + }, + { + "epoch": 0.16, + "learning_rate": 9.533528304937213e-08, + "logits/chosen": -2.10422682762146, + "logits/rejected": -2.1411337852478027, + "logps/chosen": -283.0480651855469, + "logps/rejected": -370.86016845703125, + "loss": 0.3163, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1650177240371704, + "rewards/margins": 0.6439270377159119, + "rewards/rejected": 0.5210906863212585, + "step": 2826 + }, + { + "epoch": 0.16, + "learning_rate": 9.533130753036114e-08, + "logits/chosen": -2.0234427452087402, + "logits/rejected": -1.9644231796264648, + "logps/chosen": -184.58224487304688, + "logps/rejected": -300.3048095703125, + "loss": 0.4426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5772308707237244, + "rewards/margins": 0.529797375202179, + "rewards/rejected": 0.04743347316980362, + "step": 2827 + }, + { + "epoch": 0.16, + "learning_rate": 9.532733040096037e-08, + "logits/chosen": -2.102050304412842, + "logits/rejected": -2.0215446949005127, + "logps/chosen": -436.8904113769531, + "logps/rejected": -702.870361328125, + "loss": 0.3118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7937836050987244, + "rewards/margins": 1.1666901111602783, + "rewards/rejected": -0.37290650606155396, + "step": 2828 + }, + { + "epoch": 0.16, + "learning_rate": 9.532335166131109e-08, + "logits/chosen": -2.056072235107422, + "logits/rejected": -2.0501046180725098, + "logps/chosen": -231.65676879882812, + "logps/rejected": -265.60931396484375, + "loss": 0.4688, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.440759301185608, + "rewards/margins": -0.14616703987121582, + "rewards/rejected": 1.5869263410568237, + "step": 2829 + }, + { + "epoch": 0.16, + "learning_rate": 9.531937131155467e-08, + "logits/chosen": -2.1689271926879883, + "logits/rejected": -2.1406214237213135, + "logps/chosen": -81.62118530273438, + "logps/rejected": -290.7734375, + "loss": 0.3945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08378372341394424, + "rewards/margins": 1.9115761518478394, + "rewards/rejected": -1.8277924060821533, + "step": 2830 + }, + { + "epoch": 0.16, + "learning_rate": 9.53153893518325e-08, + "logits/chosen": -1.9281665086746216, + "logits/rejected": -1.9324449300765991, + "logps/chosen": -192.58465576171875, + "logps/rejected": -295.92901611328125, + "loss": 0.4646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7358032464981079, + "rewards/margins": 0.177886962890625, + "rewards/rejected": 0.5579162836074829, + "step": 2831 + }, + { + "epoch": 0.16, + "learning_rate": 9.531140578228603e-08, + "logits/chosen": -2.015129327774048, + "logits/rejected": -2.0006117820739746, + "logps/chosen": -198.96994018554688, + "logps/rejected": -301.34588623046875, + "loss": 0.3589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7478485107421875, + "rewards/margins": 0.7269134521484375, + "rewards/rejected": 0.02093505859375, + "step": 2832 + }, + { + "epoch": 0.16, + "learning_rate": 9.530742060305679e-08, + "logits/chosen": -2.252920150756836, + "logits/rejected": -2.259082078933716, + "logps/chosen": -204.72589111328125, + "logps/rejected": -381.44183349609375, + "loss": 0.1759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1101120710372925, + "rewards/margins": 1.9628982543945312, + "rewards/rejected": -0.8527862429618835, + "step": 2833 + }, + { + "epoch": 0.16, + "learning_rate": 9.530343381428635e-08, + "logits/chosen": -2.2448933124542236, + "logits/rejected": -2.238609552383423, + "logps/chosen": -0.00015270222502294928, + "logps/rejected": -172.290771484375, + "loss": 0.4832, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4066492894926341e-06, + "rewards/margins": 1.1503266096115112, + "rewards/rejected": -1.1503280401229858, + "step": 2834 + }, + { + "epoch": 0.16, + "learning_rate": 9.529944541611634e-08, + "logits/chosen": -2.135704755783081, + "logits/rejected": -2.1411893367767334, + "logps/chosen": -237.63035583496094, + "logps/rejected": -285.0354309082031, + "loss": 0.2751, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3974014520645142, + "rewards/margins": 0.7473190426826477, + "rewards/rejected": 0.6500824093818665, + "step": 2835 + }, + { + "epoch": 0.17, + "learning_rate": 9.529545540868844e-08, + "logits/chosen": -1.9850050210952759, + "logits/rejected": -1.983670949935913, + "logps/chosen": -11.006478309631348, + "logps/rejected": -204.92398071289062, + "loss": 0.6041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031793784350156784, + "rewards/margins": 0.43037357926368713, + "rewards/rejected": -0.462167352437973, + "step": 2836 + }, + { + "epoch": 0.17, + "learning_rate": 9.52914637921444e-08, + "logits/chosen": -2.086012125015259, + "logits/rejected": -2.0817058086395264, + "logps/chosen": -31.377365112304688, + "logps/rejected": -220.9166259765625, + "loss": 0.4157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2461780607700348, + "rewards/margins": 1.1622791290283203, + "rewards/rejected": -0.9161010980606079, + "step": 2837 + }, + { + "epoch": 0.17, + "learning_rate": 9.528747056662602e-08, + "logits/chosen": -2.298424005508423, + "logits/rejected": -2.2890703678131104, + "logps/chosen": -0.27121150493621826, + "logps/rejected": -237.56271362304688, + "loss": 0.5209, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001526030944660306, + "rewards/margins": 0.8867067098617554, + "rewards/rejected": -0.885180652141571, + "step": 2838 + }, + { + "epoch": 0.17, + "learning_rate": 9.528347573227516e-08, + "logits/chosen": -2.097188711166382, + "logits/rejected": -2.0791103839874268, + "logps/chosen": -43.28715896606445, + "logps/rejected": -317.7306823730469, + "loss": 0.4381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09260597079992294, + "rewards/margins": 1.7273434400558472, + "rewards/rejected": -1.8199493885040283, + "step": 2839 + }, + { + "epoch": 0.17, + "learning_rate": 9.527947928923374e-08, + "logits/chosen": -1.9518671035766602, + "logits/rejected": -1.96056067943573, + "logps/chosen": -275.6499328613281, + "logps/rejected": -338.03955078125, + "loss": 0.1424, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6652740240097046, + "rewards/margins": 1.5882354974746704, + "rewards/rejected": 0.07703857868909836, + "step": 2840 + }, + { + "epoch": 0.17, + "learning_rate": 9.527548123764373e-08, + "logits/chosen": -2.1797118186950684, + "logits/rejected": -2.170226573944092, + "logps/chosen": -28.546056747436523, + "logps/rejected": -77.67625427246094, + "loss": 0.6437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.032637786120176315, + "rewards/margins": 0.19136887788772583, + "rewards/rejected": -0.15873108804225922, + "step": 2841 + }, + { + "epoch": 0.17, + "learning_rate": 9.527148157764716e-08, + "logits/chosen": -2.10461163520813, + "logits/rejected": -2.0953190326690674, + "logps/chosen": -2.6745564937591553, + "logps/rejected": -74.85418701171875, + "loss": 0.6688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.059119679033756256, + "rewards/margins": 0.13521626591682434, + "rewards/rejected": -0.1943359375, + "step": 2842 + }, + { + "epoch": 0.17, + "learning_rate": 9.526748030938611e-08, + "logits/chosen": -2.325733184814453, + "logits/rejected": -2.3180043697357178, + "logps/chosen": -2.5749102860572748e-05, + "logps/rejected": -169.18862915039062, + "loss": 0.4526, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9073432611094177e-07, + "rewards/margins": 1.4749051332473755, + "rewards/rejected": -1.4749053716659546, + "step": 2843 + }, + { + "epoch": 0.17, + "learning_rate": 9.526347743300275e-08, + "logits/chosen": -2.1998257637023926, + "logits/rejected": -2.1951937675476074, + "logps/chosen": -3.659678259282373e-05, + "logps/rejected": -146.62608337402344, + "loss": 0.4444, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9338047486126015e-07, + "rewards/margins": 1.5280423164367676, + "rewards/rejected": -1.5280426740646362, + "step": 2844 + }, + { + "epoch": 0.17, + "learning_rate": 9.525947294863926e-08, + "logits/chosen": -2.211799383163452, + "logits/rejected": -2.2009639739990234, + "logps/chosen": -29.147594451904297, + "logps/rejected": -261.53912353515625, + "loss": 0.5081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03367958217859268, + "rewards/margins": 1.0190547704696655, + "rewards/rejected": -1.052734375, + "step": 2845 + }, + { + "epoch": 0.17, + "learning_rate": 9.525546685643788e-08, + "logits/chosen": -2.2690186500549316, + "logits/rejected": -2.2602591514587402, + "logps/chosen": -82.09590911865234, + "logps/rejected": -284.05963134765625, + "loss": 0.4631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14539261162281036, + "rewards/margins": 1.0182685852050781, + "rewards/rejected": -0.872875988483429, + "step": 2846 + }, + { + "epoch": 0.17, + "learning_rate": 9.525145915654099e-08, + "logits/chosen": -2.0681493282318115, + "logits/rejected": -2.0651564598083496, + "logps/chosen": -30.782724380493164, + "logps/rejected": -213.9972686767578, + "loss": 0.4637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21397839486598969, + "rewards/margins": 1.1226575374603271, + "rewards/rejected": -0.908679187297821, + "step": 2847 + }, + { + "epoch": 0.17, + "learning_rate": 9.524744984909087e-08, + "logits/chosen": -1.9130699634552002, + "logits/rejected": -1.9216086864471436, + "logps/chosen": -75.87942504882812, + "logps/rejected": -150.88198852539062, + "loss": 0.8482, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4872283935546875, + "rewards/margins": -0.05721282958984375, + "rewards/rejected": -0.43001556396484375, + "step": 2848 + }, + { + "epoch": 0.17, + "learning_rate": 9.524343893423004e-08, + "logits/chosen": -2.040358304977417, + "logits/rejected": -2.0117335319519043, + "logps/chosen": -244.81817626953125, + "logps/rejected": -376.96258544921875, + "loss": 0.4015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5587555170059204, + "rewards/margins": 0.9668823480606079, + "rewards/rejected": -0.4081268310546875, + "step": 2849 + }, + { + "epoch": 0.17, + "learning_rate": 9.523942641210094e-08, + "logits/chosen": -2.091050386428833, + "logits/rejected": -2.0838797092437744, + "logps/chosen": -61.668701171875, + "logps/rejected": -128.00424194335938, + "loss": 0.6237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003267669817432761, + "rewards/margins": 0.2553443908691406, + "rewards/rejected": -0.2586120665073395, + "step": 2850 + }, + { + "epoch": 0.17, + "learning_rate": 9.523541228284612e-08, + "logits/chosen": -2.0190210342407227, + "logits/rejected": -1.997961401939392, + "logps/chosen": -51.993865966796875, + "logps/rejected": -213.021484375, + "loss": 0.5537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06586380302906036, + "rewards/margins": 0.5389442443847656, + "rewards/rejected": -0.47308045625686646, + "step": 2851 + }, + { + "epoch": 0.17, + "learning_rate": 9.523139654660818e-08, + "logits/chosen": -2.0236470699310303, + "logits/rejected": -1.9946905374526978, + "logps/chosen": -161.2978973388672, + "logps/rejected": -414.59259033203125, + "loss": 0.341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04828491434454918, + "rewards/margins": 2.233151435852051, + "rewards/rejected": -2.184866428375244, + "step": 2852 + }, + { + "epoch": 0.17, + "learning_rate": 9.52273792035298e-08, + "logits/chosen": -2.0712063312530518, + "logits/rejected": -2.0789833068847656, + "logps/chosen": -190.00872802734375, + "logps/rejected": -298.25732421875, + "loss": 0.4685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48298341035842896, + "rewards/margins": 0.46142274141311646, + "rewards/rejected": 0.0215606689453125, + "step": 2853 + }, + { + "epoch": 0.17, + "learning_rate": 9.522336025375367e-08, + "logits/chosen": -2.165205240249634, + "logits/rejected": -2.181365728378296, + "logps/chosen": -251.48629760742188, + "logps/rejected": -393.34588623046875, + "loss": 0.1556, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.336950659751892, + "rewards/margins": 1.8314788341522217, + "rewards/rejected": -0.494528204202652, + "step": 2854 + }, + { + "epoch": 0.17, + "learning_rate": 9.521933969742258e-08, + "logits/chosen": -1.9591779708862305, + "logits/rejected": -1.9712591171264648, + "logps/chosen": -172.54052734375, + "logps/rejected": -378.1244201660156, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9111892580986023, + "rewards/margins": 0.9140273928642273, + "rewards/rejected": -0.002838134765625, + "step": 2855 + }, + { + "epoch": 0.17, + "learning_rate": 9.521531753467934e-08, + "logits/chosen": -2.1638100147247314, + "logits/rejected": -2.169783115386963, + "logps/chosen": -5.018494129180908, + "logps/rejected": -73.10760498046875, + "loss": 0.6347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.073159359395504, + "rewards/margins": 0.3021358847618103, + "rewards/rejected": -0.3752952516078949, + "step": 2856 + }, + { + "epoch": 0.17, + "learning_rate": 9.521129376566685e-08, + "logits/chosen": -2.187072515487671, + "logits/rejected": -2.190152645111084, + "logps/chosen": -12.169820785522461, + "logps/rejected": -273.2433166503906, + "loss": 0.4879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03540239483118057, + "rewards/margins": 1.0495585203170776, + "rewards/rejected": -1.0849609375, + "step": 2857 + }, + { + "epoch": 0.17, + "learning_rate": 9.520726839052807e-08, + "logits/chosen": -2.1366777420043945, + "logits/rejected": -2.1246893405914307, + "logps/chosen": -21.582963943481445, + "logps/rejected": -213.8984375, + "loss": 0.4672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015550613403320312, + "rewards/margins": 1.3288707733154297, + "rewards/rejected": -1.34442138671875, + "step": 2858 + }, + { + "epoch": 0.17, + "learning_rate": 9.520324140940596e-08, + "logits/chosen": -1.9454752206802368, + "logits/rejected": -1.9468637704849243, + "logps/chosen": -0.00014066310541238636, + "logps/rejected": -156.29769897460938, + "loss": 0.5223, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4092213354597334e-06, + "rewards/margins": 0.9016018509864807, + "rewards/rejected": -0.9016052484512329, + "step": 2859 + }, + { + "epoch": 0.17, + "learning_rate": 9.519921282244363e-08, + "logits/chosen": -2.112600326538086, + "logits/rejected": -2.1080996990203857, + "logps/chosen": -12.237022399902344, + "logps/rejected": -178.55726623535156, + "loss": 0.4233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07467641681432724, + "rewards/margins": 1.5470778942108154, + "rewards/rejected": -1.47240149974823, + "step": 2860 + }, + { + "epoch": 0.17, + "learning_rate": 9.519518262978415e-08, + "logits/chosen": -1.997746229171753, + "logits/rejected": -1.9964354038238525, + "logps/chosen": -287.51214599609375, + "logps/rejected": -319.25384521484375, + "loss": 0.4391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7155700922012329, + "rewards/margins": 0.2208252251148224, + "rewards/rejected": 0.4947448670864105, + "step": 2861 + }, + { + "epoch": 0.17, + "learning_rate": 9.51911508315707e-08, + "logits/chosen": -2.1705915927886963, + "logits/rejected": -2.149315595626831, + "logps/chosen": -314.458251953125, + "logps/rejected": -549.3217163085938, + "loss": 0.2347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2129944562911987, + "rewards/margins": 1.1758301258087158, + "rewards/rejected": 0.03716430813074112, + "step": 2862 + }, + { + "epoch": 0.17, + "learning_rate": 9.518711742794654e-08, + "logits/chosen": -2.0323326587677, + "logits/rejected": -2.047138214111328, + "logps/chosen": -219.19113159179688, + "logps/rejected": -230.7661590576172, + "loss": 0.3032, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2676467895507812, + "rewards/margins": 0.576916515827179, + "rewards/rejected": 0.6907302737236023, + "step": 2863 + }, + { + "epoch": 0.17, + "learning_rate": 9.518308241905493e-08, + "logits/chosen": -2.1124427318573, + "logits/rejected": -2.1083927154541016, + "logps/chosen": -17.562868118286133, + "logps/rejected": -131.1878662109375, + "loss": 0.5603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006251526065170765, + "rewards/margins": 0.6594482660293579, + "rewards/rejected": -0.6656997799873352, + "step": 2864 + }, + { + "epoch": 0.17, + "learning_rate": 9.517904580503921e-08, + "logits/chosen": -2.253051280975342, + "logits/rejected": -2.2515461444854736, + "logps/chosen": -0.00017916486831381917, + "logps/rejected": -77.80065155029297, + "loss": 0.5229, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2992589972782298e-06, + "rewards/margins": 0.8597314953804016, + "rewards/rejected": -0.8597328066825867, + "step": 2865 + }, + { + "epoch": 0.17, + "learning_rate": 9.517500758604279e-08, + "logits/chosen": -2.143278121948242, + "logits/rejected": -2.1428518295288086, + "logps/chosen": -33.753604888916016, + "logps/rejected": -176.3557586669922, + "loss": 0.4295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023682404309511185, + "rewards/margins": 1.1462516784667969, + "rewards/rejected": -1.1225693225860596, + "step": 2866 + }, + { + "epoch": 0.17, + "learning_rate": 9.517096776220912e-08, + "logits/chosen": -2.1451339721679688, + "logits/rejected": -2.126560688018799, + "logps/chosen": -176.41395568847656, + "logps/rejected": -354.74285888671875, + "loss": 0.3303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9103744626045227, + "rewards/margins": 0.7220138311386108, + "rewards/rejected": 0.18836060166358948, + "step": 2867 + }, + { + "epoch": 0.17, + "learning_rate": 9.516692633368172e-08, + "logits/chosen": -2.0290985107421875, + "logits/rejected": -2.0144522190093994, + "logps/chosen": -0.00012981485633645207, + "logps/rejected": -125.44501495361328, + "loss": 0.4993, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1449231724091078e-07, + "rewards/margins": 1.0305407047271729, + "rewards/rejected": -1.0305404663085938, + "step": 2868 + }, + { + "epoch": 0.17, + "learning_rate": 9.516288330060417e-08, + "logits/chosen": -1.997126579284668, + "logits/rejected": -1.9977120161056519, + "logps/chosen": -33.63981246948242, + "logps/rejected": -151.28176879882812, + "loss": 0.509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.047969818115234375, + "rewards/margins": 0.7501487731933594, + "rewards/rejected": -0.702178955078125, + "step": 2869 + }, + { + "epoch": 0.17, + "learning_rate": 9.515883866312009e-08, + "logits/chosen": -2.139312982559204, + "logits/rejected": -2.1418275833129883, + "logps/chosen": -19.477012634277344, + "logps/rejected": -66.94845581054688, + "loss": 0.6439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0511053092777729, + "rewards/margins": 0.14544963836669922, + "rewards/rejected": -0.09434433281421661, + "step": 2870 + }, + { + "epoch": 0.17, + "learning_rate": 9.515479242137317e-08, + "logits/chosen": -2.1871519088745117, + "logits/rejected": -2.209887742996216, + "logps/chosen": -219.87265014648438, + "logps/rejected": -294.8885498046875, + "loss": 0.3042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.965771496295929, + "rewards/margins": 0.8275512456893921, + "rewards/rejected": 0.13822022080421448, + "step": 2871 + }, + { + "epoch": 0.17, + "learning_rate": 9.515074457550714e-08, + "logits/chosen": -2.1875829696655273, + "logits/rejected": -2.1843881607055664, + "logps/chosen": -5.276768684387207, + "logps/rejected": -144.4088897705078, + "loss": 0.5623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03542137145996094, + "rewards/margins": 0.49407312273979187, + "rewards/rejected": -0.45865175127983093, + "step": 2872 + }, + { + "epoch": 0.17, + "learning_rate": 9.514669512566581e-08, + "logits/chosen": -2.0603010654449463, + "logits/rejected": -2.0723984241485596, + "logps/chosen": -268.0221252441406, + "logps/rejected": -468.84210205078125, + "loss": 0.1724, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3537079095840454, + "rewards/margins": 2.251077175140381, + "rewards/rejected": -0.897369384765625, + "step": 2873 + }, + { + "epoch": 0.17, + "learning_rate": 9.514264407199302e-08, + "logits/chosen": -2.205540657043457, + "logits/rejected": -2.200115919113159, + "logps/chosen": -7.767273426055908, + "logps/rejected": -147.22535705566406, + "loss": 0.5631, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025656510144472122, + "rewards/margins": 0.6065990328788757, + "rewards/rejected": -0.6322555541992188, + "step": 2874 + }, + { + "epoch": 0.17, + "learning_rate": 9.513859141463271e-08, + "logits/chosen": -2.184972047805786, + "logits/rejected": -2.1865155696868896, + "logps/chosen": -34.681373596191406, + "logps/rejected": -201.3984375, + "loss": 0.5363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14283104240894318, + "rewards/margins": 0.5875530242919922, + "rewards/rejected": -0.4447219967842102, + "step": 2875 + }, + { + "epoch": 0.17, + "learning_rate": 9.513453715372883e-08, + "logits/chosen": -2.0471067428588867, + "logits/rejected": -2.0385074615478516, + "logps/chosen": -9.23860352486372e-05, + "logps/rejected": -186.53785705566406, + "loss": 0.4758, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.346105317968977e-08, + "rewards/margins": 1.2367002964019775, + "rewards/rejected": -1.236700415611267, + "step": 2876 + }, + { + "epoch": 0.17, + "learning_rate": 9.513048128942542e-08, + "logits/chosen": -1.897186040878296, + "logits/rejected": -1.896437168121338, + "logps/chosen": -73.30511474609375, + "logps/rejected": -378.9661865234375, + "loss": 0.4621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37083131074905396, + "rewards/margins": 2.44915771484375, + "rewards/rejected": -2.819988965988159, + "step": 2877 + }, + { + "epoch": 0.17, + "learning_rate": 9.512642382186655e-08, + "logits/chosen": -2.158515453338623, + "logits/rejected": -2.1423285007476807, + "logps/chosen": -0.00014197180280461907, + "logps/rejected": -130.44198608398438, + "loss": 0.6085, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3841712959438155e-07, + "rewards/margins": 0.3689664304256439, + "rewards/rejected": -0.368966668844223, + "step": 2878 + }, + { + "epoch": 0.17, + "learning_rate": 9.512236475119638e-08, + "logits/chosen": -1.9895422458648682, + "logits/rejected": -1.9651132822036743, + "logps/chosen": -190.5238494873047, + "logps/rejected": -424.52130126953125, + "loss": 0.433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9835419058799744, + "rewards/margins": 0.11838078498840332, + "rewards/rejected": 0.865161120891571, + "step": 2879 + }, + { + "epoch": 0.17, + "learning_rate": 9.511830407755907e-08, + "logits/chosen": -2.258911371231079, + "logits/rejected": -2.2608144283294678, + "logps/chosen": -20.084579467773438, + "logps/rejected": -132.83694458007812, + "loss": 0.686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3379964828491211, + "rewards/margins": 0.39320623874664307, + "rewards/rejected": -0.7312027215957642, + "step": 2880 + }, + { + "epoch": 0.17, + "learning_rate": 9.511424180109892e-08, + "logits/chosen": -2.11814546585083, + "logits/rejected": -2.1197617053985596, + "logps/chosen": -58.18082046508789, + "logps/rejected": -157.10638427734375, + "loss": 0.4739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26313096284866333, + "rewards/margins": 0.7168434262275696, + "rewards/rejected": -0.45371246337890625, + "step": 2881 + }, + { + "epoch": 0.17, + "learning_rate": 9.511017792196023e-08, + "logits/chosen": -1.9580837488174438, + "logits/rejected": -1.9411612749099731, + "logps/chosen": -273.4552917480469, + "logps/rejected": -414.21832275390625, + "loss": 0.2268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6193878650665283, + "rewards/margins": 0.9860382676124573, + "rewards/rejected": 0.633349597454071, + "step": 2882 + }, + { + "epoch": 0.17, + "learning_rate": 9.510611244028737e-08, + "logits/chosen": -1.8161969184875488, + "logits/rejected": -1.8089162111282349, + "logps/chosen": -48.96021270751953, + "logps/rejected": -172.45953369140625, + "loss": 0.6649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.050961684435606, + "rewards/margins": 0.11743584275245667, + "rewards/rejected": -0.16839753091335297, + "step": 2883 + }, + { + "epoch": 0.17, + "learning_rate": 9.510204535622474e-08, + "logits/chosen": -2.1824002265930176, + "logits/rejected": -2.1772525310516357, + "logps/chosen": -60.64925765991211, + "logps/rejected": -197.26687622070312, + "loss": 0.4453, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07035255432128906, + "rewards/margins": 1.0444552898406982, + "rewards/rejected": -0.974102795124054, + "step": 2884 + }, + { + "epoch": 0.17, + "learning_rate": 9.509797666991684e-08, + "logits/chosen": -1.9164334535598755, + "logits/rejected": -1.955520749092102, + "logps/chosen": -309.68634033203125, + "logps/rejected": -342.97625732421875, + "loss": 0.1396, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6013672351837158, + "rewards/margins": 1.6103577613830566, + "rewards/rejected": -0.008990478701889515, + "step": 2885 + }, + { + "epoch": 0.17, + "learning_rate": 9.509390638150823e-08, + "logits/chosen": -2.088893413543701, + "logits/rejected": -1.990678071975708, + "logps/chosen": -196.9296875, + "logps/rejected": -354.343017578125, + "loss": 0.2079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0437332391738892, + "rewards/margins": 1.8034530878067017, + "rewards/rejected": -0.7597198486328125, + "step": 2886 + }, + { + "epoch": 0.17, + "learning_rate": 9.508983449114346e-08, + "logits/chosen": -2.031067132949829, + "logits/rejected": -2.0155115127563477, + "logps/chosen": -72.24685668945312, + "logps/rejected": -214.62091064453125, + "loss": 0.5071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00952224712818861, + "rewards/margins": 1.1422767639160156, + "rewards/rejected": -1.1327545642852783, + "step": 2887 + }, + { + "epoch": 0.17, + "learning_rate": 9.508576099896723e-08, + "logits/chosen": -2.1056206226348877, + "logits/rejected": -2.1057755947113037, + "logps/chosen": -34.60710144042969, + "logps/rejected": -150.4024658203125, + "loss": 0.6677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40017566084861755, + "rewards/margins": 0.47628918290138245, + "rewards/rejected": -0.87646484375, + "step": 2888 + }, + { + "epoch": 0.17, + "learning_rate": 9.508168590512423e-08, + "logits/chosen": -2.0566422939300537, + "logits/rejected": -2.06490421295166, + "logps/chosen": -220.434814453125, + "logps/rejected": -429.38568115234375, + "loss": 0.2511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9992340207099915, + "rewards/margins": 0.9287781119346619, + "rewards/rejected": 0.07045593112707138, + "step": 2889 + }, + { + "epoch": 0.17, + "learning_rate": 9.507760920975921e-08, + "logits/chosen": -2.224111318588257, + "logits/rejected": -2.211991310119629, + "logps/chosen": -5.507232189178467, + "logps/rejected": -213.1439208984375, + "loss": 0.3783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06226940080523491, + "rewards/margins": 2.3787429332733154, + "rewards/rejected": -2.3164734840393066, + "step": 2890 + }, + { + "epoch": 0.17, + "learning_rate": 9.507353091301704e-08, + "logits/chosen": -2.2045891284942627, + "logits/rejected": -2.196254253387451, + "logps/chosen": -229.73745727539062, + "logps/rejected": -464.2891845703125, + "loss": 0.2901, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0895828008651733, + "rewards/margins": 0.9080032110214233, + "rewards/rejected": 0.18157958984375, + "step": 2891 + }, + { + "epoch": 0.17, + "learning_rate": 9.506945101504256e-08, + "logits/chosen": -2.0643465518951416, + "logits/rejected": -2.0462052822113037, + "logps/chosen": -260.34808349609375, + "logps/rejected": -370.61383056640625, + "loss": 0.4247, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2155669927597046, + "rewards/margins": 0.07613217830657959, + "rewards/rejected": 1.139434814453125, + "step": 2892 + }, + { + "epoch": 0.17, + "learning_rate": 9.506536951598071e-08, + "logits/chosen": -2.2369496822357178, + "logits/rejected": -2.2283565998077393, + "logps/chosen": -3.2143895626068115, + "logps/rejected": -199.9424591064453, + "loss": 0.3984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012158441357314587, + "rewards/margins": 2.2386062145233154, + "rewards/rejected": -2.2507646083831787, + "step": 2893 + }, + { + "epoch": 0.17, + "learning_rate": 9.50612864159765e-08, + "logits/chosen": -2.2328736782073975, + "logits/rejected": -2.2289233207702637, + "logps/chosen": -220.03622436523438, + "logps/rejected": -381.3280334472656, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0457550287246704, + "rewards/margins": 0.45886844396591187, + "rewards/rejected": 0.5868865847587585, + "step": 2894 + }, + { + "epoch": 0.17, + "learning_rate": 9.5057201715175e-08, + "logits/chosen": -2.1601126194000244, + "logits/rejected": -2.1655712127685547, + "logps/chosen": -175.15057373046875, + "logps/rejected": -317.9127502441406, + "loss": 0.3448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9402832388877869, + "rewards/margins": 0.6311768293380737, + "rewards/rejected": 0.3091064393520355, + "step": 2895 + }, + { + "epoch": 0.17, + "learning_rate": 9.505311541372127e-08, + "logits/chosen": -2.3043408393859863, + "logits/rejected": -2.2991437911987305, + "logps/chosen": -6.937878060853109e-05, + "logps/rejected": -60.5205078125, + "loss": 0.5389, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6225242777400126e-07, + "rewards/margins": 0.7572015523910522, + "rewards/rejected": -0.7572017908096313, + "step": 2896 + }, + { + "epoch": 0.17, + "learning_rate": 9.504902751176051e-08, + "logits/chosen": -2.1427550315856934, + "logits/rejected": -2.1137044429779053, + "logps/chosen": -49.00323486328125, + "logps/rejected": -314.5622253417969, + "loss": 0.4231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13300323486328125, + "rewards/margins": 1.4005721807479858, + "rewards/rejected": -1.2675689458847046, + "step": 2897 + }, + { + "epoch": 0.17, + "learning_rate": 9.504493800943793e-08, + "logits/chosen": -2.0909903049468994, + "logits/rejected": -2.123957395553589, + "logps/chosen": -230.977294921875, + "logps/rejected": -363.422607421875, + "loss": 0.2406, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1034988164901733, + "rewards/margins": 1.2361342906951904, + "rewards/rejected": -0.13263550400733948, + "step": 2898 + }, + { + "epoch": 0.17, + "learning_rate": 9.504084690689885e-08, + "logits/chosen": -2.1491129398345947, + "logits/rejected": -2.1509218215942383, + "logps/chosen": -8.780966758728027, + "logps/rejected": -176.86053466796875, + "loss": 0.586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03647727891802788, + "rewards/margins": 0.5423130393028259, + "rewards/rejected": -0.5787903070449829, + "step": 2899 + }, + { + "epoch": 0.17, + "learning_rate": 9.503675420428852e-08, + "logits/chosen": -2.1621346473693848, + "logits/rejected": -2.0963616371154785, + "logps/chosen": -211.03768920898438, + "logps/rejected": -497.29876708984375, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8686172366142273, + "rewards/margins": 0.8415969610214233, + "rewards/rejected": 0.02702026441693306, + "step": 2900 + }, + { + "epoch": 0.17, + "learning_rate": 9.50326599017524e-08, + "logits/chosen": -2.078089952468872, + "logits/rejected": -2.074061870574951, + "logps/chosen": -95.91218566894531, + "logps/rejected": -261.0155029296875, + "loss": 0.3936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14050598442554474, + "rewards/margins": 1.4472092390060425, + "rewards/rejected": -1.3067032098770142, + "step": 2901 + }, + { + "epoch": 0.17, + "learning_rate": 9.502856399943593e-08, + "logits/chosen": -2.122957944869995, + "logits/rejected": -2.1341893672943115, + "logps/chosen": -149.53387451171875, + "logps/rejected": -284.023681640625, + "loss": 0.4757, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8133010864257812, + "rewards/margins": -0.022764623165130615, + "rewards/rejected": 0.8360657095909119, + "step": 2902 + }, + { + "epoch": 0.17, + "learning_rate": 9.50244664974846e-08, + "logits/chosen": -2.180405855178833, + "logits/rejected": -2.17647123336792, + "logps/chosen": -2.9512672424316406, + "logps/rejected": -128.26470947265625, + "loss": 0.6133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00712010869756341, + "rewards/margins": 0.2952350378036499, + "rewards/rejected": -0.2881149351596832, + "step": 2903 + }, + { + "epoch": 0.17, + "learning_rate": 9.502036739604397e-08, + "logits/chosen": -2.115638494491577, + "logits/rejected": -2.118856430053711, + "logps/chosen": -2.227611541748047, + "logps/rejected": -79.20237731933594, + "loss": 0.6457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04348820447921753, + "rewards/margins": 0.24683010578155518, + "rewards/rejected": -0.2903183102607727, + "step": 2904 + }, + { + "epoch": 0.17, + "learning_rate": 9.501626669525971e-08, + "logits/chosen": -2.128896713256836, + "logits/rejected": -2.134068012237549, + "logps/chosen": -149.06167602539062, + "logps/rejected": -295.9695739746094, + "loss": 0.5091, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0330612659454346, + "rewards/margins": -0.14976954460144043, + "rewards/rejected": 1.182830810546875, + "step": 2905 + }, + { + "epoch": 0.17, + "learning_rate": 9.501216439527742e-08, + "logits/chosen": -2.121175527572632, + "logits/rejected": -2.1216509342193604, + "logps/chosen": -4.284618377685547, + "logps/rejected": -39.10964584350586, + "loss": 0.6601, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03815589100122452, + "rewards/margins": 0.17815843224525452, + "rewards/rejected": -0.21631431579589844, + "step": 2906 + }, + { + "epoch": 0.17, + "learning_rate": 9.500806049624289e-08, + "logits/chosen": -2.0313448905944824, + "logits/rejected": -1.9884260892868042, + "logps/chosen": -260.6619873046875, + "logps/rejected": -451.545654296875, + "loss": 0.5862, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0819885730743408, + "rewards/margins": -0.4455535411834717, + "rewards/rejected": 1.5275421142578125, + "step": 2907 + }, + { + "epoch": 0.17, + "learning_rate": 9.500395499830189e-08, + "logits/chosen": -2.091693639755249, + "logits/rejected": -2.082585096359253, + "logps/chosen": -142.18991088867188, + "logps/rejected": -229.403076171875, + "loss": 0.6528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06512909382581711, + "rewards/margins": 0.04213409870862961, + "rewards/rejected": 0.0229949951171875, + "step": 2908 + }, + { + "epoch": 0.17, + "learning_rate": 9.499984790160027e-08, + "logits/chosen": -2.169779062271118, + "logits/rejected": -2.0909409523010254, + "logps/chosen": -131.9927215576172, + "logps/rejected": -428.6597900390625, + "loss": 0.3926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.50787353515625, + "rewards/margins": 0.738903820514679, + "rewards/rejected": -0.23103027045726776, + "step": 2909 + }, + { + "epoch": 0.17, + "learning_rate": 9.499573920628393e-08, + "logits/chosen": -1.8733168840408325, + "logits/rejected": -1.8665963411331177, + "logps/chosen": -258.09100341796875, + "logps/rejected": -274.38623046875, + "loss": 0.623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19727478921413422, + "rewards/margins": 0.17660218477249146, + "rewards/rejected": 0.02067260816693306, + "step": 2910 + }, + { + "epoch": 0.17, + "learning_rate": 9.499162891249884e-08, + "logits/chosen": -1.9526993036270142, + "logits/rejected": -1.9977796077728271, + "logps/chosen": -283.0968017578125, + "logps/rejected": -495.39178466796875, + "loss": 0.6725, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06022338941693306, + "rewards/margins": -0.24648134410381317, + "rewards/rejected": 0.30670472979545593, + "step": 2911 + }, + { + "epoch": 0.17, + "learning_rate": 9.4987517020391e-08, + "logits/chosen": -2.02209210395813, + "logits/rejected": -2.0222392082214355, + "logps/chosen": -0.15193524956703186, + "logps/rejected": -57.656890869140625, + "loss": 0.5311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001758277416229248, + "rewards/margins": 0.802335262298584, + "rewards/rejected": -0.8040935397148132, + "step": 2912 + }, + { + "epoch": 0.17, + "learning_rate": 9.498340353010652e-08, + "logits/chosen": -2.1464743614196777, + "logits/rejected": -2.118711233139038, + "logps/chosen": -209.46463012695312, + "logps/rejected": -414.63250732421875, + "loss": 0.2328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0258514881134033, + "rewards/margins": 1.3690094947814941, + "rewards/rejected": -0.34315797686576843, + "step": 2913 + }, + { + "epoch": 0.17, + "learning_rate": 9.49792884417915e-08, + "logits/chosen": -2.201815605163574, + "logits/rejected": -2.204575777053833, + "logps/chosen": -12.312634468078613, + "logps/rejected": -173.2976837158203, + "loss": 0.5797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04613933712244034, + "rewards/margins": 0.4920957684516907, + "rewards/rejected": -0.44595643877983093, + "step": 2914 + }, + { + "epoch": 0.17, + "learning_rate": 9.497517175559213e-08, + "logits/chosen": -2.0079538822174072, + "logits/rejected": -2.0062382221221924, + "logps/chosen": -0.3176862895488739, + "logps/rejected": -128.420166015625, + "loss": 0.473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006245720665901899, + "rewards/margins": 1.2592167854309082, + "rewards/rejected": -1.2654625177383423, + "step": 2915 + }, + { + "epoch": 0.17, + "learning_rate": 9.497105347165468e-08, + "logits/chosen": -2.2117984294891357, + "logits/rejected": -2.193908452987671, + "logps/chosen": -205.77908325195312, + "logps/rejected": -383.5594482421875, + "loss": 0.3967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7135986685752869, + "rewards/margins": 0.41171571612358093, + "rewards/rejected": 0.30188295245170593, + "step": 2916 + }, + { + "epoch": 0.17, + "learning_rate": 9.496693359012542e-08, + "logits/chosen": -2.1197938919067383, + "logits/rejected": -2.118025302886963, + "logps/chosen": -3.7550522392848507e-05, + "logps/rejected": -93.92094421386719, + "loss": 0.5599, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.006663172935077e-07, + "rewards/margins": 0.6301860213279724, + "rewards/rejected": -0.6301864981651306, + "step": 2917 + }, + { + "epoch": 0.17, + "learning_rate": 9.496281211115072e-08, + "logits/chosen": -2.309999465942383, + "logits/rejected": -2.2935705184936523, + "logps/chosen": -201.23756408691406, + "logps/rejected": -295.4420471191406, + "loss": 0.3851, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1121383905410767, + "rewards/margins": 0.3897842764854431, + "rewards/rejected": 0.7223541140556335, + "step": 2918 + }, + { + "epoch": 0.17, + "learning_rate": 9.4958689034877e-08, + "logits/chosen": -2.0723977088928223, + "logits/rejected": -2.0613021850585938, + "logps/chosen": -45.89704895019531, + "logps/rejected": -157.53619384765625, + "loss": 0.5835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.062284089624881744, + "rewards/margins": 0.5867988467216492, + "rewards/rejected": -0.6490829586982727, + "step": 2919 + }, + { + "epoch": 0.17, + "learning_rate": 9.495456436145073e-08, + "logits/chosen": -2.175058603286743, + "logits/rejected": -2.168618679046631, + "logps/chosen": -26.77489471435547, + "logps/rejected": -150.05072021484375, + "loss": 0.5725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09588432312011719, + "rewards/margins": 0.7987918853759766, + "rewards/rejected": -0.8946762084960938, + "step": 2920 + }, + { + "epoch": 0.17, + "learning_rate": 9.495043809101844e-08, + "logits/chosen": -2.0653131008148193, + "logits/rejected": -2.012378692626953, + "logps/chosen": -220.64080810546875, + "logps/rejected": -411.717041015625, + "loss": 0.469, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1744126081466675, + "rewards/margins": -0.12098836898803711, + "rewards/rejected": 1.2954009771347046, + "step": 2921 + }, + { + "epoch": 0.17, + "learning_rate": 9.49463102237267e-08, + "logits/chosen": -2.0819058418273926, + "logits/rejected": -2.0917530059814453, + "logps/chosen": -248.17247009277344, + "logps/rejected": -391.759521484375, + "loss": 0.2699, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.768489122390747, + "rewards/margins": 0.6191757917404175, + "rewards/rejected": 1.1493133306503296, + "step": 2922 + }, + { + "epoch": 0.17, + "learning_rate": 9.494218075972218e-08, + "logits/chosen": -2.0452816486358643, + "logits/rejected": -2.0557615756988525, + "logps/chosen": -232.42144775390625, + "logps/rejected": -297.24639892578125, + "loss": 0.5038, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3843719959259033, + "rewards/margins": -0.18917536735534668, + "rewards/rejected": 1.57354736328125, + "step": 2923 + }, + { + "epoch": 0.17, + "learning_rate": 9.493804969915156e-08, + "logits/chosen": -2.263739824295044, + "logits/rejected": -2.2584195137023926, + "logps/chosen": -7.674196243286133, + "logps/rejected": -58.55386734008789, + "loss": 0.5983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036085129249840975, + "rewards/margins": 0.4489111006259918, + "rewards/rejected": -0.45251962542533875, + "step": 2924 + }, + { + "epoch": 0.17, + "learning_rate": 9.493391704216159e-08, + "logits/chosen": -2.365419864654541, + "logits/rejected": -2.3632936477661133, + "logps/chosen": -10.067418098449707, + "logps/rejected": -100.41471862792969, + "loss": 0.4413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22208987176418304, + "rewards/margins": 0.9762181639671326, + "rewards/rejected": -0.7541282773017883, + "step": 2925 + }, + { + "epoch": 0.17, + "learning_rate": 9.49297827888991e-08, + "logits/chosen": -2.1435370445251465, + "logits/rejected": -2.0902023315429688, + "logps/chosen": -221.261962890625, + "logps/rejected": -460.59906005859375, + "loss": 0.3849, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3681763410568237, + "rewards/margins": 0.2323700189590454, + "rewards/rejected": 1.1358063220977783, + "step": 2926 + }, + { + "epoch": 0.17, + "learning_rate": 9.492564693951094e-08, + "logits/chosen": -2.075185537338257, + "logits/rejected": -2.0684261322021484, + "logps/chosen": -161.36009216308594, + "logps/rejected": -369.50860595703125, + "loss": 0.4315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11342620849609375, + "rewards/margins": 1.2540329694747925, + "rewards/rejected": -1.1406067609786987, + "step": 2927 + }, + { + "epoch": 0.17, + "learning_rate": 9.492150949414405e-08, + "logits/chosen": -2.1700448989868164, + "logits/rejected": -2.1704392433166504, + "logps/chosen": -7.380795001983643, + "logps/rejected": -203.27023315429688, + "loss": 0.5497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08825469017028809, + "rewards/margins": 0.8446280360221863, + "rewards/rejected": -0.9328827261924744, + "step": 2928 + }, + { + "epoch": 0.17, + "learning_rate": 9.491737045294542e-08, + "logits/chosen": -1.9507465362548828, + "logits/rejected": -1.954983115196228, + "logps/chosen": -3.727043628692627, + "logps/rejected": -151.45761108398438, + "loss": 0.5692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01925048790872097, + "rewards/margins": 0.5481277704238892, + "rewards/rejected": -0.5288772583007812, + "step": 2929 + }, + { + "epoch": 0.17, + "learning_rate": 9.491322981606207e-08, + "logits/chosen": -2.1101877689361572, + "logits/rejected": -2.098414182662964, + "logps/chosen": -29.7330322265625, + "logps/rejected": -168.09396362304688, + "loss": 0.5643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055364418774843216, + "rewards/margins": 0.5103662610054016, + "rewards/rejected": -0.4550018310546875, + "step": 2930 + }, + { + "epoch": 0.17, + "learning_rate": 9.49090875836411e-08, + "logits/chosen": -2.1367239952087402, + "logits/rejected": -1.981758952140808, + "logps/chosen": -296.50677490234375, + "logps/rejected": -632.7218627929688, + "loss": 0.1392, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1330689191818237, + "rewards/margins": 2.244903564453125, + "rewards/rejected": -1.1118347644805908, + "step": 2931 + }, + { + "epoch": 0.17, + "learning_rate": 9.490494375582967e-08, + "logits/chosen": -2.3301703929901123, + "logits/rejected": -2.3256633281707764, + "logps/chosen": -40.63672637939453, + "logps/rejected": -138.03350830078125, + "loss": 0.3994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3224056363105774, + "rewards/margins": 1.1707546710968018, + "rewards/rejected": -0.8483490347862244, + "step": 2932 + }, + { + "epoch": 0.17, + "learning_rate": 9.490079833277498e-08, + "logits/chosen": -2.143111228942871, + "logits/rejected": -2.126361608505249, + "logps/chosen": -34.869285583496094, + "logps/rejected": -247.38983154296875, + "loss": 0.4626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09503173828125, + "rewards/margins": 1.4488343000411987, + "rewards/rejected": -1.5438660383224487, + "step": 2933 + }, + { + "epoch": 0.17, + "learning_rate": 9.48966513146243e-08, + "logits/chosen": -2.00506854057312, + "logits/rejected": -2.0187761783599854, + "logps/chosen": -181.7642822265625, + "logps/rejected": -222.44435119628906, + "loss": 0.563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17872925102710724, + "rewards/margins": 0.3021041750907898, + "rewards/rejected": -0.12337493896484375, + "step": 2934 + }, + { + "epoch": 0.17, + "learning_rate": 9.489250270152495e-08, + "logits/chosen": -2.058586359024048, + "logits/rejected": -2.0605721473693848, + "logps/chosen": -136.8822021484375, + "logps/rejected": -239.05677795410156, + "loss": 0.2526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8139480948448181, + "rewards/margins": 1.3002228736877441, + "rewards/rejected": -0.48627471923828125, + "step": 2935 + }, + { + "epoch": 0.17, + "learning_rate": 9.488835249362431e-08, + "logits/chosen": -2.075113296508789, + "logits/rejected": -2.0652899742126465, + "logps/chosen": -19.085613250732422, + "logps/rejected": -135.77484130859375, + "loss": 0.6487, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20496559143066406, + "rewards/margins": 0.41066551208496094, + "rewards/rejected": -0.615631103515625, + "step": 2936 + }, + { + "epoch": 0.17, + "learning_rate": 9.488420069106982e-08, + "logits/chosen": -2.0040836334228516, + "logits/rejected": -1.955801010131836, + "logps/chosen": -230.74339294433594, + "logps/rejected": -371.7669677734375, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3162750005722046, + "rewards/margins": 0.8899291753768921, + "rewards/rejected": 0.4263458251953125, + "step": 2937 + }, + { + "epoch": 0.17, + "learning_rate": 9.488004729400896e-08, + "logits/chosen": -2.235658884048462, + "logits/rejected": -2.2207353115081787, + "logps/chosen": -23.08033561706543, + "logps/rejected": -192.39776611328125, + "loss": 0.4242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05376415327191353, + "rewards/margins": 1.613062858581543, + "rewards/rejected": -1.5592987537384033, + "step": 2938 + }, + { + "epoch": 0.17, + "learning_rate": 9.48758923025893e-08, + "logits/chosen": -1.9832757711410522, + "logits/rejected": -1.960158348083496, + "logps/chosen": -323.2370910644531, + "logps/rejected": -534.818359375, + "loss": 0.2102, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1993865966796875, + "rewards/margins": 1.2402130365371704, + "rewards/rejected": -0.04082641750574112, + "step": 2939 + }, + { + "epoch": 0.17, + "learning_rate": 9.487173571695842e-08, + "logits/chosen": -1.880759596824646, + "logits/rejected": -1.885243535041809, + "logps/chosen": -266.4234619140625, + "logps/rejected": -521.2448120117188, + "loss": 0.4522, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.00872802734375, + "rewards/margins": 0.00249636173248291, + "rewards/rejected": 1.006231665611267, + "step": 2940 + }, + { + "epoch": 0.17, + "learning_rate": 9.486757753726401e-08, + "logits/chosen": -2.0896453857421875, + "logits/rejected": -2.0583364963531494, + "logps/chosen": -231.98748779296875, + "logps/rejected": -404.8424377441406, + "loss": 0.4157, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0137954950332642, + "rewards/margins": 0.24154818058013916, + "rewards/rejected": 0.772247314453125, + "step": 2941 + }, + { + "epoch": 0.17, + "learning_rate": 9.486341776365377e-08, + "logits/chosen": -1.9570125341415405, + "logits/rejected": -1.9370460510253906, + "logps/chosen": -213.85977172851562, + "logps/rejected": -306.5748291015625, + "loss": 0.5786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11842651665210724, + "rewards/margins": 0.3586670160293579, + "rewards/rejected": -0.24024048447608948, + "step": 2942 + }, + { + "epoch": 0.17, + "learning_rate": 9.485925639627548e-08, + "logits/chosen": -2.169887065887451, + "logits/rejected": -2.1619350910186768, + "logps/chosen": -219.22364807128906, + "logps/rejected": -388.502197265625, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0996917486190796, + "rewards/margins": 0.6487395763397217, + "rewards/rejected": 0.4509521424770355, + "step": 2943 + }, + { + "epoch": 0.17, + "learning_rate": 9.485509343527697e-08, + "logits/chosen": -2.089036703109741, + "logits/rejected": -2.071725606918335, + "logps/chosen": -254.61990356445312, + "logps/rejected": -467.916748046875, + "loss": 0.1815, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2268646955490112, + "rewards/margins": 1.6534088850021362, + "rewards/rejected": -0.426544189453125, + "step": 2944 + }, + { + "epoch": 0.17, + "learning_rate": 9.485092888080615e-08, + "logits/chosen": -1.9248746633529663, + "logits/rejected": -1.9091274738311768, + "logps/chosen": -153.88455200195312, + "logps/rejected": -234.84991455078125, + "loss": 0.5193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25168153643608093, + "rewards/margins": 0.48947450518608093, + "rewards/rejected": -0.23779296875, + "step": 2945 + }, + { + "epoch": 0.17, + "learning_rate": 9.484676273301092e-08, + "logits/chosen": -2.0057413578033447, + "logits/rejected": -1.976171612739563, + "logps/chosen": -223.72792053222656, + "logps/rejected": -432.9522399902344, + "loss": 0.1495, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2587738037109375, + "rewards/margins": 2.2863097190856934, + "rewards/rejected": -1.0275360345840454, + "step": 2946 + }, + { + "epoch": 0.17, + "learning_rate": 9.484259499203933e-08, + "logits/chosen": -2.0562777519226074, + "logits/rejected": -2.066202402114868, + "logps/chosen": -3.6954374081688e-05, + "logps/rejected": -149.57374572753906, + "loss": 0.5018, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.536497600493021e-07, + "rewards/margins": 1.0088385343551636, + "rewards/rejected": -1.00883948802948, + "step": 2947 + }, + { + "epoch": 0.17, + "learning_rate": 9.48384256580394e-08, + "logits/chosen": -2.147939682006836, + "logits/rejected": -2.141197919845581, + "logps/chosen": -47.4241943359375, + "logps/rejected": -197.43605041503906, + "loss": 0.5952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0968296080827713, + "rewards/margins": 0.4940258264541626, + "rewards/rejected": -0.5908554196357727, + "step": 2948 + }, + { + "epoch": 0.17, + "learning_rate": 9.483425473115927e-08, + "logits/chosen": -2.0524697303771973, + "logits/rejected": -2.052640676498413, + "logps/chosen": -28.009851455688477, + "logps/rejected": -74.55537414550781, + "loss": 0.5954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08674812316894531, + "rewards/margins": 0.2254253476858139, + "rewards/rejected": -0.1386772245168686, + "step": 2949 + }, + { + "epoch": 0.17, + "learning_rate": 9.48300822115471e-08, + "logits/chosen": -2.1225106716156006, + "logits/rejected": -2.111429452896118, + "logps/chosen": -14.230583190917969, + "logps/rejected": -137.29705810546875, + "loss": 0.4774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023810196667909622, + "rewards/margins": 1.2481396198272705, + "rewards/rejected": -1.2719497680664062, + "step": 2950 + }, + { + "epoch": 0.17, + "learning_rate": 9.482590809935112e-08, + "logits/chosen": -2.1907482147216797, + "logits/rejected": -2.195244312286377, + "logps/chosen": -55.241661071777344, + "logps/rejected": -240.18927001953125, + "loss": 0.3326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4916256070137024, + "rewards/margins": 1.5070157051086426, + "rewards/rejected": -1.0153900384902954, + "step": 2951 + }, + { + "epoch": 0.17, + "learning_rate": 9.482173239471962e-08, + "logits/chosen": -2.217331886291504, + "logits/rejected": -2.194274663925171, + "logps/chosen": -20.136869430541992, + "logps/rejected": -212.43572998046875, + "loss": 0.5178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010831832885742188, + "rewards/margins": 0.9722675681114197, + "rewards/rejected": -0.9830994009971619, + "step": 2952 + }, + { + "epoch": 0.17, + "learning_rate": 9.481755509780093e-08, + "logits/chosen": -1.9945296049118042, + "logits/rejected": -1.9875810146331787, + "logps/chosen": -9.50900936126709, + "logps/rejected": -222.5431671142578, + "loss": 0.6058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07504158467054367, + "rewards/margins": 0.3576771020889282, + "rewards/rejected": -0.28263550996780396, + "step": 2953 + }, + { + "epoch": 0.17, + "learning_rate": 9.481337620874348e-08, + "logits/chosen": -1.936863660812378, + "logits/rejected": -1.8769806623458862, + "logps/chosen": -291.0102844238281, + "logps/rejected": -476.71124267578125, + "loss": 0.4237, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6162506341934204, + "rewards/margins": 0.31598207354545593, + "rewards/rejected": 0.3002685606479645, + "step": 2954 + }, + { + "epoch": 0.17, + "learning_rate": 9.480919572769568e-08, + "logits/chosen": -2.0452871322631836, + "logits/rejected": -2.0331616401672363, + "logps/chosen": -202.7845458984375, + "logps/rejected": -276.50238037109375, + "loss": 0.4894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33377382159233093, + "rewards/margins": 0.5238281488418579, + "rewards/rejected": -0.19005432724952698, + "step": 2955 + }, + { + "epoch": 0.17, + "learning_rate": 9.480501365480609e-08, + "logits/chosen": -2.177917242050171, + "logits/rejected": -2.1733484268188477, + "logps/chosen": -0.09110675752162933, + "logps/rejected": -80.70780944824219, + "loss": 0.6635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003251974005252123, + "rewards/margins": 0.13072019815444946, + "rewards/rejected": -0.13397216796875, + "step": 2956 + }, + { + "epoch": 0.17, + "learning_rate": 9.480082999022324e-08, + "logits/chosen": -2.1789052486419678, + "logits/rejected": -2.1684932708740234, + "logps/chosen": -4.722280502319336, + "logps/rejected": -131.13600158691406, + "loss": 0.635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1962975710630417, + "rewards/margins": 0.5000753998756409, + "rewards/rejected": -0.6963729858398438, + "step": 2957 + }, + { + "epoch": 0.17, + "learning_rate": 9.479664473409575e-08, + "logits/chosen": -2.078453302383423, + "logits/rejected": -2.020368814468384, + "logps/chosen": -243.87448120117188, + "logps/rejected": -388.2289123535156, + "loss": 0.1315, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.640710473060608, + "rewards/margins": 1.8521331548690796, + "rewards/rejected": -0.21142272651195526, + "step": 2958 + }, + { + "epoch": 0.17, + "learning_rate": 9.479245788657231e-08, + "logits/chosen": -2.152562141418457, + "logits/rejected": -2.1508069038391113, + "logps/chosen": -192.49984741210938, + "logps/rejected": -312.66424560546875, + "loss": 0.1858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0209152698516846, + "rewards/margins": 1.6109848022460938, + "rewards/rejected": -0.590069591999054, + "step": 2959 + }, + { + "epoch": 0.17, + "learning_rate": 9.478826944780167e-08, + "logits/chosen": -2.201195478439331, + "logits/rejected": -2.2064297199249268, + "logps/chosen": -6.525238037109375, + "logps/rejected": -148.8502197265625, + "loss": 0.6554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1562906801700592, + "rewards/margins": 0.3416478633880615, + "rewards/rejected": -0.4979385435581207, + "step": 2960 + }, + { + "epoch": 0.17, + "learning_rate": 9.478407941793261e-08, + "logits/chosen": -2.170639991760254, + "logits/rejected": -2.1501383781433105, + "logps/chosen": -210.23275756835938, + "logps/rejected": -267.82916259765625, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1725982427597046, + "rewards/margins": 0.6240020394325256, + "rewards/rejected": 0.548596203327179, + "step": 2961 + }, + { + "epoch": 0.17, + "learning_rate": 9.4779887797114e-08, + "logits/chosen": -2.145338535308838, + "logits/rejected": -2.1326780319213867, + "logps/chosen": -214.86734008789062, + "logps/rejected": -270.118408203125, + "loss": 0.4417, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0996582508087158, + "rewards/margins": 0.02358400821685791, + "rewards/rejected": 1.076074242591858, + "step": 2962 + }, + { + "epoch": 0.17, + "learning_rate": 9.477569458549472e-08, + "logits/chosen": -2.1241495609283447, + "logits/rejected": -2.0564005374908447, + "logps/chosen": -176.4064178466797, + "logps/rejected": -255.35195922851562, + "loss": 0.4163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9762100577354431, + "rewards/margins": 0.3567795157432556, + "rewards/rejected": 0.6194305419921875, + "step": 2963 + }, + { + "epoch": 0.17, + "learning_rate": 9.477149978322376e-08, + "logits/chosen": -2.184225559234619, + "logits/rejected": -2.1805381774902344, + "logps/chosen": -0.009302079677581787, + "logps/rejected": -232.489013671875, + "loss": 0.4695, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.933767821872607e-05, + "rewards/margins": 1.279405951499939, + "rewards/rejected": -1.2794952392578125, + "step": 2964 + }, + { + "epoch": 0.17, + "learning_rate": 9.476730339045013e-08, + "logits/chosen": -2.1352782249450684, + "logits/rejected": -2.133939504623413, + "logps/chosen": -0.23303046822547913, + "logps/rejected": -112.33407592773438, + "loss": 0.6686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007039213087409735, + "rewards/margins": 0.10783276706933975, + "rewards/rejected": -0.11487197875976562, + "step": 2965 + }, + { + "epoch": 0.17, + "learning_rate": 9.476310540732288e-08, + "logits/chosen": -2.0218605995178223, + "logits/rejected": -1.988635540008545, + "logps/chosen": -261.3854064941406, + "logps/rejected": -502.3360900878906, + "loss": 0.2564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9057525992393494, + "rewards/margins": 1.4930267333984375, + "rewards/rejected": -0.5872741937637329, + "step": 2966 + }, + { + "epoch": 0.17, + "learning_rate": 9.475890583399119e-08, + "logits/chosen": -2.1206274032592773, + "logits/rejected": -2.120258331298828, + "logps/chosen": -31.608339309692383, + "logps/rejected": -176.76539611816406, + "loss": 0.5016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05072536692023277, + "rewards/margins": 0.9553003311157227, + "rewards/rejected": -1.0060256719589233, + "step": 2967 + }, + { + "epoch": 0.17, + "learning_rate": 9.475470467060422e-08, + "logits/chosen": -2.1159491539001465, + "logits/rejected": -2.1126997470855713, + "logps/chosen": -0.39108702540397644, + "logps/rejected": -109.69685363769531, + "loss": 0.6041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009636670583859086, + "rewards/margins": 0.3979576826095581, + "rewards/rejected": -0.396994024515152, + "step": 2968 + }, + { + "epoch": 0.17, + "learning_rate": 9.475050191731121e-08, + "logits/chosen": -2.0745699405670166, + "logits/rejected": -2.0541911125183105, + "logps/chosen": -318.6196594238281, + "logps/rejected": -430.86810302734375, + "loss": 0.449, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1858307123184204, + "rewards/margins": 0.00410151481628418, + "rewards/rejected": 1.1817291975021362, + "step": 2969 + }, + { + "epoch": 0.17, + "learning_rate": 9.474629757426148e-08, + "logits/chosen": -2.102811813354492, + "logits/rejected": -2.100525140762329, + "logps/chosen": -13.999119758605957, + "logps/rejected": -121.64695739746094, + "loss": 0.5275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13473425805568695, + "rewards/margins": 0.6359840035438538, + "rewards/rejected": -0.5012497305870056, + "step": 2970 + }, + { + "epoch": 0.17, + "learning_rate": 9.474209164160438e-08, + "logits/chosen": -2.084432601928711, + "logits/rejected": -2.0901741981506348, + "logps/chosen": -245.56654357910156, + "logps/rejected": -474.27337646484375, + "loss": 0.2687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9563125967979431, + "rewards/margins": 1.107862949371338, + "rewards/rejected": -0.15155029296875, + "step": 2971 + }, + { + "epoch": 0.17, + "learning_rate": 9.473788411948934e-08, + "logits/chosen": -1.8346056938171387, + "logits/rejected": -1.8159961700439453, + "logps/chosen": -331.4689636230469, + "logps/rejected": -451.9541015625, + "loss": 0.5302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5039734244346619, + "rewards/margins": 0.10273441672325134, + "rewards/rejected": 0.4012390077114105, + "step": 2972 + }, + { + "epoch": 0.17, + "learning_rate": 9.47336750080658e-08, + "logits/chosen": -2.0207154750823975, + "logits/rejected": -2.0112898349761963, + "logps/chosen": -271.681640625, + "logps/rejected": -417.5331726074219, + "loss": 0.429, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3391846418380737, + "rewards/margins": -0.03466486930847168, + "rewards/rejected": 1.3738495111465454, + "step": 2973 + }, + { + "epoch": 0.17, + "learning_rate": 9.472946430748333e-08, + "logits/chosen": -1.9440364837646484, + "logits/rejected": -1.9349243640899658, + "logps/chosen": -88.36180114746094, + "logps/rejected": -200.4722900390625, + "loss": 0.4433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45897600054740906, + "rewards/margins": 0.7074867486953735, + "rewards/rejected": -0.24851074814796448, + "step": 2974 + }, + { + "epoch": 0.17, + "learning_rate": 9.472525201789146e-08, + "logits/chosen": -2.1478490829467773, + "logits/rejected": -2.193631649017334, + "logps/chosen": -177.5682373046875, + "logps/rejected": -252.06924438476562, + "loss": 0.4494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6312225461006165, + "rewards/margins": 0.288504034280777, + "rewards/rejected": 0.3427185118198395, + "step": 2975 + }, + { + "epoch": 0.17, + "learning_rate": 9.472103813943988e-08, + "logits/chosen": -2.1114919185638428, + "logits/rejected": -2.1020967960357666, + "logps/chosen": -14.6422119140625, + "logps/rejected": -205.34036254882812, + "loss": 0.4759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007130336947739124, + "rewards/margins": 1.223799228668213, + "rewards/rejected": -1.2309296131134033, + "step": 2976 + }, + { + "epoch": 0.17, + "learning_rate": 9.471682267227829e-08, + "logits/chosen": -2.1388614177703857, + "logits/rejected": -2.1603758335113525, + "logps/chosen": -285.5740966796875, + "logps/rejected": -331.5903015136719, + "loss": 0.3957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3630799055099487, + "rewards/margins": 0.20314645767211914, + "rewards/rejected": 1.1599334478378296, + "step": 2977 + }, + { + "epoch": 0.17, + "learning_rate": 9.471260561655641e-08, + "logits/chosen": -2.231858968734741, + "logits/rejected": -2.2355566024780273, + "logps/chosen": -17.6845645904541, + "logps/rejected": -173.33193969726562, + "loss": 0.5188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11948299407958984, + "rewards/margins": 0.6895605325698853, + "rewards/rejected": -0.5700775384902954, + "step": 2978 + }, + { + "epoch": 0.17, + "learning_rate": 9.470838697242405e-08, + "logits/chosen": -1.936047077178955, + "logits/rejected": -1.9460092782974243, + "logps/chosen": -184.947021484375, + "logps/rejected": -331.1635437011719, + "loss": 0.4824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6036956906318665, + "rewards/margins": 0.19568786025047302, + "rewards/rejected": 0.40800783038139343, + "step": 2979 + }, + { + "epoch": 0.17, + "learning_rate": 9.470416674003112e-08, + "logits/chosen": -2.177117347717285, + "logits/rejected": -2.1773569583892822, + "logps/chosen": -1.7725752592086792, + "logps/rejected": -182.72653198242188, + "loss": 0.4943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11195977032184601, + "rewards/margins": 1.2888703346252441, + "rewards/rejected": -1.4008301496505737, + "step": 2980 + }, + { + "epoch": 0.17, + "learning_rate": 9.46999449195275e-08, + "logits/chosen": -2.123758554458618, + "logits/rejected": -2.124654769897461, + "logps/chosen": -322.8882751464844, + "logps/rejected": -601.5274658203125, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5238189697265625, + "rewards/margins": 1.4446990489959717, + "rewards/rejected": -0.920880138874054, + "step": 2981 + }, + { + "epoch": 0.17, + "learning_rate": 9.469572151106319e-08, + "logits/chosen": -2.101581335067749, + "logits/rejected": -2.0865252017974854, + "logps/chosen": -284.5694580078125, + "logps/rejected": -431.7174072265625, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8040863275527954, + "rewards/margins": 0.9164367914199829, + "rewards/rejected": -0.1123504638671875, + "step": 2982 + }, + { + "epoch": 0.17, + "learning_rate": 9.469149651478821e-08, + "logits/chosen": -2.0434117317199707, + "logits/rejected": -1.9986350536346436, + "logps/chosen": -192.04319763183594, + "logps/rejected": -388.66583251953125, + "loss": 0.2147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9424179196357727, + "rewards/margins": 2.3103835582733154, + "rewards/rejected": -1.3679656982421875, + "step": 2983 + }, + { + "epoch": 0.17, + "learning_rate": 9.468726993085267e-08, + "logits/chosen": -2.0519750118255615, + "logits/rejected": -2.0443434715270996, + "logps/chosen": -140.75216674804688, + "logps/rejected": -186.99603271484375, + "loss": 0.6109, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.35076141357421875, + "rewards/margins": -0.02787017822265625, + "rewards/rejected": 0.378631591796875, + "step": 2984 + }, + { + "epoch": 0.17, + "learning_rate": 9.468304175940672e-08, + "logits/chosen": -2.136902332305908, + "logits/rejected": -2.1165475845336914, + "logps/chosen": -12.169917106628418, + "logps/rejected": -426.6289978027344, + "loss": 0.3381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10020112991333008, + "rewards/margins": 3.126742362976074, + "rewards/rejected": -3.026541233062744, + "step": 2985 + }, + { + "epoch": 0.17, + "learning_rate": 9.467881200060056e-08, + "logits/chosen": -2.0496175289154053, + "logits/rejected": -2.045114517211914, + "logps/chosen": -110.15353393554688, + "logps/rejected": -167.4304962158203, + "loss": 0.7264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3500305116176605, + "rewards/margins": 0.24386444687843323, + "rewards/rejected": -0.5938949584960938, + "step": 2986 + }, + { + "epoch": 0.17, + "learning_rate": 9.467458065458445e-08, + "logits/chosen": -2.3071134090423584, + "logits/rejected": -2.301630735397339, + "logps/chosen": -13.429241180419922, + "logps/rejected": -65.78868865966797, + "loss": 0.6774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09826765209436417, + "rewards/margins": 0.2049870491027832, + "rewards/rejected": -0.3032546937465668, + "step": 2987 + }, + { + "epoch": 0.17, + "learning_rate": 9.46703477215087e-08, + "logits/chosen": -2.053502321243286, + "logits/rejected": -2.058647632598877, + "logps/chosen": -178.36618041992188, + "logps/rejected": -324.4027099609375, + "loss": 0.4723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42629241943359375, + "rewards/margins": 0.31179046630859375, + "rewards/rejected": 0.114501953125, + "step": 2988 + }, + { + "epoch": 0.17, + "learning_rate": 9.466611320152369e-08, + "logits/chosen": -2.157954692840576, + "logits/rejected": -2.146466016769409, + "logps/chosen": -234.1088104248047, + "logps/rejected": -337.00738525390625, + "loss": 0.2827, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6147644519805908, + "rewards/margins": 0.6017731428146362, + "rewards/rejected": 1.0129913091659546, + "step": 2989 + }, + { + "epoch": 0.17, + "learning_rate": 9.466187709477986e-08, + "logits/chosen": -2.057403087615967, + "logits/rejected": -2.042579412460327, + "logps/chosen": -189.65386962890625, + "logps/rejected": -336.3957214355469, + "loss": 0.4089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8694259524345398, + "rewards/margins": 0.29606473445892334, + "rewards/rejected": 0.5733612179756165, + "step": 2990 + }, + { + "epoch": 0.17, + "learning_rate": 9.46576394014277e-08, + "logits/chosen": -2.1134397983551025, + "logits/rejected": -2.1176271438598633, + "logps/chosen": -5.469203472137451, + "logps/rejected": -74.20513153076172, + "loss": 0.6262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022382402792572975, + "rewards/margins": 0.2703949511051178, + "rewards/rejected": -0.24801254272460938, + "step": 2991 + }, + { + "epoch": 0.17, + "learning_rate": 9.465340012161773e-08, + "logits/chosen": -2.1472270488739014, + "logits/rejected": -2.149508237838745, + "logps/chosen": -1.0064440965652466, + "logps/rejected": -34.154109954833984, + "loss": 0.6425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005424905102699995, + "rewards/margins": 0.20383909344673157, + "rewards/rejected": -0.20926399528980255, + "step": 2992 + }, + { + "epoch": 0.17, + "learning_rate": 9.464915925550056e-08, + "logits/chosen": -2.313082695007324, + "logits/rejected": -2.2798469066619873, + "logps/chosen": -201.89828491210938, + "logps/rejected": -353.8821716308594, + "loss": 0.4514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13671265542507172, + "rewards/margins": 1.5212280750274658, + "rewards/rejected": -1.6579407453536987, + "step": 2993 + }, + { + "epoch": 0.17, + "learning_rate": 9.464491680322686e-08, + "logits/chosen": -2.099684953689575, + "logits/rejected": -2.0926530361175537, + "logps/chosen": -281.6338195800781, + "logps/rejected": -368.96875, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.780926525592804, + "rewards/margins": 1.185235619544983, + "rewards/rejected": -0.40430909395217896, + "step": 2994 + }, + { + "epoch": 0.17, + "learning_rate": 9.464067276494734e-08, + "logits/chosen": -1.9293879270553589, + "logits/rejected": -1.8050087690353394, + "logps/chosen": -246.48097229003906, + "logps/rejected": -549.1387939453125, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0204604864120483, + "rewards/margins": 1.0190322399139404, + "rewards/rejected": 0.0014282226329669356, + "step": 2995 + }, + { + "epoch": 0.17, + "learning_rate": 9.463642714081274e-08, + "logits/chosen": -2.16823148727417, + "logits/rejected": -2.1537561416625977, + "logps/chosen": -0.00040430616354569793, + "logps/rejected": -231.92005920410156, + "loss": 0.4115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6220694305957295e-05, + "rewards/margins": 1.8934286832809448, + "rewards/rejected": -1.8934448957443237, + "step": 2996 + }, + { + "epoch": 0.17, + "learning_rate": 9.463217993097395e-08, + "logits/chosen": -1.8781678676605225, + "logits/rejected": -1.8557994365692139, + "logps/chosen": -172.70166015625, + "logps/rejected": -302.3243103027344, + "loss": 0.4357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.518695056438446, + "rewards/margins": 0.5763092041015625, + "rewards/rejected": -0.05761413648724556, + "step": 2997 + }, + { + "epoch": 0.17, + "learning_rate": 9.462793113558176e-08, + "logits/chosen": -2.1633105278015137, + "logits/rejected": -2.160306692123413, + "logps/chosen": -67.32618713378906, + "logps/rejected": -246.05621337890625, + "loss": 0.4808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.133320614695549, + "rewards/margins": 1.5192307233810425, + "rewards/rejected": -1.652551293373108, + "step": 2998 + }, + { + "epoch": 0.17, + "learning_rate": 9.46236807547872e-08, + "logits/chosen": -2.0185375213623047, + "logits/rejected": -2.017941951751709, + "logps/chosen": -18.570987701416016, + "logps/rejected": -35.940765380859375, + "loss": 0.6344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.059449005872011185, + "rewards/margins": 0.2095954865217209, + "rewards/rejected": -0.150146484375, + "step": 2999 + }, + { + "epoch": 0.17, + "learning_rate": 9.461942878874118e-08, + "logits/chosen": -2.1836583614349365, + "logits/rejected": -2.1826882362365723, + "logps/chosen": -25.44785499572754, + "logps/rejected": -178.66744995117188, + "loss": 0.5216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005853843875229359, + "rewards/margins": 0.7175267934799194, + "rewards/rejected": -0.7116729617118835, + "step": 3000 + }, + { + "epoch": 0.17, + "learning_rate": 9.461517523759481e-08, + "logits/chosen": -2.126685857772827, + "logits/rejected": -2.128507137298584, + "logps/chosen": -71.47201538085938, + "logps/rejected": -156.08358764648438, + "loss": 0.3507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46351853013038635, + "rewards/margins": 1.3871971368789673, + "rewards/rejected": -0.9236785769462585, + "step": 3001 + }, + { + "epoch": 0.17, + "learning_rate": 9.461092010149919e-08, + "logits/chosen": -2.250192403793335, + "logits/rejected": -2.2423973083496094, + "logps/chosen": -0.008672724477946758, + "logps/rejected": -120.92010498046875, + "loss": 0.5782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00034447104553692043, + "rewards/margins": 0.5169262290000916, + "rewards/rejected": -0.5172706842422485, + "step": 3002 + }, + { + "epoch": 0.17, + "learning_rate": 9.460666338060546e-08, + "logits/chosen": -2.1533420085906982, + "logits/rejected": -2.1543736457824707, + "logps/chosen": -8.021730422973633, + "logps/rejected": -292.4747619628906, + "loss": 0.4767, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09790020436048508, + "rewards/margins": 0.8349454998970032, + "rewards/rejected": -0.7370452880859375, + "step": 3003 + }, + { + "epoch": 0.17, + "learning_rate": 9.460240507506486e-08, + "logits/chosen": -2.028053045272827, + "logits/rejected": -2.027742624282837, + "logps/chosen": -2.6973671913146973, + "logps/rejected": -89.399169921875, + "loss": 0.5714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04722774028778076, + "rewards/margins": 0.5029742121696472, + "rewards/rejected": -0.45574647188186646, + "step": 3004 + }, + { + "epoch": 0.17, + "learning_rate": 9.459814518502865e-08, + "logits/chosen": -2.173151969909668, + "logits/rejected": -2.163780927658081, + "logps/chosen": -21.181713104248047, + "logps/rejected": -133.75857543945312, + "loss": 0.5963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04618530347943306, + "rewards/margins": 0.521557629108429, + "rewards/rejected": -0.475372314453125, + "step": 3005 + }, + { + "epoch": 0.17, + "learning_rate": 9.459388371064818e-08, + "logits/chosen": -2.1084654331207275, + "logits/rejected": -2.067178249359131, + "logps/chosen": -215.96633911132812, + "logps/rejected": -302.3509826660156, + "loss": 0.5009, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9192779660224915, + "rewards/margins": -0.08092039823532104, + "rewards/rejected": 1.0001983642578125, + "step": 3006 + }, + { + "epoch": 0.17, + "learning_rate": 9.458962065207481e-08, + "logits/chosen": -2.1465084552764893, + "logits/rejected": -2.15384578704834, + "logps/chosen": -112.68031311035156, + "logps/rejected": -267.1463928222656, + "loss": 0.5143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2224380522966385, + "rewards/margins": 0.6062790155410767, + "rewards/rejected": -0.383840948343277, + "step": 3007 + }, + { + "epoch": 0.18, + "learning_rate": 9.458535600946003e-08, + "logits/chosen": -1.940432071685791, + "logits/rejected": -1.9402542114257812, + "logps/chosen": -20.983287811279297, + "logps/rejected": -97.98959350585938, + "loss": 0.6599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04956245422363281, + "rewards/margins": 0.10189323127269745, + "rewards/rejected": -0.052330780774354935, + "step": 3008 + }, + { + "epoch": 0.18, + "learning_rate": 9.45810897829553e-08, + "logits/chosen": -2.087052345275879, + "logits/rejected": -2.054021120071411, + "logps/chosen": -178.76837158203125, + "logps/rejected": -262.85455322265625, + "loss": 0.2913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8433334231376648, + "rewards/margins": 0.9984878301620483, + "rewards/rejected": -0.15515442192554474, + "step": 3009 + }, + { + "epoch": 0.18, + "learning_rate": 9.457682197271219e-08, + "logits/chosen": -1.9869464635849, + "logits/rejected": -1.9484527111053467, + "logps/chosen": -178.4813232421875, + "logps/rejected": -238.86041259765625, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4459594786167145, + "rewards/margins": 1.3915833234786987, + "rewards/rejected": -0.9456238150596619, + "step": 3010 + }, + { + "epoch": 0.18, + "learning_rate": 9.457255257888231e-08, + "logits/chosen": -1.962750792503357, + "logits/rejected": -1.9353328943252563, + "logps/chosen": -249.89028930664062, + "logps/rejected": -418.4640197753906, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6571563482284546, + "rewards/margins": 0.3625304698944092, + "rewards/rejected": 1.2946258783340454, + "step": 3011 + }, + { + "epoch": 0.18, + "learning_rate": 9.456828160161734e-08, + "logits/chosen": -2.0500080585479736, + "logits/rejected": -2.0480480194091797, + "logps/chosen": -63.658260345458984, + "logps/rejected": -237.8685302734375, + "loss": 0.5556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1521228849887848, + "rewards/margins": 0.4511585235595703, + "rewards/rejected": -0.2990356385707855, + "step": 3012 + }, + { + "epoch": 0.18, + "learning_rate": 9.4564009041069e-08, + "logits/chosen": -2.107264518737793, + "logits/rejected": -2.153930902481079, + "logps/chosen": -291.998291015625, + "logps/rejected": -325.43646240234375, + "loss": 0.3064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3636292219161987, + "rewards/margins": 0.6470642685890198, + "rewards/rejected": 0.716564953327179, + "step": 3013 + }, + { + "epoch": 0.18, + "learning_rate": 9.455973489738906e-08, + "logits/chosen": -2.115680694580078, + "logits/rejected": -2.1070480346679688, + "logps/chosen": -58.59888458251953, + "logps/rejected": -123.3020248413086, + "loss": 0.6607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04636688157916069, + "rewards/margins": 0.1585342437028885, + "rewards/rejected": -0.20490112900733948, + "step": 3014 + }, + { + "epoch": 0.18, + "learning_rate": 9.45554591707294e-08, + "logits/chosen": -2.0007028579711914, + "logits/rejected": -1.9719346761703491, + "logps/chosen": -30.319488525390625, + "logps/rejected": -301.4191589355469, + "loss": 0.4478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07966365665197372, + "rewards/margins": 1.144092321395874, + "rewards/rejected": -1.064428687095642, + "step": 3015 + }, + { + "epoch": 0.18, + "learning_rate": 9.455118186124186e-08, + "logits/chosen": -2.050963878631592, + "logits/rejected": -2.053712844848633, + "logps/chosen": -118.23741912841797, + "logps/rejected": -249.75909423828125, + "loss": 0.4922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.126170352101326, + "rewards/margins": 0.5774566531181335, + "rewards/rejected": -0.45128631591796875, + "step": 3016 + }, + { + "epoch": 0.18, + "learning_rate": 9.454690296907844e-08, + "logits/chosen": -2.03131365776062, + "logits/rejected": -2.037898302078247, + "logps/chosen": -244.38856506347656, + "logps/rejected": -319.09136962890625, + "loss": 0.3917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0633041858673096, + "rewards/margins": 0.3534775376319885, + "rewards/rejected": 0.709826648235321, + "step": 3017 + }, + { + "epoch": 0.18, + "learning_rate": 9.454262249439112e-08, + "logits/chosen": -2.3110392093658447, + "logits/rejected": -2.308408260345459, + "logps/chosen": -0.0012233764864504337, + "logps/rejected": -57.82284927368164, + "loss": 0.6868, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.453663885011338e-05, + "rewards/margins": 0.025530420243740082, + "rewards/rejected": -0.025564957410097122, + "step": 3018 + }, + { + "epoch": 0.18, + "learning_rate": 9.453834043733194e-08, + "logits/chosen": -1.988749623298645, + "logits/rejected": -1.9697736501693726, + "logps/chosen": -295.8039855957031, + "logps/rejected": -490.24029541015625, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47942811250686646, + "rewards/margins": 2.1497528553009033, + "rewards/rejected": -1.670324683189392, + "step": 3019 + }, + { + "epoch": 0.18, + "learning_rate": 9.453405679805306e-08, + "logits/chosen": -2.1699907779693604, + "logits/rejected": -2.156263589859009, + "logps/chosen": -31.15393829345703, + "logps/rejected": -121.28084564208984, + "loss": 0.6156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13748779892921448, + "rewards/margins": 0.07928925007581711, + "rewards/rejected": 0.05819854885339737, + "step": 3020 + }, + { + "epoch": 0.18, + "learning_rate": 9.452977157670665e-08, + "logits/chosen": -2.3465943336486816, + "logits/rejected": -2.3303263187408447, + "logps/chosen": -21.933210372924805, + "logps/rejected": -137.7213134765625, + "loss": 0.5112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08169174194335938, + "rewards/margins": 0.7659843564033508, + "rewards/rejected": -0.6842926144599915, + "step": 3021 + }, + { + "epoch": 0.18, + "learning_rate": 9.452548477344494e-08, + "logits/chosen": -2.0563251972198486, + "logits/rejected": -2.0379602909088135, + "logps/chosen": -228.10113525390625, + "logps/rejected": -319.1104736328125, + "loss": 0.4576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6363357901573181, + "rewards/margins": 0.12011265754699707, + "rewards/rejected": 0.516223132610321, + "step": 3022 + }, + { + "epoch": 0.18, + "learning_rate": 9.45211963884202e-08, + "logits/chosen": -2.216146469116211, + "logits/rejected": -2.2153990268707275, + "logps/chosen": -0.12701566517353058, + "logps/rejected": -137.13528442382812, + "loss": 0.4861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003329247236251831, + "rewards/margins": 1.1270231008529663, + "rewards/rejected": -1.123693823814392, + "step": 3023 + }, + { + "epoch": 0.18, + "learning_rate": 9.451690642178481e-08, + "logits/chosen": -1.9018133878707886, + "logits/rejected": -1.821478247642517, + "logps/chosen": -282.81915283203125, + "logps/rejected": -370.90869140625, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.858081042766571, + "rewards/margins": 1.3595917224884033, + "rewards/rejected": -0.5015106201171875, + "step": 3024 + }, + { + "epoch": 0.18, + "learning_rate": 9.451261487369112e-08, + "logits/chosen": -2.3945305347442627, + "logits/rejected": -2.3934073448181152, + "logps/chosen": -41.503013610839844, + "logps/rejected": -182.64190673828125, + "loss": 0.5358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05733489990234375, + "rewards/margins": 0.7585800290107727, + "rewards/rejected": -0.701245129108429, + "step": 3025 + }, + { + "epoch": 0.18, + "learning_rate": 9.450832174429162e-08, + "logits/chosen": -2.0170645713806152, + "logits/rejected": -2.021881103515625, + "logps/chosen": -38.66938018798828, + "logps/rejected": -174.67669677734375, + "loss": 0.4009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3841758668422699, + "rewards/margins": 1.0953147411346436, + "rewards/rejected": -0.711138904094696, + "step": 3026 + }, + { + "epoch": 0.18, + "learning_rate": 9.450402703373883e-08, + "logits/chosen": -2.144505739212036, + "logits/rejected": -2.135746717453003, + "logps/chosen": -125.28308868408203, + "logps/rejected": -213.22586059570312, + "loss": 0.6104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06003418192267418, + "rewards/margins": 0.013606265187263489, + "rewards/rejected": 0.04642791673541069, + "step": 3027 + }, + { + "epoch": 0.18, + "learning_rate": 9.449973074218531e-08, + "logits/chosen": -2.016145944595337, + "logits/rejected": -2.016148328781128, + "logps/chosen": -8.6663818359375, + "logps/rejected": -62.08629608154297, + "loss": 0.6261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08216018974781036, + "rewards/margins": 0.32487523555755615, + "rewards/rejected": -0.4070354402065277, + "step": 3028 + }, + { + "epoch": 0.18, + "learning_rate": 9.449543286978368e-08, + "logits/chosen": -1.8878469467163086, + "logits/rejected": -1.8090460300445557, + "logps/chosen": -346.7358703613281, + "logps/rejected": -697.28759765625, + "loss": 0.201, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0648224353790283, + "rewards/margins": 1.603524923324585, + "rewards/rejected": -0.5387024283409119, + "step": 3029 + }, + { + "epoch": 0.18, + "learning_rate": 9.449113341668662e-08, + "logits/chosen": -2.1425631046295166, + "logits/rejected": -2.1323766708374023, + "logps/chosen": -216.12387084960938, + "logps/rejected": -356.1518859863281, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9750869870185852, + "rewards/margins": 0.5678756833076477, + "rewards/rejected": 0.4072113037109375, + "step": 3030 + }, + { + "epoch": 0.18, + "learning_rate": 9.448683238304687e-08, + "logits/chosen": -2.0451455116271973, + "logits/rejected": -2.0382790565490723, + "logps/chosen": -210.54080200195312, + "logps/rejected": -226.53579711914062, + "loss": 0.4746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6814681887626648, + "rewards/margins": 0.1953231692314148, + "rewards/rejected": 0.48614501953125, + "step": 3031 + }, + { + "epoch": 0.18, + "learning_rate": 9.448252976901721e-08, + "logits/chosen": -2.0814549922943115, + "logits/rejected": -2.066826105117798, + "logps/chosen": -5.074289798736572, + "logps/rejected": -189.95782470703125, + "loss": 0.4436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00770835904404521, + "rewards/margins": 1.3005086183547974, + "rewards/rejected": -1.30821692943573, + "step": 3032 + }, + { + "epoch": 0.18, + "learning_rate": 9.447822557475053e-08, + "logits/chosen": -2.129493236541748, + "logits/rejected": -2.101928234100342, + "logps/chosen": -183.96630859375, + "logps/rejected": -330.5563659667969, + "loss": 0.4005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6520324945449829, + "rewards/margins": 0.6705597043037415, + "rewards/rejected": -0.01852722279727459, + "step": 3033 + }, + { + "epoch": 0.18, + "learning_rate": 9.44739198003997e-08, + "logits/chosen": -2.2990493774414062, + "logits/rejected": -2.299062967300415, + "logps/chosen": -0.8773515820503235, + "logps/rejected": -157.0513458251953, + "loss": 0.6385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021136457100510597, + "rewards/margins": 0.3211105465888977, + "rewards/rejected": -0.34224700927734375, + "step": 3034 + }, + { + "epoch": 0.18, + "learning_rate": 9.446961244611769e-08, + "logits/chosen": -2.089921712875366, + "logits/rejected": -2.1201882362365723, + "logps/chosen": -306.877197265625, + "logps/rejected": -321.58892822265625, + "loss": 0.1744, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6508148908615112, + "rewards/margins": 1.2739777565002441, + "rewards/rejected": 0.3768371641635895, + "step": 3035 + }, + { + "epoch": 0.18, + "learning_rate": 9.446530351205752e-08, + "logits/chosen": -1.9859485626220703, + "logits/rejected": -1.9900449514389038, + "logps/chosen": -0.6967620849609375, + "logps/rejected": -72.50651550292969, + "loss": 0.428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013806277886033058, + "rewards/margins": 1.5939217805862427, + "rewards/rejected": -1.580115556716919, + "step": 3036 + }, + { + "epoch": 0.18, + "learning_rate": 9.446099299837228e-08, + "logits/chosen": -2.0698883533477783, + "logits/rejected": -2.072964668273926, + "logps/chosen": -0.00252987421117723, + "logps/rejected": -99.4291000366211, + "loss": 0.6502, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.761641558725387e-05, + "rewards/margins": 0.1748756766319275, + "rewards/rejected": -0.1749732941389084, + "step": 3037 + }, + { + "epoch": 0.18, + "learning_rate": 9.445668090521509e-08, + "logits/chosen": -1.9715361595153809, + "logits/rejected": -1.913957953453064, + "logps/chosen": -224.63201904296875, + "logps/rejected": -537.6553344726562, + "loss": 0.416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47718507051467896, + "rewards/margins": 0.809338390827179, + "rewards/rejected": -0.3321533203125, + "step": 3038 + }, + { + "epoch": 0.18, + "learning_rate": 9.445236723273911e-08, + "logits/chosen": -1.9899115562438965, + "logits/rejected": -1.9722239971160889, + "logps/chosen": -180.0399169921875, + "logps/rejected": -236.1275177001953, + "loss": 0.3563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.691497802734375, + "rewards/margins": 0.7627105712890625, + "rewards/rejected": -0.0712127685546875, + "step": 3039 + }, + { + "epoch": 0.18, + "learning_rate": 9.444805198109762e-08, + "logits/chosen": -2.0271401405334473, + "logits/rejected": -2.016378402709961, + "logps/chosen": -321.3236083984375, + "logps/rejected": -319.04443359375, + "loss": 0.2667, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.485174536705017, + "rewards/margins": 0.8562011122703552, + "rewards/rejected": 0.6289734244346619, + "step": 3040 + }, + { + "epoch": 0.18, + "learning_rate": 9.44437351504439e-08, + "logits/chosen": -2.1533589363098145, + "logits/rejected": -2.0894007682800293, + "logps/chosen": -244.57174682617188, + "logps/rejected": -437.1166076660156, + "loss": 0.322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8686447143554688, + "rewards/margins": 1.038996934890747, + "rewards/rejected": -0.17035217583179474, + "step": 3041 + }, + { + "epoch": 0.18, + "learning_rate": 9.44394167409313e-08, + "logits/chosen": -2.0021092891693115, + "logits/rejected": -2.011274576187134, + "logps/chosen": -0.10473685711622238, + "logps/rejected": -64.91498565673828, + "loss": 0.67, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008750855922698975, + "rewards/margins": 0.07813416421413422, + "rewards/rejected": -0.07900924980640411, + "step": 3042 + }, + { + "epoch": 0.18, + "learning_rate": 9.443509675271326e-08, + "logits/chosen": -2.0418670177459717, + "logits/rejected": -2.0420005321502686, + "logps/chosen": -0.01966516301035881, + "logps/rejected": -36.35634231567383, + "loss": 0.6585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004223914002068341, + "rewards/margins": 0.14239034056663513, + "rewards/rejected": -0.14281272888183594, + "step": 3043 + }, + { + "epoch": 0.18, + "learning_rate": 9.443077518594323e-08, + "logits/chosen": -2.0360496044158936, + "logits/rejected": -2.015596389770508, + "logps/chosen": -113.72557067871094, + "logps/rejected": -326.8962097167969, + "loss": 0.5023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3110817074775696, + "rewards/margins": 1.6106376647949219, + "rewards/rejected": -1.9217194318771362, + "step": 3044 + }, + { + "epoch": 0.18, + "learning_rate": 9.442645204077471e-08, + "logits/chosen": -2.1594150066375732, + "logits/rejected": -2.150512933731079, + "logps/chosen": -14.002842903137207, + "logps/rejected": -146.96343994140625, + "loss": 0.5471, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026354504749178886, + "rewards/margins": 0.6881413459777832, + "rewards/rejected": -0.714495837688446, + "step": 3045 + }, + { + "epoch": 0.18, + "learning_rate": 9.442212731736131e-08, + "logits/chosen": -2.056145668029785, + "logits/rejected": -2.057044267654419, + "logps/chosen": -15.046736717224121, + "logps/rejected": -62.09834671020508, + "loss": 0.6636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22003717720508575, + "rewards/margins": 0.2947741746902466, + "rewards/rejected": -0.5148113369941711, + "step": 3046 + }, + { + "epoch": 0.18, + "learning_rate": 9.441780101585665e-08, + "logits/chosen": -1.979731559753418, + "logits/rejected": -1.9802231788635254, + "logps/chosen": -1.0037959814071655, + "logps/rejected": -82.1654052734375, + "loss": 0.654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018868638202548027, + "rewards/margins": 0.18384285271167755, + "rewards/rejected": -0.20271149277687073, + "step": 3047 + }, + { + "epoch": 0.18, + "learning_rate": 9.441347313641444e-08, + "logits/chosen": -1.888941764831543, + "logits/rejected": -1.8475077152252197, + "logps/chosen": -129.9473876953125, + "logps/rejected": -265.6611328125, + "loss": 0.4989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1508834809064865, + "rewards/margins": 0.8524185419082642, + "rewards/rejected": -0.7015350461006165, + "step": 3048 + }, + { + "epoch": 0.18, + "learning_rate": 9.440914367918842e-08, + "logits/chosen": -2.0948729515075684, + "logits/rejected": -2.0659019947052, + "logps/chosen": -211.24594116210938, + "logps/rejected": -330.39630126953125, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3712478876113892, + "rewards/margins": 0.6202041506767273, + "rewards/rejected": 0.7510437369346619, + "step": 3049 + }, + { + "epoch": 0.18, + "learning_rate": 9.440481264433236e-08, + "logits/chosen": -2.0960466861724854, + "logits/rejected": -2.095860242843628, + "logps/chosen": -0.00011539120168890804, + "logps/rejected": -88.60385131835938, + "loss": 0.5851, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.503380471807759e-07, + "rewards/margins": 0.4766934812068939, + "rewards/rejected": -0.476693719625473, + "step": 3050 + }, + { + "epoch": 0.18, + "learning_rate": 9.440048003200016e-08, + "logits/chosen": -2.0133233070373535, + "logits/rejected": -2.005786418914795, + "logps/chosen": -143.37033081054688, + "logps/rejected": -233.86700439453125, + "loss": 0.5293, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.050164818763733, + "rewards/margins": -0.248687744140625, + "rewards/rejected": 1.298852562904358, + "step": 3051 + }, + { + "epoch": 0.18, + "learning_rate": 9.439614584234575e-08, + "logits/chosen": -1.7031469345092773, + "logits/rejected": -1.702112078666687, + "logps/chosen": -115.04585266113281, + "logps/rejected": -161.08084106445312, + "loss": 0.5272, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6791549921035767, + "rewards/margins": -0.06941372156143188, + "rewards/rejected": 0.7485687136650085, + "step": 3052 + }, + { + "epoch": 0.18, + "learning_rate": 9.439181007552303e-08, + "logits/chosen": -1.990649938583374, + "logits/rejected": -1.961933970451355, + "logps/chosen": -144.5960693359375, + "logps/rejected": -227.053466796875, + "loss": 0.5541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26081544160842896, + "rewards/margins": 0.30302736163139343, + "rewards/rejected": -0.04221191629767418, + "step": 3053 + }, + { + "epoch": 0.18, + "learning_rate": 9.43874727316861e-08, + "logits/chosen": -2.1992146968841553, + "logits/rejected": -2.2002623081207275, + "logps/chosen": -1.502151370048523, + "logps/rejected": -138.57510375976562, + "loss": 0.4747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019944285973906517, + "rewards/margins": 1.185255765914917, + "rewards/rejected": -1.1653114557266235, + "step": 3054 + }, + { + "epoch": 0.18, + "learning_rate": 9.438313381098901e-08, + "logits/chosen": -2.138148546218872, + "logits/rejected": -2.138363838195801, + "logps/chosen": -258.55712890625, + "logps/rejected": -403.5355529785156, + "loss": 0.1288, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.582061767578125, + "rewards/margins": 2.2156097888946533, + "rewards/rejected": -0.6335479617118835, + "step": 3055 + }, + { + "epoch": 0.18, + "learning_rate": 9.437879331358591e-08, + "logits/chosen": -2.0789711475372314, + "logits/rejected": -2.059476137161255, + "logps/chosen": -231.59490966796875, + "logps/rejected": -348.62371826171875, + "loss": 0.2347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2502853870391846, + "rewards/margins": 1.0853134393692017, + "rewards/rejected": 0.16497193276882172, + "step": 3056 + }, + { + "epoch": 0.18, + "learning_rate": 9.437445123963098e-08, + "logits/chosen": -2.043898344039917, + "logits/rejected": -2.0151989459991455, + "logps/chosen": -185.41412353515625, + "logps/rejected": -296.941162109375, + "loss": 0.2676, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1022003889083862, + "rewards/margins": 0.9258087873458862, + "rewards/rejected": 0.1763916015625, + "step": 3057 + }, + { + "epoch": 0.18, + "learning_rate": 9.437010758927849e-08, + "logits/chosen": -1.981414794921875, + "logits/rejected": -1.9610413312911987, + "logps/chosen": -116.13935852050781, + "logps/rejected": -468.13018798828125, + "loss": 0.3955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06545334309339523, + "rewards/margins": 2.5022225379943848, + "rewards/rejected": -2.567675828933716, + "step": 3058 + }, + { + "epoch": 0.18, + "learning_rate": 9.436576236268275e-08, + "logits/chosen": -2.1380600929260254, + "logits/rejected": -2.13879132270813, + "logps/chosen": -23.919151306152344, + "logps/rejected": -41.55170440673828, + "loss": 0.6545, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03939495235681534, + "rewards/margins": 0.022241784259676933, + "rewards/rejected": 0.017153168097138405, + "step": 3059 + }, + { + "epoch": 0.18, + "learning_rate": 9.43614155599981e-08, + "logits/chosen": -2.222480058670044, + "logits/rejected": -2.221329927444458, + "logps/chosen": -6.9555344581604, + "logps/rejected": -161.12973022460938, + "loss": 0.5634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0297867301851511, + "rewards/margins": 0.5694641470909119, + "rewards/rejected": -0.5396774411201477, + "step": 3060 + }, + { + "epoch": 0.18, + "learning_rate": 9.435706718137897e-08, + "logits/chosen": -2.0374515056610107, + "logits/rejected": -1.9946062564849854, + "logps/chosen": -312.13690185546875, + "logps/rejected": -472.92236328125, + "loss": 0.2711, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5547302961349487, + "rewards/margins": 0.7713623642921448, + "rewards/rejected": 0.783367931842804, + "step": 3061 + }, + { + "epoch": 0.18, + "learning_rate": 9.435271722697985e-08, + "logits/chosen": -2.0029690265655518, + "logits/rejected": -1.9996854066848755, + "logps/chosen": -82.41432189941406, + "logps/rejected": -373.72930908203125, + "loss": 0.4644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22634582221508026, + "rewards/margins": 3.3187317848205566, + "rewards/rejected": -3.5450775623321533, + "step": 3062 + }, + { + "epoch": 0.18, + "learning_rate": 9.434836569695526e-08, + "logits/chosen": -2.0228629112243652, + "logits/rejected": -2.026277542114258, + "logps/chosen": -96.84008026123047, + "logps/rejected": -205.09596252441406, + "loss": 0.5551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11545105278491974, + "rewards/margins": 0.3325347900390625, + "rewards/rejected": -0.21708373725414276, + "step": 3063 + }, + { + "epoch": 0.18, + "learning_rate": 9.434401259145979e-08, + "logits/chosen": -2.2399537563323975, + "logits/rejected": -2.2240161895751953, + "logps/chosen": -201.66232299804688, + "logps/rejected": -303.39031982421875, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9070678949356079, + "rewards/margins": 0.6583130359649658, + "rewards/rejected": 0.24875488877296448, + "step": 3064 + }, + { + "epoch": 0.18, + "learning_rate": 9.433965791064808e-08, + "logits/chosen": -2.0107767581939697, + "logits/rejected": -2.0357937812805176, + "logps/chosen": -149.5543212890625, + "logps/rejected": -296.52813720703125, + "loss": 0.4767, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7620574831962585, + "rewards/margins": 0.06396788358688354, + "rewards/rejected": 0.698089599609375, + "step": 3065 + }, + { + "epoch": 0.18, + "learning_rate": 9.433530165467483e-08, + "logits/chosen": -1.955670714378357, + "logits/rejected": -1.9718085527420044, + "logps/chosen": -163.1019287109375, + "logps/rejected": -297.32720947265625, + "loss": 0.4172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.64581298828125, + "rewards/margins": 0.31240537762641907, + "rewards/rejected": 0.33340761065483093, + "step": 3066 + }, + { + "epoch": 0.18, + "learning_rate": 9.433094382369482e-08, + "logits/chosen": -2.018831729888916, + "logits/rejected": -2.0116024017333984, + "logps/chosen": -67.96434020996094, + "logps/rejected": -263.48077392578125, + "loss": 0.5055, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18921127915382385, + "rewards/margins": 0.6282127499580383, + "rewards/rejected": -0.4390014708042145, + "step": 3067 + }, + { + "epoch": 0.18, + "learning_rate": 9.432658441786281e-08, + "logits/chosen": -2.142310857772827, + "logits/rejected": -2.147413730621338, + "logps/chosen": -0.6214655041694641, + "logps/rejected": -133.57472229003906, + "loss": 0.4216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004934060852974653, + "rewards/margins": 1.802640676498413, + "rewards/rejected": -1.7977066040039062, + "step": 3068 + }, + { + "epoch": 0.18, + "learning_rate": 9.432222343733372e-08, + "logits/chosen": -2.1908106803894043, + "logits/rejected": -2.186063051223755, + "logps/chosen": -55.781185150146484, + "logps/rejected": -147.51393127441406, + "loss": 0.5079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.323111355304718, + "rewards/margins": 0.6527378559112549, + "rewards/rejected": -0.3296264708042145, + "step": 3069 + }, + { + "epoch": 0.18, + "learning_rate": 9.431786088226243e-08, + "logits/chosen": -2.244379758834839, + "logits/rejected": -2.228177309036255, + "logps/chosen": -25.563129425048828, + "logps/rejected": -213.28123474121094, + "loss": 0.4421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07608204334974289, + "rewards/margins": 1.1647371053695679, + "rewards/rejected": -1.0886551141738892, + "step": 3070 + }, + { + "epoch": 0.18, + "learning_rate": 9.431349675280395e-08, + "logits/chosen": -1.9563629627227783, + "logits/rejected": -1.9444762468338013, + "logps/chosen": -36.35152053833008, + "logps/rejected": -138.85702514648438, + "loss": 0.7082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3354072570800781, + "rewards/margins": 0.20293045043945312, + "rewards/rejected": -0.5383377075195312, + "step": 3071 + }, + { + "epoch": 0.18, + "learning_rate": 9.43091310491133e-08, + "logits/chosen": -2.08548903465271, + "logits/rejected": -2.0525143146514893, + "logps/chosen": -191.91958618164062, + "logps/rejected": -223.25006103515625, + "loss": 0.4617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3134475648403168, + "rewards/margins": 0.6513625979423523, + "rewards/rejected": -0.3379150331020355, + "step": 3072 + }, + { + "epoch": 0.18, + "learning_rate": 9.430476377134557e-08, + "logits/chosen": -2.0416653156280518, + "logits/rejected": -2.038947582244873, + "logps/chosen": -25.369140625, + "logps/rejected": -113.1943130493164, + "loss": 0.6351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02870502509176731, + "rewards/margins": 0.30018940567970276, + "rewards/rejected": -0.271484375, + "step": 3073 + }, + { + "epoch": 0.18, + "learning_rate": 9.430039491965593e-08, + "logits/chosen": -2.2447264194488525, + "logits/rejected": -2.2352335453033447, + "logps/chosen": -76.9295654296875, + "logps/rejected": -261.1488037109375, + "loss": 0.4521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1557212918996811, + "rewards/margins": 2.3657143115997314, + "rewards/rejected": -2.521435499191284, + "step": 3074 + }, + { + "epoch": 0.18, + "learning_rate": 9.429602449419955e-08, + "logits/chosen": -1.9232640266418457, + "logits/rejected": -1.9291114807128906, + "logps/chosen": -134.88885498046875, + "logps/rejected": -281.3033142089844, + "loss": 0.462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8737152218818665, + "rewards/margins": 0.024533092975616455, + "rewards/rejected": 0.84918212890625, + "step": 3075 + }, + { + "epoch": 0.18, + "learning_rate": 9.429165249513171e-08, + "logits/chosen": -2.1890485286712646, + "logits/rejected": -2.175579309463501, + "logps/chosen": -39.611358642578125, + "logps/rejected": -226.9527130126953, + "loss": 0.5707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027194976806640625, + "rewards/margins": 0.4983970820903778, + "rewards/rejected": -0.4712021052837372, + "step": 3076 + }, + { + "epoch": 0.18, + "learning_rate": 9.428727892260773e-08, + "logits/chosen": -2.0738301277160645, + "logits/rejected": -2.0660359859466553, + "logps/chosen": -57.754600524902344, + "logps/rejected": -149.6773223876953, + "loss": 0.5503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2508430480957031, + "rewards/margins": 1.1364113092422485, + "rewards/rejected": -1.3872543573379517, + "step": 3077 + }, + { + "epoch": 0.18, + "learning_rate": 9.428290377678296e-08, + "logits/chosen": -2.1789052486419678, + "logits/rejected": -2.2049150466918945, + "logps/chosen": -267.8466491699219, + "logps/rejected": -401.8768615722656, + "loss": 0.1687, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6189240217208862, + "rewards/margins": 1.33441162109375, + "rewards/rejected": 0.28451234102249146, + "step": 3078 + }, + { + "epoch": 0.18, + "learning_rate": 9.427852705781284e-08, + "logits/chosen": -2.151923179626465, + "logits/rejected": -2.1539390087127686, + "logps/chosen": -0.7070069313049316, + "logps/rejected": -37.63560485839844, + "loss": 0.6979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029342427849769592, + "rewards/margins": 0.048564374446868896, + "rewards/rejected": -0.07790680229663849, + "step": 3079 + }, + { + "epoch": 0.18, + "learning_rate": 9.427414876585284e-08, + "logits/chosen": -2.05440092086792, + "logits/rejected": -2.0484209060668945, + "logps/chosen": -23.768009185791016, + "logps/rejected": -117.73267364501953, + "loss": 0.7829, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3332696855068207, + "rewards/margins": -0.010378450155258179, + "rewards/rejected": -0.3228912353515625, + "step": 3080 + }, + { + "epoch": 0.18, + "learning_rate": 9.426976890105851e-08, + "logits/chosen": -2.1714954376220703, + "logits/rejected": -2.159604549407959, + "logps/chosen": -5.517791748046875, + "logps/rejected": -278.0697021484375, + "loss": 0.3828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0716487392783165, + "rewards/margins": 2.0297298431396484, + "rewards/rejected": -1.9580811262130737, + "step": 3081 + }, + { + "epoch": 0.18, + "learning_rate": 9.426538746358545e-08, + "logits/chosen": -2.0660459995269775, + "logits/rejected": -2.050123929977417, + "logps/chosen": -241.85690307617188, + "logps/rejected": -456.2337646484375, + "loss": 0.1426, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4225494861602783, + "rewards/margins": 2.0365142822265625, + "rewards/rejected": -0.613964855670929, + "step": 3082 + }, + { + "epoch": 0.18, + "learning_rate": 9.426100445358931e-08, + "logits/chosen": -2.1136844158172607, + "logits/rejected": -2.089080810546875, + "logps/chosen": -60.460567474365234, + "logps/rejected": -278.207763671875, + "loss": 0.3869, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049452971667051315, + "rewards/margins": 1.9962273836135864, + "rewards/rejected": -1.9467743635177612, + "step": 3083 + }, + { + "epoch": 0.18, + "learning_rate": 9.425661987122577e-08, + "logits/chosen": -2.059067964553833, + "logits/rejected": -1.9656970500946045, + "logps/chosen": -238.0028533935547, + "logps/rejected": -502.83050537109375, + "loss": 0.4382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4491867125034332, + "rewards/margins": 0.6451461911201477, + "rewards/rejected": -0.19595947861671448, + "step": 3084 + }, + { + "epoch": 0.18, + "learning_rate": 9.425223371665061e-08, + "logits/chosen": -2.04137921333313, + "logits/rejected": -1.9884146451950073, + "logps/chosen": -311.3939208984375, + "logps/rejected": -470.575927734375, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.779339611530304, + "rewards/margins": 1.005743384361267, + "rewards/rejected": -0.22640381753444672, + "step": 3085 + }, + { + "epoch": 0.18, + "learning_rate": 9.424784599001966e-08, + "logits/chosen": -1.9194884300231934, + "logits/rejected": -1.9809924364089966, + "logps/chosen": -256.84918212890625, + "logps/rejected": -373.06634521484375, + "loss": 0.2382, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3119720220565796, + "rewards/margins": 1.0770293474197388, + "rewards/rejected": 0.23494262993335724, + "step": 3086 + }, + { + "epoch": 0.18, + "learning_rate": 9.424345669148876e-08, + "logits/chosen": -2.1137068271636963, + "logits/rejected": -2.161872625350952, + "logps/chosen": -220.33624267578125, + "logps/rejected": -349.69122314453125, + "loss": 0.252, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1604140996932983, + "rewards/margins": 1.0131728649139404, + "rewards/rejected": 0.14724121987819672, + "step": 3087 + }, + { + "epoch": 0.18, + "learning_rate": 9.423906582121389e-08, + "logits/chosen": -2.193835735321045, + "logits/rejected": -2.184894561767578, + "logps/chosen": -45.13765335083008, + "logps/rejected": -128.7688446044922, + "loss": 0.54, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18049012124538422, + "rewards/margins": 0.44560396671295166, + "rewards/rejected": -0.26511383056640625, + "step": 3088 + }, + { + "epoch": 0.18, + "learning_rate": 9.423467337935099e-08, + "logits/chosen": -2.2459731101989746, + "logits/rejected": -2.2416982650756836, + "logps/chosen": -19.258712768554688, + "logps/rejected": -173.85861206054688, + "loss": 0.3993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03096771240234375, + "rewards/margins": 1.8716567754745483, + "rewards/rejected": -1.902624487876892, + "step": 3089 + }, + { + "epoch": 0.18, + "learning_rate": 9.423027936605611e-08, + "logits/chosen": -2.1095402240753174, + "logits/rejected": -2.1007163524627686, + "logps/chosen": -22.87476921081543, + "logps/rejected": -194.63888549804688, + "loss": 0.3625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2462320327758789, + "rewards/margins": 1.3740885257720947, + "rewards/rejected": -1.1278564929962158, + "step": 3090 + }, + { + "epoch": 0.18, + "learning_rate": 9.422588378148535e-08, + "logits/chosen": -2.0336458683013916, + "logits/rejected": -1.9354792833328247, + "logps/chosen": -293.97467041015625, + "logps/rejected": -540.5950927734375, + "loss": 0.1926, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4259796142578125, + "rewards/margins": 1.6160675287246704, + "rewards/rejected": -0.19008789956569672, + "step": 3091 + }, + { + "epoch": 0.18, + "learning_rate": 9.422148662579487e-08, + "logits/chosen": -1.8855005502700806, + "logits/rejected": -1.9905035495758057, + "logps/chosen": -347.82305908203125, + "logps/rejected": -414.759765625, + "loss": 0.0809, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.023519992828369, + "rewards/margins": 2.550811767578125, + "rewards/rejected": -0.5272918939590454, + "step": 3092 + }, + { + "epoch": 0.18, + "learning_rate": 9.421708789914089e-08, + "logits/chosen": -2.0400373935699463, + "logits/rejected": -2.0192229747772217, + "logps/chosen": -206.6101531982422, + "logps/rejected": -281.11309814453125, + "loss": 0.3999, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2831391096115112, + "rewards/margins": 0.16784977912902832, + "rewards/rejected": 1.115289330482483, + "step": 3093 + }, + { + "epoch": 0.18, + "learning_rate": 9.421268760167965e-08, + "logits/chosen": -2.0903663635253906, + "logits/rejected": -2.068877696990967, + "logps/chosen": -155.20945739746094, + "logps/rejected": -346.5299987792969, + "loss": 0.4763, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8786422610282898, + "rewards/margins": 0.04070281982421875, + "rewards/rejected": 0.837939441204071, + "step": 3094 + }, + { + "epoch": 0.18, + "learning_rate": 9.420828573356748e-08, + "logits/chosen": -2.145112991333008, + "logits/rejected": -2.1574602127075195, + "logps/chosen": -237.71034240722656, + "logps/rejected": -356.529052734375, + "loss": 0.2401, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5452377796173096, + "rewards/margins": 0.8713028430938721, + "rewards/rejected": 0.6739349365234375, + "step": 3095 + }, + { + "epoch": 0.18, + "learning_rate": 9.420388229496074e-08, + "logits/chosen": -2.0252013206481934, + "logits/rejected": -1.9981582164764404, + "logps/chosen": -183.0598907470703, + "logps/rejected": -398.2352600097656, + "loss": 0.4083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27983856201171875, + "rewards/margins": 1.1482864618301392, + "rewards/rejected": -0.8684478998184204, + "step": 3096 + }, + { + "epoch": 0.18, + "learning_rate": 9.41994772860159e-08, + "logits/chosen": -2.0603573322296143, + "logits/rejected": -2.08034086227417, + "logps/chosen": -168.60147094726562, + "logps/rejected": -298.1949768066406, + "loss": 0.1627, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.181544542312622, + "rewards/margins": 2.1384658813476562, + "rewards/rejected": -0.956921398639679, + "step": 3097 + }, + { + "epoch": 0.18, + "learning_rate": 9.419507070688941e-08, + "logits/chosen": -2.2708396911621094, + "logits/rejected": -2.244279384613037, + "logps/chosen": -17.72732162475586, + "logps/rejected": -325.4231262207031, + "loss": 0.4182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0916873961687088, + "rewards/margins": 2.2734005451202393, + "rewards/rejected": -2.3650879859924316, + "step": 3098 + }, + { + "epoch": 0.18, + "learning_rate": 9.419066255773782e-08, + "logits/chosen": -2.2749831676483154, + "logits/rejected": -2.229827404022217, + "logps/chosen": -58.90864181518555, + "logps/rejected": -242.39385986328125, + "loss": 0.5898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010652542114257812, + "rewards/margins": 0.4332035183906555, + "rewards/rejected": -0.4225509762763977, + "step": 3099 + }, + { + "epoch": 0.18, + "learning_rate": 9.418625283871775e-08, + "logits/chosen": -2.130237102508545, + "logits/rejected": -2.0872387886047363, + "logps/chosen": -202.786376953125, + "logps/rejected": -300.71490478515625, + "loss": 0.3654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8823516964912415, + "rewards/margins": 0.8418365716934204, + "rewards/rejected": 0.04051513597369194, + "step": 3100 + }, + { + "epoch": 0.18, + "learning_rate": 9.418184154998583e-08, + "logits/chosen": -2.081852674484253, + "logits/rejected": -2.078425884246826, + "logps/chosen": -139.2296142578125, + "logps/rejected": -250.9693145751953, + "loss": 0.3872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6444458365440369, + "rewards/margins": 0.6045059561729431, + "rewards/rejected": 0.03993988037109375, + "step": 3101 + }, + { + "epoch": 0.18, + "learning_rate": 9.41774286916988e-08, + "logits/chosen": -2.2513904571533203, + "logits/rejected": -2.2313392162323, + "logps/chosen": -3.2782303605927154e-05, + "logps/rejected": -168.21014404296875, + "loss": 0.606, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.960610138799893e-08, + "rewards/margins": 0.3854216933250427, + "rewards/rejected": -0.3854217529296875, + "step": 3102 + }, + { + "epoch": 0.18, + "learning_rate": 9.417301426401338e-08, + "logits/chosen": -2.0861752033233643, + "logits/rejected": -2.082188129425049, + "logps/chosen": -29.939743041992188, + "logps/rejected": -152.04833984375, + "loss": 0.4143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4576354920864105, + "rewards/margins": 0.8524124026298523, + "rewards/rejected": -0.3947769105434418, + "step": 3103 + }, + { + "epoch": 0.18, + "learning_rate": 9.416859826708644e-08, + "logits/chosen": -1.9213913679122925, + "logits/rejected": -1.8648037910461426, + "logps/chosen": -255.021728515625, + "logps/rejected": -442.3685302734375, + "loss": 0.2841, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2281982898712158, + "rewards/margins": 0.7470459342002869, + "rewards/rejected": 0.48115235567092896, + "step": 3104 + }, + { + "epoch": 0.18, + "learning_rate": 9.416418070107484e-08, + "logits/chosen": -2.0084035396575928, + "logits/rejected": -2.004101514816284, + "logps/chosen": -255.85984802246094, + "logps/rejected": -293.9549560546875, + "loss": 0.536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48238372802734375, + "rewards/margins": 0.0011154115200042725, + "rewards/rejected": 0.4812683165073395, + "step": 3105 + }, + { + "epoch": 0.18, + "learning_rate": 9.41597615661355e-08, + "logits/chosen": -2.237046718597412, + "logits/rejected": -2.2359519004821777, + "logps/chosen": -0.12259266525506973, + "logps/rejected": -261.5399169921875, + "loss": 0.4425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007534222211688757, + "rewards/margins": 1.5984121561050415, + "rewards/rejected": -1.60594642162323, + "step": 3106 + }, + { + "epoch": 0.18, + "learning_rate": 9.415534086242543e-08, + "logits/chosen": -2.182940721511841, + "logits/rejected": -2.182034730911255, + "logps/chosen": -5.22865104675293, + "logps/rejected": -142.88241577148438, + "loss": 0.549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003538036486133933, + "rewards/margins": 0.6795786619186401, + "rewards/rejected": -0.6760406494140625, + "step": 3107 + }, + { + "epoch": 0.18, + "learning_rate": 9.415091859010165e-08, + "logits/chosen": -2.221585988998413, + "logits/rejected": -2.221158027648926, + "logps/chosen": -36.67659378051758, + "logps/rejected": -163.67474365234375, + "loss": 0.5874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06277771294116974, + "rewards/margins": 0.46402132511138916, + "rewards/rejected": -0.5267990231513977, + "step": 3108 + }, + { + "epoch": 0.18, + "learning_rate": 9.414649474932128e-08, + "logits/chosen": -2.2051475048065186, + "logits/rejected": -2.256962537765503, + "logps/chosen": -158.8146514892578, + "logps/rejected": -270.56158447265625, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8763290643692017, + "rewards/margins": 0.7505295276641846, + "rewards/rejected": 0.12579956650733948, + "step": 3109 + }, + { + "epoch": 0.18, + "learning_rate": 9.41420693402415e-08, + "logits/chosen": -1.9852410554885864, + "logits/rejected": -1.9935358762741089, + "logps/chosen": -341.2001647949219, + "logps/rejected": -339.8612060546875, + "loss": 0.2606, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6465729475021362, + "rewards/margins": 0.7040222883224487, + "rewards/rejected": 0.9425506591796875, + "step": 3110 + }, + { + "epoch": 0.18, + "learning_rate": 9.413764236301947e-08, + "logits/chosen": -1.9964814186096191, + "logits/rejected": -1.9741312265396118, + "logps/chosen": -89.40458679199219, + "logps/rejected": -215.0889434814453, + "loss": 0.4403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2210235595703125, + "rewards/margins": 1.36156165599823, + "rewards/rejected": -1.1405380964279175, + "step": 3111 + }, + { + "epoch": 0.18, + "learning_rate": 9.413321381781247e-08, + "logits/chosen": -2.046220541000366, + "logits/rejected": -2.036324977874756, + "logps/chosen": -0.32528606057167053, + "logps/rejected": -151.50827026367188, + "loss": 0.6778, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00806611217558384, + "rewards/margins": 0.04623381793498993, + "rewards/rejected": -0.05429992824792862, + "step": 3112 + }, + { + "epoch": 0.18, + "learning_rate": 9.412878370477786e-08, + "logits/chosen": -2.343646287918091, + "logits/rejected": -2.3366682529449463, + "logps/chosen": -66.15019226074219, + "logps/rejected": -198.55670166015625, + "loss": 0.5133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13022994995117188, + "rewards/margins": 1.2505470514297485, + "rewards/rejected": -1.3807770013809204, + "step": 3113 + }, + { + "epoch": 0.18, + "learning_rate": 9.412435202407299e-08, + "logits/chosen": -2.230517625808716, + "logits/rejected": -2.2386415004730225, + "logps/chosen": -9.430916786193848, + "logps/rejected": -116.74674987792969, + "loss": 0.4969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05257081985473633, + "rewards/margins": 1.1345950365066528, + "rewards/rejected": -1.1871658563613892, + "step": 3114 + }, + { + "epoch": 0.18, + "learning_rate": 9.411991877585529e-08, + "logits/chosen": -2.0321104526519775, + "logits/rejected": -2.025331735610962, + "logps/chosen": -226.45318603515625, + "logps/rejected": -519.7734375, + "loss": 0.1141, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.579833984375, + "rewards/margins": 2.1179261207580566, + "rewards/rejected": -0.5380920767784119, + "step": 3115 + }, + { + "epoch": 0.18, + "learning_rate": 9.411548396028226e-08, + "logits/chosen": -2.145664691925049, + "logits/rejected": -2.150200366973877, + "logps/chosen": -2.204450845718384, + "logps/rejected": -64.03013610839844, + "loss": 0.6068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13839784264564514, + "rewards/margins": 0.37893423438072205, + "rewards/rejected": -0.5173320770263672, + "step": 3116 + }, + { + "epoch": 0.18, + "learning_rate": 9.411104757751147e-08, + "logits/chosen": -2.1580121517181396, + "logits/rejected": -2.154808759689331, + "logps/chosen": -10.85331916809082, + "logps/rejected": -148.37078857421875, + "loss": 0.5519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07235126942396164, + "rewards/margins": 0.5480455160140991, + "rewards/rejected": -0.4756942689418793, + "step": 3117 + }, + { + "epoch": 0.18, + "learning_rate": 9.410660962770047e-08, + "logits/chosen": -2.160982131958008, + "logits/rejected": -2.145172357559204, + "logps/chosen": -171.47119140625, + "logps/rejected": -437.82647705078125, + "loss": 0.183, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1585617065429688, + "rewards/margins": 1.7377610206604004, + "rewards/rejected": -0.5791992545127869, + "step": 3118 + }, + { + "epoch": 0.18, + "learning_rate": 9.410217011100698e-08, + "logits/chosen": -2.203610897064209, + "logits/rejected": -2.1940853595733643, + "logps/chosen": -67.74761962890625, + "logps/rejected": -213.427734375, + "loss": 0.5486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3315185606479645, + "rewards/margins": 0.320382684469223, + "rewards/rejected": 0.011135864071547985, + "step": 3119 + }, + { + "epoch": 0.18, + "learning_rate": 9.409772902758865e-08, + "logits/chosen": -2.159916877746582, + "logits/rejected": -2.1456665992736816, + "logps/chosen": -21.489768981933594, + "logps/rejected": -201.10122680664062, + "loss": 0.5142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021811677142977715, + "rewards/margins": 0.631604790687561, + "rewards/rejected": -0.6097931265830994, + "step": 3120 + }, + { + "epoch": 0.18, + "learning_rate": 9.40932863776033e-08, + "logits/chosen": -2.091600179672241, + "logits/rejected": -2.07185697555542, + "logps/chosen": -216.06219482421875, + "logps/rejected": -269.92578125, + "loss": 0.4591, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8850982785224915, + "rewards/margins": -0.00908815860748291, + "rewards/rejected": 0.8941864371299744, + "step": 3121 + }, + { + "epoch": 0.18, + "learning_rate": 9.408884216120874e-08, + "logits/chosen": -2.1814193725585938, + "logits/rejected": -2.1742818355560303, + "logps/chosen": -0.862612247467041, + "logps/rejected": -170.75338745117188, + "loss": 0.3948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017110109911300242, + "rewards/margins": 2.2893764972686768, + "rewards/rejected": -2.2895476818084717, + "step": 3122 + }, + { + "epoch": 0.18, + "learning_rate": 9.408439637856282e-08, + "logits/chosen": -2.1625771522521973, + "logits/rejected": -2.149623394012451, + "logps/chosen": -0.018508316949009895, + "logps/rejected": -244.62582397460938, + "loss": 0.4001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036801863461732864, + "rewards/margins": 2.2029170989990234, + "rewards/rejected": -2.2032852172851562, + "step": 3123 + }, + { + "epoch": 0.18, + "learning_rate": 9.407994902982351e-08, + "logits/chosen": -2.05014705657959, + "logits/rejected": -2.051881790161133, + "logps/chosen": -0.0001255228416994214, + "logps/rejected": -63.104026794433594, + "loss": 0.4924, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5747474410309223e-06, + "rewards/margins": 1.081080675125122, + "rewards/rejected": -1.0810832977294922, + "step": 3124 + }, + { + "epoch": 0.18, + "learning_rate": 9.40755001151488e-08, + "logits/chosen": -2.1288580894470215, + "logits/rejected": -2.125858783721924, + "logps/chosen": -0.06683015823364258, + "logps/rejected": -130.17791748046875, + "loss": 0.4639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007354982080869377, + "rewards/margins": 1.3317664861679077, + "rewards/rejected": -1.332502007484436, + "step": 3125 + }, + { + "epoch": 0.18, + "learning_rate": 9.407104963469672e-08, + "logits/chosen": -2.2157652378082275, + "logits/rejected": -2.2062838077545166, + "logps/chosen": -123.4501724243164, + "logps/rejected": -346.0410461425781, + "loss": 0.4091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28336867690086365, + "rewards/margins": 1.0831184387207031, + "rewards/rejected": -0.7997497916221619, + "step": 3126 + }, + { + "epoch": 0.18, + "learning_rate": 9.406659758862539e-08, + "logits/chosen": -2.1965701580047607, + "logits/rejected": -2.1694540977478027, + "logps/chosen": -120.15309143066406, + "logps/rejected": -230.13446044921875, + "loss": 0.5583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1734664887189865, + "rewards/margins": 0.9101303219795227, + "rewards/rejected": -1.0835968255996704, + "step": 3127 + }, + { + "epoch": 0.18, + "learning_rate": 9.406214397709295e-08, + "logits/chosen": -2.0509867668151855, + "logits/rejected": -2.041025161743164, + "logps/chosen": -6.592625617980957, + "logps/rejected": -139.2565155029297, + "loss": 0.4348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026247501373291016, + "rewards/margins": 1.6841187477111816, + "rewards/rejected": -1.6578712463378906, + "step": 3128 + }, + { + "epoch": 0.18, + "learning_rate": 9.405768880025764e-08, + "logits/chosen": -2.0941922664642334, + "logits/rejected": -2.1062636375427246, + "logps/chosen": -170.902099609375, + "logps/rejected": -282.1505126953125, + "loss": 0.5524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17796173691749573, + "rewards/margins": 0.4544265866279602, + "rewards/rejected": -0.2764648497104645, + "step": 3129 + }, + { + "epoch": 0.18, + "learning_rate": 9.405323205827772e-08, + "logits/chosen": -2.0319621562957764, + "logits/rejected": -2.0114762783050537, + "logps/chosen": -221.65048217773438, + "logps/rejected": -356.66510009765625, + "loss": 0.6526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03352050855755806, + "rewards/margins": 0.21105347573757172, + "rewards/rejected": -0.24457398056983948, + "step": 3130 + }, + { + "epoch": 0.18, + "learning_rate": 9.404877375131148e-08, + "logits/chosen": -2.1901845932006836, + "logits/rejected": -2.177582025527954, + "logps/chosen": -2.9365074634552, + "logps/rejected": -247.51828002929688, + "loss": 0.4159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0070140124298632145, + "rewards/margins": 1.6781779527664185, + "rewards/rejected": -1.6711639165878296, + "step": 3131 + }, + { + "epoch": 0.18, + "learning_rate": 9.404431387951735e-08, + "logits/chosen": -2.0509285926818848, + "logits/rejected": -2.0549700260162354, + "logps/chosen": -15.837248802185059, + "logps/rejected": -245.30877685546875, + "loss": 0.3701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07042532414197922, + "rewards/margins": 2.7378692626953125, + "rewards/rejected": -2.6674439907073975, + "step": 3132 + }, + { + "epoch": 0.18, + "learning_rate": 9.403985244305374e-08, + "logits/chosen": -1.9636473655700684, + "logits/rejected": -1.9663909673690796, + "logps/chosen": -8.105971937766299e-05, + "logps/rejected": -134.2794189453125, + "loss": 0.5363, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.463408676107065e-07, + "rewards/margins": 0.7939704060554504, + "rewards/rejected": -0.7939712405204773, + "step": 3133 + }, + { + "epoch": 0.18, + "learning_rate": 9.403538944207916e-08, + "logits/chosen": -2.1559855937957764, + "logits/rejected": -2.1357040405273438, + "logps/chosen": -87.72106170654297, + "logps/rejected": -269.57763671875, + "loss": 0.3601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4376976191997528, + "rewards/margins": 1.472689151763916, + "rewards/rejected": -1.0349915027618408, + "step": 3134 + }, + { + "epoch": 0.18, + "learning_rate": 9.403092487675212e-08, + "logits/chosen": -2.0288808345794678, + "logits/rejected": -2.063389301300049, + "logps/chosen": -203.9362030029297, + "logps/rejected": -375.93524169921875, + "loss": 0.2182, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.12254798412323, + "rewards/margins": 1.4060776233673096, + "rewards/rejected": -0.283529669046402, + "step": 3135 + }, + { + "epoch": 0.18, + "learning_rate": 9.402645874723128e-08, + "logits/chosen": -2.078859806060791, + "logits/rejected": -2.075997829437256, + "logps/chosen": -18.897048950195312, + "logps/rejected": -151.93048095703125, + "loss": 0.5446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026141930371522903, + "rewards/margins": 0.9064325094223022, + "rewards/rejected": -0.932574450969696, + "step": 3136 + }, + { + "epoch": 0.18, + "learning_rate": 9.402199105367525e-08, + "logits/chosen": -1.8667380809783936, + "logits/rejected": -1.8897560834884644, + "logps/chosen": -245.87281799316406, + "logps/rejected": -601.1902465820312, + "loss": 0.0954, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5832871198654175, + "rewards/margins": 2.5068283081054688, + "rewards/rejected": -0.923541247844696, + "step": 3137 + }, + { + "epoch": 0.18, + "learning_rate": 9.401752179624279e-08, + "logits/chosen": -2.1931188106536865, + "logits/rejected": -2.1809370517730713, + "logps/chosen": -18.490779876708984, + "logps/rejected": -208.95053100585938, + "loss": 0.4496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0586305633187294, + "rewards/margins": 1.14528226852417, + "rewards/rejected": -1.0866516828536987, + "step": 3138 + }, + { + "epoch": 0.18, + "learning_rate": 9.401305097509262e-08, + "logits/chosen": -2.0587315559387207, + "logits/rejected": -2.0538852214813232, + "logps/chosen": -52.826908111572266, + "logps/rejected": -93.52950286865234, + "loss": 0.8345, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5340480804443359, + "rewards/margins": -0.018739700317382812, + "rewards/rejected": -0.5153083801269531, + "step": 3139 + }, + { + "epoch": 0.18, + "learning_rate": 9.400857859038357e-08, + "logits/chosen": -2.124178409576416, + "logits/rejected": -2.1202621459960938, + "logps/chosen": -31.64563751220703, + "logps/rejected": -138.77694702148438, + "loss": 0.7271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07023697346448898, + "rewards/margins": 0.021028898656368256, + "rewards/rejected": -0.09126587212085724, + "step": 3140 + }, + { + "epoch": 0.18, + "learning_rate": 9.400410464227457e-08, + "logits/chosen": -2.3243184089660645, + "logits/rejected": -2.327094078063965, + "logps/chosen": -28.127826690673828, + "logps/rejected": -98.51947021484375, + "loss": 0.7839, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4456291198730469, + "rewards/margins": -0.05769500136375427, + "rewards/rejected": -0.3879341185092926, + "step": 3141 + }, + { + "epoch": 0.18, + "learning_rate": 9.399962913092452e-08, + "logits/chosen": -2.201563835144043, + "logits/rejected": -2.1782774925231934, + "logps/chosen": -153.3292999267578, + "logps/rejected": -285.0436096191406, + "loss": 0.4397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.618084728717804, + "rewards/margins": 0.26118776202201843, + "rewards/rejected": 0.3568969666957855, + "step": 3142 + }, + { + "epoch": 0.18, + "learning_rate": 9.399515205649242e-08, + "logits/chosen": -2.307389974594116, + "logits/rejected": -2.3041141033172607, + "logps/chosen": -76.27301025390625, + "logps/rejected": -249.18475341796875, + "loss": 0.4811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10479965060949326, + "rewards/margins": 0.976934015750885, + "rewards/rejected": -0.8721343874931335, + "step": 3143 + }, + { + "epoch": 0.18, + "learning_rate": 9.399067341913732e-08, + "logits/chosen": -2.0605077743530273, + "logits/rejected": -1.999952793121338, + "logps/chosen": -48.81331253051758, + "logps/rejected": -300.5671691894531, + "loss": 0.3742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1894844025373459, + "rewards/margins": 1.5876747369766235, + "rewards/rejected": -1.3981903791427612, + "step": 3144 + }, + { + "epoch": 0.18, + "learning_rate": 9.39861932190183e-08, + "logits/chosen": -2.1433069705963135, + "logits/rejected": -2.1407978534698486, + "logps/chosen": -11.138612747192383, + "logps/rejected": -86.697998046875, + "loss": 0.7655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2928234040737152, + "rewards/margins": 0.01198616623878479, + "rewards/rejected": -0.3048095703125, + "step": 3145 + }, + { + "epoch": 0.18, + "learning_rate": 9.398171145629454e-08, + "logits/chosen": -1.915368676185608, + "logits/rejected": -1.9108127355575562, + "logps/chosen": -74.19583129882812, + "logps/rejected": -188.89205932617188, + "loss": 0.6228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05842285230755806, + "rewards/margins": 0.4651733636856079, + "rewards/rejected": -0.5235962271690369, + "step": 3146 + }, + { + "epoch": 0.18, + "learning_rate": 9.397722813112525e-08, + "logits/chosen": -2.313962936401367, + "logits/rejected": -2.3093526363372803, + "logps/chosen": -0.6342568397521973, + "logps/rejected": -110.246826171875, + "loss": 0.5012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005134105682373047, + "rewards/margins": 0.8022105097770691, + "rewards/rejected": -0.797076404094696, + "step": 3147 + }, + { + "epoch": 0.18, + "learning_rate": 9.397274324366971e-08, + "logits/chosen": -1.8176418542861938, + "logits/rejected": -1.816545009613037, + "logps/chosen": -46.498294830322266, + "logps/rejected": -190.7823486328125, + "loss": 0.6546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19810448586940765, + "rewards/margins": 0.3778613805770874, + "rewards/rejected": -0.5759658813476562, + "step": 3148 + }, + { + "epoch": 0.18, + "learning_rate": 9.396825679408725e-08, + "logits/chosen": -1.9991086721420288, + "logits/rejected": -1.9832661151885986, + "logps/chosen": -96.1780014038086, + "logps/rejected": -344.19061279296875, + "loss": 0.4723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14310608804225922, + "rewards/margins": 0.9653900265693665, + "rewards/rejected": -0.822283923625946, + "step": 3149 + }, + { + "epoch": 0.18, + "learning_rate": 9.396376878253721e-08, + "logits/chosen": -2.1899378299713135, + "logits/rejected": -2.2042808532714844, + "logps/chosen": -158.04261779785156, + "logps/rejected": -347.4208679199219, + "loss": 0.3246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5881423950195312, + "rewards/margins": 0.8284775018692017, + "rewards/rejected": -0.24033509194850922, + "step": 3150 + }, + { + "epoch": 0.18, + "learning_rate": 9.395927920917907e-08, + "logits/chosen": -2.001987934112549, + "logits/rejected": -2.0082576274871826, + "logps/chosen": -0.005343428812921047, + "logps/rejected": -101.20695495605469, + "loss": 0.4945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016472609422635287, + "rewards/margins": 1.0670174360275269, + "rewards/rejected": -1.067182183265686, + "step": 3151 + }, + { + "epoch": 0.18, + "learning_rate": 9.39547880741723e-08, + "logits/chosen": -2.16011381149292, + "logits/rejected": -2.1393675804138184, + "logps/chosen": -27.37853240966797, + "logps/rejected": -193.10174560546875, + "loss": 0.4509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08462562412023544, + "rewards/margins": 1.3865512609481812, + "rewards/rejected": -1.3019256591796875, + "step": 3152 + }, + { + "epoch": 0.18, + "learning_rate": 9.395029537767644e-08, + "logits/chosen": -1.9075355529785156, + "logits/rejected": -1.9144383668899536, + "logps/chosen": -63.979042053222656, + "logps/rejected": -277.003662109375, + "loss": 0.4842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20018921792507172, + "rewards/margins": 0.6439178586006165, + "rewards/rejected": -0.44372865557670593, + "step": 3153 + }, + { + "epoch": 0.18, + "learning_rate": 9.39458011198511e-08, + "logits/chosen": -2.0836167335510254, + "logits/rejected": -2.066167116165161, + "logps/chosen": -0.03032670170068741, + "logps/rejected": -248.18191528320312, + "loss": 0.3713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006795350345782936, + "rewards/margins": 2.8722453117370605, + "rewards/rejected": -2.8729248046875, + "step": 3154 + }, + { + "epoch": 0.18, + "learning_rate": 9.394130530085598e-08, + "logits/chosen": -1.9920408725738525, + "logits/rejected": -1.9714804887771606, + "logps/chosen": -179.0790252685547, + "logps/rejected": -319.5983581542969, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8818069696426392, + "rewards/margins": 1.2144180536270142, + "rewards/rejected": -0.332611083984375, + "step": 3155 + }, + { + "epoch": 0.18, + "learning_rate": 9.393680792085072e-08, + "logits/chosen": -2.25203537940979, + "logits/rejected": -2.2548627853393555, + "logps/chosen": -0.0003399541601538658, + "logps/rejected": -108.50732421875, + "loss": 0.4409, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.970399004378123e-06, + "rewards/margins": 1.572011947631836, + "rewards/rejected": -1.572016954421997, + "step": 3156 + }, + { + "epoch": 0.18, + "learning_rate": 9.393230897999514e-08, + "logits/chosen": -2.182730197906494, + "logits/rejected": -2.125476121902466, + "logps/chosen": -242.45022583007812, + "logps/rejected": -358.67486572265625, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8131820559501648, + "rewards/margins": 1.4219833612442017, + "rewards/rejected": -0.6088013052940369, + "step": 3157 + }, + { + "epoch": 0.18, + "learning_rate": 9.392780847844903e-08, + "logits/chosen": -2.124743700027466, + "logits/rejected": -2.124434232711792, + "logps/chosen": -33.218894958496094, + "logps/rejected": -80.3004379272461, + "loss": 0.6481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020160675048828125, + "rewards/margins": 0.2723190486431122, + "rewards/rejected": -0.2924797236919403, + "step": 3158 + }, + { + "epoch": 0.18, + "learning_rate": 9.392330641637232e-08, + "logits/chosen": -2.1700804233551025, + "logits/rejected": -2.1383421421051025, + "logps/chosen": -129.74432373046875, + "logps/rejected": -397.9366455078125, + "loss": 0.3411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4291320741176605, + "rewards/margins": 1.329248070716858, + "rewards/rejected": -0.900115966796875, + "step": 3159 + }, + { + "epoch": 0.18, + "learning_rate": 9.391880279392489e-08, + "logits/chosen": -1.8957637548446655, + "logits/rejected": -1.8925390243530273, + "logps/chosen": -25.87519645690918, + "logps/rejected": -184.0050048828125, + "loss": 0.5268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0712137222290039, + "rewards/margins": 0.7288050055503845, + "rewards/rejected": -0.6575912833213806, + "step": 3160 + }, + { + "epoch": 0.18, + "learning_rate": 9.391429761126677e-08, + "logits/chosen": -2.091764211654663, + "logits/rejected": -2.084115982055664, + "logps/chosen": -22.498849868774414, + "logps/rejected": -85.75590515136719, + "loss": 0.6392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06544323265552521, + "rewards/margins": 0.13402995467185974, + "rewards/rejected": -0.06858672946691513, + "step": 3161 + }, + { + "epoch": 0.18, + "learning_rate": 9.390979086855796e-08, + "logits/chosen": -2.303463935852051, + "logits/rejected": -2.2966508865356445, + "logps/chosen": -22.92632293701172, + "logps/rejected": -171.77926635742188, + "loss": 0.396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05970344692468643, + "rewards/margins": 2.185542583465576, + "rewards/rejected": -2.1258392333984375, + "step": 3162 + }, + { + "epoch": 0.18, + "learning_rate": 9.390528256595863e-08, + "logits/chosen": -1.9410656690597534, + "logits/rejected": -1.9307708740234375, + "logps/chosen": -57.98233413696289, + "logps/rejected": -229.48333740234375, + "loss": 0.5475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34906846284866333, + "rewards/margins": 1.3692574501037598, + "rewards/rejected": -1.7183258533477783, + "step": 3163 + }, + { + "epoch": 0.18, + "learning_rate": 9.390077270362889e-08, + "logits/chosen": -2.0932936668395996, + "logits/rejected": -2.1545028686523438, + "logps/chosen": -325.9625244140625, + "logps/rejected": -344.2848815917969, + "loss": 0.2615, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.492315649986267, + "rewards/margins": 0.8132415413856506, + "rewards/rejected": 0.6790741086006165, + "step": 3164 + }, + { + "epoch": 0.18, + "learning_rate": 9.389626128172894e-08, + "logits/chosen": -1.919193148612976, + "logits/rejected": -1.9209707975387573, + "logps/chosen": -0.006399896461516619, + "logps/rejected": -63.06436538696289, + "loss": 0.6645, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.468960833037272e-05, + "rewards/margins": 0.11758466809988022, + "rewards/rejected": -0.11753997951745987, + "step": 3165 + }, + { + "epoch": 0.18, + "learning_rate": 9.38917483004191e-08, + "logits/chosen": -2.038588762283325, + "logits/rejected": -2.0301828384399414, + "logps/chosen": -19.82378387451172, + "logps/rejected": -264.68798828125, + "loss": 0.3676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011780166998505592, + "rewards/margins": 2.642745018005371, + "rewards/rejected": -2.630964756011963, + "step": 3166 + }, + { + "epoch": 0.18, + "learning_rate": 9.388723375985964e-08, + "logits/chosen": -2.156622886657715, + "logits/rejected": -2.109683036804199, + "logps/chosen": -194.94468688964844, + "logps/rejected": -419.7762451171875, + "loss": 0.3051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9753738641738892, + "rewards/margins": 0.8910660147666931, + "rewards/rejected": 0.08430786430835724, + "step": 3167 + }, + { + "epoch": 0.18, + "learning_rate": 9.388271766021096e-08, + "logits/chosen": -2.0389645099639893, + "logits/rejected": -2.0378830432891846, + "logps/chosen": -1.2242273092269897, + "logps/rejected": -82.78987121582031, + "loss": 0.6338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02493377961218357, + "rewards/margins": 0.2824133336544037, + "rewards/rejected": -0.3073471188545227, + "step": 3168 + }, + { + "epoch": 0.18, + "learning_rate": 9.387820000163352e-08, + "logits/chosen": -2.1160988807678223, + "logits/rejected": -2.105614423751831, + "logps/chosen": -15.680447578430176, + "logps/rejected": -204.69898986816406, + "loss": 0.4091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03627653047442436, + "rewards/margins": 1.8620563745498657, + "rewards/rejected": -1.8257797956466675, + "step": 3169 + }, + { + "epoch": 0.18, + "learning_rate": 9.387368078428777e-08, + "logits/chosen": -1.9076098203659058, + "logits/rejected": -1.8888814449310303, + "logps/chosen": -198.09474182128906, + "logps/rejected": -510.8883972167969, + "loss": 0.2356, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0514297485351562, + "rewards/margins": 1.2163559198379517, + "rewards/rejected": -0.16492615640163422, + "step": 3170 + }, + { + "epoch": 0.18, + "learning_rate": 9.386916000833426e-08, + "logits/chosen": -2.059577465057373, + "logits/rejected": -2.058729648590088, + "logps/chosen": -28.08271026611328, + "logps/rejected": -212.156982421875, + "loss": 0.7019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6405872702598572, + "rewards/margins": 0.9366374611854553, + "rewards/rejected": -1.5772247314453125, + "step": 3171 + }, + { + "epoch": 0.18, + "learning_rate": 9.38646376739336e-08, + "logits/chosen": -1.9398410320281982, + "logits/rejected": -1.9566973447799683, + "logps/chosen": -169.04696655273438, + "logps/rejected": -300.6255187988281, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0499786138534546, + "rewards/margins": 0.6872192025184631, + "rewards/rejected": 0.36275941133499146, + "step": 3172 + }, + { + "epoch": 0.18, + "learning_rate": 9.386011378124646e-08, + "logits/chosen": -2.0074684619903564, + "logits/rejected": -2.006920576095581, + "logps/chosen": -22.43401527404785, + "logps/rejected": -105.57553100585938, + "loss": 0.6874, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.007573127746582031, + "rewards/margins": -0.04795055463910103, + "rewards/rejected": 0.05552368238568306, + "step": 3173 + }, + { + "epoch": 0.18, + "learning_rate": 9.385558833043353e-08, + "logits/chosen": -2.198521614074707, + "logits/rejected": -2.1997129917144775, + "logps/chosen": -61.141937255859375, + "logps/rejected": -144.08694458007812, + "loss": 0.5585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06765174865722656, + "rewards/margins": 0.5296649932861328, + "rewards/rejected": -0.46201324462890625, + "step": 3174 + }, + { + "epoch": 0.18, + "learning_rate": 9.385106132165558e-08, + "logits/chosen": -2.189509868621826, + "logits/rejected": -2.149637222290039, + "logps/chosen": -183.73056030273438, + "logps/rejected": -385.609375, + "loss": 0.3328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7747589349746704, + "rewards/margins": 0.8123535513877869, + "rewards/rejected": -0.03759460523724556, + "step": 3175 + }, + { + "epoch": 0.18, + "learning_rate": 9.384653275507343e-08, + "logits/chosen": -1.9127600193023682, + "logits/rejected": -1.8756498098373413, + "logps/chosen": -105.52618408203125, + "logps/rejected": -277.2333984375, + "loss": 0.3922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17742691934108734, + "rewards/margins": 1.5194557905197144, + "rewards/rejected": -1.3420288562774658, + "step": 3176 + }, + { + "epoch": 0.18, + "learning_rate": 9.384200263084797e-08, + "logits/chosen": -2.162214994430542, + "logits/rejected": -2.150655746459961, + "logps/chosen": -40.53002166748047, + "logps/rejected": -176.69325256347656, + "loss": 0.4604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05116462707519531, + "rewards/margins": 1.2509602308273315, + "rewards/rejected": -1.1997956037521362, + "step": 3177 + }, + { + "epoch": 0.18, + "learning_rate": 9.38374709491401e-08, + "logits/chosen": -2.291239023208618, + "logits/rejected": -2.2684428691864014, + "logps/chosen": -14.745223999023438, + "logps/rejected": -275.9813232421875, + "loss": 0.3825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06346970051527023, + "rewards/margins": 2.172687530517578, + "rewards/rejected": -2.109217882156372, + "step": 3178 + }, + { + "epoch": 0.18, + "learning_rate": 9.383293771011085e-08, + "logits/chosen": -2.022301197052002, + "logits/rejected": -2.013953924179077, + "logps/chosen": -229.70443725585938, + "logps/rejected": -327.0823974609375, + "loss": 0.6481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09691772609949112, + "rewards/margins": 0.36458131670951843, + "rewards/rejected": -0.46149903535842896, + "step": 3179 + }, + { + "epoch": 0.19, + "learning_rate": 9.382840291392123e-08, + "logits/chosen": -2.1655735969543457, + "logits/rejected": -2.163630247116089, + "logps/chosen": -0.0011366839753463864, + "logps/rejected": -124.24712371826172, + "loss": 0.5175, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4340128220501356e-05, + "rewards/margins": 0.8958777785301208, + "rewards/rejected": -0.8958534598350525, + "step": 3180 + }, + { + "epoch": 0.19, + "learning_rate": 9.382386656073235e-08, + "logits/chosen": -2.1781110763549805, + "logits/rejected": -2.2014355659484863, + "logps/chosen": -311.3995361328125, + "logps/rejected": -318.28680419921875, + "loss": 0.2981, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5468018054962158, + "rewards/margins": 0.4455505609512329, + "rewards/rejected": 1.101251244544983, + "step": 3181 + }, + { + "epoch": 0.19, + "learning_rate": 9.381932865070539e-08, + "logits/chosen": -2.195279598236084, + "logits/rejected": -2.201730251312256, + "logps/chosen": -70.25332641601562, + "logps/rejected": -123.87454223632812, + "loss": 0.7527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28208619356155396, + "rewards/margins": 0.02236860990524292, + "rewards/rejected": -0.3044548034667969, + "step": 3182 + }, + { + "epoch": 0.19, + "learning_rate": 9.381478918400151e-08, + "logits/chosen": -2.244793653488159, + "logits/rejected": -2.237717866897583, + "logps/chosen": -34.2039794921875, + "logps/rejected": -249.0631103515625, + "loss": 0.4997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04827537760138512, + "rewards/margins": 1.1044422388076782, + "rewards/rejected": -1.1527175903320312, + "step": 3183 + }, + { + "epoch": 0.19, + "learning_rate": 9.3810248160782e-08, + "logits/chosen": -2.115494728088379, + "logits/rejected": -2.1076643466949463, + "logps/chosen": -0.0004260165151208639, + "logps/rejected": -217.85208129882812, + "loss": 0.3751, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.152806913130917e-06, + "rewards/margins": 2.7123332023620605, + "rewards/rejected": -2.71234130859375, + "step": 3184 + }, + { + "epoch": 0.19, + "learning_rate": 9.380570558120818e-08, + "logits/chosen": -1.9383224248886108, + "logits/rejected": -1.941715121269226, + "logps/chosen": -59.85736846923828, + "logps/rejected": -264.19659423828125, + "loss": 0.4359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17135123908519745, + "rewards/margins": 1.0356929302215576, + "rewards/rejected": -0.8643417358398438, + "step": 3185 + }, + { + "epoch": 0.19, + "learning_rate": 9.380116144544141e-08, + "logits/chosen": -2.232992172241211, + "logits/rejected": -2.232691526412964, + "logps/chosen": -0.10192056745290756, + "logps/rejected": -104.90945434570312, + "loss": 0.611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038128711748868227, + "rewards/margins": 0.3400347828865051, + "rewards/rejected": -0.3438476622104645, + "step": 3186 + }, + { + "epoch": 0.19, + "learning_rate": 9.379661575364314e-08, + "logits/chosen": -1.964309573173523, + "logits/rejected": -1.9516890048980713, + "logps/chosen": -206.87322998046875, + "logps/rejected": -394.89007568359375, + "loss": 0.5308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1994888335466385, + "rewards/margins": 0.3115341067314148, + "rewards/rejected": -0.1120452880859375, + "step": 3187 + }, + { + "epoch": 0.19, + "learning_rate": 9.379206850597485e-08, + "logits/chosen": -2.081108331680298, + "logits/rejected": -2.0696170330047607, + "logps/chosen": -4.552436351776123, + "logps/rejected": -153.7386016845703, + "loss": 0.3904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02085280418395996, + "rewards/margins": 2.29184889793396, + "rewards/rejected": -2.27099609375, + "step": 3188 + }, + { + "epoch": 0.19, + "learning_rate": 9.378751970259806e-08, + "logits/chosen": -2.1035492420196533, + "logits/rejected": -2.0779106616973877, + "logps/chosen": -98.58566284179688, + "logps/rejected": -370.25457763671875, + "loss": 0.439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12751007080078125, + "rewards/margins": 2.2240066528320312, + "rewards/rejected": -2.3515167236328125, + "step": 3189 + }, + { + "epoch": 0.19, + "learning_rate": 9.37829693436744e-08, + "logits/chosen": -2.043510913848877, + "logits/rejected": -2.0379531383514404, + "logps/chosen": -53.45628356933594, + "logps/rejected": -160.19357299804688, + "loss": 0.8836, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5062370300292969, + "rewards/margins": -0.16060104966163635, + "rewards/rejected": -0.3456359803676605, + "step": 3190 + }, + { + "epoch": 0.19, + "learning_rate": 9.37784174293655e-08, + "logits/chosen": -2.028787851333618, + "logits/rejected": -1.972259283065796, + "logps/chosen": -290.5148010253906, + "logps/rejected": -295.6363220214844, + "loss": 0.2256, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2553313970565796, + "rewards/margins": 1.300408959388733, + "rewards/rejected": -0.04507751390337944, + "step": 3191 + }, + { + "epoch": 0.19, + "learning_rate": 9.377386395983306e-08, + "logits/chosen": -2.027958631515503, + "logits/rejected": -2.0248546600341797, + "logps/chosen": -10.99416446685791, + "logps/rejected": -110.01481628417969, + "loss": 0.6627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24719372391700745, + "rewards/margins": 0.41477373242378235, + "rewards/rejected": -0.6619674563407898, + "step": 3192 + }, + { + "epoch": 0.19, + "learning_rate": 9.376930893523887e-08, + "logits/chosen": -2.13712215423584, + "logits/rejected": -2.1258161067962646, + "logps/chosen": -7.805275917053223, + "logps/rejected": -165.21728515625, + "loss": 0.4953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08958254009485245, + "rewards/margins": 0.8360639214515686, + "rewards/rejected": -0.7464813590049744, + "step": 3193 + }, + { + "epoch": 0.19, + "learning_rate": 9.37647523557447e-08, + "logits/chosen": -2.009546995162964, + "logits/rejected": -1.9695899486541748, + "logps/chosen": -125.34979248046875, + "logps/rejected": -325.4923095703125, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3067382872104645, + "rewards/margins": 1.6987823247909546, + "rewards/rejected": -1.3920440673828125, + "step": 3194 + }, + { + "epoch": 0.19, + "learning_rate": 9.376019422151247e-08, + "logits/chosen": -2.272810220718384, + "logits/rejected": -2.2646484375, + "logps/chosen": -0.004118145443499088, + "logps/rejected": -170.41021728515625, + "loss": 0.4323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001853656576713547, + "rewards/margins": 1.6212059259414673, + "rewards/rejected": -1.6213912963867188, + "step": 3195 + }, + { + "epoch": 0.19, + "learning_rate": 9.375563453270408e-08, + "logits/chosen": -1.9184308052062988, + "logits/rejected": -1.7884141206741333, + "logps/chosen": -172.45689392089844, + "logps/rejected": -432.5118103027344, + "loss": 0.429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6408798098564148, + "rewards/margins": 0.6050979495048523, + "rewards/rejected": 0.0357818603515625, + "step": 3196 + }, + { + "epoch": 0.19, + "learning_rate": 9.375107328948152e-08, + "logits/chosen": -1.9065241813659668, + "logits/rejected": -1.9632149934768677, + "logps/chosen": -148.09739685058594, + "logps/rejected": -220.1685333251953, + "loss": 0.2696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7557601928710938, + "rewards/margins": 1.229888916015625, + "rewards/rejected": -0.47412872314453125, + "step": 3197 + }, + { + "epoch": 0.19, + "learning_rate": 9.374651049200684e-08, + "logits/chosen": -2.084778308868408, + "logits/rejected": -2.0744264125823975, + "logps/chosen": -7.3100080490112305, + "logps/rejected": -300.779296875, + "loss": 0.346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05864829942584038, + "rewards/margins": 3.4692928791046143, + "rewards/rejected": -3.41064453125, + "step": 3198 + }, + { + "epoch": 0.19, + "learning_rate": 9.37419461404421e-08, + "logits/chosen": -2.2517237663269043, + "logits/rejected": -2.2390294075012207, + "logps/chosen": -101.46511840820312, + "logps/rejected": -248.787841796875, + "loss": 0.557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04001007229089737, + "rewards/margins": 0.5783416628837585, + "rewards/rejected": -0.6183517575263977, + "step": 3199 + }, + { + "epoch": 0.19, + "learning_rate": 9.373738023494949e-08, + "logits/chosen": -2.236642360687256, + "logits/rejected": -2.219977855682373, + "logps/chosen": -0.02835254929959774, + "logps/rejected": -304.48602294921875, + "loss": 0.3836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009902858873829246, + "rewards/margins": 2.681046962738037, + "rewards/rejected": -2.682037353515625, + "step": 3200 + }, + { + "epoch": 0.19, + "learning_rate": 9.373281277569116e-08, + "logits/chosen": -2.0423452854156494, + "logits/rejected": -2.044715642929077, + "logps/chosen": -93.43128967285156, + "logps/rejected": -51.2528076171875, + "loss": 0.7542, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08355408161878586, + "rewards/margins": -0.20307999849319458, + "rewards/rejected": 0.11952590942382812, + "step": 3201 + }, + { + "epoch": 0.19, + "learning_rate": 9.372824376282941e-08, + "logits/chosen": -2.022822380065918, + "logits/rejected": -2.0068912506103516, + "logps/chosen": -136.89913940429688, + "logps/rejected": -326.5436706542969, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0255035161972046, + "rewards/margins": 0.8403289318084717, + "rewards/rejected": 0.18517456948757172, + "step": 3202 + }, + { + "epoch": 0.19, + "learning_rate": 9.372367319652656e-08, + "logits/chosen": -1.9713042974472046, + "logits/rejected": -1.919559359550476, + "logps/chosen": -184.7504425048828, + "logps/rejected": -449.2950439453125, + "loss": 0.3979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5364853143692017, + "rewards/margins": 0.7704391479492188, + "rewards/rejected": -0.23395386338233948, + "step": 3203 + }, + { + "epoch": 0.19, + "learning_rate": 9.371910107694495e-08, + "logits/chosen": -2.1024668216705322, + "logits/rejected": -2.068626642227173, + "logps/chosen": -215.3827362060547, + "logps/rejected": -351.8235778808594, + "loss": 0.359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.66729736328125, + "rewards/margins": 1.0477569103240967, + "rewards/rejected": -0.38045960664749146, + "step": 3204 + }, + { + "epoch": 0.19, + "learning_rate": 9.371452740424701e-08, + "logits/chosen": -2.2044315338134766, + "logits/rejected": -2.210456609725952, + "logps/chosen": -154.07623291015625, + "logps/rejected": -261.87091064453125, + "loss": 0.5708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1747482270002365, + "rewards/margins": 0.907295286655426, + "rewards/rejected": -1.0820435285568237, + "step": 3205 + }, + { + "epoch": 0.19, + "learning_rate": 9.370995217859525e-08, + "logits/chosen": -1.797420859336853, + "logits/rejected": -1.755892038345337, + "logps/chosen": -210.03099060058594, + "logps/rejected": -471.48748779296875, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9465072751045227, + "rewards/margins": 1.0612777471542358, + "rewards/rejected": -0.11477050930261612, + "step": 3206 + }, + { + "epoch": 0.19, + "learning_rate": 9.370537540015217e-08, + "logits/chosen": -2.0902693271636963, + "logits/rejected": -2.0885727405548096, + "logps/chosen": -2.6583447834127583e-05, + "logps/rejected": -116.01260375976562, + "loss": 0.5461, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.337798375468992e-07, + "rewards/margins": 0.7112835049629211, + "rewards/rejected": -0.7112838625907898, + "step": 3207 + }, + { + "epoch": 0.19, + "learning_rate": 9.370079706908036e-08, + "logits/chosen": -2.0051965713500977, + "logits/rejected": -2.0029754638671875, + "logps/chosen": -16.0512638092041, + "logps/rejected": -148.72361755371094, + "loss": 0.6371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2932470440864563, + "rewards/margins": 0.4840143322944641, + "rewards/rejected": -0.7772613763809204, + "step": 3208 + }, + { + "epoch": 0.19, + "learning_rate": 9.369621718554247e-08, + "logits/chosen": -1.932694435119629, + "logits/rejected": -1.9036738872528076, + "logps/chosen": -202.72605895996094, + "logps/rejected": -437.48919677734375, + "loss": 0.2925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9538009762763977, + "rewards/margins": 0.8761154413223267, + "rewards/rejected": 0.07768554985523224, + "step": 3209 + }, + { + "epoch": 0.19, + "learning_rate": 9.369163574970123e-08, + "logits/chosen": -2.163358688354492, + "logits/rejected": -2.1495277881622314, + "logps/chosen": -179.0446319580078, + "logps/rejected": -272.6690673828125, + "loss": 0.4178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0972336530685425, + "rewards/margins": 0.08583831787109375, + "rewards/rejected": 1.0113953351974487, + "step": 3210 + }, + { + "epoch": 0.19, + "learning_rate": 9.368705276171935e-08, + "logits/chosen": -2.2077560424804688, + "logits/rejected": -2.2678496837615967, + "logps/chosen": -182.48385620117188, + "logps/rejected": -293.87933349609375, + "loss": 0.4223, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1193512678146362, + "rewards/margins": 0.09220278263092041, + "rewards/rejected": 1.0271484851837158, + "step": 3211 + }, + { + "epoch": 0.19, + "learning_rate": 9.368246822175967e-08, + "logits/chosen": -2.189182758331299, + "logits/rejected": -2.1742334365844727, + "logps/chosen": -179.23548889160156, + "logps/rejected": -256.2616271972656, + "loss": 0.4399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3685897886753082, + "rewards/margins": 0.7792236804962158, + "rewards/rejected": -0.4106338620185852, + "step": 3212 + }, + { + "epoch": 0.19, + "learning_rate": 9.367788212998504e-08, + "logits/chosen": -2.0333988666534424, + "logits/rejected": -2.079963445663452, + "logps/chosen": -275.8739318847656, + "logps/rejected": -350.30059814453125, + "loss": 0.3045, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1721038818359375, + "rewards/margins": 0.6001739501953125, + "rewards/rejected": 0.571929931640625, + "step": 3213 + }, + { + "epoch": 0.19, + "learning_rate": 9.367329448655837e-08, + "logits/chosen": -2.2401716709136963, + "logits/rejected": -2.242767095565796, + "logps/chosen": -16.1667423248291, + "logps/rejected": -175.9467315673828, + "loss": 0.5294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08796310424804688, + "rewards/margins": 0.7349220514297485, + "rewards/rejected": -0.8228851556777954, + "step": 3214 + }, + { + "epoch": 0.19, + "learning_rate": 9.366870529164266e-08, + "logits/chosen": -2.111647367477417, + "logits/rejected": -2.1401875019073486, + "logps/chosen": -214.15896606445312, + "logps/rejected": -280.85382080078125, + "loss": 0.6095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06296386569738388, + "rewards/margins": 0.384246826171875, + "rewards/rejected": -0.4472106993198395, + "step": 3215 + }, + { + "epoch": 0.19, + "learning_rate": 9.366411454540094e-08, + "logits/chosen": -2.1113133430480957, + "logits/rejected": -2.102830171585083, + "logps/chosen": -55.408756256103516, + "logps/rejected": -337.96234130859375, + "loss": 0.3695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33262521028518677, + "rewards/margins": 1.3486530780792236, + "rewards/rejected": -1.016027808189392, + "step": 3216 + }, + { + "epoch": 0.19, + "learning_rate": 9.365952224799628e-08, + "logits/chosen": -2.095726490020752, + "logits/rejected": -2.102015733718872, + "logps/chosen": -19.107908248901367, + "logps/rejected": -224.75656127929688, + "loss": 0.8015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45442086458206177, + "rewards/margins": 0.10165518522262573, + "rewards/rejected": -0.5560760498046875, + "step": 3217 + }, + { + "epoch": 0.19, + "learning_rate": 9.365492839959183e-08, + "logits/chosen": -2.051302194595337, + "logits/rejected": -2.0434982776641846, + "logps/chosen": -231.71124267578125, + "logps/rejected": -343.78045654296875, + "loss": 0.3807, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.078942894935608, + "rewards/margins": 0.3327881097793579, + "rewards/rejected": 0.74615478515625, + "step": 3218 + }, + { + "epoch": 0.19, + "learning_rate": 9.365033300035079e-08, + "logits/chosen": -2.0469703674316406, + "logits/rejected": -1.9743199348449707, + "logps/chosen": -193.8294219970703, + "logps/rejected": -412.98345947265625, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7583892941474915, + "rewards/margins": 1.1019439697265625, + "rewards/rejected": -0.34355470538139343, + "step": 3219 + }, + { + "epoch": 0.19, + "learning_rate": 9.364573605043639e-08, + "logits/chosen": -2.0311195850372314, + "logits/rejected": -2.0113210678100586, + "logps/chosen": -66.0411376953125, + "logps/rejected": -198.99850463867188, + "loss": 0.5544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009796142578125, + "rewards/margins": 0.5209609866142273, + "rewards/rejected": -0.5307571291923523, + "step": 3220 + }, + { + "epoch": 0.19, + "learning_rate": 9.364113755001196e-08, + "logits/chosen": -2.260471820831299, + "logits/rejected": -2.276179075241089, + "logps/chosen": -136.84507751464844, + "logps/rejected": -229.46165466308594, + "loss": 0.5527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24117890000343323, + "rewards/margins": 0.1333572417497635, + "rewards/rejected": 0.10782165825366974, + "step": 3221 + }, + { + "epoch": 0.19, + "learning_rate": 9.363653749924086e-08, + "logits/chosen": -2.1950573921203613, + "logits/rejected": -2.19053316116333, + "logps/chosen": -4.792174877366051e-05, + "logps/rejected": -143.56199645996094, + "loss": 0.4421, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3112803571857512e-06, + "rewards/margins": 1.6007883548736572, + "rewards/rejected": -1.6007896661758423, + "step": 3222 + }, + { + "epoch": 0.19, + "learning_rate": 9.363193589828649e-08, + "logits/chosen": -2.136500835418701, + "logits/rejected": -2.1399667263031006, + "logps/chosen": -10.292387008666992, + "logps/rejected": -294.3428955078125, + "loss": 0.3757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07889747619628906, + "rewards/margins": 2.2655065059661865, + "rewards/rejected": -2.1866090297698975, + "step": 3223 + }, + { + "epoch": 0.19, + "learning_rate": 9.362733274731234e-08, + "logits/chosen": -1.9993480443954468, + "logits/rejected": -1.984849214553833, + "logps/chosen": -44.488800048828125, + "logps/rejected": -268.71807861328125, + "loss": 0.5687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.045679476112127304, + "rewards/margins": 0.9170402884483337, + "rewards/rejected": -0.962719738483429, + "step": 3224 + }, + { + "epoch": 0.19, + "learning_rate": 9.362272804648191e-08, + "logits/chosen": -1.934049367904663, + "logits/rejected": -1.8584614992141724, + "logps/chosen": -302.8594665527344, + "logps/rejected": -498.8133544921875, + "loss": 0.4316, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3768433332443237, + "rewards/margins": -0.02152097225189209, + "rewards/rejected": 1.3983643054962158, + "step": 3225 + }, + { + "epoch": 0.19, + "learning_rate": 9.36181217959588e-08, + "logits/chosen": -2.0018310546875, + "logits/rejected": -1.9862446784973145, + "logps/chosen": -220.75938415527344, + "logps/rejected": -327.73101806640625, + "loss": 0.5432, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.477447509765625, + "rewards/margins": -0.0726318359375, + "rewards/rejected": 0.550079345703125, + "step": 3226 + }, + { + "epoch": 0.19, + "learning_rate": 9.361351399590665e-08, + "logits/chosen": -2.121943712234497, + "logits/rejected": -2.111912965774536, + "logps/chosen": -24.7180118560791, + "logps/rejected": -234.745361328125, + "loss": 0.3888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0734262466430664, + "rewards/margins": 1.9522684812545776, + "rewards/rejected": -1.8788422346115112, + "step": 3227 + }, + { + "epoch": 0.19, + "learning_rate": 9.360890464648915e-08, + "logits/chosen": -2.166804552078247, + "logits/rejected": -2.166619300842285, + "logps/chosen": -18.399642944335938, + "logps/rejected": -92.29473876953125, + "loss": 0.6133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002152824541553855, + "rewards/margins": 0.29731541872024536, + "rewards/rejected": -0.2994682490825653, + "step": 3228 + }, + { + "epoch": 0.19, + "learning_rate": 9.360429374787005e-08, + "logits/chosen": -2.0325615406036377, + "logits/rejected": -2.031949043273926, + "logps/chosen": -16.671201705932617, + "logps/rejected": -245.1494140625, + "loss": 0.3988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03291015699505806, + "rewards/margins": 2.388777256011963, + "rewards/rejected": -2.421687364578247, + "step": 3229 + }, + { + "epoch": 0.19, + "learning_rate": 9.359968130021312e-08, + "logits/chosen": -2.1051132678985596, + "logits/rejected": -2.1282358169555664, + "logps/chosen": -212.45616149902344, + "logps/rejected": -288.07012939453125, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1413925886154175, + "rewards/margins": 0.4332504868507385, + "rewards/rejected": 0.708142101764679, + "step": 3230 + }, + { + "epoch": 0.19, + "learning_rate": 9.359506730368225e-08, + "logits/chosen": -1.9802483320236206, + "logits/rejected": -1.9230308532714844, + "logps/chosen": -208.38653564453125, + "logps/rejected": -341.7222595214844, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9086288809776306, + "rewards/margins": 1.048536777496338, + "rewards/rejected": -0.1399078369140625, + "step": 3231 + }, + { + "epoch": 0.19, + "learning_rate": 9.359045175844136e-08, + "logits/chosen": -2.164848804473877, + "logits/rejected": -2.146580457687378, + "logps/chosen": -23.77582550048828, + "logps/rejected": -206.27728271484375, + "loss": 0.3918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10367031395435333, + "rewards/margins": 1.8451635837554932, + "rewards/rejected": -1.7414932250976562, + "step": 3232 + }, + { + "epoch": 0.19, + "learning_rate": 9.358583466465437e-08, + "logits/chosen": -2.1793220043182373, + "logits/rejected": -2.1655962467193604, + "logps/chosen": -169.481201171875, + "logps/rejected": -295.5787353515625, + "loss": 0.2087, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1641693115234375, + "rewards/margins": 1.315679907798767, + "rewards/rejected": -0.15151062607765198, + "step": 3233 + }, + { + "epoch": 0.19, + "learning_rate": 9.358121602248536e-08, + "logits/chosen": -2.0724501609802246, + "logits/rejected": -2.047157049179077, + "logps/chosen": -275.70263671875, + "logps/rejected": -450.47564697265625, + "loss": 0.1279, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7431671619415283, + "rewards/margins": 1.8319427967071533, + "rewards/rejected": -0.088775634765625, + "step": 3234 + }, + { + "epoch": 0.19, + "learning_rate": 9.357659583209836e-08, + "logits/chosen": -2.102766990661621, + "logits/rejected": -2.090533494949341, + "logps/chosen": -16.506000518798828, + "logps/rejected": -154.72740173339844, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0002685546933207661, + "rewards/margins": 1.362274169921875, + "rewards/rejected": -1.362005591392517, + "step": 3235 + }, + { + "epoch": 0.19, + "learning_rate": 9.357197409365753e-08, + "logits/chosen": -2.03246808052063, + "logits/rejected": -1.988652229309082, + "logps/chosen": -270.8380126953125, + "logps/rejected": -452.85888671875, + "loss": 0.6023, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.84332275390625, + "rewards/margins": -0.45261847972869873, + "rewards/rejected": 1.2959412336349487, + "step": 3236 + }, + { + "epoch": 0.19, + "learning_rate": 9.356735080732704e-08, + "logits/chosen": -2.2577390670776367, + "logits/rejected": -2.257774591445923, + "logps/chosen": -3.686177968978882, + "logps/rejected": -10.812732696533203, + "loss": 0.7053, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.015666795894503593, + "rewards/margins": -0.025839831680059433, + "rewards/rejected": 0.010173034854233265, + "step": 3237 + }, + { + "epoch": 0.19, + "learning_rate": 9.356272597327115e-08, + "logits/chosen": -2.085638999938965, + "logits/rejected": -2.080514907836914, + "logps/chosen": -121.44268798828125, + "logps/rejected": -179.1056671142578, + "loss": 0.5785, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.836163341999054, + "rewards/margins": -0.25553280115127563, + "rewards/rejected": 1.0916961431503296, + "step": 3238 + }, + { + "epoch": 0.19, + "learning_rate": 9.355809959165413e-08, + "logits/chosen": -2.032209873199463, + "logits/rejected": -2.028249740600586, + "logps/chosen": -0.002713576890528202, + "logps/rejected": -246.10775756835938, + "loss": 0.4333, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.848306369036436e-05, + "rewards/margins": 1.663773775100708, + "rewards/rejected": -1.663812279701233, + "step": 3239 + }, + { + "epoch": 0.19, + "learning_rate": 9.355347166264036e-08, + "logits/chosen": -2.0913033485412598, + "logits/rejected": -2.0774550437927246, + "logps/chosen": -0.00019048684043809772, + "logps/rejected": -118.45285034179688, + "loss": 0.6247, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.481905762077076e-06, + "rewards/margins": 0.2952805757522583, + "rewards/rejected": -0.29528504610061646, + "step": 3240 + }, + { + "epoch": 0.19, + "learning_rate": 9.354884218639421e-08, + "logits/chosen": -2.280228853225708, + "logits/rejected": -2.2405295372009277, + "logps/chosen": -0.25446099042892456, + "logps/rejected": -170.5577392578125, + "loss": 0.4176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007304674480110407, + "rewards/margins": 1.780763030052185, + "rewards/rejected": -1.7880676984786987, + "step": 3241 + }, + { + "epoch": 0.19, + "learning_rate": 9.354421116308018e-08, + "logits/chosen": -2.074073314666748, + "logits/rejected": -2.058089017868042, + "logps/chosen": -67.02922058105469, + "logps/rejected": -306.96124267578125, + "loss": 0.4051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02807769738137722, + "rewards/margins": 1.957869052886963, + "rewards/rejected": -1.9297913312911987, + "step": 3242 + }, + { + "epoch": 0.19, + "learning_rate": 9.353957859286279e-08, + "logits/chosen": -2.034658193588257, + "logits/rejected": -1.998665452003479, + "logps/chosen": -266.65594482421875, + "logps/rejected": -335.8553466796875, + "loss": 0.3614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.650219738483429, + "rewards/margins": 0.9190247058868408, + "rewards/rejected": -0.2688049376010895, + "step": 3243 + }, + { + "epoch": 0.19, + "learning_rate": 9.353494447590658e-08, + "logits/chosen": -2.0366528034210205, + "logits/rejected": -2.0217435359954834, + "logps/chosen": -56.318634033203125, + "logps/rejected": -284.8497619628906, + "loss": 0.5398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04345092922449112, + "rewards/margins": 0.777392566204071, + "rewards/rejected": -0.820843517780304, + "step": 3244 + }, + { + "epoch": 0.19, + "learning_rate": 9.353030881237618e-08, + "logits/chosen": -1.8970670700073242, + "logits/rejected": -1.8946877717971802, + "logps/chosen": -10.875582695007324, + "logps/rejected": -136.60186767578125, + "loss": 0.529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024987125769257545, + "rewards/margins": 0.78773432970047, + "rewards/rejected": -0.7627472281455994, + "step": 3245 + }, + { + "epoch": 0.19, + "learning_rate": 9.352567160243627e-08, + "logits/chosen": -1.9583160877227783, + "logits/rejected": -1.9412585496902466, + "logps/chosen": -137.20712280273438, + "logps/rejected": -439.5566101074219, + "loss": 0.4167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8424972891807556, + "rewards/margins": 0.3687424063682556, + "rewards/rejected": 0.4737548828125, + "step": 3246 + }, + { + "epoch": 0.19, + "learning_rate": 9.352103284625163e-08, + "logits/chosen": -2.0631444454193115, + "logits/rejected": -2.052961826324463, + "logps/chosen": -18.90308380126953, + "logps/rejected": -271.90802001953125, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15117226541042328, + "rewards/margins": 1.4485752582550049, + "rewards/rejected": -1.2974029779434204, + "step": 3247 + }, + { + "epoch": 0.19, + "learning_rate": 9.3516392543987e-08, + "logits/chosen": -2.0639877319335938, + "logits/rejected": -2.041546583175659, + "logps/chosen": -20.93993377685547, + "logps/rejected": -310.2867431640625, + "loss": 0.4213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05312023311853409, + "rewards/margins": 1.6556107997894287, + "rewards/rejected": -1.7087310552597046, + "step": 3248 + }, + { + "epoch": 0.19, + "learning_rate": 9.351175069580726e-08, + "logits/chosen": -2.157745122909546, + "logits/rejected": -2.1536996364593506, + "logps/chosen": -21.119003295898438, + "logps/rejected": -93.08342742919922, + "loss": 0.6234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1584096997976303, + "rewards/margins": 0.5095911026000977, + "rewards/rejected": -0.6680008172988892, + "step": 3249 + }, + { + "epoch": 0.19, + "learning_rate": 9.350710730187727e-08, + "logits/chosen": -1.7971911430358887, + "logits/rejected": -1.7734555006027222, + "logps/chosen": -239.90432739257812, + "logps/rejected": -376.5210266113281, + "loss": 0.3935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.753186047077179, + "rewards/margins": 0.7168243527412415, + "rewards/rejected": 0.0363616943359375, + "step": 3250 + }, + { + "epoch": 0.19, + "learning_rate": 9.350246236236204e-08, + "logits/chosen": -2.16396164894104, + "logits/rejected": -2.1519341468811035, + "logps/chosen": -29.503507614135742, + "logps/rejected": -143.79827880859375, + "loss": 0.4951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16811199486255646, + "rewards/margins": 0.8078321218490601, + "rewards/rejected": -0.6397201418876648, + "step": 3251 + }, + { + "epoch": 0.19, + "learning_rate": 9.349781587742654e-08, + "logits/chosen": -2.2280399799346924, + "logits/rejected": -2.205264091491699, + "logps/chosen": -118.80291748046875, + "logps/rejected": -238.17276000976562, + "loss": 0.428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050986479967832565, + "rewards/margins": 1.236981987953186, + "rewards/rejected": -1.1859954595565796, + "step": 3252 + }, + { + "epoch": 0.19, + "learning_rate": 9.349316784723584e-08, + "logits/chosen": -2.1149239540100098, + "logits/rejected": -2.1221907138824463, + "logps/chosen": -70.63023376464844, + "logps/rejected": -256.30743408203125, + "loss": 0.5628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2524002194404602, + "rewards/margins": 0.7175857424736023, + "rewards/rejected": -0.9699859619140625, + "step": 3253 + }, + { + "epoch": 0.19, + "learning_rate": 9.348851827195509e-08, + "logits/chosen": -2.107881784439087, + "logits/rejected": -2.1066219806671143, + "logps/chosen": -0.005556056741625071, + "logps/rejected": -83.72903442382812, + "loss": 0.5683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002357316407142207, + "rewards/margins": 0.5839149951934814, + "rewards/rejected": -0.5841507315635681, + "step": 3254 + }, + { + "epoch": 0.19, + "learning_rate": 9.348386715174943e-08, + "logits/chosen": -2.02225661277771, + "logits/rejected": -2.0110292434692383, + "logps/chosen": -135.2169189453125, + "logps/rejected": -242.30006408691406, + "loss": 0.4062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5077270865440369, + "rewards/margins": 0.8342697620391846, + "rewards/rejected": -0.3265426754951477, + "step": 3255 + }, + { + "epoch": 0.19, + "learning_rate": 9.347921448678411e-08, + "logits/chosen": -2.1428768634796143, + "logits/rejected": -2.134249448776245, + "logps/chosen": -37.986289978027344, + "logps/rejected": -230.49453735351562, + "loss": 0.6496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.430764764547348, + "rewards/margins": 0.665771484375, + "rewards/rejected": -1.0965362787246704, + "step": 3256 + }, + { + "epoch": 0.19, + "learning_rate": 9.347456027722442e-08, + "logits/chosen": -2.009685754776001, + "logits/rejected": -1.9831018447875977, + "logps/chosen": -173.02944946289062, + "logps/rejected": -247.92660522460938, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4073272943496704, + "rewards/margins": 0.3495819568634033, + "rewards/rejected": 1.057745337486267, + "step": 3257 + }, + { + "epoch": 0.19, + "learning_rate": 9.346990452323569e-08, + "logits/chosen": -2.1433372497558594, + "logits/rejected": -2.090099334716797, + "logps/chosen": -238.59698486328125, + "logps/rejected": -431.796630859375, + "loss": 0.4108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2685730457305908, + "rewards/margins": 0.11644589900970459, + "rewards/rejected": 1.1521271467208862, + "step": 3258 + }, + { + "epoch": 0.19, + "learning_rate": 9.346524722498331e-08, + "logits/chosen": -1.8568679094314575, + "logits/rejected": -1.853313684463501, + "logps/chosen": -18.83819580078125, + "logps/rejected": -235.3058319091797, + "loss": 0.568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12417659908533096, + "rewards/margins": 0.7975305318832397, + "rewards/rejected": -0.9217071533203125, + "step": 3259 + }, + { + "epoch": 0.19, + "learning_rate": 9.346058838263273e-08, + "logits/chosen": -2.1352508068084717, + "logits/rejected": -2.1271145343780518, + "logps/chosen": -198.06982421875, + "logps/rejected": -281.149169921875, + "loss": 0.2563, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1674774885177612, + "rewards/margins": 0.9782196879386902, + "rewards/rejected": 0.18925781548023224, + "step": 3260 + }, + { + "epoch": 0.19, + "learning_rate": 9.345592799634948e-08, + "logits/chosen": -2.1737446784973145, + "logits/rejected": -2.1723546981811523, + "logps/chosen": -7.1551008224487305, + "logps/rejected": -82.76007080078125, + "loss": 0.5654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0377713218331337, + "rewards/margins": 0.6543421745300293, + "rewards/rejected": -0.6921135187149048, + "step": 3261 + }, + { + "epoch": 0.19, + "learning_rate": 9.34512660662991e-08, + "logits/chosen": -2.1299846172332764, + "logits/rejected": -2.122042655944824, + "logps/chosen": -21.49777603149414, + "logps/rejected": -339.8843078613281, + "loss": 0.384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11281223595142365, + "rewards/margins": 1.7474863529205322, + "rewards/rejected": -1.634674072265625, + "step": 3262 + }, + { + "epoch": 0.19, + "learning_rate": 9.34466025926472e-08, + "logits/chosen": -2.010892629623413, + "logits/rejected": -2.012505531311035, + "logps/chosen": -25.783363342285156, + "logps/rejected": -73.78128051757812, + "loss": 0.559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1598896086215973, + "rewards/margins": 0.5438045263290405, + "rewards/rejected": -0.3839149475097656, + "step": 3263 + }, + { + "epoch": 0.19, + "learning_rate": 9.344193757555945e-08, + "logits/chosen": -2.172635555267334, + "logits/rejected": -2.1717827320098877, + "logps/chosen": -28.366544723510742, + "logps/rejected": -104.63467407226562, + "loss": 0.5964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10959091037511826, + "rewards/margins": 0.31914520263671875, + "rewards/rejected": -0.2095542997121811, + "step": 3264 + }, + { + "epoch": 0.19, + "learning_rate": 9.343727101520158e-08, + "logits/chosen": -2.1884515285491943, + "logits/rejected": -2.194000720977783, + "logps/chosen": -10.24112319946289, + "logps/rejected": -59.198326110839844, + "loss": 0.6622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01807241514325142, + "rewards/margins": 0.12409467995166779, + "rewards/rejected": -0.10602226108312607, + "step": 3265 + }, + { + "epoch": 0.19, + "learning_rate": 9.343260291173938e-08, + "logits/chosen": -2.2082419395446777, + "logits/rejected": -2.208791971206665, + "logps/chosen": -98.8492202758789, + "logps/rejected": -222.20932006835938, + "loss": 0.6045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28844451904296875, + "rewards/margins": 0.18836669623851776, + "rewards/rejected": 0.10007782280445099, + "step": 3266 + }, + { + "epoch": 0.19, + "learning_rate": 9.342793326533866e-08, + "logits/chosen": -2.0343587398529053, + "logits/rejected": -2.010371446609497, + "logps/chosen": -206.7184600830078, + "logps/rejected": -346.6475830078125, + "loss": 0.4886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.859417736530304, + "rewards/margins": 0.12821656465530396, + "rewards/rejected": 0.731201171875, + "step": 3267 + }, + { + "epoch": 0.19, + "learning_rate": 9.342326207616533e-08, + "logits/chosen": -2.058016538619995, + "logits/rejected": -2.043663263320923, + "logps/chosen": -234.88153076171875, + "logps/rejected": -346.3658752441406, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.758404552936554, + "rewards/margins": 1.1085907220840454, + "rewards/rejected": -0.35018616914749146, + "step": 3268 + }, + { + "epoch": 0.19, + "learning_rate": 9.341858934438533e-08, + "logits/chosen": -2.267552614212036, + "logits/rejected": -2.2583277225494385, + "logps/chosen": -6.644656658172607, + "logps/rejected": -309.8213806152344, + "loss": 0.3571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02458353154361248, + "rewards/margins": 4.578264236450195, + "rewards/rejected": -4.602847576141357, + "step": 3269 + }, + { + "epoch": 0.19, + "learning_rate": 9.341391507016465e-08, + "logits/chosen": -2.2957422733306885, + "logits/rejected": -2.28548264503479, + "logps/chosen": -8.559067646274343e-05, + "logps/rejected": -227.65655517578125, + "loss": 0.431, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1576131530309794e-06, + "rewards/margins": 1.6926339864730835, + "rewards/rejected": -1.6926361322402954, + "step": 3270 + }, + { + "epoch": 0.19, + "learning_rate": 9.340923925366934e-08, + "logits/chosen": -2.0190107822418213, + "logits/rejected": -2.018320083618164, + "logps/chosen": -218.0521240234375, + "logps/rejected": -437.8268127441406, + "loss": 0.3745, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.554724097251892, + "rewards/margins": 0.1658782958984375, + "rewards/rejected": 1.3888458013534546, + "step": 3271 + }, + { + "epoch": 0.19, + "learning_rate": 9.340456189506552e-08, + "logits/chosen": -2.154526710510254, + "logits/rejected": -2.1380116939544678, + "logps/chosen": -36.37377166748047, + "logps/rejected": -226.2290802001953, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18072204291820526, + "rewards/margins": 1.4431930780410767, + "rewards/rejected": -1.262471079826355, + "step": 3272 + }, + { + "epoch": 0.19, + "learning_rate": 9.339988299451933e-08, + "logits/chosen": -2.2586348056793213, + "logits/rejected": -2.2450520992279053, + "logps/chosen": -0.00012659617641475052, + "logps/rejected": -278.80450439453125, + "loss": 0.3675, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8833845842891606e-06, + "rewards/margins": 3.1946027278900146, + "rewards/rejected": -3.1946046352386475, + "step": 3273 + }, + { + "epoch": 0.19, + "learning_rate": 9.339520255219703e-08, + "logits/chosen": -2.0997462272644043, + "logits/rejected": -2.096792697906494, + "logps/chosen": -0.18094459176063538, + "logps/rejected": -154.57711791992188, + "loss": 0.4745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005331477615982294, + "rewards/margins": 1.242843508720398, + "rewards/rejected": -1.248175024986267, + "step": 3274 + }, + { + "epoch": 0.19, + "learning_rate": 9.339052056826485e-08, + "logits/chosen": -2.0798678398132324, + "logits/rejected": -1.9351022243499756, + "logps/chosen": -350.55438232421875, + "logps/rejected": -945.8026123046875, + "loss": 0.1773, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.467596411705017, + "rewards/margins": 1.565557837486267, + "rewards/rejected": -0.09796142578125, + "step": 3275 + }, + { + "epoch": 0.19, + "learning_rate": 9.338583704288913e-08, + "logits/chosen": -2.1732873916625977, + "logits/rejected": -2.1613070964813232, + "logps/chosen": -12.45316219329834, + "logps/rejected": -139.4512939453125, + "loss": 0.5732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09656476974487305, + "rewards/margins": 0.7009968757629395, + "rewards/rejected": -0.7975616455078125, + "step": 3276 + }, + { + "epoch": 0.19, + "learning_rate": 9.338115197623627e-08, + "logits/chosen": -2.1181209087371826, + "logits/rejected": -2.1031084060668945, + "logps/chosen": -0.27892521023750305, + "logps/rejected": -285.46881103515625, + "loss": 0.4233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024706659838557243, + "rewards/margins": 1.8818974494934082, + "rewards/rejected": -1.9066040515899658, + "step": 3277 + }, + { + "epoch": 0.19, + "learning_rate": 9.337646536847268e-08, + "logits/chosen": -2.1614749431610107, + "logits/rejected": -2.1536619663238525, + "logps/chosen": -35.501319885253906, + "logps/rejected": -322.27618408203125, + "loss": 0.3975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05153656005859375, + "rewards/margins": 2.132450819015503, + "rewards/rejected": -2.1839873790740967, + "step": 3278 + }, + { + "epoch": 0.19, + "learning_rate": 9.337177721976488e-08, + "logits/chosen": -2.142644166946411, + "logits/rejected": -2.1340742111206055, + "logps/chosen": -42.89724349975586, + "logps/rejected": -193.46224975585938, + "loss": 0.5098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0939609557390213, + "rewards/margins": 1.0191402435302734, + "rewards/rejected": -1.1131012439727783, + "step": 3279 + }, + { + "epoch": 0.19, + "learning_rate": 9.336708753027938e-08, + "logits/chosen": -2.1791861057281494, + "logits/rejected": -2.182295799255371, + "logps/chosen": -2.9300763607025146, + "logps/rejected": -105.68043518066406, + "loss": 0.5704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04786710813641548, + "rewards/margins": 0.42765533924102783, + "rewards/rejected": -0.37978821992874146, + "step": 3280 + }, + { + "epoch": 0.19, + "learning_rate": 9.33623963001828e-08, + "logits/chosen": -1.8873924016952515, + "logits/rejected": -1.8624883890151978, + "logps/chosen": -350.75885009765625, + "logps/rejected": -521.75, + "loss": 0.2521, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2982056140899658, + "rewards/margins": 0.8732178211212158, + "rewards/rejected": 0.42498779296875, + "step": 3281 + }, + { + "epoch": 0.19, + "learning_rate": 9.33577035296418e-08, + "logits/chosen": -2.021375894546509, + "logits/rejected": -2.0207269191741943, + "logps/chosen": -9.118762016296387, + "logps/rejected": -234.58399963378906, + "loss": 0.4646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02534322813153267, + "rewards/margins": 1.444883942604065, + "rewards/rejected": -1.4702271223068237, + "step": 3282 + }, + { + "epoch": 0.19, + "learning_rate": 9.33530092188231e-08, + "logits/chosen": -2.028754711151123, + "logits/rejected": -2.01021146774292, + "logps/chosen": -177.81446838378906, + "logps/rejected": -236.5950469970703, + "loss": 0.6213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03860321268439293, + "rewards/margins": 0.260751336812973, + "rewards/rejected": -0.222148135304451, + "step": 3283 + }, + { + "epoch": 0.19, + "learning_rate": 9.334831336789343e-08, + "logits/chosen": -2.2352750301361084, + "logits/rejected": -2.228522539138794, + "logps/chosen": -0.922844409942627, + "logps/rejected": -78.50077056884766, + "loss": 0.6172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007146209478378296, + "rewards/margins": 0.3046068847179413, + "rewards/rejected": -0.3117530941963196, + "step": 3284 + }, + { + "epoch": 0.19, + "learning_rate": 9.334361597701963e-08, + "logits/chosen": -2.0740602016448975, + "logits/rejected": -2.0385231971740723, + "logps/chosen": -224.16323852539062, + "logps/rejected": -436.1979064941406, + "loss": 0.256, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.115228295326233, + "rewards/margins": 1.2103179693222046, + "rewards/rejected": -0.09508972615003586, + "step": 3285 + }, + { + "epoch": 0.19, + "learning_rate": 9.333891704636858e-08, + "logits/chosen": -1.8279368877410889, + "logits/rejected": -1.821229100227356, + "logps/chosen": -236.4073028564453, + "logps/rejected": -349.2141418457031, + "loss": 0.5266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4606125056743622, + "rewards/margins": 0.26765596866607666, + "rewards/rejected": 0.19295655190944672, + "step": 3286 + }, + { + "epoch": 0.19, + "learning_rate": 9.33342165761072e-08, + "logits/chosen": -2.056607246398926, + "logits/rejected": -2.054518461227417, + "logps/chosen": -0.5782595276832581, + "logps/rejected": -136.6129608154297, + "loss": 0.5433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03248380869626999, + "rewards/margins": 0.7463735938072205, + "rewards/rejected": -0.778857409954071, + "step": 3287 + }, + { + "epoch": 0.19, + "learning_rate": 9.332951456640247e-08, + "logits/chosen": -2.109926700592041, + "logits/rejected": -2.1082470417022705, + "logps/chosen": -122.84722137451172, + "logps/rejected": -258.62847900390625, + "loss": 0.3998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4479789733886719, + "rewards/margins": 0.7971885800361633, + "rewards/rejected": -0.34920960664749146, + "step": 3288 + }, + { + "epoch": 0.19, + "learning_rate": 9.332481101742146e-08, + "logits/chosen": -2.093524694442749, + "logits/rejected": -2.088797092437744, + "logps/chosen": -6.484892219305038e-05, + "logps/rejected": -138.87213134765625, + "loss": 0.5983, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.960610138799893e-08, + "rewards/margins": 0.42230841517448425, + "rewards/rejected": -0.4223083555698395, + "step": 3289 + }, + { + "epoch": 0.19, + "learning_rate": 9.332010592933121e-08, + "logits/chosen": -2.0998342037200928, + "logits/rejected": -2.038515329360962, + "logps/chosen": -222.63973999023438, + "logps/rejected": -506.7924499511719, + "loss": 0.247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9739532470703125, + "rewards/margins": 1.599945068359375, + "rewards/rejected": -0.6259918212890625, + "step": 3290 + }, + { + "epoch": 0.19, + "learning_rate": 9.331539930229893e-08, + "logits/chosen": -2.1755964756011963, + "logits/rejected": -2.17297101020813, + "logps/chosen": -0.45610588788986206, + "logps/rejected": -186.25161743164062, + "loss": 0.416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006605613511055708, + "rewards/margins": 1.8555877208709717, + "rewards/rejected": -1.8621933460235596, + "step": 3291 + }, + { + "epoch": 0.19, + "learning_rate": 9.331069113649176e-08, + "logits/chosen": -2.1082260608673096, + "logits/rejected": -2.1223487854003906, + "logps/chosen": -301.41046142578125, + "logps/rejected": -401.13702392578125, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3783081769943237, + "rewards/margins": 0.4287598133087158, + "rewards/rejected": 0.9495483636856079, + "step": 3292 + }, + { + "epoch": 0.19, + "learning_rate": 9.3305981432077e-08, + "logits/chosen": -2.144827365875244, + "logits/rejected": -2.1520752906799316, + "logps/chosen": -250.38868713378906, + "logps/rejected": -280.6277770996094, + "loss": 0.2959, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1693557500839233, + "rewards/margins": 0.7222640514373779, + "rewards/rejected": 0.447091668844223, + "step": 3293 + }, + { + "epoch": 0.19, + "learning_rate": 9.330127018922194e-08, + "logits/chosen": -1.9039156436920166, + "logits/rejected": -1.8980121612548828, + "logps/chosen": -217.24285888671875, + "logps/rejected": -345.9027404785156, + "loss": 0.2628, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.330804467201233, + "rewards/margins": 0.9565521478652954, + "rewards/rejected": 0.3742523193359375, + "step": 3294 + }, + { + "epoch": 0.19, + "learning_rate": 9.329655740809395e-08, + "logits/chosen": -2.2199490070343018, + "logits/rejected": -2.2097673416137695, + "logps/chosen": -11.386205673217773, + "logps/rejected": -127.21961975097656, + "loss": 0.5311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08767195045948029, + "rewards/margins": 0.9599644541740417, + "rewards/rejected": -1.0476363897323608, + "step": 3295 + }, + { + "epoch": 0.19, + "learning_rate": 9.329184308886044e-08, + "logits/chosen": -2.1796083450317383, + "logits/rejected": -2.173851251602173, + "logps/chosen": -5.635406970977783, + "logps/rejected": -50.71929168701172, + "loss": 0.6533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01609940640628338, + "rewards/margins": 0.2078363001346588, + "rewards/rejected": -0.22393570840358734, + "step": 3296 + }, + { + "epoch": 0.19, + "learning_rate": 9.328712723168893e-08, + "logits/chosen": -2.0940706729888916, + "logits/rejected": -2.0551817417144775, + "logps/chosen": -257.5516357421875, + "logps/rejected": -502.4461669921875, + "loss": 0.388, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.216833472251892, + "rewards/margins": 0.25603026151657104, + "rewards/rejected": 0.960803210735321, + "step": 3297 + }, + { + "epoch": 0.19, + "learning_rate": 9.32824098367469e-08, + "logits/chosen": -1.9766056537628174, + "logits/rejected": -1.9114727973937988, + "logps/chosen": -371.11187744140625, + "logps/rejected": -598.17578125, + "loss": 0.3296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.325775146484375, + "rewards/margins": 0.40223389863967896, + "rewards/rejected": 0.923541247844696, + "step": 3298 + }, + { + "epoch": 0.19, + "learning_rate": 9.327769090420196e-08, + "logits/chosen": -1.8214093446731567, + "logits/rejected": -1.8230383396148682, + "logps/chosen": -0.028646737337112427, + "logps/rejected": -181.8878631591797, + "loss": 0.505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044345800415612757, + "rewards/margins": 0.9736669659614563, + "rewards/rejected": -0.9741104245185852, + "step": 3299 + }, + { + "epoch": 0.19, + "learning_rate": 9.327297043422175e-08, + "logits/chosen": -2.156493663787842, + "logits/rejected": -2.1455233097076416, + "logps/chosen": -0.16296619176864624, + "logps/rejected": -168.57598876953125, + "loss": 0.4457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026983649004250765, + "rewards/margins": 1.5258065462112427, + "rewards/rejected": -1.5285049676895142, + "step": 3300 + }, + { + "epoch": 0.19, + "learning_rate": 9.326824842697397e-08, + "logits/chosen": -1.9568792581558228, + "logits/rejected": -1.9497454166412354, + "logps/chosen": -48.809783935546875, + "logps/rejected": -196.01568603515625, + "loss": 0.4863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11345825344324112, + "rewards/margins": 1.7809631824493408, + "rewards/rejected": -1.8944214582443237, + "step": 3301 + }, + { + "epoch": 0.19, + "learning_rate": 9.326352488262634e-08, + "logits/chosen": -2.100602149963379, + "logits/rejected": -2.0605738162994385, + "logps/chosen": -203.50418090820312, + "logps/rejected": -356.4678955078125, + "loss": 0.2396, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6496933698654175, + "rewards/margins": 0.8194870352745056, + "rewards/rejected": 0.8302063345909119, + "step": 3302 + }, + { + "epoch": 0.19, + "learning_rate": 9.325879980134669e-08, + "logits/chosen": -2.1290149688720703, + "logits/rejected": -2.113018274307251, + "logps/chosen": -64.08517456054688, + "logps/rejected": -312.6896057128906, + "loss": 0.3483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2428489774465561, + "rewards/margins": 1.7644157409667969, + "rewards/rejected": -1.5215667486190796, + "step": 3303 + }, + { + "epoch": 0.19, + "learning_rate": 9.325407318330286e-08, + "logits/chosen": -2.1023776531219482, + "logits/rejected": -2.1042799949645996, + "logps/chosen": -17.62513542175293, + "logps/rejected": -71.63162231445312, + "loss": 0.7194, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.031249618157744408, + "rewards/margins": -0.1315792202949524, + "rewards/rejected": 0.16282883286476135, + "step": 3304 + }, + { + "epoch": 0.19, + "learning_rate": 9.324934502866277e-08, + "logits/chosen": -2.081721305847168, + "logits/rejected": -2.0705056190490723, + "logps/chosen": -234.1640625, + "logps/rejected": -313.3353576660156, + "loss": 0.3926, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.406367540359497, + "rewards/margins": 0.10236060619354248, + "rewards/rejected": 1.3040069341659546, + "step": 3305 + }, + { + "epoch": 0.19, + "learning_rate": 9.32446153375944e-08, + "logits/chosen": -2.294189691543579, + "logits/rejected": -2.2912042140960693, + "logps/chosen": -15.452352523803711, + "logps/rejected": -62.82792663574219, + "loss": 0.6785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008560562506318092, + "rewards/margins": 0.04891624301671982, + "rewards/rejected": -0.05747680738568306, + "step": 3306 + }, + { + "epoch": 0.19, + "learning_rate": 9.323988411026575e-08, + "logits/chosen": -2.0846047401428223, + "logits/rejected": -2.0877606868743896, + "logps/chosen": -27.534690856933594, + "logps/rejected": -151.27537536621094, + "loss": 0.5146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09009227901697159, + "rewards/margins": 1.0279146432876587, + "rewards/rejected": -1.118006944656372, + "step": 3307 + }, + { + "epoch": 0.19, + "learning_rate": 9.323515134684492e-08, + "logits/chosen": -2.1382131576538086, + "logits/rejected": -2.1314761638641357, + "logps/chosen": -210.26426696777344, + "logps/rejected": -529.1651000976562, + "loss": 0.2648, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1405853033065796, + "rewards/margins": 0.9476104378700256, + "rewards/rejected": 0.19297485053539276, + "step": 3308 + }, + { + "epoch": 0.19, + "learning_rate": 9.323041704750003e-08, + "logits/chosen": -1.9481486082077026, + "logits/rejected": -1.9105013608932495, + "logps/chosen": -176.94775390625, + "logps/rejected": -355.27752685546875, + "loss": 0.1938, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.02696692943573, + "rewards/margins": 1.713435411453247, + "rewards/rejected": -0.6864685416221619, + "step": 3309 + }, + { + "epoch": 0.19, + "learning_rate": 9.322568121239927e-08, + "logits/chosen": -1.789000153541565, + "logits/rejected": -1.7819803953170776, + "logps/chosen": -282.36920166015625, + "logps/rejected": -439.927001953125, + "loss": 0.5145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48133546113967896, + "rewards/margins": 0.27619630098342896, + "rewards/rejected": 0.20513916015625, + "step": 3310 + }, + { + "epoch": 0.19, + "learning_rate": 9.322094384171086e-08, + "logits/chosen": -2.2682559490203857, + "logits/rejected": -2.262162923812866, + "logps/chosen": -30.804523468017578, + "logps/rejected": -177.065673828125, + "loss": 0.6173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010192490182816982, + "rewards/margins": 0.31496238708496094, + "rewards/rejected": -0.304769903421402, + "step": 3311 + }, + { + "epoch": 0.19, + "learning_rate": 9.32162049356031e-08, + "logits/chosen": -2.164477586746216, + "logits/rejected": -2.1634159088134766, + "logps/chosen": -0.00024018797557801008, + "logps/rejected": -98.34728240966797, + "loss": 0.6909, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6472971765324473e-05, + "rewards/margins": 0.021123817190527916, + "rewards/rejected": -0.021140290424227715, + "step": 3312 + }, + { + "epoch": 0.19, + "learning_rate": 9.321146449424435e-08, + "logits/chosen": -1.9027457237243652, + "logits/rejected": -1.9159467220306396, + "logps/chosen": -12.248332023620605, + "logps/rejected": -153.38356018066406, + "loss": 0.4301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016945648938417435, + "rewards/margins": 1.760286808013916, + "rewards/rejected": -1.7772324085235596, + "step": 3313 + }, + { + "epoch": 0.19, + "learning_rate": 9.320672251780303e-08, + "logits/chosen": -2.1175177097320557, + "logits/rejected": -2.112752914428711, + "logps/chosen": -16.66160011291504, + "logps/rejected": -183.73211669921875, + "loss": 0.4792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04223480448126793, + "rewards/margins": 0.9944198727607727, + "rewards/rejected": -0.9521850943565369, + "step": 3314 + }, + { + "epoch": 0.19, + "learning_rate": 9.320197900644757e-08, + "logits/chosen": -2.0205726623535156, + "logits/rejected": -2.011219024658203, + "logps/chosen": -98.68087768554688, + "logps/rejected": -408.74420166015625, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14475707709789276, + "rewards/margins": 2.205819606781006, + "rewards/rejected": -2.0610625743865967, + "step": 3315 + }, + { + "epoch": 0.19, + "learning_rate": 9.319723396034648e-08, + "logits/chosen": -2.109318971633911, + "logits/rejected": -2.1215460300445557, + "logps/chosen": -176.91006469726562, + "logps/rejected": -195.41769409179688, + "loss": 0.4227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6306396722793579, + "rewards/margins": 0.46510928869247437, + "rewards/rejected": 0.16553039848804474, + "step": 3316 + }, + { + "epoch": 0.19, + "learning_rate": 9.319248737966836e-08, + "logits/chosen": -2.055137872695923, + "logits/rejected": -2.0316379070281982, + "logps/chosen": -18.050350189208984, + "logps/rejected": -184.22775268554688, + "loss": 0.5089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061037253588438034, + "rewards/margins": 0.7122106552124023, + "rewards/rejected": -0.6511734127998352, + "step": 3317 + }, + { + "epoch": 0.19, + "learning_rate": 9.318773926458179e-08, + "logits/chosen": -2.056926727294922, + "logits/rejected": -2.053863048553467, + "logps/chosen": -85.67146301269531, + "logps/rejected": -285.494384765625, + "loss": 0.5779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5076927542686462, + "rewards/margins": 1.378049373626709, + "rewards/rejected": -1.8857421875, + "step": 3318 + }, + { + "epoch": 0.19, + "learning_rate": 9.318298961525548e-08, + "logits/chosen": -1.788638710975647, + "logits/rejected": -1.7801318168640137, + "logps/chosen": -284.1215515136719, + "logps/rejected": -336.5101318359375, + "loss": 0.4587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.372702032327652, + "rewards/margins": 0.4386749267578125, + "rewards/rejected": -0.06597290188074112, + "step": 3319 + }, + { + "epoch": 0.19, + "learning_rate": 9.317823843185815e-08, + "logits/chosen": -2.130544424057007, + "logits/rejected": -2.0871734619140625, + "logps/chosen": -180.65867614746094, + "logps/rejected": -305.23052978515625, + "loss": 0.5375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3849243223667145, + "rewards/margins": 0.3469390869140625, + "rewards/rejected": 0.03798523172736168, + "step": 3320 + }, + { + "epoch": 0.19, + "learning_rate": 9.317348571455856e-08, + "logits/chosen": -1.9470237493515015, + "logits/rejected": -1.9248870611190796, + "logps/chosen": -252.7652587890625, + "logps/rejected": -500.9380187988281, + "loss": 0.1496, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.433203101158142, + "rewards/margins": 1.4810149669647217, + "rewards/rejected": -0.04781189188361168, + "step": 3321 + }, + { + "epoch": 0.19, + "learning_rate": 9.31687314635256e-08, + "logits/chosen": -2.147101879119873, + "logits/rejected": -2.151390790939331, + "logps/chosen": -0.00036926145548932254, + "logps/rejected": -101.90937042236328, + "loss": 0.5879, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.057642273546662e-06, + "rewards/margins": 0.47779321670532227, + "rewards/rejected": -0.4778022766113281, + "step": 3322 + }, + { + "epoch": 0.19, + "learning_rate": 9.316397567892813e-08, + "logits/chosen": -1.9558422565460205, + "logits/rejected": -1.9424840211868286, + "logps/chosen": -316.0605773925781, + "logps/rejected": -309.0766906738281, + "loss": 0.1712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7571258544921875, + "rewards/margins": 1.270941138267517, + "rewards/rejected": 0.486184686422348, + "step": 3323 + }, + { + "epoch": 0.19, + "learning_rate": 9.31592183609351e-08, + "logits/chosen": -1.9916878938674927, + "logits/rejected": -2.019217014312744, + "logps/chosen": -203.290283203125, + "logps/rejected": -253.64913940429688, + "loss": 0.2342, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5805816650390625, + "rewards/margins": 0.9441466927528381, + "rewards/rejected": 0.6364349722862244, + "step": 3324 + }, + { + "epoch": 0.19, + "learning_rate": 9.315445950971552e-08, + "logits/chosen": -2.0423827171325684, + "logits/rejected": -2.0475010871887207, + "logps/chosen": -422.7093505859375, + "logps/rejected": -405.9239501953125, + "loss": 0.3017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.978118896484375, + "rewards/margins": 0.8123260736465454, + "rewards/rejected": 0.16579285264015198, + "step": 3325 + }, + { + "epoch": 0.19, + "learning_rate": 9.314969912543845e-08, + "logits/chosen": -2.0234713554382324, + "logits/rejected": -2.0559542179107666, + "logps/chosen": -153.97360229492188, + "logps/rejected": -232.14523315429688, + "loss": 0.4852, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6686111688613892, + "rewards/margins": 0.06932371854782104, + "rewards/rejected": 0.5992874503135681, + "step": 3326 + }, + { + "epoch": 0.19, + "learning_rate": 9.3144937208273e-08, + "logits/chosen": -2.066978931427002, + "logits/rejected": -2.0488362312316895, + "logps/chosen": -218.73196411132812, + "logps/rejected": -315.8643798828125, + "loss": 0.2787, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4080063104629517, + "rewards/margins": 0.7474045157432556, + "rewards/rejected": 0.660601794719696, + "step": 3327 + }, + { + "epoch": 0.19, + "learning_rate": 9.314017375838835e-08, + "logits/chosen": -2.1030261516571045, + "logits/rejected": -2.0946686267852783, + "logps/chosen": -221.1219482421875, + "logps/rejected": -369.219482421875, + "loss": 0.4014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4881210327148438, + "rewards/margins": 0.02843475341796875, + "rewards/rejected": 1.459686279296875, + "step": 3328 + }, + { + "epoch": 0.19, + "learning_rate": 9.313540877595368e-08, + "logits/chosen": -2.1085519790649414, + "logits/rejected": -2.0894367694854736, + "logps/chosen": -43.074806213378906, + "logps/rejected": -255.9561309814453, + "loss": 0.3822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12085419148206711, + "rewards/margins": 1.8711425065994263, + "rewards/rejected": -1.7502883672714233, + "step": 3329 + }, + { + "epoch": 0.19, + "learning_rate": 9.31306422611383e-08, + "logits/chosen": -2.0620357990264893, + "logits/rejected": -2.0132908821105957, + "logps/chosen": -182.85525512695312, + "logps/rejected": -254.98028564453125, + "loss": 0.5261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5224609375, + "rewards/margins": 0.36491698026657104, + "rewards/rejected": 0.15754394233226776, + "step": 3330 + }, + { + "epoch": 0.19, + "learning_rate": 9.312587421411153e-08, + "logits/chosen": -2.3400397300720215, + "logits/rejected": -2.3319602012634277, + "logps/chosen": -2.130539655685425, + "logps/rejected": -208.1307830810547, + "loss": 0.5003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016469955153297633, + "rewards/margins": 1.0264267921447754, + "rewards/rejected": -1.0265915393829346, + "step": 3331 + }, + { + "epoch": 0.19, + "learning_rate": 9.312110463504277e-08, + "logits/chosen": -1.965305209159851, + "logits/rejected": -1.9815729856491089, + "logps/chosen": -156.40463256835938, + "logps/rejected": -217.91366577148438, + "loss": 0.5036, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3843017518520355, + "rewards/margins": 0.50457763671875, + "rewards/rejected": -0.12027587741613388, + "step": 3332 + }, + { + "epoch": 0.19, + "learning_rate": 9.311633352410144e-08, + "logits/chosen": -2.1299479007720947, + "logits/rejected": -2.123422145843506, + "logps/chosen": -54.64055633544922, + "logps/rejected": -218.69943237304688, + "loss": 0.5164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022297287359833717, + "rewards/margins": 0.6178417205810547, + "rewards/rejected": -0.5955444574356079, + "step": 3333 + }, + { + "epoch": 0.19, + "learning_rate": 9.311156088145704e-08, + "logits/chosen": -2.1615750789642334, + "logits/rejected": -2.161397695541382, + "logps/chosen": -6.767592906951904, + "logps/rejected": -187.44979858398438, + "loss": 0.4839, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08078112453222275, + "rewards/margins": 1.3447407484054565, + "rewards/rejected": -1.4255218505859375, + "step": 3334 + }, + { + "epoch": 0.19, + "learning_rate": 9.310678670727912e-08, + "logits/chosen": -1.8374379873275757, + "logits/rejected": -1.8078200817108154, + "logps/chosen": -220.40567016601562, + "logps/rejected": -247.5909423828125, + "loss": 0.5104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44781190156936646, + "rewards/margins": 0.29759520292282104, + "rewards/rejected": 0.15021668374538422, + "step": 3335 + }, + { + "epoch": 0.19, + "learning_rate": 9.310201100173727e-08, + "logits/chosen": -2.0758891105651855, + "logits/rejected": -2.0711472034454346, + "logps/chosen": -0.00015759174129925668, + "logps/rejected": -120.60868835449219, + "loss": 0.6354, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0741631487908307e-06, + "rewards/margins": 0.24660056829452515, + "rewards/rejected": -0.2466026395559311, + "step": 3336 + }, + { + "epoch": 0.19, + "learning_rate": 9.309723376500114e-08, + "logits/chosen": -2.104342222213745, + "logits/rejected": -2.0710933208465576, + "logps/chosen": -221.41419982910156, + "logps/rejected": -411.9265441894531, + "loss": 0.1527, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0165131092071533, + "rewards/margins": 1.3227112293243408, + "rewards/rejected": 0.6938018798828125, + "step": 3337 + }, + { + "epoch": 0.19, + "learning_rate": 9.309245499724048e-08, + "logits/chosen": -2.1454858779907227, + "logits/rejected": -2.110828399658203, + "logps/chosen": -226.5096435546875, + "logps/rejected": -589.2470092773438, + "loss": 0.0817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6077362298965454, + "rewards/margins": 3.400521755218506, + "rewards/rejected": -1.79278564453125, + "step": 3338 + }, + { + "epoch": 0.19, + "learning_rate": 9.308767469862502e-08, + "logits/chosen": -2.132941246032715, + "logits/rejected": -2.1104466915130615, + "logps/chosen": -29.2641544342041, + "logps/rejected": -229.34735107421875, + "loss": 0.3991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36100560426712036, + "rewards/margins": 1.0305582284927368, + "rewards/rejected": -0.6695526242256165, + "step": 3339 + }, + { + "epoch": 0.19, + "learning_rate": 9.308289286932458e-08, + "logits/chosen": -2.053607702255249, + "logits/rejected": -2.0541343688964844, + "logps/chosen": -3.087504956056364e-05, + "logps/rejected": -219.35943603515625, + "loss": 0.4136, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.721998945773521e-07, + "rewards/margins": 1.7544580698013306, + "rewards/rejected": -1.7544586658477783, + "step": 3340 + }, + { + "epoch": 0.19, + "learning_rate": 9.307810950950906e-08, + "logits/chosen": -2.001588821411133, + "logits/rejected": -1.9974501132965088, + "logps/chosen": -12.875995635986328, + "logps/rejected": -120.98635864257812, + "loss": 0.4334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01248712558299303, + "rewards/margins": 1.6242845058441162, + "rewards/rejected": -1.6117973327636719, + "step": 3341 + }, + { + "epoch": 0.19, + "learning_rate": 9.307332461934837e-08, + "logits/chosen": -2.084174633026123, + "logits/rejected": -2.01421856880188, + "logps/chosen": -242.0706787109375, + "logps/rejected": -310.89056396484375, + "loss": 0.2033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6641358137130737, + "rewards/margins": 1.0732239484786987, + "rewards/rejected": 0.590911865234375, + "step": 3342 + }, + { + "epoch": 0.19, + "learning_rate": 9.30685381990125e-08, + "logits/chosen": -1.891127586364746, + "logits/rejected": -1.8850260972976685, + "logps/chosen": -6.848624229431152, + "logps/rejected": -107.00333404541016, + "loss": 0.4826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05405845865607262, + "rewards/margins": 1.3339756727218628, + "rewards/rejected": -1.3880341053009033, + "step": 3343 + }, + { + "epoch": 0.19, + "learning_rate": 9.306375024867147e-08, + "logits/chosen": -1.9286712408065796, + "logits/rejected": -1.9195396900177002, + "logps/chosen": -187.4464874267578, + "logps/rejected": -280.8779296875, + "loss": 0.2567, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.204278588294983, + "rewards/margins": 1.0431641340255737, + "rewards/rejected": 0.16111449897289276, + "step": 3344 + }, + { + "epoch": 0.19, + "learning_rate": 9.305896076849538e-08, + "logits/chosen": -2.0183677673339844, + "logits/rejected": -2.0495150089263916, + "logps/chosen": -221.129638671875, + "logps/rejected": -362.8171691894531, + "loss": 0.1153, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.11406409740448, + "rewards/margins": 3.0178422927856445, + "rewards/rejected": -1.903778076171875, + "step": 3345 + }, + { + "epoch": 0.19, + "learning_rate": 9.305416975865439e-08, + "logits/chosen": -1.9496625661849976, + "logits/rejected": -1.9027639627456665, + "logps/chosen": -258.8023376464844, + "logps/rejected": -397.8549499511719, + "loss": 0.5111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31463930010795593, + "rewards/margins": 0.36234742403030396, + "rewards/rejected": -0.04770813137292862, + "step": 3346 + }, + { + "epoch": 0.19, + "learning_rate": 9.304937721931869e-08, + "logits/chosen": -2.1541504859924316, + "logits/rejected": -2.1570913791656494, + "logps/chosen": -35.14466094970703, + "logps/rejected": -236.42453002929688, + "loss": 0.5543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1365024596452713, + "rewards/margins": 0.4179660677909851, + "rewards/rejected": -0.281463623046875, + "step": 3347 + }, + { + "epoch": 0.19, + "learning_rate": 9.304458315065853e-08, + "logits/chosen": -2.1066462993621826, + "logits/rejected": -2.099323034286499, + "logps/chosen": -109.43675231933594, + "logps/rejected": -310.83551025390625, + "loss": 0.5464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3561035096645355, + "rewards/margins": 1.4933350086212158, + "rewards/rejected": -1.8494385480880737, + "step": 3348 + }, + { + "epoch": 0.19, + "learning_rate": 9.303978755284423e-08, + "logits/chosen": -2.104243040084839, + "logits/rejected": -2.1105716228485107, + "logps/chosen": -17.06899642944336, + "logps/rejected": -91.802978515625, + "loss": 0.6024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02188720740377903, + "rewards/margins": 0.43131789565086365, + "rewards/rejected": -0.4532051086425781, + "step": 3349 + }, + { + "epoch": 0.19, + "learning_rate": 9.303499042604614e-08, + "logits/chosen": -1.9219385385513306, + "logits/rejected": -1.87582266330719, + "logps/chosen": -174.70193481445312, + "logps/rejected": -423.0451965332031, + "loss": 0.3834, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5717865228652954, + "rewards/margins": 0.9462860822677612, + "rewards/rejected": -0.37449952960014343, + "step": 3350 + }, + { + "epoch": 0.2, + "learning_rate": 9.303019177043467e-08, + "logits/chosen": -2.1470110416412354, + "logits/rejected": -2.137059450149536, + "logps/chosen": -35.85192108154297, + "logps/rejected": -173.3178253173828, + "loss": 0.5851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025267792865633965, + "rewards/margins": 0.5320701599121094, + "rewards/rejected": -0.5573379397392273, + "step": 3351 + }, + { + "epoch": 0.2, + "learning_rate": 9.302539158618033e-08, + "logits/chosen": -2.0692477226257324, + "logits/rejected": -2.126183032989502, + "logps/chosen": -222.50135803222656, + "logps/rejected": -292.357177734375, + "loss": 0.2257, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.183264136314392, + "rewards/margins": 1.1842162609100342, + "rewards/rejected": -0.0009521484607830644, + "step": 3352 + }, + { + "epoch": 0.2, + "learning_rate": 9.30205898734536e-08, + "logits/chosen": -2.006720542907715, + "logits/rejected": -2.0006699562072754, + "logps/chosen": -198.44561767578125, + "logps/rejected": -234.5199432373047, + "loss": 0.369, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9462356567382812, + "rewards/margins": 0.0833892822265625, + "rewards/rejected": 1.8628463745117188, + "step": 3353 + }, + { + "epoch": 0.2, + "learning_rate": 9.301578663242509e-08, + "logits/chosen": -2.0162010192871094, + "logits/rejected": -2.015690326690674, + "logps/chosen": -2.1510226726531982, + "logps/rejected": -198.1621551513672, + "loss": 0.4944, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030405377969145775, + "rewards/margins": 1.1655510663986206, + "rewards/rejected": -1.1959564685821533, + "step": 3354 + }, + { + "epoch": 0.2, + "learning_rate": 9.301098186326543e-08, + "logits/chosen": -2.144299268722534, + "logits/rejected": -2.149162769317627, + "logps/chosen": -22.25425148010254, + "logps/rejected": -100.36869812011719, + "loss": 0.4701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3864198625087738, + "rewards/margins": 0.6214219927787781, + "rewards/rejected": -0.23500214517116547, + "step": 3355 + }, + { + "epoch": 0.2, + "learning_rate": 9.30061755661453e-08, + "logits/chosen": -2.091585159301758, + "logits/rejected": -2.091761350631714, + "logps/chosen": -27.143844604492188, + "logps/rejected": -124.66453552246094, + "loss": 0.6719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0830177292227745, + "rewards/margins": 0.213287353515625, + "rewards/rejected": -0.2963050901889801, + "step": 3356 + }, + { + "epoch": 0.2, + "learning_rate": 9.300136774123544e-08, + "logits/chosen": -1.974987268447876, + "logits/rejected": -1.9774976968765259, + "logps/chosen": -7.965970516204834, + "logps/rejected": -127.28855895996094, + "loss": 0.4095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15180182456970215, + "rewards/margins": 1.5029516220092773, + "rewards/rejected": -1.3511497974395752, + "step": 3357 + }, + { + "epoch": 0.2, + "learning_rate": 9.299655838870667e-08, + "logits/chosen": -2.0228793621063232, + "logits/rejected": -2.022446870803833, + "logps/chosen": -209.64718627929688, + "logps/rejected": -257.46966552734375, + "loss": 0.5874, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08971405029296875, + "rewards/margins": -0.06793366372585297, + "rewards/rejected": 0.15764771401882172, + "step": 3358 + }, + { + "epoch": 0.2, + "learning_rate": 9.299174750872983e-08, + "logits/chosen": -2.27526593208313, + "logits/rejected": -2.263413906097412, + "logps/chosen": -200.76962280273438, + "logps/rejected": -263.6954345703125, + "loss": 0.4651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7262207269668579, + "rewards/margins": 0.42561647295951843, + "rewards/rejected": 0.3006042540073395, + "step": 3359 + }, + { + "epoch": 0.2, + "learning_rate": 9.298693510147579e-08, + "logits/chosen": -2.0108907222747803, + "logits/rejected": -1.9902161359786987, + "logps/chosen": -294.4647216796875, + "logps/rejected": -374.70355224609375, + "loss": 0.179, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1371430158615112, + "rewards/margins": 1.4692200422286987, + "rewards/rejected": -0.3320770263671875, + "step": 3360 + }, + { + "epoch": 0.2, + "learning_rate": 9.298212116711558e-08, + "logits/chosen": -2.1636555194854736, + "logits/rejected": -2.1641807556152344, + "logps/chosen": -340.90423583984375, + "logps/rejected": -339.55926513671875, + "loss": 0.4189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.151739478111267, + "rewards/margins": 0.14323115348815918, + "rewards/rejected": 1.008508324623108, + "step": 3361 + }, + { + "epoch": 0.2, + "learning_rate": 9.297730570582016e-08, + "logits/chosen": -1.9119267463684082, + "logits/rejected": -1.8910688161849976, + "logps/chosen": -194.8001708984375, + "logps/rejected": -496.32763671875, + "loss": 0.4231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4269607663154602, + "rewards/margins": 0.49945831298828125, + "rewards/rejected": -0.07249756157398224, + "step": 3362 + }, + { + "epoch": 0.2, + "learning_rate": 9.297248871776062e-08, + "logits/chosen": -2.0533134937286377, + "logits/rejected": -2.067155361175537, + "logps/chosen": -123.4346923828125, + "logps/rejected": -231.78358459472656, + "loss": 0.516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3553421199321747, + "rewards/margins": 0.45044252276420593, + "rewards/rejected": -0.09510040283203125, + "step": 3363 + }, + { + "epoch": 0.2, + "learning_rate": 9.29676702031081e-08, + "logits/chosen": -2.101001739501953, + "logits/rejected": -2.1087968349456787, + "logps/chosen": -34.77339172363281, + "logps/rejected": -71.59373474121094, + "loss": 0.6951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09309997409582138, + "rewards/margins": 0.093419648706913, + "rewards/rejected": -0.18651962280273438, + "step": 3364 + }, + { + "epoch": 0.2, + "learning_rate": 9.296285016203373e-08, + "logits/chosen": -2.015674591064453, + "logits/rejected": -2.020054817199707, + "logps/chosen": -4.3018927574157715, + "logps/rejected": -94.05839538574219, + "loss": 0.5679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008974266238510609, + "rewards/margins": 0.557045578956604, + "rewards/rejected": -0.5480713248252869, + "step": 3365 + }, + { + "epoch": 0.2, + "learning_rate": 9.295802859470877e-08, + "logits/chosen": -1.9825758934020996, + "logits/rejected": -1.9999592304229736, + "logps/chosen": -347.550048828125, + "logps/rejected": -517.6396484375, + "loss": 0.081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.330804467201233, + "rewards/margins": 4.154595851898193, + "rewards/rejected": -2.82379150390625, + "step": 3366 + }, + { + "epoch": 0.2, + "learning_rate": 9.295320550130451e-08, + "logits/chosen": -2.063823699951172, + "logits/rejected": -2.1134555339813232, + "logps/chosen": -224.068603515625, + "logps/rejected": -173.37445068359375, + "loss": 0.5182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.662792980670929, + "rewards/margins": 0.01312863826751709, + "rewards/rejected": 0.6496643424034119, + "step": 3367 + }, + { + "epoch": 0.2, + "learning_rate": 9.294838088199228e-08, + "logits/chosen": -1.986006259918213, + "logits/rejected": -1.9717168807983398, + "logps/chosen": -0.0027721745427697897, + "logps/rejected": -193.47625732421875, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001563242549309507, + "rewards/margins": 2.8155784606933594, + "rewards/rejected": -2.81573486328125, + "step": 3368 + }, + { + "epoch": 0.2, + "learning_rate": 9.294355473694349e-08, + "logits/chosen": -2.0629093647003174, + "logits/rejected": -2.06173038482666, + "logps/chosen": -78.57958984375, + "logps/rejected": -218.87974548339844, + "loss": 0.3616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2804306149482727, + "rewards/margins": 1.464552402496338, + "rewards/rejected": -1.1841217279434204, + "step": 3369 + }, + { + "epoch": 0.2, + "learning_rate": 9.293872706632957e-08, + "logits/chosen": -2.2912003993988037, + "logits/rejected": -2.2831881046295166, + "logps/chosen": -46.16878890991211, + "logps/rejected": -228.02996826171875, + "loss": 0.3918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05831336975097656, + "rewards/margins": 2.0382511615753174, + "rewards/rejected": -1.9799377918243408, + "step": 3370 + }, + { + "epoch": 0.2, + "learning_rate": 9.293389787032203e-08, + "logits/chosen": -1.9627728462219238, + "logits/rejected": -1.9282315969467163, + "logps/chosen": -300.57763671875, + "logps/rejected": -374.04119873046875, + "loss": 0.4479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7226959466934204, + "rewards/margins": 0.38663026690483093, + "rewards/rejected": 0.3360656797885895, + "step": 3371 + }, + { + "epoch": 0.2, + "learning_rate": 9.292906714909241e-08, + "logits/chosen": -1.7333238124847412, + "logits/rejected": -1.6204890012741089, + "logps/chosen": -267.934326171875, + "logps/rejected": -576.7258911132812, + "loss": 0.4094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.447967529296875, + "rewards/margins": 0.965405285358429, + "rewards/rejected": -0.517437756061554, + "step": 3372 + }, + { + "epoch": 0.2, + "learning_rate": 9.292423490281235e-08, + "logits/chosen": -2.0921034812927246, + "logits/rejected": -2.086102247238159, + "logps/chosen": -29.07333755493164, + "logps/rejected": -172.7364044189453, + "loss": 0.4251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1455976516008377, + "rewards/margins": 1.2222914695739746, + "rewards/rejected": -1.0766937732696533, + "step": 3373 + }, + { + "epoch": 0.2, + "learning_rate": 9.291940113165351e-08, + "logits/chosen": -1.9388097524642944, + "logits/rejected": -1.935356616973877, + "logps/chosen": -217.65476989746094, + "logps/rejected": -393.06500244140625, + "loss": 0.2884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8321273922920227, + "rewards/margins": 1.0507370233535767, + "rewards/rejected": -0.21860961616039276, + "step": 3374 + }, + { + "epoch": 0.2, + "learning_rate": 9.291456583578758e-08, + "logits/chosen": -2.1378917694091797, + "logits/rejected": -2.1087679862976074, + "logps/chosen": -250.56112670898438, + "logps/rejected": -430.2074890136719, + "loss": 0.1917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4643462896347046, + "rewards/margins": 1.28533935546875, + "rewards/rejected": 0.17900696396827698, + "step": 3375 + }, + { + "epoch": 0.2, + "learning_rate": 9.290972901538637e-08, + "logits/chosen": -2.0231215953826904, + "logits/rejected": -2.0163936614990234, + "logps/chosen": -61.09897232055664, + "logps/rejected": -97.99102783203125, + "loss": 0.4841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31761589646339417, + "rewards/margins": 0.5953632593154907, + "rewards/rejected": -0.27774736285209656, + "step": 3376 + }, + { + "epoch": 0.2, + "learning_rate": 9.290489067062168e-08, + "logits/chosen": -2.081364631652832, + "logits/rejected": -2.1066231727600098, + "logps/chosen": -222.53785705566406, + "logps/rejected": -430.9957275390625, + "loss": 0.107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3857040405273438, + "rewards/margins": 2.720106601715088, + "rewards/rejected": -1.3344024419784546, + "step": 3377 + }, + { + "epoch": 0.2, + "learning_rate": 9.290005080166541e-08, + "logits/chosen": -1.9995521306991577, + "logits/rejected": -1.9789762496948242, + "logps/chosen": -145.19268798828125, + "logps/rejected": -213.66941833496094, + "loss": 0.4512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.661206066608429, + "rewards/margins": 0.49095308780670166, + "rewards/rejected": 0.1702529937028885, + "step": 3378 + }, + { + "epoch": 0.2, + "learning_rate": 9.289520940868948e-08, + "logits/chosen": -1.9823336601257324, + "logits/rejected": -1.9773603677749634, + "logps/chosen": -0.012273644097149372, + "logps/rejected": -99.47866821289062, + "loss": 0.4468, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.910754094249569e-05, + "rewards/margins": 1.5078744888305664, + "rewards/rejected": -1.5078353881835938, + "step": 3379 + }, + { + "epoch": 0.2, + "learning_rate": 9.289036649186589e-08, + "logits/chosen": -2.16375994682312, + "logits/rejected": -2.1548142433166504, + "logps/chosen": -69.43282318115234, + "logps/rejected": -272.4060974121094, + "loss": 0.5291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3111869990825653, + "rewards/margins": 1.462241291999817, + "rewards/rejected": -1.7734283208847046, + "step": 3380 + }, + { + "epoch": 0.2, + "learning_rate": 9.288552205136669e-08, + "logits/chosen": -2.078667640686035, + "logits/rejected": -2.0559139251708984, + "logps/chosen": -228.93118286132812, + "logps/rejected": -359.651611328125, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7774505615234375, + "rewards/margins": 1.1313965320587158, + "rewards/rejected": 0.6460540890693665, + "step": 3381 + }, + { + "epoch": 0.2, + "learning_rate": 9.288067608736396e-08, + "logits/chosen": -2.0114588737487793, + "logits/rejected": -2.008147716522217, + "logps/chosen": -67.65592956542969, + "logps/rejected": -200.59613037109375, + "loss": 0.6016, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11589660495519638, + "rewards/margins": 0.23218078911304474, + "rewards/rejected": -0.11628418415784836, + "step": 3382 + }, + { + "epoch": 0.2, + "learning_rate": 9.287582860002988e-08, + "logits/chosen": -2.0505902767181396, + "logits/rejected": -2.0612642765045166, + "logps/chosen": -160.88885498046875, + "logps/rejected": -212.85580444335938, + "loss": 0.5436, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7069031000137329, + "rewards/margins": -0.08942872285842896, + "rewards/rejected": 0.7963318228721619, + "step": 3383 + }, + { + "epoch": 0.2, + "learning_rate": 9.287097958953663e-08, + "logits/chosen": -2.1372549533843994, + "logits/rejected": -2.114558696746826, + "logps/chosen": -221.38861083984375, + "logps/rejected": -360.4254150390625, + "loss": 0.5007, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1432037353515625, + "rewards/margins": -0.1687774658203125, + "rewards/rejected": 1.311981201171875, + "step": 3384 + }, + { + "epoch": 0.2, + "learning_rate": 9.286612905605649e-08, + "logits/chosen": -2.0058279037475586, + "logits/rejected": -1.988568663597107, + "logps/chosen": -65.85850524902344, + "logps/rejected": -286.42974853515625, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3607544004917145, + "rewards/margins": 1.7935394048690796, + "rewards/rejected": -1.4327850341796875, + "step": 3385 + }, + { + "epoch": 0.2, + "learning_rate": 9.286127699976174e-08, + "logits/chosen": -2.03080677986145, + "logits/rejected": -2.031763792037964, + "logps/chosen": -9.47692824411206e-05, + "logps/rejected": -208.26255798339844, + "loss": 0.3967, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0847601288332953e-06, + "rewards/margins": 2.25060772895813, + "rewards/rejected": -2.2506089210510254, + "step": 3386 + }, + { + "epoch": 0.2, + "learning_rate": 9.285642342082481e-08, + "logits/chosen": -2.070505380630493, + "logits/rejected": -2.048705816268921, + "logps/chosen": -299.0895690917969, + "logps/rejected": -500.41790771484375, + "loss": 0.154, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.138952612876892, + "rewards/margins": 1.58294677734375, + "rewards/rejected": -0.4439941346645355, + "step": 3387 + }, + { + "epoch": 0.2, + "learning_rate": 9.285156831941805e-08, + "logits/chosen": -2.0241270065307617, + "logits/rejected": -2.088099718093872, + "logps/chosen": -240.32586669921875, + "logps/rejected": -318.5230712890625, + "loss": 0.2632, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.483831763267517, + "rewards/margins": 0.8061187267303467, + "rewards/rejected": 0.6777130365371704, + "step": 3388 + }, + { + "epoch": 0.2, + "learning_rate": 9.284671169571399e-08, + "logits/chosen": -2.056954860687256, + "logits/rejected": -2.0399348735809326, + "logps/chosen": -211.72988891601562, + "logps/rejected": -346.1904296875, + "loss": 0.35, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.739013671875, + "rewards/margins": 0.7568359375, + "rewards/rejected": -0.017822265625, + "step": 3389 + }, + { + "epoch": 0.2, + "learning_rate": 9.284185354988515e-08, + "logits/chosen": -2.0308220386505127, + "logits/rejected": -1.9653040170669556, + "logps/chosen": -223.88902282714844, + "logps/rejected": -422.08917236328125, + "loss": 0.3572, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2481536865234375, + "rewards/margins": 0.3857574462890625, + "rewards/rejected": 0.862396240234375, + "step": 3390 + }, + { + "epoch": 0.2, + "learning_rate": 9.283699388210411e-08, + "logits/chosen": -2.0576274394989014, + "logits/rejected": -2.037174701690674, + "logps/chosen": -165.83053588867188, + "logps/rejected": -187.40911865234375, + "loss": 0.5657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38269805908203125, + "rewards/margins": 0.22527313232421875, + "rewards/rejected": 0.1574249267578125, + "step": 3391 + }, + { + "epoch": 0.2, + "learning_rate": 9.28321326925435e-08, + "logits/chosen": -1.889589548110962, + "logits/rejected": -1.8803733587265015, + "logps/chosen": -262.48101806640625, + "logps/rejected": -423.61114501953125, + "loss": 0.1519, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.817047119140625, + "rewards/margins": 1.6347320079803467, + "rewards/rejected": 0.18231506645679474, + "step": 3392 + }, + { + "epoch": 0.2, + "learning_rate": 9.282726998137602e-08, + "logits/chosen": -1.996345043182373, + "logits/rejected": -1.9706491231918335, + "logps/chosen": -201.48472595214844, + "logps/rejected": -461.9571228027344, + "loss": 0.185, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2663803100585938, + "rewards/margins": 1.8382644653320312, + "rewards/rejected": -0.5718841552734375, + "step": 3393 + }, + { + "epoch": 0.2, + "learning_rate": 9.282240574877442e-08, + "logits/chosen": -2.2167699337005615, + "logits/rejected": -2.218980073928833, + "logps/chosen": -0.016357747837901115, + "logps/rejected": -42.393218994140625, + "loss": 0.6063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002126919134752825, + "rewards/margins": 0.3843976855278015, + "rewards/rejected": -0.3846103847026825, + "step": 3394 + }, + { + "epoch": 0.2, + "learning_rate": 9.281753999491152e-08, + "logits/chosen": -2.2392935752868652, + "logits/rejected": -2.236635684967041, + "logps/chosen": -2.647103786468506, + "logps/rejected": -91.95073699951172, + "loss": 0.6823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02995009534060955, + "rewards/margins": 0.05107283592224121, + "rewards/rejected": -0.02112274244427681, + "step": 3395 + }, + { + "epoch": 0.2, + "learning_rate": 9.281267271996014e-08, + "logits/chosen": -2.270413875579834, + "logits/rejected": -2.259030818939209, + "logps/chosen": -7.239681720733643, + "logps/rejected": -238.24209594726562, + "loss": 0.3525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19550739228725433, + "rewards/margins": 2.063291549682617, + "rewards/rejected": -1.8677841424942017, + "step": 3396 + }, + { + "epoch": 0.2, + "learning_rate": 9.28078039240932e-08, + "logits/chosen": -2.0810885429382324, + "logits/rejected": -2.09108304977417, + "logps/chosen": -170.88514709472656, + "logps/rejected": -206.24806213378906, + "loss": 0.3299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1873703002929688, + "rewards/margins": 0.5096023678779602, + "rewards/rejected": 0.6777679324150085, + "step": 3397 + }, + { + "epoch": 0.2, + "learning_rate": 9.280293360748367e-08, + "logits/chosen": -2.15030837059021, + "logits/rejected": -2.1489717960357666, + "logps/chosen": -0.016752466559410095, + "logps/rejected": -91.34416961669922, + "loss": 0.5601, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005025248974561691, + "rewards/margins": 0.6302485466003418, + "rewards/rejected": -0.6307510733604431, + "step": 3398 + }, + { + "epoch": 0.2, + "learning_rate": 9.279806177030458e-08, + "logits/chosen": -1.9977575540542603, + "logits/rejected": -1.994763731956482, + "logps/chosen": -276.36468505859375, + "logps/rejected": -385.46954345703125, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.947808861732483, + "rewards/margins": 0.3180053234100342, + "rewards/rejected": 1.6298035383224487, + "step": 3399 + }, + { + "epoch": 0.2, + "learning_rate": 9.279318841272899e-08, + "logits/chosen": -2.1235806941986084, + "logits/rejected": -2.1224000453948975, + "logps/chosen": -160.67691040039062, + "logps/rejected": -263.3206481933594, + "loss": 0.4, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.577174425125122, + "rewards/margins": 0.09772193431854248, + "rewards/rejected": 1.4794524908065796, + "step": 3400 + }, + { + "epoch": 0.2, + "learning_rate": 9.278831353493002e-08, + "logits/chosen": -1.8701328039169312, + "logits/rejected": -1.8613766431808472, + "logps/chosen": -53.3882942199707, + "logps/rejected": -271.5356750488281, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41995278000831604, + "rewards/margins": 1.5496097803115845, + "rewards/rejected": -1.1296570301055908, + "step": 3401 + }, + { + "epoch": 0.2, + "learning_rate": 9.278343713708085e-08, + "logits/chosen": -2.0314736366271973, + "logits/rejected": -1.9530653953552246, + "logps/chosen": -176.59954833984375, + "logps/rejected": -296.82366943359375, + "loss": 0.4816, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40885621309280396, + "rewards/margins": 0.5858246088027954, + "rewards/rejected": -0.17696838080883026, + "step": 3402 + }, + { + "epoch": 0.2, + "learning_rate": 9.277855921935471e-08, + "logits/chosen": -2.073042869567871, + "logits/rejected": -2.0804476737976074, + "logps/chosen": -203.6283721923828, + "logps/rejected": -362.83038330078125, + "loss": 0.2651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7930251955986023, + "rewards/margins": 1.1718918085098267, + "rewards/rejected": -0.378866583108902, + "step": 3403 + }, + { + "epoch": 0.2, + "learning_rate": 9.27736797819249e-08, + "logits/chosen": -1.9678618907928467, + "logits/rejected": -1.9655179977416992, + "logps/chosen": -135.64959716796875, + "logps/rejected": -272.0487365722656, + "loss": 0.6887, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3272293210029602, + "rewards/margins": -0.4893752932548523, + "rewards/rejected": 0.8166046142578125, + "step": 3404 + }, + { + "epoch": 0.2, + "learning_rate": 9.276879882496476e-08, + "logits/chosen": -1.9755539894104004, + "logits/rejected": -1.9109561443328857, + "logps/chosen": -213.98992919921875, + "logps/rejected": -408.24639892578125, + "loss": 0.297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5445327758789062, + "rewards/margins": 1.81059730052948, + "rewards/rejected": -1.2660645246505737, + "step": 3405 + }, + { + "epoch": 0.2, + "learning_rate": 9.276391634864766e-08, + "logits/chosen": -1.692999243736267, + "logits/rejected": -1.692933201789856, + "logps/chosen": -38.25389862060547, + "logps/rejected": -124.45378112792969, + "loss": 0.6637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017087554559111595, + "rewards/margins": 0.02801513858139515, + "rewards/rejected": -0.045102693140506744, + "step": 3406 + }, + { + "epoch": 0.2, + "learning_rate": 9.275903235314708e-08, + "logits/chosen": -2.0788354873657227, + "logits/rejected": -2.0988266468048096, + "logps/chosen": -194.13885498046875, + "logps/rejected": -288.7933349609375, + "loss": 0.3786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9902206659317017, + "rewards/margins": 0.27533113956451416, + "rewards/rejected": 0.7148895263671875, + "step": 3407 + }, + { + "epoch": 0.2, + "learning_rate": 9.275414683863653e-08, + "logits/chosen": -1.8836504220962524, + "logits/rejected": -1.8552783727645874, + "logps/chosen": -0.12999065220355988, + "logps/rejected": -344.9270324707031, + "loss": 0.4461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030096829868853092, + "rewards/margins": 1.5138177871704102, + "rewards/rejected": -1.5168274641036987, + "step": 3408 + }, + { + "epoch": 0.2, + "learning_rate": 9.274925980528954e-08, + "logits/chosen": -2.110562801361084, + "logits/rejected": -2.1113617420196533, + "logps/chosen": -4.339170118328184e-05, + "logps/rejected": -42.89790725708008, + "loss": 0.5245, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4570402362987807e-07, + "rewards/margins": 0.8497097492218018, + "rewards/rejected": -0.8497101068496704, + "step": 3409 + }, + { + "epoch": 0.2, + "learning_rate": 9.274437125327973e-08, + "logits/chosen": -2.013775587081909, + "logits/rejected": -2.004354476928711, + "logps/chosen": -66.07563781738281, + "logps/rejected": -343.72021484375, + "loss": 0.3105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.418670654296875, + "rewards/margins": 1.7948426008224487, + "rewards/rejected": -1.3761719465255737, + "step": 3410 + }, + { + "epoch": 0.2, + "learning_rate": 9.273948118278077e-08, + "logits/chosen": -2.087904453277588, + "logits/rejected": -2.075185775756836, + "logps/chosen": -219.32493591308594, + "logps/rejected": -350.5654602050781, + "loss": 0.2337, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.520259141921997, + "rewards/margins": 0.9332535266876221, + "rewards/rejected": 0.587005615234375, + "step": 3411 + }, + { + "epoch": 0.2, + "learning_rate": 9.273458959396639e-08, + "logits/chosen": -2.152945041656494, + "logits/rejected": -2.1792805194854736, + "logps/chosen": -208.43875122070312, + "logps/rejected": -244.283447265625, + "loss": 0.4265, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.008215308189392, + "rewards/margins": 0.14640802145004272, + "rewards/rejected": 0.8618072867393494, + "step": 3412 + }, + { + "epoch": 0.2, + "learning_rate": 9.272969648701032e-08, + "logits/chosen": -2.166736125946045, + "logits/rejected": -2.1680052280426025, + "logps/chosen": -15.992895126342773, + "logps/rejected": -106.29652404785156, + "loss": 0.6186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09951019287109375, + "rewards/margins": 0.20614013075828552, + "rewards/rejected": -0.10662994533777237, + "step": 3413 + }, + { + "epoch": 0.2, + "learning_rate": 9.272480186208644e-08, + "logits/chosen": -2.074075698852539, + "logits/rejected": -2.047253370285034, + "logps/chosen": -129.87802124023438, + "logps/rejected": -418.7569885253906, + "loss": 0.222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8378769159317017, + "rewards/margins": 1.607142686843872, + "rewards/rejected": -0.7692657709121704, + "step": 3414 + }, + { + "epoch": 0.2, + "learning_rate": 9.27199057193686e-08, + "logits/chosen": -1.9046940803527832, + "logits/rejected": -1.8167756795883179, + "logps/chosen": -262.86175537109375, + "logps/rejected": -590.3890380859375, + "loss": 0.0815, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7101563215255737, + "rewards/margins": 2.7767457962036133, + "rewards/rejected": -1.06658935546875, + "step": 3415 + }, + { + "epoch": 0.2, + "learning_rate": 9.271500805903075e-08, + "logits/chosen": -2.0487942695617676, + "logits/rejected": -2.028618097305298, + "logps/chosen": -201.55935668945312, + "logps/rejected": -370.6788024902344, + "loss": 0.3715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6262558698654175, + "rewards/margins": 0.12258458137512207, + "rewards/rejected": 1.5036712884902954, + "step": 3416 + }, + { + "epoch": 0.2, + "learning_rate": 9.271010888124687e-08, + "logits/chosen": -2.135535955429077, + "logits/rejected": -2.127370595932007, + "logps/chosen": -1.0197585821151733, + "logps/rejected": -213.27256774902344, + "loss": 0.358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007236564066261053, + "rewards/margins": 3.616414785385132, + "rewards/rejected": -3.609178304672241, + "step": 3417 + }, + { + "epoch": 0.2, + "learning_rate": 9.2705208186191e-08, + "logits/chosen": -2.1123404502868652, + "logits/rejected": -2.1052587032318115, + "logps/chosen": -175.97610473632812, + "logps/rejected": -269.2787780761719, + "loss": 0.3846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1210678815841675, + "rewards/margins": 0.16161960363388062, + "rewards/rejected": 0.9594482779502869, + "step": 3418 + }, + { + "epoch": 0.2, + "learning_rate": 9.270030597403724e-08, + "logits/chosen": -2.080138921737671, + "logits/rejected": -2.06900691986084, + "logps/chosen": -0.014542431570589542, + "logps/rejected": -94.62504577636719, + "loss": 0.627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007534275646321476, + "rewards/margins": 0.2837207615375519, + "rewards/rejected": -0.2844741940498352, + "step": 3419 + }, + { + "epoch": 0.2, + "learning_rate": 9.269540224495976e-08, + "logits/chosen": -1.9392831325531006, + "logits/rejected": -1.9216989278793335, + "logps/chosen": -243.49244689941406, + "logps/rejected": -407.13226318359375, + "loss": 0.2658, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.788525402545929, + "rewards/margins": 1.0205291509628296, + "rewards/rejected": -0.23200379312038422, + "step": 3420 + }, + { + "epoch": 0.2, + "learning_rate": 9.269049699913273e-08, + "logits/chosen": -2.0477492809295654, + "logits/rejected": -2.046936273574829, + "logps/chosen": -11.237569808959961, + "logps/rejected": -129.75027465820312, + "loss": 0.6104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18902531266212463, + "rewards/margins": 0.6639401912689209, + "rewards/rejected": -0.8529655337333679, + "step": 3421 + }, + { + "epoch": 0.2, + "learning_rate": 9.268559023673042e-08, + "logits/chosen": -1.8285787105560303, + "logits/rejected": -1.7765204906463623, + "logps/chosen": -300.4726867675781, + "logps/rejected": -585.245849609375, + "loss": 0.3241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6710662841796875, + "rewards/margins": 1.1685638427734375, + "rewards/rejected": -0.49749755859375, + "step": 3422 + }, + { + "epoch": 0.2, + "learning_rate": 9.268068195792715e-08, + "logits/chosen": -2.0142946243286133, + "logits/rejected": -2.015681743621826, + "logps/chosen": -187.5635986328125, + "logps/rejected": -257.35943603515625, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1590622663497925, + "rewards/margins": 0.6743698716163635, + "rewards/rejected": 0.48469239473342896, + "step": 3423 + }, + { + "epoch": 0.2, + "learning_rate": 9.267577216289731e-08, + "logits/chosen": -1.9134478569030762, + "logits/rejected": -1.9119681119918823, + "logps/chosen": -1.7182283401489258, + "logps/rejected": -158.28488159179688, + "loss": 0.4991, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10129878669977188, + "rewards/margins": 1.247289776802063, + "rewards/rejected": -1.3485885858535767, + "step": 3424 + }, + { + "epoch": 0.2, + "learning_rate": 9.267086085181526e-08, + "logits/chosen": -1.9822262525558472, + "logits/rejected": -1.975511074066162, + "logps/chosen": -161.81964111328125, + "logps/rejected": -256.1932373046875, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.060810923576355, + "rewards/margins": 0.7910171747207642, + "rewards/rejected": 0.26979371905326843, + "step": 3425 + }, + { + "epoch": 0.2, + "learning_rate": 9.266594802485552e-08, + "logits/chosen": -1.9700629711151123, + "logits/rejected": -1.9510202407836914, + "logps/chosen": -177.6921844482422, + "logps/rejected": -329.23101806640625, + "loss": 0.5622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.317239373922348, + "rewards/margins": 0.14521178603172302, + "rewards/rejected": 0.172027587890625, + "step": 3426 + }, + { + "epoch": 0.2, + "learning_rate": 9.26610336821926e-08, + "logits/chosen": -2.1025750637054443, + "logits/rejected": -2.0956037044525146, + "logps/chosen": -49.07710266113281, + "logps/rejected": -162.08348083496094, + "loss": 0.8368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5552974939346313, + "rewards/margins": 0.29015082120895386, + "rewards/rejected": -0.8454483151435852, + "step": 3427 + }, + { + "epoch": 0.2, + "learning_rate": 9.26561178240011e-08, + "logits/chosen": -2.098346710205078, + "logits/rejected": -2.0742785930633545, + "logps/chosen": -29.843347549438477, + "logps/rejected": -249.22451782226562, + "loss": 0.3978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06769218295812607, + "rewards/margins": 1.9285366535186768, + "rewards/rejected": -1.8608444929122925, + "step": 3428 + }, + { + "epoch": 0.2, + "learning_rate": 9.265120045045561e-08, + "logits/chosen": -2.01216459274292, + "logits/rejected": -2.0164313316345215, + "logps/chosen": -2.089810371398926, + "logps/rejected": -147.25735473632812, + "loss": 0.6652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1016264483332634, + "rewards/margins": 0.26435407996177673, + "rewards/rejected": -0.3659805357456207, + "step": 3429 + }, + { + "epoch": 0.2, + "learning_rate": 9.264628156173087e-08, + "logits/chosen": -1.9856916666030884, + "logits/rejected": -2.070464611053467, + "logps/chosen": -286.3407897949219, + "logps/rejected": -355.8422546386719, + "loss": 0.1701, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4918396472930908, + "rewards/margins": 1.3881134986877441, + "rewards/rejected": 0.10372620075941086, + "step": 3430 + }, + { + "epoch": 0.2, + "learning_rate": 9.264136115800161e-08, + "logits/chosen": -2.1497623920440674, + "logits/rejected": -2.1342310905456543, + "logps/chosen": -0.0021928001660853624, + "logps/rejected": -305.86712646484375, + "loss": 0.3551, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001299926807405427, + "rewards/margins": 4.3797221183776855, + "rewards/rejected": -4.379852294921875, + "step": 3431 + }, + { + "epoch": 0.2, + "learning_rate": 9.263643923944261e-08, + "logits/chosen": -2.1508994102478027, + "logits/rejected": -2.0902318954467773, + "logps/chosen": -330.85675048828125, + "logps/rejected": -487.80670166015625, + "loss": 0.497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4725708067417145, + "rewards/margins": 0.2786804437637329, + "rewards/rejected": 0.19389037787914276, + "step": 3432 + }, + { + "epoch": 0.2, + "learning_rate": 9.263151580622873e-08, + "logits/chosen": -2.014237403869629, + "logits/rejected": -2.0178380012512207, + "logps/chosen": -17.486427307128906, + "logps/rejected": -92.6731185913086, + "loss": 0.49, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0012327193981036544, + "rewards/margins": 1.1389403343200684, + "rewards/rejected": -1.1377075910568237, + "step": 3433 + }, + { + "epoch": 0.2, + "learning_rate": 9.262659085853488e-08, + "logits/chosen": -2.077117681503296, + "logits/rejected": -2.0759284496307373, + "logps/chosen": -52.5926399230957, + "logps/rejected": -166.2166748046875, + "loss": 0.5823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24090079963207245, + "rewards/margins": 0.3540157377719879, + "rewards/rejected": -0.11311493068933487, + "step": 3434 + }, + { + "epoch": 0.2, + "learning_rate": 9.262166439653602e-08, + "logits/chosen": -2.20107102394104, + "logits/rejected": -2.1713297367095947, + "logps/chosen": -155.5685577392578, + "logps/rejected": -387.8529052734375, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9785354733467102, + "rewards/margins": 0.42508697509765625, + "rewards/rejected": 0.553448498249054, + "step": 3435 + }, + { + "epoch": 0.2, + "learning_rate": 9.261673642040714e-08, + "logits/chosen": -1.9352718591690063, + "logits/rejected": -1.9343864917755127, + "logps/chosen": -235.2423095703125, + "logps/rejected": -372.921630859375, + "loss": 0.2398, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3509399890899658, + "rewards/margins": 0.94427490234375, + "rewards/rejected": 0.40666505694389343, + "step": 3436 + }, + { + "epoch": 0.2, + "learning_rate": 9.261180693032333e-08, + "logits/chosen": -2.047055721282959, + "logits/rejected": -2.0419681072235107, + "logps/chosen": -55.56449890136719, + "logps/rejected": -130.20913696289062, + "loss": 0.8473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6891242861747742, + "rewards/margins": 0.11760711669921875, + "rewards/rejected": -0.8067314028739929, + "step": 3437 + }, + { + "epoch": 0.2, + "learning_rate": 9.26068759264597e-08, + "logits/chosen": -2.0727109909057617, + "logits/rejected": -2.0614917278289795, + "logps/chosen": -171.1932373046875, + "logps/rejected": -275.2064208984375, + "loss": 0.3607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5556136965751648, + "rewards/margins": 0.7578750848770142, + "rewards/rejected": -0.20226135849952698, + "step": 3438 + }, + { + "epoch": 0.2, + "learning_rate": 9.260194340899144e-08, + "logits/chosen": -1.949766755104065, + "logits/rejected": -1.9456759691238403, + "logps/chosen": -115.89854431152344, + "logps/rejected": -408.87890625, + "loss": 0.3021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5717849731445312, + "rewards/margins": 1.4524551630020142, + "rewards/rejected": -0.8806701898574829, + "step": 3439 + }, + { + "epoch": 0.2, + "learning_rate": 9.259700937809373e-08, + "logits/chosen": -2.0442984104156494, + "logits/rejected": -1.9710612297058105, + "logps/chosen": -317.6536865234375, + "logps/rejected": -454.7947692871094, + "loss": 0.1524, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.824041724205017, + "rewards/margins": 1.6014556884765625, + "rewards/rejected": 0.22258606553077698, + "step": 3440 + }, + { + "epoch": 0.2, + "learning_rate": 9.259207383394191e-08, + "logits/chosen": -1.934590458869934, + "logits/rejected": -1.9268782138824463, + "logps/chosen": -76.61864471435547, + "logps/rejected": -212.23208618164062, + "loss": 0.6289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5787849426269531, + "rewards/margins": 0.9853843450546265, + "rewards/rejected": -1.5641692876815796, + "step": 3441 + }, + { + "epoch": 0.2, + "learning_rate": 9.258713677671128e-08, + "logits/chosen": -1.8504959344863892, + "logits/rejected": -1.836442470550537, + "logps/chosen": -200.41647338867188, + "logps/rejected": -255.7233123779297, + "loss": 0.242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.047570824623108, + "rewards/margins": 1.1676864624023438, + "rewards/rejected": -0.12011566013097763, + "step": 3442 + }, + { + "epoch": 0.2, + "learning_rate": 9.258219820657722e-08, + "logits/chosen": -2.055755853652954, + "logits/rejected": -2.059255361557007, + "logps/chosen": -8.99265193939209, + "logps/rejected": -45.073368072509766, + "loss": 0.7088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07570105046033859, + "rewards/margins": 0.004035279154777527, + "rewards/rejected": -0.07973632961511612, + "step": 3443 + }, + { + "epoch": 0.2, + "learning_rate": 9.25772581237152e-08, + "logits/chosen": -1.8771369457244873, + "logits/rejected": -1.8779300451278687, + "logps/chosen": -14.086786270141602, + "logps/rejected": -286.7635498046875, + "loss": 0.3261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3206779658794403, + "rewards/margins": 2.4769158363342285, + "rewards/rejected": -2.156237840652466, + "step": 3444 + }, + { + "epoch": 0.2, + "learning_rate": 9.257231652830071e-08, + "logits/chosen": -2.1061606407165527, + "logits/rejected": -2.092985153198242, + "logps/chosen": -6.9145708084106445, + "logps/rejected": -96.77001190185547, + "loss": 0.4734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07083659619092941, + "rewards/margins": 0.9060027599334717, + "rewards/rejected": -0.8351661562919617, + "step": 3445 + }, + { + "epoch": 0.2, + "learning_rate": 9.256737342050928e-08, + "logits/chosen": -2.1143858432769775, + "logits/rejected": -2.152939558029175, + "logps/chosen": -146.8388214111328, + "logps/rejected": -481.43634033203125, + "loss": 0.144, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2017288208007812, + "rewards/margins": 2.280146837234497, + "rewards/rejected": -1.0784180164337158, + "step": 3446 + }, + { + "epoch": 0.2, + "learning_rate": 9.256242880051654e-08, + "logits/chosen": -2.203211784362793, + "logits/rejected": -2.2201199531555176, + "logps/chosen": -262.6589660644531, + "logps/rejected": -315.75054931640625, + "loss": 0.2652, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.109100341796875, + "rewards/margins": 0.848406970500946, + "rewards/rejected": 0.26069337129592896, + "step": 3447 + }, + { + "epoch": 0.2, + "learning_rate": 9.255748266849812e-08, + "logits/chosen": -2.1676602363586426, + "logits/rejected": -2.156914710998535, + "logps/chosen": -12.240082740783691, + "logps/rejected": -99.53709411621094, + "loss": 0.496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01702737808227539, + "rewards/margins": 1.0175279378890991, + "rewards/rejected": -1.0005005598068237, + "step": 3448 + }, + { + "epoch": 0.2, + "learning_rate": 9.255253502462975e-08, + "logits/chosen": -2.0244014263153076, + "logits/rejected": -2.0196399688720703, + "logps/chosen": -72.94058227539062, + "logps/rejected": -323.32550048828125, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15159225463867188, + "rewards/margins": 1.5838005542755127, + "rewards/rejected": -1.4322082996368408, + "step": 3449 + }, + { + "epoch": 0.2, + "learning_rate": 9.25475858690872e-08, + "logits/chosen": -2.135030508041382, + "logits/rejected": -2.1256661415100098, + "logps/chosen": -2.5272247512475587e-05, + "logps/rejected": -85.46488189697266, + "loss": 0.6081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1920783293817294e-07, + "rewards/margins": 0.3338342010974884, + "rewards/rejected": -0.33383408188819885, + "step": 3450 + }, + { + "epoch": 0.2, + "learning_rate": 9.254263520204626e-08, + "logits/chosen": -2.085798740386963, + "logits/rejected": -2.0532045364379883, + "logps/chosen": -116.4393539428711, + "logps/rejected": -255.30191040039062, + "loss": 0.514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17712250351905823, + "rewards/margins": 0.7718521356582642, + "rewards/rejected": -0.5947296023368835, + "step": 3451 + }, + { + "epoch": 0.2, + "learning_rate": 9.253768302368282e-08, + "logits/chosen": -2.23740816116333, + "logits/rejected": -2.219613790512085, + "logps/chosen": -67.66758728027344, + "logps/rejected": -280.9950866699219, + "loss": 0.3353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3848815858364105, + "rewards/margins": 1.6046600341796875, + "rewards/rejected": -1.2197784185409546, + "step": 3452 + }, + { + "epoch": 0.2, + "learning_rate": 9.253272933417282e-08, + "logits/chosen": -2.1246495246887207, + "logits/rejected": -2.1105458736419678, + "logps/chosen": -0.0004251739592291415, + "logps/rejected": -73.97347259521484, + "loss": 0.6119, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.919990791066084e-05, + "rewards/margins": 0.3517243564128876, + "rewards/rejected": -0.3517051637172699, + "step": 3453 + }, + { + "epoch": 0.2, + "learning_rate": 9.252777413369222e-08, + "logits/chosen": -2.1136438846588135, + "logits/rejected": -2.11466383934021, + "logps/chosen": -80.77098083496094, + "logps/rejected": -216.82704162597656, + "loss": 0.6147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15840302407741547, + "rewards/margins": 0.611865222454071, + "rewards/rejected": -0.7702682614326477, + "step": 3454 + }, + { + "epoch": 0.2, + "learning_rate": 9.252281742241704e-08, + "logits/chosen": -1.9433159828186035, + "logits/rejected": -1.93953537940979, + "logps/chosen": -143.0892333984375, + "logps/rejected": -229.79443359375, + "loss": 0.5708, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8020858764648438, + "rewards/margins": -0.24255526065826416, + "rewards/rejected": 1.044641137123108, + "step": 3455 + }, + { + "epoch": 0.2, + "learning_rate": 9.251785920052339e-08, + "logits/chosen": -2.3422818183898926, + "logits/rejected": -2.335308074951172, + "logps/chosen": -0.004700176417827606, + "logps/rejected": -163.71730041503906, + "loss": 0.4037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014438654761761427, + "rewards/margins": 2.112915515899658, + "rewards/rejected": -2.1130599975585938, + "step": 3456 + }, + { + "epoch": 0.2, + "learning_rate": 9.251289946818741e-08, + "logits/chosen": -2.201338768005371, + "logits/rejected": -2.212286949157715, + "logps/chosen": -11.898150444030762, + "logps/rejected": -146.8436279296875, + "loss": 0.5699, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024896622635424137, + "rewards/margins": 0.5420583486557007, + "rewards/rejected": -0.5445480346679688, + "step": 3457 + }, + { + "epoch": 0.2, + "learning_rate": 9.250793822558529e-08, + "logits/chosen": -2.1394271850585938, + "logits/rejected": -2.140495538711548, + "logps/chosen": -0.02065356820821762, + "logps/rejected": -63.21232604980469, + "loss": 0.6643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00022111181169748306, + "rewards/margins": 0.08023858070373535, + "rewards/rejected": -0.08001746982336044, + "step": 3458 + }, + { + "epoch": 0.2, + "learning_rate": 9.250297547289327e-08, + "logits/chosen": -2.2773478031158447, + "logits/rejected": -2.274282217025757, + "logps/chosen": -17.099502563476562, + "logps/rejected": -129.2144012451172, + "loss": 0.5396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004318428225815296, + "rewards/margins": 0.874318540096283, + "rewards/rejected": -0.8786369562149048, + "step": 3459 + }, + { + "epoch": 0.2, + "learning_rate": 9.249801121028767e-08, + "logits/chosen": -2.0273780822753906, + "logits/rejected": -2.0004262924194336, + "logps/chosen": -161.0190887451172, + "logps/rejected": -314.96533203125, + "loss": 0.3967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7305038571357727, + "rewards/margins": 0.48462677001953125, + "rewards/rejected": 0.24587707221508026, + "step": 3460 + }, + { + "epoch": 0.2, + "learning_rate": 9.249304543794482e-08, + "logits/chosen": -2.1284971237182617, + "logits/rejected": -2.1304354667663574, + "logps/chosen": -6.137602806091309, + "logps/rejected": -170.8354034423828, + "loss": 0.533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10571184009313583, + "rewards/margins": 1.0793544054031372, + "rewards/rejected": -1.1850662231445312, + "step": 3461 + }, + { + "epoch": 0.2, + "learning_rate": 9.248807815604114e-08, + "logits/chosen": -2.058051109313965, + "logits/rejected": -2.0575411319732666, + "logps/chosen": -0.6334872245788574, + "logps/rejected": -138.7482147216797, + "loss": 0.4562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03812514245510101, + "rewards/margins": 1.3793079853057861, + "rewards/rejected": -1.4174331426620483, + "step": 3462 + }, + { + "epoch": 0.2, + "learning_rate": 9.248310936475309e-08, + "logits/chosen": -1.9408890008926392, + "logits/rejected": -1.9414750337600708, + "logps/chosen": -0.08388014137744904, + "logps/rejected": -174.84967041015625, + "loss": 0.3941, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.067156937206164e-05, + "rewards/margins": 2.305145740509033, + "rewards/rejected": -2.305206298828125, + "step": 3463 + }, + { + "epoch": 0.2, + "learning_rate": 9.24781390642572e-08, + "logits/chosen": -2.073798418045044, + "logits/rejected": -2.0674526691436768, + "logps/chosen": -7.727094650268555, + "logps/rejected": -114.44731140136719, + "loss": 0.5035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1361248940229416, + "rewards/margins": 0.7687504887580872, + "rewards/rejected": -0.6326255798339844, + "step": 3464 + }, + { + "epoch": 0.2, + "learning_rate": 9.247316725473001e-08, + "logits/chosen": -2.1534218788146973, + "logits/rejected": -2.098259449005127, + "logps/chosen": -155.31922912597656, + "logps/rejected": -311.7669677734375, + "loss": 0.5425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4102737605571747, + "rewards/margins": 0.26602935791015625, + "rewards/rejected": 0.14424438774585724, + "step": 3465 + }, + { + "epoch": 0.2, + "learning_rate": 9.246819393634817e-08, + "logits/chosen": -2.0584542751312256, + "logits/rejected": -2.0559332370758057, + "logps/chosen": -36.21995544433594, + "logps/rejected": -139.24659729003906, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32478904724121094, + "rewards/margins": 0.36731988191604614, + "rewards/rejected": -0.04253082349896431, + "step": 3466 + }, + { + "epoch": 0.2, + "learning_rate": 9.246321910928835e-08, + "logits/chosen": -2.114168882369995, + "logits/rejected": -2.0918447971343994, + "logps/chosen": -194.7333526611328, + "logps/rejected": -297.6759948730469, + "loss": 0.7142, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.469888299703598, + "rewards/margins": -0.5602325201034546, + "rewards/rejected": 1.030120849609375, + "step": 3467 + }, + { + "epoch": 0.2, + "learning_rate": 9.245824277372727e-08, + "logits/chosen": -2.0873332023620605, + "logits/rejected": -2.0883588790893555, + "logps/chosen": -74.78929138183594, + "logps/rejected": -230.2745361328125, + "loss": 0.5028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2189071625471115, + "rewards/margins": 0.6953567266464233, + "rewards/rejected": -0.476449579000473, + "step": 3468 + }, + { + "epoch": 0.2, + "learning_rate": 9.245326492984173e-08, + "logits/chosen": -1.9441055059432983, + "logits/rejected": -1.9152722358703613, + "logps/chosen": -170.62356567382812, + "logps/rejected": -315.2813720703125, + "loss": 0.4327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6280410885810852, + "rewards/margins": 0.22418060898780823, + "rewards/rejected": 0.403860479593277, + "step": 3469 + }, + { + "epoch": 0.2, + "learning_rate": 9.244828557780855e-08, + "logits/chosen": -1.9878472089767456, + "logits/rejected": -1.9782804250717163, + "logps/chosen": -18.600322723388672, + "logps/rejected": -74.56822204589844, + "loss": 0.5403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18476200103759766, + "rewards/margins": 0.4828340709209442, + "rewards/rejected": -0.29807206988334656, + "step": 3470 + }, + { + "epoch": 0.2, + "learning_rate": 9.244330471780463e-08, + "logits/chosen": -2.0920581817626953, + "logits/rejected": -2.0788803100585938, + "logps/chosen": -8.736698150634766, + "logps/rejected": -148.62046813964844, + "loss": 0.5052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.040505122393369675, + "rewards/margins": 0.7656866908073425, + "rewards/rejected": -0.7251815795898438, + "step": 3471 + }, + { + "epoch": 0.2, + "learning_rate": 9.24383223500069e-08, + "logits/chosen": -2.139749526977539, + "logits/rejected": -2.1322011947631836, + "logps/chosen": -23.468477249145508, + "logps/rejected": -238.275390625, + "loss": 0.4491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04993152618408203, + "rewards/margins": 1.2620867490768433, + "rewards/rejected": -1.2121552228927612, + "step": 3472 + }, + { + "epoch": 0.2, + "learning_rate": 9.243333847459239e-08, + "logits/chosen": -2.15509295463562, + "logits/rejected": -2.153977870941162, + "logps/chosen": -19.314666748046875, + "logps/rejected": -124.68986511230469, + "loss": 0.5812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16059552133083344, + "rewards/margins": 0.3389677107334137, + "rewards/rejected": -0.17837218940258026, + "step": 3473 + }, + { + "epoch": 0.2, + "learning_rate": 9.242835309173814e-08, + "logits/chosen": -2.1461944580078125, + "logits/rejected": -2.1385281085968018, + "logps/chosen": -10.277270317077637, + "logps/rejected": -161.7346649169922, + "loss": 0.3856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037482645362615585, + "rewards/margins": 2.195629358291626, + "rewards/rejected": -2.158146619796753, + "step": 3474 + }, + { + "epoch": 0.2, + "learning_rate": 9.242336620162124e-08, + "logits/chosen": -2.134023666381836, + "logits/rejected": -2.0532617568969727, + "logps/chosen": -291.7017822265625, + "logps/rejected": -449.7716369628906, + "loss": 0.2273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.528717041015625, + "rewards/margins": 1.1415984630584717, + "rewards/rejected": 0.38711854815483093, + "step": 3475 + }, + { + "epoch": 0.2, + "learning_rate": 9.241837780441886e-08, + "logits/chosen": -2.233900785446167, + "logits/rejected": -2.2314438819885254, + "logps/chosen": -115.71073913574219, + "logps/rejected": -296.1058349609375, + "loss": 0.4651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31228333711624146, + "rewards/margins": 0.6507781744003296, + "rewards/rejected": -0.3384948670864105, + "step": 3476 + }, + { + "epoch": 0.2, + "learning_rate": 9.241338790030819e-08, + "logits/chosen": -2.0330564975738525, + "logits/rejected": -2.0269906520843506, + "logps/chosen": -95.67415618896484, + "logps/rejected": -232.01162719726562, + "loss": 0.4573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47801896929740906, + "rewards/margins": 0.5359413623809814, + "rewards/rejected": -0.05792236328125, + "step": 3477 + }, + { + "epoch": 0.2, + "learning_rate": 9.240839648946655e-08, + "logits/chosen": -2.0463385581970215, + "logits/rejected": -2.034332513809204, + "logps/chosen": -20.14132308959961, + "logps/rejected": -136.43385314941406, + "loss": 0.5455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.079686738550663, + "rewards/margins": 0.7725486755371094, + "rewards/rejected": -0.8522354364395142, + "step": 3478 + }, + { + "epoch": 0.2, + "learning_rate": 9.240340357207119e-08, + "logits/chosen": -2.13269305229187, + "logits/rejected": -2.1406900882720947, + "logps/chosen": -18.343996047973633, + "logps/rejected": -317.336181640625, + "loss": 0.355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22531090676784515, + "rewards/margins": 1.9973934888839722, + "rewards/rejected": -1.7720825672149658, + "step": 3479 + }, + { + "epoch": 0.2, + "learning_rate": 9.239840914829954e-08, + "logits/chosen": -2.0314910411834717, + "logits/rejected": -2.0169668197631836, + "logps/chosen": -13.578383445739746, + "logps/rejected": -269.33935546875, + "loss": 0.4283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09421892464160919, + "rewards/margins": 2.071885108947754, + "rewards/rejected": -2.1661040782928467, + "step": 3480 + }, + { + "epoch": 0.2, + "learning_rate": 9.2393413218329e-08, + "logits/chosen": -2.0784194469451904, + "logits/rejected": -1.9996815919876099, + "logps/chosen": -171.7878875732422, + "logps/rejected": -285.1341857910156, + "loss": 0.4221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6430511474609375, + "rewards/margins": 0.585845947265625, + "rewards/rejected": 0.0572052001953125, + "step": 3481 + }, + { + "epoch": 0.2, + "learning_rate": 9.238841578233705e-08, + "logits/chosen": -2.205756902694702, + "logits/rejected": -2.1993017196655273, + "logps/chosen": -34.63591003417969, + "logps/rejected": -179.6968994140625, + "loss": 0.4127, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2147369384765625, + "rewards/margins": 1.4717804193496704, + "rewards/rejected": -1.257043480873108, + "step": 3482 + }, + { + "epoch": 0.2, + "learning_rate": 9.238341684050124e-08, + "logits/chosen": -2.0648703575134277, + "logits/rejected": -1.973516821861267, + "logps/chosen": -237.7126922607422, + "logps/rejected": -357.7076110839844, + "loss": 0.4398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7513718008995056, + "rewards/margins": 0.13929295539855957, + "rewards/rejected": 0.612078845500946, + "step": 3483 + }, + { + "epoch": 0.2, + "learning_rate": 9.237841639299913e-08, + "logits/chosen": -2.0425117015838623, + "logits/rejected": -2.041105270385742, + "logps/chosen": -0.6727875471115112, + "logps/rejected": -9.646788597106934, + "loss": 0.7002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016475409269332886, + "rewards/margins": 0.02565038949251175, + "rewards/rejected": -0.042125798761844635, + "step": 3484 + }, + { + "epoch": 0.2, + "learning_rate": 9.237341444000837e-08, + "logits/chosen": -2.0684103965759277, + "logits/rejected": -2.043781042098999, + "logps/chosen": -263.3148193359375, + "logps/rejected": -387.26171875, + "loss": 0.1635, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9187134504318237, + "rewards/margins": 1.280242919921875, + "rewards/rejected": 0.638470470905304, + "step": 3485 + }, + { + "epoch": 0.2, + "learning_rate": 9.236841098170666e-08, + "logits/chosen": -1.5955536365509033, + "logits/rejected": -1.5588105916976929, + "logps/chosen": -215.1083984375, + "logps/rejected": -227.1945037841797, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8829620480537415, + "rewards/margins": 0.9551605582237244, + "rewards/rejected": -0.07219848781824112, + "step": 3486 + }, + { + "epoch": 0.2, + "learning_rate": 9.236340601827177e-08, + "logits/chosen": -2.1733198165893555, + "logits/rejected": -2.1593940258026123, + "logps/chosen": -65.06179809570312, + "logps/rejected": -274.7715759277344, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15081635117530823, + "rewards/margins": 1.6848434209823608, + "rewards/rejected": -1.534027099609375, + "step": 3487 + }, + { + "epoch": 0.2, + "learning_rate": 9.235839954988144e-08, + "logits/chosen": -2.0264904499053955, + "logits/rejected": -1.9781157970428467, + "logps/chosen": -250.95999145507812, + "logps/rejected": -625.7268676757812, + "loss": 0.2295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2279388904571533, + "rewards/margins": 1.0205719470977783, + "rewards/rejected": 0.207366943359375, + "step": 3488 + }, + { + "epoch": 0.2, + "learning_rate": 9.235339157671359e-08, + "logits/chosen": -1.976525068283081, + "logits/rejected": -2.038405179977417, + "logps/chosen": -221.62258911132812, + "logps/rejected": -217.4365692138672, + "loss": 0.2874, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.114160180091858, + "rewards/margins": 0.6373794674873352, + "rewards/rejected": 0.4767807126045227, + "step": 3489 + }, + { + "epoch": 0.2, + "learning_rate": 9.234838209894608e-08, + "logits/chosen": -2.099616765975952, + "logits/rejected": -2.107863187789917, + "logps/chosen": -174.4473876953125, + "logps/rejected": -334.701171875, + "loss": 0.2414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7771087884902954, + "rewards/margins": 1.3613983392715454, + "rewards/rejected": -0.58428955078125, + "step": 3490 + }, + { + "epoch": 0.2, + "learning_rate": 9.234337111675689e-08, + "logits/chosen": -2.0389938354492188, + "logits/rejected": -2.0862669944763184, + "logps/chosen": -249.37188720703125, + "logps/rejected": -307.3011474609375, + "loss": 0.4269, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3142365217208862, + "rewards/margins": 0.0506439208984375, + "rewards/rejected": 1.2635926008224487, + "step": 3491 + }, + { + "epoch": 0.2, + "learning_rate": 9.233835863032406e-08, + "logits/chosen": -1.9699422121047974, + "logits/rejected": -1.9637506008148193, + "logps/chosen": -208.70712280273438, + "logps/rejected": -349.52923583984375, + "loss": 0.1867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.459742784500122, + "rewards/margins": 1.2330398559570312, + "rewards/rejected": 0.22670288383960724, + "step": 3492 + }, + { + "epoch": 0.2, + "learning_rate": 9.233334463982561e-08, + "logits/chosen": -2.0865378379821777, + "logits/rejected": -2.0659897327423096, + "logps/chosen": -196.76988220214844, + "logps/rejected": -499.74102783203125, + "loss": 0.2802, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3790191411972046, + "rewards/margins": 0.8223815560340881, + "rewards/rejected": 0.5566375851631165, + "step": 3493 + }, + { + "epoch": 0.2, + "learning_rate": 9.232832914543969e-08, + "logits/chosen": -2.1041648387908936, + "logits/rejected": -2.107069969177246, + "logps/chosen": -12.794124603271484, + "logps/rejected": -34.43484878540039, + "loss": 0.7101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10096502304077148, + "rewards/margins": 0.02670411765575409, + "rewards/rejected": -0.12766914069652557, + "step": 3494 + }, + { + "epoch": 0.2, + "learning_rate": 9.232331214734446e-08, + "logits/chosen": -2.16483998298645, + "logits/rejected": -2.1613545417785645, + "logps/chosen": -232.96646118164062, + "logps/rejected": -413.956298828125, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.306634545326233, + "rewards/margins": 0.8874298334121704, + "rewards/rejected": 0.4192047119140625, + "step": 3495 + }, + { + "epoch": 0.2, + "learning_rate": 9.231829364571817e-08, + "logits/chosen": -2.0927770137786865, + "logits/rejected": -2.0735485553741455, + "logps/chosen": -275.6473693847656, + "logps/rejected": -348.44775390625, + "loss": 0.1433, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8036469221115112, + "rewards/margins": 1.6061676740646362, + "rewards/rejected": 0.197479248046875, + "step": 3496 + }, + { + "epoch": 0.2, + "learning_rate": 9.231327364073908e-08, + "logits/chosen": -2.0852086544036865, + "logits/rejected": -2.086198568344116, + "logps/chosen": -4.7325589548563585e-05, + "logps/rejected": -102.87371826171875, + "loss": 0.442, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9339101931545883e-07, + "rewards/margins": 1.5548778772354126, + "rewards/rejected": -1.5548782348632812, + "step": 3497 + }, + { + "epoch": 0.2, + "learning_rate": 9.230825213258553e-08, + "logits/chosen": -1.9936162233352661, + "logits/rejected": -1.9862011671066284, + "logps/chosen": -17.043594360351562, + "logps/rejected": -144.31002807617188, + "loss": 0.5062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13273067772388458, + "rewards/margins": 0.6873876452445984, + "rewards/rejected": -0.554656982421875, + "step": 3498 + }, + { + "epoch": 0.2, + "learning_rate": 9.230322912143591e-08, + "logits/chosen": -2.2268359661102295, + "logits/rejected": -2.2243082523345947, + "logps/chosen": -6.339359283447266, + "logps/rejected": -173.56874084472656, + "loss": 0.3608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13498187065124512, + "rewards/margins": 2.198755979537964, + "rewards/rejected": -2.0637741088867188, + "step": 3499 + }, + { + "epoch": 0.2, + "learning_rate": 9.229820460746867e-08, + "logits/chosen": -2.1392717361450195, + "logits/rejected": -2.136531352996826, + "logps/chosen": -16.077348709106445, + "logps/rejected": -83.45812225341797, + "loss": 0.6006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05954895168542862, + "rewards/margins": 0.3612327575683594, + "rewards/rejected": -0.30168381333351135, + "step": 3500 + }, + { + "epoch": 0.2, + "learning_rate": 9.22931785908623e-08, + "logits/chosen": -1.9958945512771606, + "logits/rejected": -1.960055947303772, + "logps/chosen": -290.55657958984375, + "logps/rejected": -426.15814208984375, + "loss": 0.2048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0719940662384033, + "rewards/margins": 1.6733673810958862, + "rewards/rejected": -0.6013733148574829, + "step": 3501 + }, + { + "epoch": 0.2, + "learning_rate": 9.228815107179535e-08, + "logits/chosen": -2.049194574356079, + "logits/rejected": -2.0530948638916016, + "logps/chosen": -95.29927062988281, + "logps/rejected": -159.37545776367188, + "loss": 0.7142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17766952514648438, + "rewards/margins": 0.061725616455078125, + "rewards/rejected": -0.2393951416015625, + "step": 3502 + }, + { + "epoch": 0.2, + "learning_rate": 9.228312205044641e-08, + "logits/chosen": -1.8641972541809082, + "logits/rejected": -1.851266622543335, + "logps/chosen": -280.6262512207031, + "logps/rejected": -293.97613525390625, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7743774652481079, + "rewards/margins": 0.9531189203262329, + "rewards/rejected": -0.178741455078125, + "step": 3503 + }, + { + "epoch": 0.2, + "learning_rate": 9.227809152699417e-08, + "logits/chosen": -2.2096598148345947, + "logits/rejected": -2.1836624145507812, + "logps/chosen": -59.296051025390625, + "logps/rejected": -243.50997924804688, + "loss": 0.3269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3836463987827301, + "rewards/margins": 1.6535484790802002, + "rewards/rejected": -1.2699021100997925, + "step": 3504 + }, + { + "epoch": 0.2, + "learning_rate": 9.22730595016173e-08, + "logits/chosen": -2.091130018234253, + "logits/rejected": -2.087329387664795, + "logps/chosen": -218.34231567382812, + "logps/rejected": -310.53167724609375, + "loss": 0.474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.724273681640625, + "rewards/margins": 0.11687010526657104, + "rewards/rejected": 0.607403576374054, + "step": 3505 + }, + { + "epoch": 0.2, + "learning_rate": 9.226802597449457e-08, + "logits/chosen": -2.054777145385742, + "logits/rejected": -2.0525407791137695, + "logps/chosen": -0.00245984410867095, + "logps/rejected": -129.2745361328125, + "loss": 0.4521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012891204096376896, + "rewards/margins": 1.4278298616409302, + "rewards/rejected": -1.4279587268829346, + "step": 3506 + }, + { + "epoch": 0.2, + "learning_rate": 9.226299094580481e-08, + "logits/chosen": -2.0941803455352783, + "logits/rejected": -2.0791409015655518, + "logps/chosen": -53.90351104736328, + "logps/rejected": -260.0167236328125, + "loss": 0.463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024089813232421875, + "rewards/margins": 1.0292977094650269, + "rewards/rejected": -1.0533875226974487, + "step": 3507 + }, + { + "epoch": 0.2, + "learning_rate": 9.225795441572688e-08, + "logits/chosen": -2.147613763809204, + "logits/rejected": -2.1532680988311768, + "logps/chosen": -16.97309112548828, + "logps/rejected": -290.42608642578125, + "loss": 0.3983, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36477890610694885, + "rewards/margins": 1.208565592765808, + "rewards/rejected": -0.8437866568565369, + "step": 3508 + }, + { + "epoch": 0.2, + "learning_rate": 9.22529163844397e-08, + "logits/chosen": -2.0021393299102783, + "logits/rejected": -2.000925064086914, + "logps/chosen": -420.59014892578125, + "logps/rejected": -566.1495361328125, + "loss": 0.1487, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6270843744277954, + "rewards/margins": 1.9012908935546875, + "rewards/rejected": -0.2742065489292145, + "step": 3509 + }, + { + "epoch": 0.2, + "learning_rate": 9.224787685212226e-08, + "logits/chosen": -2.0557303428649902, + "logits/rejected": -2.0575108528137207, + "logps/chosen": -5.335013389587402, + "logps/rejected": -98.32332611083984, + "loss": 0.6176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06807126849889755, + "rewards/margins": 0.39272966980934143, + "rewards/rejected": -0.4608009457588196, + "step": 3510 + }, + { + "epoch": 0.2, + "learning_rate": 9.224283581895355e-08, + "logits/chosen": -2.2305331230163574, + "logits/rejected": -2.2378299236297607, + "logps/chosen": -5.081274032592773, + "logps/rejected": -97.21354675292969, + "loss": 0.5355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0439724437892437, + "rewards/margins": 0.43600964546203613, + "rewards/rejected": -0.39203721284866333, + "step": 3511 + }, + { + "epoch": 0.2, + "learning_rate": 9.223779328511269e-08, + "logits/chosen": -2.1233878135681152, + "logits/rejected": -2.0056517124176025, + "logps/chosen": -268.57244873046875, + "logps/rejected": -502.6990966796875, + "loss": 0.4981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34531861543655396, + "rewards/margins": 0.3349456787109375, + "rewards/rejected": 0.010372924618422985, + "step": 3512 + }, + { + "epoch": 0.2, + "learning_rate": 9.223274925077881e-08, + "logits/chosen": -2.0114359855651855, + "logits/rejected": -1.995754599571228, + "logps/chosen": -96.2233657836914, + "logps/rejected": -270.76580810546875, + "loss": 0.5259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1667129546403885, + "rewards/margins": 0.7683852910995483, + "rewards/rejected": -0.601672351360321, + "step": 3513 + }, + { + "epoch": 0.2, + "learning_rate": 9.222770371613112e-08, + "logits/chosen": -2.057870864868164, + "logits/rejected": -2.0493714809417725, + "logps/chosen": -2.6407549381256104, + "logps/rejected": -113.30943298339844, + "loss": 0.6798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012697768397629261, + "rewards/margins": 0.05163285881280899, + "rewards/rejected": -0.038935091346502304, + "step": 3514 + }, + { + "epoch": 0.2, + "learning_rate": 9.22226566813488e-08, + "logits/chosen": -1.9369087219238281, + "logits/rejected": -1.9212982654571533, + "logps/chosen": -125.54777526855469, + "logps/rejected": -197.98922729492188, + "loss": 0.586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3197677731513977, + "rewards/margins": 0.1387283354997635, + "rewards/rejected": 0.18103943765163422, + "step": 3515 + }, + { + "epoch": 0.2, + "learning_rate": 9.221760814661119e-08, + "logits/chosen": -1.9873442649841309, + "logits/rejected": -1.9843579530715942, + "logps/chosen": -1.279515266418457, + "logps/rejected": -159.39892578125, + "loss": 0.5408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017514824867248535, + "rewards/margins": 0.7195213437080383, + "rewards/rejected": -0.7370361685752869, + "step": 3516 + }, + { + "epoch": 0.2, + "learning_rate": 9.221255811209766e-08, + "logits/chosen": -2.140043020248413, + "logits/rejected": -2.140331983566284, + "logps/chosen": -10.31230640411377, + "logps/rejected": -176.9656982421875, + "loss": 0.5454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03751535341143608, + "rewards/margins": 0.7522490620613098, + "rewards/rejected": -0.789764404296875, + "step": 3517 + }, + { + "epoch": 0.2, + "learning_rate": 9.220750657798754e-08, + "logits/chosen": -2.3001813888549805, + "logits/rejected": -2.297344207763672, + "logps/chosen": -25.98741912841797, + "logps/rejected": -164.98052978515625, + "loss": 0.479, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0889820083975792, + "rewards/margins": 1.0402843952178955, + "rewards/rejected": -1.129266381263733, + "step": 3518 + }, + { + "epoch": 0.2, + "learning_rate": 9.220245354446035e-08, + "logits/chosen": -2.1027679443359375, + "logits/rejected": -2.0980582237243652, + "logps/chosen": -247.7130889892578, + "logps/rejected": -335.8816833496094, + "loss": 0.5054, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5347061157226562, + "rewards/margins": -0.38610684871673584, + "rewards/rejected": 1.920812964439392, + "step": 3519 + }, + { + "epoch": 0.2, + "learning_rate": 9.219739901169556e-08, + "logits/chosen": -2.0346691608428955, + "logits/rejected": -2.0310845375061035, + "logps/chosen": -23.064651489257812, + "logps/rejected": -114.05753326416016, + "loss": 0.6026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17445336282253265, + "rewards/margins": 0.1448749601840973, + "rewards/rejected": 0.029578400775790215, + "step": 3520 + }, + { + "epoch": 0.2, + "learning_rate": 9.219234297987275e-08, + "logits/chosen": -2.1167361736297607, + "logits/rejected": -2.1109490394592285, + "logps/chosen": -14.742480278015137, + "logps/rejected": -151.7607421875, + "loss": 0.4905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06858339160680771, + "rewards/margins": 0.9456570744514465, + "rewards/rejected": -0.8770737051963806, + "step": 3521 + }, + { + "epoch": 0.2, + "learning_rate": 9.218728544917153e-08, + "logits/chosen": -2.1745800971984863, + "logits/rejected": -2.160159111022949, + "logps/chosen": -2.701580047607422, + "logps/rejected": -348.7655029296875, + "loss": 0.3914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022804928943514824, + "rewards/margins": 2.307753801345825, + "rewards/rejected": -2.2849488258361816, + "step": 3522 + }, + { + "epoch": 0.21, + "learning_rate": 9.218222641977159e-08, + "logits/chosen": -2.15920352935791, + "logits/rejected": -2.2209997177124023, + "logps/chosen": -172.4696044921875, + "logps/rejected": -310.399658203125, + "loss": 0.2236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.938446044921875, + "rewards/margins": 1.539239525794983, + "rewards/rejected": -0.6007934808731079, + "step": 3523 + }, + { + "epoch": 0.21, + "learning_rate": 9.217716589185263e-08, + "logits/chosen": -2.2526402473449707, + "logits/rejected": -2.2454886436462402, + "logps/chosen": -6.566941261291504, + "logps/rejected": -185.92059326171875, + "loss": 0.4484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026407719124108553, + "rewards/margins": 1.4044119119644165, + "rewards/rejected": -1.4070526361465454, + "step": 3524 + }, + { + "epoch": 0.21, + "learning_rate": 9.21721038655944e-08, + "logits/chosen": -2.154635429382324, + "logits/rejected": -2.134815216064453, + "logps/chosen": -53.488380432128906, + "logps/rejected": -237.64541625976562, + "loss": 0.4204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017223358154296875, + "rewards/margins": 1.6507866382598877, + "rewards/rejected": -1.6335632801055908, + "step": 3525 + }, + { + "epoch": 0.21, + "learning_rate": 9.216704034117678e-08, + "logits/chosen": -2.212038516998291, + "logits/rejected": -2.1937146186828613, + "logps/chosen": -8.84142780303955, + "logps/rejected": -339.77239990234375, + "loss": 0.372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031119346618652344, + "rewards/margins": 2.48447322845459, + "rewards/rejected": -2.4533538818359375, + "step": 3526 + }, + { + "epoch": 0.21, + "learning_rate": 9.216197531877962e-08, + "logits/chosen": -1.995337963104248, + "logits/rejected": -1.9803842306137085, + "logps/chosen": -53.22621536254883, + "logps/rejected": -216.2674102783203, + "loss": 0.424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2695060670375824, + "rewards/margins": 1.0271720886230469, + "rewards/rejected": -0.7576660513877869, + "step": 3527 + }, + { + "epoch": 0.21, + "learning_rate": 9.215690879858287e-08, + "logits/chosen": -2.1261372566223145, + "logits/rejected": -2.1310455799102783, + "logps/chosen": -1.2274528741836548, + "logps/rejected": -145.9524688720703, + "loss": 0.4563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018306827172636986, + "rewards/margins": 1.3920159339904785, + "rewards/rejected": -1.3737090826034546, + "step": 3528 + }, + { + "epoch": 0.21, + "learning_rate": 9.215184078076648e-08, + "logits/chosen": -2.0168747901916504, + "logits/rejected": -1.994934320449829, + "logps/chosen": -180.35264587402344, + "logps/rejected": -496.28338623046875, + "loss": 0.124, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3973907232284546, + "rewards/margins": 2.821075439453125, + "rewards/rejected": -1.4236847162246704, + "step": 3529 + }, + { + "epoch": 0.21, + "learning_rate": 9.214677126551054e-08, + "logits/chosen": -1.9653035402297974, + "logits/rejected": -1.9654525518417358, + "logps/chosen": -174.2320098876953, + "logps/rejected": -260.1623840332031, + "loss": 0.2336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9824997186660767, + "rewards/margins": 1.0866806507110596, + "rewards/rejected": -0.10418090969324112, + "step": 3530 + }, + { + "epoch": 0.21, + "learning_rate": 9.214170025299511e-08, + "logits/chosen": -2.164720058441162, + "logits/rejected": -2.1552157402038574, + "logps/chosen": -42.566322326660156, + "logps/rejected": -229.42095947265625, + "loss": 0.4741, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16392020881175995, + "rewards/margins": 1.2814351320266724, + "rewards/rejected": -1.4453552961349487, + "step": 3531 + }, + { + "epoch": 0.21, + "learning_rate": 9.213662774340037e-08, + "logits/chosen": -2.1210176944732666, + "logits/rejected": -2.1166253089904785, + "logps/chosen": -5.505973815917969, + "logps/rejected": -102.08984375, + "loss": 0.6179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07475581020116806, + "rewards/margins": 0.2765975594520569, + "rewards/rejected": -0.20184174180030823, + "step": 3532 + }, + { + "epoch": 0.21, + "learning_rate": 9.213155373690648e-08, + "logits/chosen": -2.0469932556152344, + "logits/rejected": -2.0262961387634277, + "logps/chosen": -216.84222412109375, + "logps/rejected": -427.77716064453125, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7210189700126648, + "rewards/margins": 1.162358045578003, + "rewards/rejected": -0.4413391053676605, + "step": 3533 + }, + { + "epoch": 0.21, + "learning_rate": 9.212647823369372e-08, + "logits/chosen": -2.1938066482543945, + "logits/rejected": -2.2181665897369385, + "logps/chosen": -257.23651123046875, + "logps/rejected": -334.59088134765625, + "loss": 0.1246, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.634912133216858, + "rewards/margins": 1.9953736066818237, + "rewards/rejected": -0.36046144366264343, + "step": 3534 + }, + { + "epoch": 0.21, + "learning_rate": 9.212140123394238e-08, + "logits/chosen": -2.1629838943481445, + "logits/rejected": -2.1508901119232178, + "logps/chosen": -62.0955810546875, + "logps/rejected": -265.89422607421875, + "loss": 0.3271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4425857663154602, + "rewards/margins": 1.4144821166992188, + "rewards/rejected": -0.9718963503837585, + "step": 3535 + }, + { + "epoch": 0.21, + "learning_rate": 9.211632273783286e-08, + "logits/chosen": -2.016761541366577, + "logits/rejected": -2.0096287727355957, + "logps/chosen": -120.84889221191406, + "logps/rejected": -263.3841857910156, + "loss": 0.5585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08380737155675888, + "rewards/margins": 0.8356354236602783, + "rewards/rejected": -0.9194427728652954, + "step": 3536 + }, + { + "epoch": 0.21, + "learning_rate": 9.211124274554551e-08, + "logits/chosen": -1.9271160364151, + "logits/rejected": -1.9340612888336182, + "logps/chosen": -64.51783752441406, + "logps/rejected": -149.6035919189453, + "loss": 0.616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04947509989142418, + "rewards/margins": 0.35842132568359375, + "rewards/rejected": -0.4078964293003082, + "step": 3537 + }, + { + "epoch": 0.21, + "learning_rate": 9.210616125726085e-08, + "logits/chosen": -2.158470869064331, + "logits/rejected": -2.1516284942626953, + "logps/chosen": -20.715673446655273, + "logps/rejected": -127.19783020019531, + "loss": 0.636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16250400245189667, + "rewards/margins": 0.38139092922210693, + "rewards/rejected": -0.5438949465751648, + "step": 3538 + }, + { + "epoch": 0.21, + "learning_rate": 9.210107827315935e-08, + "logits/chosen": -1.9832121133804321, + "logits/rejected": -1.9751439094543457, + "logps/chosen": -63.636322021484375, + "logps/rejected": -191.02894592285156, + "loss": 0.6387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0883556380867958, + "rewards/margins": 0.3064029812812805, + "rewards/rejected": -0.3947586119174957, + "step": 3539 + }, + { + "epoch": 0.21, + "learning_rate": 9.209599379342165e-08, + "logits/chosen": -1.989678144454956, + "logits/rejected": -1.9986211061477661, + "logps/chosen": -19.298786163330078, + "logps/rejected": -135.58126831054688, + "loss": 0.6289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31109943985939026, + "rewards/margins": 0.6725584268569946, + "rewards/rejected": -0.9836578369140625, + "step": 3540 + }, + { + "epoch": 0.21, + "learning_rate": 9.209090781822832e-08, + "logits/chosen": -2.0624849796295166, + "logits/rejected": -2.047607660293579, + "logps/chosen": -0.20486950874328613, + "logps/rejected": -124.38101196289062, + "loss": 0.6874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004490685649216175, + "rewards/margins": 0.016632817685604095, + "rewards/rejected": -0.021123504266142845, + "step": 3541 + }, + { + "epoch": 0.21, + "learning_rate": 9.208582034776005e-08, + "logits/chosen": -2.167135000228882, + "logits/rejected": -2.159379720687866, + "logps/chosen": -196.6944580078125, + "logps/rejected": -398.5538024902344, + "loss": 0.2668, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1369460821151733, + "rewards/margins": 0.9544997811317444, + "rewards/rejected": 0.18244628608226776, + "step": 3542 + }, + { + "epoch": 0.21, + "learning_rate": 9.208073138219759e-08, + "logits/chosen": -1.9088283777236938, + "logits/rejected": -1.928382158279419, + "logps/chosen": -239.53305053710938, + "logps/rejected": -518.89111328125, + "loss": 0.2235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7983917593955994, + "rewards/margins": 1.6722931861877441, + "rewards/rejected": -0.8739013671875, + "step": 3543 + }, + { + "epoch": 0.21, + "learning_rate": 9.207564092172169e-08, + "logits/chosen": -2.052917718887329, + "logits/rejected": -2.022019863128662, + "logps/chosen": -148.03189086914062, + "logps/rejected": -345.96875, + "loss": 0.2215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9867904782295227, + "rewards/margins": 1.4209884405136108, + "rewards/rejected": -0.4341979920864105, + "step": 3544 + }, + { + "epoch": 0.21, + "learning_rate": 9.207054896651322e-08, + "logits/chosen": -2.0504565238952637, + "logits/rejected": -2.066178321838379, + "logps/chosen": -252.2176513671875, + "logps/rejected": -434.76171875, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.390954613685608, + "rewards/margins": 1.9068787097930908, + "rewards/rejected": -0.5159240961074829, + "step": 3545 + }, + { + "epoch": 0.21, + "learning_rate": 9.206545551675308e-08, + "logits/chosen": -2.0272810459136963, + "logits/rejected": -2.0259721279144287, + "logps/chosen": -195.1143341064453, + "logps/rejected": -234.48345947265625, + "loss": 0.3332, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2084823846817017, + "rewards/margins": 0.4792587161064148, + "rewards/rejected": 0.7292236685752869, + "step": 3546 + }, + { + "epoch": 0.21, + "learning_rate": 9.206036057262219e-08, + "logits/chosen": -2.229337215423584, + "logits/rejected": -2.224933385848999, + "logps/chosen": -50.860816955566406, + "logps/rejected": -124.82066345214844, + "loss": 0.7515, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.23044510185718536, + "rewards/margins": -0.041915133595466614, + "rewards/rejected": -0.18852996826171875, + "step": 3547 + }, + { + "epoch": 0.21, + "learning_rate": 9.205526413430154e-08, + "logits/chosen": -2.120640754699707, + "logits/rejected": -2.1228525638580322, + "logps/chosen": -28.986652374267578, + "logps/rejected": -140.79681396484375, + "loss": 0.7131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46010857820510864, + "rewards/margins": 0.47069281339645386, + "rewards/rejected": -0.9308013916015625, + "step": 3548 + }, + { + "epoch": 0.21, + "learning_rate": 9.205016620197219e-08, + "logits/chosen": -2.0118069648742676, + "logits/rejected": -2.0058536529541016, + "logps/chosen": -34.06648635864258, + "logps/rejected": -130.719482421875, + "loss": 0.3933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19489708542823792, + "rewards/margins": 1.3000667095184326, + "rewards/rejected": -1.105169653892517, + "step": 3549 + }, + { + "epoch": 0.21, + "learning_rate": 9.204506677581525e-08, + "logits/chosen": -2.086613416671753, + "logits/rejected": -2.0802111625671387, + "logps/chosen": -220.6708984375, + "logps/rejected": -341.2281494140625, + "loss": 0.4047, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2437591552734375, + "rewards/margins": 0.2118438482284546, + "rewards/rejected": 1.031915307044983, + "step": 3550 + }, + { + "epoch": 0.21, + "learning_rate": 9.203996585601188e-08, + "logits/chosen": -2.2098636627197266, + "logits/rejected": -2.1993911266326904, + "logps/chosen": -1.1030000448226929, + "logps/rejected": -102.94470977783203, + "loss": 0.4992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.046376075595617294, + "rewards/margins": 1.0420951843261719, + "rewards/rejected": -1.0884712934494019, + "step": 3551 + }, + { + "epoch": 0.21, + "learning_rate": 9.203486344274328e-08, + "logits/chosen": -2.088737726211548, + "logits/rejected": -2.0850861072540283, + "logps/chosen": -29.296327590942383, + "logps/rejected": -134.9031219482422, + "loss": 0.5291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016193199902772903, + "rewards/margins": 0.755178689956665, + "rewards/rejected": -0.7389854788780212, + "step": 3552 + }, + { + "epoch": 0.21, + "learning_rate": 9.202975953619073e-08, + "logits/chosen": -1.901994228363037, + "logits/rejected": -1.9019496440887451, + "logps/chosen": -304.1365966796875, + "logps/rejected": -494.614501953125, + "loss": 0.1227, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2959015369415283, + "rewards/margins": 1.8728790283203125, + "rewards/rejected": -0.576977550983429, + "step": 3553 + }, + { + "epoch": 0.21, + "learning_rate": 9.202465413653551e-08, + "logits/chosen": -1.9289089441299438, + "logits/rejected": -1.9168760776519775, + "logps/chosen": -27.25117301940918, + "logps/rejected": -198.166748046875, + "loss": 0.4667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.048089027404785156, + "rewards/margins": 1.1127508878707886, + "rewards/rejected": -1.1608399152755737, + "step": 3554 + }, + { + "epoch": 0.21, + "learning_rate": 9.201954724395901e-08, + "logits/chosen": -2.0921947956085205, + "logits/rejected": -2.0921287536621094, + "logps/chosen": -16.981002807617188, + "logps/rejected": -288.5801086425781, + "loss": 0.416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0057392120361328125, + "rewards/margins": 1.4195301532745361, + "rewards/rejected": -1.4137909412384033, + "step": 3555 + }, + { + "epoch": 0.21, + "learning_rate": 9.201443885864267e-08, + "logits/chosen": -2.090280771255493, + "logits/rejected": -2.07704758644104, + "logps/chosen": -29.390247344970703, + "logps/rejected": -297.51263427734375, + "loss": 0.3723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.060605239123106, + "rewards/margins": 2.032681703567505, + "rewards/rejected": -1.972076416015625, + "step": 3556 + }, + { + "epoch": 0.21, + "learning_rate": 9.200932898076795e-08, + "logits/chosen": -2.0100414752960205, + "logits/rejected": -2.0145370960235596, + "logps/chosen": -0.10040108114480972, + "logps/rejected": -78.66482543945312, + "loss": 0.5852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013175777858123183, + "rewards/margins": 0.4916588068008423, + "rewards/rejected": -0.4929763972759247, + "step": 3557 + }, + { + "epoch": 0.21, + "learning_rate": 9.200421761051635e-08, + "logits/chosen": -1.8461779356002808, + "logits/rejected": -1.8223448991775513, + "logps/chosen": -278.2036437988281, + "logps/rejected": -496.6825256347656, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9354004263877869, + "rewards/margins": 0.9920593500137329, + "rewards/rejected": -0.05665893480181694, + "step": 3558 + }, + { + "epoch": 0.21, + "learning_rate": 9.199910474806949e-08, + "logits/chosen": -2.1976544857025146, + "logits/rejected": -2.1913740634918213, + "logps/chosen": -161.57980346679688, + "logps/rejected": -280.7861328125, + "loss": 0.3646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8516952395439148, + "rewards/margins": 0.5021346807479858, + "rewards/rejected": 0.34956055879592896, + "step": 3559 + }, + { + "epoch": 0.21, + "learning_rate": 9.199399039360898e-08, + "logits/chosen": -2.2458550930023193, + "logits/rejected": -2.226287841796875, + "logps/chosen": -54.5859489440918, + "logps/rejected": -181.0509796142578, + "loss": 0.4033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.053896334022283554, + "rewards/margins": 1.6662086248397827, + "rewards/rejected": -1.6123123168945312, + "step": 3560 + }, + { + "epoch": 0.21, + "learning_rate": 9.198887454731653e-08, + "logits/chosen": -2.201343297958374, + "logits/rejected": -2.181885242462158, + "logps/chosen": -7.271583308465779e-05, + "logps/rejected": -175.525146484375, + "loss": 0.4185, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.225121064242558e-07, + "rewards/margins": 1.865862250328064, + "rewards/rejected": -1.8658630847930908, + "step": 3561 + }, + { + "epoch": 0.21, + "learning_rate": 9.198375720937386e-08, + "logits/chosen": -1.9131934642791748, + "logits/rejected": -1.909577488899231, + "logps/chosen": -13.827589988708496, + "logps/rejected": -178.91299438476562, + "loss": 0.4594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10640096664428711, + "rewards/margins": 1.2010482549667358, + "rewards/rejected": -1.0946472883224487, + "step": 3562 + }, + { + "epoch": 0.21, + "learning_rate": 9.197863837996277e-08, + "logits/chosen": -1.9060016870498657, + "logits/rejected": -1.9070875644683838, + "logps/chosen": -148.68997192382812, + "logps/rejected": -368.1614074707031, + "loss": 0.4663, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.02916419506073, + "rewards/margins": -0.03085470199584961, + "rewards/rejected": 1.0600188970565796, + "step": 3563 + }, + { + "epoch": 0.21, + "learning_rate": 9.19735180592651e-08, + "logits/chosen": -2.2308590412139893, + "logits/rejected": -2.211193799972534, + "logps/chosen": -2.5941972732543945, + "logps/rejected": -114.15086364746094, + "loss": 0.5263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014735198579728603, + "rewards/margins": 0.779157817363739, + "rewards/rejected": -0.764422595500946, + "step": 3564 + }, + { + "epoch": 0.21, + "learning_rate": 9.196839624746277e-08, + "logits/chosen": -2.052701711654663, + "logits/rejected": -2.0506174564361572, + "logps/chosen": -4.82791438116692e-05, + "logps/rejected": -91.4542007446289, + "loss": 0.6254, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1917291153906717e-08, + "rewards/margins": 0.2923629879951477, + "rewards/rejected": -0.2923629879951477, + "step": 3565 + }, + { + "epoch": 0.21, + "learning_rate": 9.196327294473769e-08, + "logits/chosen": -2.265476703643799, + "logits/rejected": -2.258363962173462, + "logps/chosen": -155.730224609375, + "logps/rejected": -328.01409912109375, + "loss": 0.4394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7336227297782898, + "rewards/margins": 0.2627822756767273, + "rewards/rejected": 0.4708404541015625, + "step": 3566 + }, + { + "epoch": 0.21, + "learning_rate": 9.195814815127192e-08, + "logits/chosen": -1.93458890914917, + "logits/rejected": -1.8272194862365723, + "logps/chosen": -254.70082092285156, + "logps/rejected": -456.4626159667969, + "loss": 0.2515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8336044549942017, + "rewards/margins": 1.3556534051895142, + "rewards/rejected": -0.5220489501953125, + "step": 3567 + }, + { + "epoch": 0.21, + "learning_rate": 9.195302186724747e-08, + "logits/chosen": -2.1669721603393555, + "logits/rejected": -2.169930934906006, + "logps/chosen": -2.16691255569458, + "logps/rejected": -45.910911560058594, + "loss": 0.7362, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.010243916884064674, + "rewards/margins": -0.1698315292596817, + "rewards/rejected": 0.18007545173168182, + "step": 3568 + }, + { + "epoch": 0.21, + "learning_rate": 9.194789409284647e-08, + "logits/chosen": -2.2675063610076904, + "logits/rejected": -2.2636706829071045, + "logps/chosen": -0.010900960303843021, + "logps/rejected": -119.0345230102539, + "loss": 0.5224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00012369696923997253, + "rewards/margins": 0.8639901280403137, + "rewards/rejected": -0.8638664484024048, + "step": 3569 + }, + { + "epoch": 0.21, + "learning_rate": 9.194276482825109e-08, + "logits/chosen": -2.0512280464172363, + "logits/rejected": -2.0097787380218506, + "logps/chosen": -149.67562866210938, + "logps/rejected": -579.4276123046875, + "loss": 0.3403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03568267822265625, + "rewards/margins": 2.6288223266601562, + "rewards/rejected": -2.5931396484375, + "step": 3570 + }, + { + "epoch": 0.21, + "learning_rate": 9.193763407364353e-08, + "logits/chosen": -2.190967321395874, + "logits/rejected": -2.1854262351989746, + "logps/chosen": -0.005721915513277054, + "logps/rejected": -220.6090850830078, + "loss": 0.3586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016302433505188674, + "rewards/margins": 3.6186282634735107, + "rewards/rejected": -3.618791341781616, + "step": 3571 + }, + { + "epoch": 0.21, + "learning_rate": 9.193250182920606e-08, + "logits/chosen": -2.2071685791015625, + "logits/rejected": -2.206841230392456, + "logps/chosen": -0.0025575452018529177, + "logps/rejected": -145.9675750732422, + "loss": 0.569, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017966468294616789, + "rewards/margins": 0.5792866349220276, + "rewards/rejected": -0.5794662833213806, + "step": 3572 + }, + { + "epoch": 0.21, + "learning_rate": 9.192736809512103e-08, + "logits/chosen": -2.2222445011138916, + "logits/rejected": -2.1973984241485596, + "logps/chosen": -165.53109741210938, + "logps/rejected": -267.7536926269531, + "loss": 0.2059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.639623999595642, + "rewards/margins": 1.0843017101287842, + "rewards/rejected": 0.5553222894668579, + "step": 3573 + }, + { + "epoch": 0.21, + "learning_rate": 9.192223287157079e-08, + "logits/chosen": -2.047435998916626, + "logits/rejected": -2.045332193374634, + "logps/chosen": -9.291118621826172, + "logps/rejected": -63.467716217041016, + "loss": 0.5499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010883617214858532, + "rewards/margins": 0.7073239088058472, + "rewards/rejected": -0.7182075381278992, + "step": 3574 + }, + { + "epoch": 0.21, + "learning_rate": 9.191709615873778e-08, + "logits/chosen": -2.0144872665405273, + "logits/rejected": -2.012690782546997, + "logps/chosen": -27.054277420043945, + "logps/rejected": -82.58290100097656, + "loss": 0.9527, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5616623163223267, + "rewards/margins": -0.4098396301269531, + "rewards/rejected": -0.15182267129421234, + "step": 3575 + }, + { + "epoch": 0.21, + "learning_rate": 9.191195795680446e-08, + "logits/chosen": -2.0027451515197754, + "logits/rejected": -2.0182886123657227, + "logps/chosen": -189.71322631835938, + "logps/rejected": -415.55877685546875, + "loss": 0.1355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9929443597793579, + "rewards/margins": 2.9838593006134033, + "rewards/rejected": -1.9909149408340454, + "step": 3576 + }, + { + "epoch": 0.21, + "learning_rate": 9.19068182659534e-08, + "logits/chosen": -2.213496208190918, + "logits/rejected": -2.20339035987854, + "logps/chosen": -5.120773792266846, + "logps/rejected": -264.1562194824219, + "loss": 0.4261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026183176785707474, + "rewards/margins": 1.6862677335739136, + "rewards/rejected": -1.66008460521698, + "step": 3577 + }, + { + "epoch": 0.21, + "learning_rate": 9.190167708636715e-08, + "logits/chosen": -1.75858736038208, + "logits/rejected": -1.7424914836883545, + "logps/chosen": -315.0698547363281, + "logps/rejected": -585.6304321289062, + "loss": 0.3833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2370636463165283, + "rewards/margins": 0.2616302967071533, + "rewards/rejected": 0.975433349609375, + "step": 3578 + }, + { + "epoch": 0.21, + "learning_rate": 9.189653441822836e-08, + "logits/chosen": -2.043246030807495, + "logits/rejected": -2.0497078895568848, + "logps/chosen": -172.49911499023438, + "logps/rejected": -216.083251953125, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1826599836349487, + "rewards/margins": 0.43782657384872437, + "rewards/rejected": 0.7448334097862244, + "step": 3579 + }, + { + "epoch": 0.21, + "learning_rate": 9.189139026171974e-08, + "logits/chosen": -2.1478219032287598, + "logits/rejected": -2.143625020980835, + "logps/chosen": -52.93341064453125, + "logps/rejected": -142.09671020507812, + "loss": 0.7884, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.29486846923828125, + "rewards/margins": -0.24150390923023224, + "rewards/rejected": -0.05336456373333931, + "step": 3580 + }, + { + "epoch": 0.21, + "learning_rate": 9.188624461702403e-08, + "logits/chosen": -1.9132009744644165, + "logits/rejected": -1.9074112176895142, + "logps/chosen": -231.19265747070312, + "logps/rejected": -442.84625244140625, + "loss": 0.0964, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6811676025390625, + "rewards/margins": 2.417651414871216, + "rewards/rejected": -0.7364837527275085, + "step": 3581 + }, + { + "epoch": 0.21, + "learning_rate": 9.188109748432402e-08, + "logits/chosen": -1.9994738101959229, + "logits/rejected": -1.9552918672561646, + "logps/chosen": -192.0817413330078, + "logps/rejected": -499.3063659667969, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1214783191680908, + "rewards/margins": 0.6869995594024658, + "rewards/rejected": 0.434478759765625, + "step": 3582 + }, + { + "epoch": 0.21, + "learning_rate": 9.187594886380256e-08, + "logits/chosen": -2.1396169662475586, + "logits/rejected": -2.1742916107177734, + "logps/chosen": -199.61094665527344, + "logps/rejected": -308.6976013183594, + "loss": 0.3742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0689910650253296, + "rewards/margins": 0.2900604009628296, + "rewards/rejected": 0.7789306640625, + "step": 3583 + }, + { + "epoch": 0.21, + "learning_rate": 9.187079875564258e-08, + "logits/chosen": -2.1060216426849365, + "logits/rejected": -2.0967416763305664, + "logps/chosen": -1.54109787940979, + "logps/rejected": -208.55303955078125, + "loss": 0.3659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.041671108454465866, + "rewards/margins": 2.7613306045532227, + "rewards/rejected": -2.7196595668792725, + "step": 3584 + }, + { + "epoch": 0.21, + "learning_rate": 9.186564716002701e-08, + "logits/chosen": -2.190793752670288, + "logits/rejected": -2.187084436416626, + "logps/chosen": -37.41998291015625, + "logps/rejected": -209.91799926757812, + "loss": 0.4639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.043415069580078125, + "rewards/margins": 1.410382866859436, + "rewards/rejected": -1.4537979364395142, + "step": 3585 + }, + { + "epoch": 0.21, + "learning_rate": 9.186049407713885e-08, + "logits/chosen": -1.9498372077941895, + "logits/rejected": -1.9565238952636719, + "logps/chosen": -52.93617630004883, + "logps/rejected": -219.04017639160156, + "loss": 0.5947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1971077024936676, + "rewards/margins": 0.7740806341171265, + "rewards/rejected": -0.9711883664131165, + "step": 3586 + }, + { + "epoch": 0.21, + "learning_rate": 9.185533950716117e-08, + "logits/chosen": -2.1620993614196777, + "logits/rejected": -2.1637303829193115, + "logps/chosen": -2.9276087284088135, + "logps/rejected": -69.24588775634766, + "loss": 0.582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025608135387301445, + "rewards/margins": 0.3825890123844147, + "rewards/rejected": -0.3569808900356293, + "step": 3587 + }, + { + "epoch": 0.21, + "learning_rate": 9.185018345027712e-08, + "logits/chosen": -2.010237455368042, + "logits/rejected": -2.0086052417755127, + "logps/chosen": -2.5793886184692383, + "logps/rejected": -23.329395294189453, + "loss": 0.7242, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04519329220056534, + "rewards/margins": -0.076573945581913, + "rewards/rejected": 0.031380653381347656, + "step": 3588 + }, + { + "epoch": 0.21, + "learning_rate": 9.184502590666982e-08, + "logits/chosen": -1.8259060382843018, + "logits/rejected": -1.8236883878707886, + "logps/chosen": -8.462600708007812, + "logps/rejected": -336.7404479980469, + "loss": 0.368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03320446237921715, + "rewards/margins": 2.7609846591949463, + "rewards/rejected": -2.7277801036834717, + "step": 3589 + }, + { + "epoch": 0.21, + "learning_rate": 9.183986687652253e-08, + "logits/chosen": -2.234001636505127, + "logits/rejected": -2.2222683429718018, + "logps/chosen": -4.213201522827148, + "logps/rejected": -165.1432647705078, + "loss": 0.4963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011517715640366077, + "rewards/margins": 0.9670124053955078, + "rewards/rejected": -0.9554947018623352, + "step": 3590 + }, + { + "epoch": 0.21, + "learning_rate": 9.183470636001851e-08, + "logits/chosen": -2.034438371658325, + "logits/rejected": -2.0327930450439453, + "logps/chosen": -12.530713081359863, + "logps/rejected": -200.48837280273438, + "loss": 0.3831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0945964828133583, + "rewards/margins": 1.9122921228408813, + "rewards/rejected": -1.8176956176757812, + "step": 3591 + }, + { + "epoch": 0.21, + "learning_rate": 9.182954435734107e-08, + "logits/chosen": -2.1714630126953125, + "logits/rejected": -2.162102222442627, + "logps/chosen": -51.335697174072266, + "logps/rejected": -120.74183654785156, + "loss": 0.5011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19269981980323792, + "rewards/margins": 0.6393260955810547, + "rewards/rejected": -0.4466262757778168, + "step": 3592 + }, + { + "epoch": 0.21, + "learning_rate": 9.182438086867361e-08, + "logits/chosen": -1.942118525505066, + "logits/rejected": -1.9404253959655762, + "logps/chosen": -193.48536682128906, + "logps/rejected": -384.1141357421875, + "loss": 0.2086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6077362298965454, + "rewards/margins": 1.1592864990234375, + "rewards/rejected": 0.4484497010707855, + "step": 3593 + }, + { + "epoch": 0.21, + "learning_rate": 9.181921589419956e-08, + "logits/chosen": -1.9654759168624878, + "logits/rejected": -1.999690055847168, + "logps/chosen": -207.31236267089844, + "logps/rejected": -389.22882080078125, + "loss": 0.167, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2138794660568237, + "rewards/margins": 1.6447968482971191, + "rewards/rejected": -0.430917352437973, + "step": 3594 + }, + { + "epoch": 0.21, + "learning_rate": 9.18140494341024e-08, + "logits/chosen": -1.9716178178787231, + "logits/rejected": -1.9398165941238403, + "logps/chosen": -294.2451171875, + "logps/rejected": -365.05548095703125, + "loss": 0.3782, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7388977408409119, + "rewards/margins": 0.59130859375, + "rewards/rejected": 0.14758911728858948, + "step": 3595 + }, + { + "epoch": 0.21, + "learning_rate": 9.180888148856567e-08, + "logits/chosen": -2.097938060760498, + "logits/rejected": -2.11971378326416, + "logps/chosen": -164.12704467773438, + "logps/rejected": -311.5984191894531, + "loss": 0.477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16463623940944672, + "rewards/margins": 1.0593109130859375, + "rewards/rejected": -1.2239471673965454, + "step": 3596 + }, + { + "epoch": 0.21, + "learning_rate": 9.180371205777295e-08, + "logits/chosen": -2.066380023956299, + "logits/rejected": -2.060875654220581, + "logps/chosen": -241.35751342773438, + "logps/rejected": -424.7025146484375, + "loss": 0.2172, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2494186162948608, + "rewards/margins": 1.2397750616073608, + "rewards/rejected": 0.0096435546875, + "step": 3597 + }, + { + "epoch": 0.21, + "learning_rate": 9.17985411419079e-08, + "logits/chosen": -1.9909054040908813, + "logits/rejected": -2.0143299102783203, + "logps/chosen": -187.2188720703125, + "logps/rejected": -324.1822204589844, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.032379150390625, + "rewards/margins": 0.5409881472587585, + "rewards/rejected": 0.49139100313186646, + "step": 3598 + }, + { + "epoch": 0.21, + "learning_rate": 9.179336874115422e-08, + "logits/chosen": -2.004566192626953, + "logits/rejected": -2.0059421062469482, + "logps/chosen": -12.107784271240234, + "logps/rejected": -140.22865295410156, + "loss": 0.5264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021120930090546608, + "rewards/margins": 0.7074383497238159, + "rewards/rejected": -0.6863174438476562, + "step": 3599 + }, + { + "epoch": 0.21, + "learning_rate": 9.178819485569565e-08, + "logits/chosen": -2.081573724746704, + "logits/rejected": -2.077890157699585, + "logps/chosen": -2.092128276824951, + "logps/rejected": -128.24993896484375, + "loss": 0.5428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006575155537575483, + "rewards/margins": 0.7186174392700195, + "rewards/rejected": -0.7120422720909119, + "step": 3600 + }, + { + "epoch": 0.21, + "learning_rate": 9.178301948571597e-08, + "logits/chosen": -2.116459846496582, + "logits/rejected": -2.068725824356079, + "logps/chosen": -153.7520751953125, + "logps/rejected": -301.7461853027344, + "loss": 0.3041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9943863153457642, + "rewards/margins": 0.6642379760742188, + "rewards/rejected": 0.330148309469223, + "step": 3601 + }, + { + "epoch": 0.21, + "learning_rate": 9.177784263139907e-08, + "logits/chosen": -2.1365628242492676, + "logits/rejected": -2.1157214641571045, + "logps/chosen": -138.64190673828125, + "logps/rejected": -339.0487060546875, + "loss": 0.2255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7272445559501648, + "rewards/margins": 1.535252332687378, + "rewards/rejected": -0.8080078363418579, + "step": 3602 + }, + { + "epoch": 0.21, + "learning_rate": 9.177266429292883e-08, + "logits/chosen": -1.9996176958084106, + "logits/rejected": -1.9444758892059326, + "logps/chosen": -226.52206420898438, + "logps/rejected": -407.33465576171875, + "loss": 0.4069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3134002685546875, + "rewards/margins": 1.38018798828125, + "rewards/rejected": -1.0667877197265625, + "step": 3603 + }, + { + "epoch": 0.21, + "learning_rate": 9.176748447048923e-08, + "logits/chosen": -2.0368642807006836, + "logits/rejected": -1.9736021757125854, + "logps/chosen": -182.84445190429688, + "logps/rejected": -414.44976806640625, + "loss": 0.4618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32142335176467896, + "rewards/margins": 0.6928558349609375, + "rewards/rejected": -0.37143251299858093, + "step": 3604 + }, + { + "epoch": 0.21, + "learning_rate": 9.176230316426428e-08, + "logits/chosen": -1.8568038940429688, + "logits/rejected": -1.8027101755142212, + "logps/chosen": -214.3831329345703, + "logps/rejected": -436.40020751953125, + "loss": 0.2483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9637512564659119, + "rewards/margins": 1.1230378150939941, + "rewards/rejected": -0.1592864990234375, + "step": 3605 + }, + { + "epoch": 0.21, + "learning_rate": 9.175712037443804e-08, + "logits/chosen": -2.044881820678711, + "logits/rejected": -2.0430827140808105, + "logps/chosen": -38.13243865966797, + "logps/rejected": -290.96636962890625, + "loss": 0.4935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1822277158498764, + "rewards/margins": 1.2361927032470703, + "rewards/rejected": -1.418420433998108, + "step": 3606 + }, + { + "epoch": 0.21, + "learning_rate": 9.175193610119462e-08, + "logits/chosen": -2.0684876441955566, + "logits/rejected": -2.034963369369507, + "logps/chosen": -245.55453491210938, + "logps/rejected": -346.7918395996094, + "loss": 0.4272, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0927826166152954, + "rewards/margins": -0.00517582893371582, + "rewards/rejected": 1.0979584455490112, + "step": 3607 + }, + { + "epoch": 0.21, + "learning_rate": 9.17467503447182e-08, + "logits/chosen": -2.182007312774658, + "logits/rejected": -2.185037136077881, + "logps/chosen": -7.753886699676514, + "logps/rejected": -126.93313598632812, + "loss": 0.4359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025028562173247337, + "rewards/margins": 1.5344455242156982, + "rewards/rejected": -1.509416937828064, + "step": 3608 + }, + { + "epoch": 0.21, + "learning_rate": 9.174156310519301e-08, + "logits/chosen": -2.098850727081299, + "logits/rejected": -2.0890913009643555, + "logps/chosen": -4.708720734925009e-05, + "logps/rejected": -121.37417602539062, + "loss": 0.549, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.126013888911984e-07, + "rewards/margins": 0.6877680420875549, + "rewards/rejected": -0.6877685785293579, + "step": 3609 + }, + { + "epoch": 0.21, + "learning_rate": 9.173637438280332e-08, + "logits/chosen": -2.02004337310791, + "logits/rejected": -1.9930895566940308, + "logps/chosen": -157.1554412841797, + "logps/rejected": -288.7978515625, + "loss": 0.3817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9127869009971619, + "rewards/margins": 0.32870179414749146, + "rewards/rejected": 0.5840851068496704, + "step": 3610 + }, + { + "epoch": 0.21, + "learning_rate": 9.173118417773345e-08, + "logits/chosen": -2.290269374847412, + "logits/rejected": -2.290085554122925, + "logps/chosen": -2.021328926086426, + "logps/rejected": -183.0972137451172, + "loss": 0.443, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031583260744810104, + "rewards/margins": 1.640895962715149, + "rewards/rejected": -1.672479271888733, + "step": 3611 + }, + { + "epoch": 0.21, + "learning_rate": 9.172599249016778e-08, + "logits/chosen": -2.0426199436187744, + "logits/rejected": -2.0434513092041016, + "logps/chosen": -260.23785400390625, + "logps/rejected": -302.17620849609375, + "loss": 0.4265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5067077875137329, + "rewards/margins": 0.7441132068634033, + "rewards/rejected": -0.23740540444850922, + "step": 3612 + }, + { + "epoch": 0.21, + "learning_rate": 9.172079932029077e-08, + "logits/chosen": -2.229121685028076, + "logits/rejected": -2.2273497581481934, + "logps/chosen": -43.32917404174805, + "logps/rejected": -154.47288513183594, + "loss": 0.8642, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6738567352294922, + "rewards/margins": 0.09643900394439697, + "rewards/rejected": -0.7702957391738892, + "step": 3613 + }, + { + "epoch": 0.21, + "learning_rate": 9.171560466828687e-08, + "logits/chosen": -1.9376575946807861, + "logits/rejected": -1.8191381692886353, + "logps/chosen": -313.1064147949219, + "logps/rejected": -878.3572387695312, + "loss": 0.0461, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7263396978378296, + "rewards/margins": 4.563711643218994, + "rewards/rejected": -2.837371826171875, + "step": 3614 + }, + { + "epoch": 0.21, + "learning_rate": 9.171040853434067e-08, + "logits/chosen": -2.1341118812561035, + "logits/rejected": -2.1355159282684326, + "logps/chosen": -43.03199768066406, + "logps/rejected": -262.70416259765625, + "loss": 0.3652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08990173786878586, + "rewards/margins": 2.4021592140197754, + "rewards/rejected": -2.3122575283050537, + "step": 3615 + }, + { + "epoch": 0.21, + "learning_rate": 9.17052109186367e-08, + "logits/chosen": -2.322662591934204, + "logits/rejected": -2.324885368347168, + "logps/chosen": -25.11471176147461, + "logps/rejected": -112.0169448852539, + "loss": 0.5304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11909713596105576, + "rewards/margins": 1.0145530700683594, + "rewards/rejected": -1.1336501836776733, + "step": 3616 + }, + { + "epoch": 0.21, + "learning_rate": 9.170001182135965e-08, + "logits/chosen": -1.9822286367416382, + "logits/rejected": -1.9678899049758911, + "logps/chosen": -176.71987915039062, + "logps/rejected": -316.7105712890625, + "loss": 0.4227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6123580932617188, + "rewards/margins": 0.36617887020111084, + "rewards/rejected": 0.24617920815944672, + "step": 3617 + }, + { + "epoch": 0.21, + "learning_rate": 9.16948112426942e-08, + "logits/chosen": -1.987448811531067, + "logits/rejected": -1.9272230863571167, + "logps/chosen": -277.50775146484375, + "logps/rejected": -443.20904541015625, + "loss": 0.1115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.520867943763733, + "rewards/margins": 2.492431640625, + "rewards/rejected": -0.9715637564659119, + "step": 3618 + }, + { + "epoch": 0.21, + "learning_rate": 9.168960918282511e-08, + "logits/chosen": -1.9592362642288208, + "logits/rejected": -1.9334790706634521, + "logps/chosen": -195.0528564453125, + "logps/rejected": -317.64642333984375, + "loss": 0.2627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7802352905273438, + "rewards/margins": 1.2368637323379517, + "rewards/rejected": -0.4566284120082855, + "step": 3619 + }, + { + "epoch": 0.21, + "learning_rate": 9.168440564193717e-08, + "logits/chosen": -2.0063588619232178, + "logits/rejected": -1.9778739213943481, + "logps/chosen": -199.72317504882812, + "logps/rejected": -390.2457275390625, + "loss": 0.4134, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5948486328125, + "rewards/margins": 0.61871337890625, + "rewards/rejected": -0.02386474609375, + "step": 3620 + }, + { + "epoch": 0.21, + "learning_rate": 9.167920062021524e-08, + "logits/chosen": -2.182384729385376, + "logits/rejected": -2.1784214973449707, + "logps/chosen": -41.12318801879883, + "logps/rejected": -172.241455078125, + "loss": 0.4019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2848072052001953, + "rewards/margins": 0.9380084872245789, + "rewards/rejected": -0.6532012820243835, + "step": 3621 + }, + { + "epoch": 0.21, + "learning_rate": 9.167399411784422e-08, + "logits/chosen": -2.0584802627563477, + "logits/rejected": -2.0575811862945557, + "logps/chosen": -58.228607177734375, + "logps/rejected": -315.1042175292969, + "loss": 0.3658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08372726291418076, + "rewards/margins": 2.9813759326934814, + "rewards/rejected": -3.06510329246521, + "step": 3622 + }, + { + "epoch": 0.21, + "learning_rate": 9.16687861350091e-08, + "logits/chosen": -1.516311526298523, + "logits/rejected": -1.4638407230377197, + "logps/chosen": -390.5761413574219, + "logps/rejected": -543.8179931640625, + "loss": 0.3981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17816162109375, + "rewards/margins": 0.744982898235321, + "rewards/rejected": -0.566821277141571, + "step": 3623 + }, + { + "epoch": 0.21, + "learning_rate": 9.166357667189486e-08, + "logits/chosen": -2.1931991577148438, + "logits/rejected": -2.191148042678833, + "logps/chosen": -29.8121280670166, + "logps/rejected": -113.7468490600586, + "loss": 0.5588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.032137680798769, + "rewards/margins": 0.6330494284629822, + "rewards/rejected": -0.6009117364883423, + "step": 3624 + }, + { + "epoch": 0.21, + "learning_rate": 9.165836572868658e-08, + "logits/chosen": -2.04252552986145, + "logits/rejected": -2.0405850410461426, + "logps/chosen": -11.05565357208252, + "logps/rejected": -68.83011627197266, + "loss": 0.7058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08223428577184677, + "rewards/margins": 0.2012602686882019, + "rewards/rejected": -0.2834945619106293, + "step": 3625 + }, + { + "epoch": 0.21, + "learning_rate": 9.165315330556938e-08, + "logits/chosen": -2.004448175430298, + "logits/rejected": -1.9949337244033813, + "logps/chosen": -16.81739616394043, + "logps/rejected": -115.96847534179688, + "loss": 0.4647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02076873742043972, + "rewards/margins": 1.2738388776779175, + "rewards/rejected": -1.2530701160430908, + "step": 3626 + }, + { + "epoch": 0.21, + "learning_rate": 9.164793940272842e-08, + "logits/chosen": -1.9835708141326904, + "logits/rejected": -1.9931936264038086, + "logps/chosen": -28.357433319091797, + "logps/rejected": -158.47442626953125, + "loss": 0.4618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.045523643493652344, + "rewards/margins": 1.2343664169311523, + "rewards/rejected": -1.1888427734375, + "step": 3627 + }, + { + "epoch": 0.21, + "learning_rate": 9.164272402034893e-08, + "logits/chosen": -2.0841383934020996, + "logits/rejected": -2.0883331298828125, + "logps/chosen": -16.999582290649414, + "logps/rejected": -118.01221466064453, + "loss": 0.5917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0634307861328125, + "rewards/margins": 0.6184722781181335, + "rewards/rejected": -0.681903064250946, + "step": 3628 + }, + { + "epoch": 0.21, + "learning_rate": 9.16375071586162e-08, + "logits/chosen": -2.0952234268188477, + "logits/rejected": -2.093012571334839, + "logps/chosen": -3.774148464202881, + "logps/rejected": -232.29400634765625, + "loss": 0.4017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012195611372590065, + "rewards/margins": 2.209087371826172, + "rewards/rejected": -2.221282958984375, + "step": 3629 + }, + { + "epoch": 0.21, + "learning_rate": 9.163228881771553e-08, + "logits/chosen": -2.044478416442871, + "logits/rejected": -2.0504093170166016, + "logps/chosen": -158.9556884765625, + "logps/rejected": -333.5353088378906, + "loss": 0.2175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6420623660087585, + "rewards/margins": 1.480676293373108, + "rewards/rejected": -0.8386139273643494, + "step": 3630 + }, + { + "epoch": 0.21, + "learning_rate": 9.162706899783233e-08, + "logits/chosen": -1.9221932888031006, + "logits/rejected": -1.907197117805481, + "logps/chosen": -179.1857452392578, + "logps/rejected": -248.12100219726562, + "loss": 0.4809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7924591302871704, + "rewards/margins": 0.031094372272491455, + "rewards/rejected": 0.761364758014679, + "step": 3631 + }, + { + "epoch": 0.21, + "learning_rate": 9.1621847699152e-08, + "logits/chosen": -1.9608088731765747, + "logits/rejected": -1.957579255104065, + "logps/chosen": -6.993679046630859, + "logps/rejected": -69.76734161376953, + "loss": 0.5678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04761829599738121, + "rewards/margins": 0.4092065989971161, + "rewards/rejected": -0.3615882992744446, + "step": 3632 + }, + { + "epoch": 0.21, + "learning_rate": 9.161662492186007e-08, + "logits/chosen": -2.1790809631347656, + "logits/rejected": -2.186405897140503, + "logps/chosen": -55.50909423828125, + "logps/rejected": -197.71353149414062, + "loss": 0.6294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3323982357978821, + "rewards/margins": 0.8431938290596008, + "rewards/rejected": -1.175592064857483, + "step": 3633 + }, + { + "epoch": 0.21, + "learning_rate": 9.161140066614203e-08, + "logits/chosen": -1.9715487957000732, + "logits/rejected": -1.9480783939361572, + "logps/chosen": -195.9683837890625, + "logps/rejected": -308.055419921875, + "loss": 0.3616, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1457321643829346, + "rewards/margins": 0.3868027329444885, + "rewards/rejected": 0.758929431438446, + "step": 3634 + }, + { + "epoch": 0.21, + "learning_rate": 9.16061749321835e-08, + "logits/chosen": -2.043024778366089, + "logits/rejected": -2.017704963684082, + "logps/chosen": -131.7624053955078, + "logps/rejected": -353.8726806640625, + "loss": 0.1891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6283844113349915, + "rewards/margins": 1.8182709217071533, + "rewards/rejected": -1.189886450767517, + "step": 3635 + }, + { + "epoch": 0.21, + "learning_rate": 9.160094772017013e-08, + "logits/chosen": -2.1231415271759033, + "logits/rejected": -2.116179943084717, + "logps/chosen": -1.3504220247268677, + "logps/rejected": -183.0065155029297, + "loss": 0.403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028168832883238792, + "rewards/margins": 1.8952021598815918, + "rewards/rejected": -1.8670333623886108, + "step": 3636 + }, + { + "epoch": 0.21, + "learning_rate": 9.159571903028759e-08, + "logits/chosen": -2.0276029109954834, + "logits/rejected": -2.024080276489258, + "logps/chosen": -138.19775390625, + "logps/rejected": -220.69479370117188, + "loss": 0.5519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2937484681606293, + "rewards/margins": 0.17685851454734802, + "rewards/rejected": 0.11688995361328125, + "step": 3637 + }, + { + "epoch": 0.21, + "learning_rate": 9.159048886272165e-08, + "logits/chosen": -2.2169814109802246, + "logits/rejected": -2.2091779708862305, + "logps/chosen": -38.54393005371094, + "logps/rejected": -205.25282287597656, + "loss": 0.4958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04445800930261612, + "rewards/margins": 1.0712814331054688, + "rewards/rejected": -1.0268234014511108, + "step": 3638 + }, + { + "epoch": 0.21, + "learning_rate": 9.15852572176581e-08, + "logits/chosen": -2.3305132389068604, + "logits/rejected": -2.3290131092071533, + "logps/chosen": -0.560394287109375, + "logps/rejected": -131.7098388671875, + "loss": 0.4806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007772255223244429, + "rewards/margins": 1.1608926057815552, + "rewards/rejected": -1.1531203985214233, + "step": 3639 + }, + { + "epoch": 0.21, + "learning_rate": 9.15800240952828e-08, + "logits/chosen": -1.8504644632339478, + "logits/rejected": -1.8445435762405396, + "logps/chosen": -0.6496104598045349, + "logps/rejected": -180.4899444580078, + "loss": 0.4311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03340747579932213, + "rewards/margins": 1.5898191928863525, + "rewards/rejected": -1.5564117431640625, + "step": 3640 + }, + { + "epoch": 0.21, + "learning_rate": 9.157478949578167e-08, + "logits/chosen": -1.8745695352554321, + "logits/rejected": -1.872571587562561, + "logps/chosen": -244.91212463378906, + "logps/rejected": -381.4154052734375, + "loss": 0.2729, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1098190546035767, + "rewards/margins": 0.8189376592636108, + "rewards/rejected": 0.29088136553764343, + "step": 3641 + }, + { + "epoch": 0.21, + "learning_rate": 9.156955341934063e-08, + "logits/chosen": -2.0579335689544678, + "logits/rejected": -2.0649642944335938, + "logps/chosen": -5.555025927606039e-05, + "logps/rejected": -150.56349182128906, + "loss": 0.4114, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.105868118946091e-07, + "rewards/margins": 1.977776288986206, + "rewards/rejected": -1.977777123451233, + "step": 3642 + }, + { + "epoch": 0.21, + "learning_rate": 9.156431586614572e-08, + "logits/chosen": -1.8052712678909302, + "logits/rejected": -1.7668191194534302, + "logps/chosen": -372.3719482421875, + "logps/rejected": -496.8249206542969, + "loss": 0.2332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.983746349811554, + "rewards/margins": 1.5690094232559204, + "rewards/rejected": -0.5852630734443665, + "step": 3643 + }, + { + "epoch": 0.21, + "learning_rate": 9.155907683638302e-08, + "logits/chosen": -2.168792963027954, + "logits/rejected": -2.164815664291382, + "logps/chosen": -80.49465942382812, + "logps/rejected": -277.7970275878906, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.353005975484848, + "rewards/margins": 1.7384705543518066, + "rewards/rejected": -1.3854645490646362, + "step": 3644 + }, + { + "epoch": 0.21, + "learning_rate": 9.15538363302386e-08, + "logits/chosen": -2.1174187660217285, + "logits/rejected": -2.110046863555908, + "logps/chosen": -60.937564849853516, + "logps/rejected": -99.33240509033203, + "loss": 0.5781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22229576110839844, + "rewards/margins": 0.3070354461669922, + "rewards/rejected": -0.08473968505859375, + "step": 3645 + }, + { + "epoch": 0.21, + "learning_rate": 9.154859434789866e-08, + "logits/chosen": -2.148391008377075, + "logits/rejected": -2.1412079334259033, + "logps/chosen": -237.99545288085938, + "logps/rejected": -365.1567077636719, + "loss": 0.1995, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4677765369415283, + "rewards/margins": 1.3275513648986816, + "rewards/rejected": 0.14022521674633026, + "step": 3646 + }, + { + "epoch": 0.21, + "learning_rate": 9.154335088954941e-08, + "logits/chosen": -1.8649404048919678, + "logits/rejected": -1.8733850717544556, + "logps/chosen": -36.03070068359375, + "logps/rejected": -179.05313110351562, + "loss": 0.6097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005920028779655695, + "rewards/margins": 0.38600045442581177, + "rewards/rejected": -0.3919204771518707, + "step": 3647 + }, + { + "epoch": 0.21, + "learning_rate": 9.153810595537712e-08, + "logits/chosen": -2.144598960876465, + "logits/rejected": -2.153585433959961, + "logps/chosen": -18.939537048339844, + "logps/rejected": -198.075439453125, + "loss": 0.4449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09297313541173935, + "rewards/margins": 1.6149981021881104, + "rewards/rejected": -1.707971215248108, + "step": 3648 + }, + { + "epoch": 0.21, + "learning_rate": 9.153285954556813e-08, + "logits/chosen": -2.0784504413604736, + "logits/rejected": -2.081974744796753, + "logps/chosen": -13.658101081848145, + "logps/rejected": -148.62709045410156, + "loss": 0.5144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017264842987060547, + "rewards/margins": 0.8989354372024536, + "rewards/rejected": -0.9162002801895142, + "step": 3649 + }, + { + "epoch": 0.21, + "learning_rate": 9.15276116603088e-08, + "logits/chosen": -2.0643973350524902, + "logits/rejected": -2.0661518573760986, + "logps/chosen": -44.55842590332031, + "logps/rejected": -149.80572509765625, + "loss": 0.7515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5593910217285156, + "rewards/margins": 0.44405901432037354, + "rewards/rejected": -1.0034500360488892, + "step": 3650 + }, + { + "epoch": 0.21, + "learning_rate": 9.152236229978559e-08, + "logits/chosen": -1.912336826324463, + "logits/rejected": -1.7595207691192627, + "logps/chosen": -335.4947509765625, + "logps/rejected": -924.36376953125, + "loss": 0.1513, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0930206775665283, + "rewards/margins": 2.134573459625244, + "rewards/rejected": -1.0415527820587158, + "step": 3651 + }, + { + "epoch": 0.21, + "learning_rate": 9.151711146418495e-08, + "logits/chosen": -1.9191542863845825, + "logits/rejected": -1.911737322807312, + "logps/chosen": -58.90814208984375, + "logps/rejected": -251.47372436523438, + "loss": 0.3497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09069214016199112, + "rewards/margins": 3.047537088394165, + "rewards/rejected": -2.9568450450897217, + "step": 3652 + }, + { + "epoch": 0.21, + "learning_rate": 9.151185915369341e-08, + "logits/chosen": -1.9520885944366455, + "logits/rejected": -1.9835443496704102, + "logps/chosen": -245.73587036132812, + "logps/rejected": -420.59442138671875, + "loss": 0.1898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8786697387695312, + "rewards/margins": 2.6849746704101562, + "rewards/rejected": -1.806304931640625, + "step": 3653 + }, + { + "epoch": 0.21, + "learning_rate": 9.150660536849762e-08, + "logits/chosen": -2.1973321437835693, + "logits/rejected": -2.191209316253662, + "logps/chosen": -22.908376693725586, + "logps/rejected": -178.40753173828125, + "loss": 0.4602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03117084503173828, + "rewards/margins": 1.2855958938598633, + "rewards/rejected": -1.254425048828125, + "step": 3654 + }, + { + "epoch": 0.21, + "learning_rate": 9.150135010878414e-08, + "logits/chosen": -2.286734104156494, + "logits/rejected": -2.281031847000122, + "logps/chosen": -9.195035934448242, + "logps/rejected": -245.95828247070312, + "loss": 0.4258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.056555937975645065, + "rewards/margins": 2.2060844898223877, + "rewards/rejected": -2.2626404762268066, + "step": 3655 + }, + { + "epoch": 0.21, + "learning_rate": 9.149609337473972e-08, + "logits/chosen": -2.032503128051758, + "logits/rejected": -2.0325939655303955, + "logps/chosen": -54.72691345214844, + "logps/rejected": -249.65159606933594, + "loss": 0.408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08126373589038849, + "rewards/margins": 1.7815719842910767, + "rewards/rejected": -1.8628357648849487, + "step": 3656 + }, + { + "epoch": 0.21, + "learning_rate": 9.149083516655106e-08, + "logits/chosen": -2.0945706367492676, + "logits/rejected": -2.059602737426758, + "logps/chosen": -180.63742065429688, + "logps/rejected": -362.5274658203125, + "loss": 1.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2802886962890625, + "rewards/margins": 1.64786696434021, + "rewards/rejected": -3.9281556606292725, + "step": 3657 + }, + { + "epoch": 0.21, + "learning_rate": 9.1485575484405e-08, + "logits/chosen": -2.2017507553100586, + "logits/rejected": -2.197059154510498, + "logps/chosen": -178.53570556640625, + "logps/rejected": -206.2536163330078, + "loss": 0.5258, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4615722596645355, + "rewards/margins": -0.0003006160259246826, + "rewards/rejected": 0.4618728756904602, + "step": 3658 + }, + { + "epoch": 0.21, + "learning_rate": 9.148031432848837e-08, + "logits/chosen": -2.158639907836914, + "logits/rejected": -2.161555767059326, + "logps/chosen": -21.583505630493164, + "logps/rejected": -161.21517944335938, + "loss": 0.5638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1611049622297287, + "rewards/margins": 0.8522892594337463, + "rewards/rejected": -1.0133942365646362, + "step": 3659 + }, + { + "epoch": 0.21, + "learning_rate": 9.147505169898806e-08, + "logits/chosen": -2.1162877082824707, + "logits/rejected": -2.1131319999694824, + "logps/chosen": -203.8585205078125, + "logps/rejected": -250.97625732421875, + "loss": 0.4013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7287079095840454, + "rewards/margins": 0.3759308159351349, + "rewards/rejected": 0.3527770936489105, + "step": 3660 + }, + { + "epoch": 0.21, + "learning_rate": 9.146978759609105e-08, + "logits/chosen": -2.0533745288848877, + "logits/rejected": -2.006700277328491, + "logps/chosen": -210.4755096435547, + "logps/rejected": -428.4443359375, + "loss": 0.0867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.931829810142517, + "rewards/margins": 2.488818407058716, + "rewards/rejected": -0.556988537311554, + "step": 3661 + }, + { + "epoch": 0.21, + "learning_rate": 9.146452201998433e-08, + "logits/chosen": -2.1334328651428223, + "logits/rejected": -2.1143336296081543, + "logps/chosen": -155.39437866210938, + "logps/rejected": -281.0358581542969, + "loss": 0.3664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2931793928146362, + "rewards/margins": 0.23958134651184082, + "rewards/rejected": 1.0535980463027954, + "step": 3662 + }, + { + "epoch": 0.21, + "learning_rate": 9.145925497085497e-08, + "logits/chosen": -2.197699785232544, + "logits/rejected": -2.1733860969543457, + "logps/chosen": -216.12539672851562, + "logps/rejected": -496.2309265136719, + "loss": 0.2667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6875625848770142, + "rewards/margins": 1.5902878046035767, + "rewards/rejected": -0.9027252197265625, + "step": 3663 + }, + { + "epoch": 0.21, + "learning_rate": 9.145398644889007e-08, + "logits/chosen": -2.072145700454712, + "logits/rejected": -2.0225882530212402, + "logps/chosen": -184.08023071289062, + "logps/rejected": -285.6382141113281, + "loss": 0.2361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2568085193634033, + "rewards/margins": 1.160369873046875, + "rewards/rejected": 0.09643860161304474, + "step": 3664 + }, + { + "epoch": 0.21, + "learning_rate": 9.144871645427678e-08, + "logits/chosen": -2.1167874336242676, + "logits/rejected": -2.048271894454956, + "logps/chosen": -128.59228515625, + "logps/rejected": -319.6649169921875, + "loss": 0.2932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9709731936454773, + "rewards/margins": 0.8865585327148438, + "rewards/rejected": 0.08441467583179474, + "step": 3665 + }, + { + "epoch": 0.21, + "learning_rate": 9.144344498720237e-08, + "logits/chosen": -2.0745604038238525, + "logits/rejected": -2.080500364303589, + "logps/chosen": -6.210651918081567e-05, + "logps/rejected": -156.3643035888672, + "loss": 0.4751, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.675385861854011e-07, + "rewards/margins": 1.2258706092834473, + "rewards/rejected": -1.2258713245391846, + "step": 3666 + }, + { + "epoch": 0.21, + "learning_rate": 9.143817204785404e-08, + "logits/chosen": -2.119412422180176, + "logits/rejected": -2.1016416549682617, + "logps/chosen": -0.0263072457164526, + "logps/rejected": -252.54623413085938, + "loss": 0.391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00034503210918046534, + "rewards/margins": 2.303396701812744, + "rewards/rejected": -2.303051710128784, + "step": 3667 + }, + { + "epoch": 0.21, + "learning_rate": 9.143289763641918e-08, + "logits/chosen": -2.1364519596099854, + "logits/rejected": -2.142209768295288, + "logps/chosen": -9.226348876953125, + "logps/rejected": -140.08737182617188, + "loss": 0.4388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04078684002161026, + "rewards/margins": 1.4829378128051758, + "rewards/rejected": -1.4421509504318237, + "step": 3668 + }, + { + "epoch": 0.21, + "learning_rate": 9.14276217530851e-08, + "logits/chosen": -2.1120266914367676, + "logits/rejected": -2.1557631492614746, + "logps/chosen": -175.4410400390625, + "logps/rejected": -225.61856079101562, + "loss": 0.3032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9869751334190369, + "rewards/margins": 0.7422882318496704, + "rewards/rejected": 0.24468688666820526, + "step": 3669 + }, + { + "epoch": 0.21, + "learning_rate": 9.142234439803924e-08, + "logits/chosen": -2.0507824420928955, + "logits/rejected": -2.039034605026245, + "logps/chosen": -79.10968017578125, + "logps/rejected": -244.09228515625, + "loss": 0.486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3121704161167145, + "rewards/margins": 0.6228424310684204, + "rewards/rejected": -0.31067201495170593, + "step": 3670 + }, + { + "epoch": 0.21, + "learning_rate": 9.14170655714691e-08, + "logits/chosen": -1.9651789665222168, + "logits/rejected": -1.9377814531326294, + "logps/chosen": -367.0740966796875, + "logps/rejected": -571.02978515625, + "loss": 0.1464, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3151733875274658, + "rewards/margins": 2.407580614089966, + "rewards/rejected": -1.0924072265625, + "step": 3671 + }, + { + "epoch": 0.21, + "learning_rate": 9.14117852735622e-08, + "logits/chosen": -2.1435859203338623, + "logits/rejected": -2.1210134029388428, + "logps/chosen": -144.9417266845703, + "logps/rejected": -260.1941833496094, + "loss": 0.5519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3879501521587372, + "rewards/margins": 0.12902987003326416, + "rewards/rejected": 0.258920282125473, + "step": 3672 + }, + { + "epoch": 0.21, + "learning_rate": 9.140650350450611e-08, + "logits/chosen": -1.968517541885376, + "logits/rejected": -1.9674793481826782, + "logps/chosen": -73.04689025878906, + "logps/rejected": -225.27459716796875, + "loss": 0.3935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5979141592979431, + "rewards/margins": 0.5993621945381165, + "rewards/rejected": -0.0014480591053143144, + "step": 3673 + }, + { + "epoch": 0.21, + "learning_rate": 9.140122026448849e-08, + "logits/chosen": -2.0159199237823486, + "logits/rejected": -1.9767301082611084, + "logps/chosen": -163.48306274414062, + "logps/rejected": -358.79010009765625, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9438018798828125, + "rewards/margins": 0.9343048334121704, + "rewards/rejected": 0.00949707068502903, + "step": 3674 + }, + { + "epoch": 0.21, + "learning_rate": 9.1395935553697e-08, + "logits/chosen": -2.119837522506714, + "logits/rejected": -2.107198476791382, + "logps/chosen": -0.8797746300697327, + "logps/rejected": -154.30809020996094, + "loss": 0.4656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07675435394048691, + "rewards/margins": 1.120148777961731, + "rewards/rejected": -1.0433944463729858, + "step": 3675 + }, + { + "epoch": 0.21, + "learning_rate": 9.139064937231939e-08, + "logits/chosen": -1.902056336402893, + "logits/rejected": -1.9001492261886597, + "logps/chosen": -203.45596313476562, + "logps/rejected": -404.90167236328125, + "loss": 0.2061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9867767691612244, + "rewards/margins": 1.6287262439727783, + "rewards/rejected": -0.641949474811554, + "step": 3676 + }, + { + "epoch": 0.21, + "learning_rate": 9.138536172054346e-08, + "logits/chosen": -2.012251615524292, + "logits/rejected": -2.0102555751800537, + "logps/chosen": -30.19374656677246, + "logps/rejected": -141.48983764648438, + "loss": 0.5015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11166878044605255, + "rewards/margins": 1.3884731531143188, + "rewards/rejected": -1.500141978263855, + "step": 3677 + }, + { + "epoch": 0.21, + "learning_rate": 9.138007259855705e-08, + "logits/chosen": -1.9665008783340454, + "logits/rejected": -1.9153177738189697, + "logps/chosen": -197.43319702148438, + "logps/rejected": -366.66558837890625, + "loss": 0.2666, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0965759754180908, + "rewards/margins": 0.9158569574356079, + "rewards/rejected": 0.18071900308132172, + "step": 3678 + }, + { + "epoch": 0.21, + "learning_rate": 9.137478200654803e-08, + "logits/chosen": -2.1778202056884766, + "logits/rejected": -2.1707284450531006, + "logps/chosen": -121.77008056640625, + "logps/rejected": -217.13674926757812, + "loss": 0.5318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16890870034694672, + "rewards/margins": 1.1869704723358154, + "rewards/rejected": -1.3558791875839233, + "step": 3679 + }, + { + "epoch": 0.21, + "learning_rate": 9.136948994470437e-08, + "logits/chosen": -2.1146912574768066, + "logits/rejected": -2.114089250564575, + "logps/chosen": -26.78959083557129, + "logps/rejected": -144.16268920898438, + "loss": 0.84, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6371246576309204, + "rewards/margins": 0.11950987577438354, + "rewards/rejected": -0.756634533405304, + "step": 3680 + }, + { + "epoch": 0.21, + "learning_rate": 9.136419641321408e-08, + "logits/chosen": -1.921945333480835, + "logits/rejected": -1.918426513671875, + "logps/chosen": -12.854957580566406, + "logps/rejected": -67.50291442871094, + "loss": 0.6191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1328880339860916, + "rewards/margins": 0.09489670395851135, + "rewards/rejected": 0.03799133375287056, + "step": 3681 + }, + { + "epoch": 0.21, + "learning_rate": 9.135890141226519e-08, + "logits/chosen": -2.0694198608398438, + "logits/rejected": -2.0716614723205566, + "logps/chosen": -138.68362426757812, + "logps/rejected": -236.84100341796875, + "loss": 0.5559, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8530044555664062, + "rewards/margins": -0.34052741527557373, + "rewards/rejected": 1.19353187084198, + "step": 3682 + }, + { + "epoch": 0.21, + "learning_rate": 9.135360494204582e-08, + "logits/chosen": -2.0599169731140137, + "logits/rejected": -2.077516555786133, + "logps/chosen": -257.6141052246094, + "logps/rejected": -365.84912109375, + "loss": 0.2311, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1038665771484375, + "rewards/margins": 0.7283416986465454, + "rewards/rejected": 1.375524878501892, + "step": 3683 + }, + { + "epoch": 0.21, + "learning_rate": 9.13483070027441e-08, + "logits/chosen": -1.9031269550323486, + "logits/rejected": -1.9022084474563599, + "logps/chosen": -34.77318572998047, + "logps/rejected": -201.25274658203125, + "loss": 0.6399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3451009690761566, + "rewards/margins": 0.6056283712387085, + "rewards/rejected": -0.9507293701171875, + "step": 3684 + }, + { + "epoch": 0.21, + "learning_rate": 9.134300759454828e-08, + "logits/chosen": -2.06941819190979, + "logits/rejected": -2.050858736038208, + "logps/chosen": -17.555538177490234, + "logps/rejected": -146.95986938476562, + "loss": 0.4709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05454540252685547, + "rewards/margins": 1.21249520778656, + "rewards/rejected": -1.1579498052597046, + "step": 3685 + }, + { + "epoch": 0.21, + "learning_rate": 9.13377067176466e-08, + "logits/chosen": -2.1387383937835693, + "logits/rejected": -2.128016471862793, + "logps/chosen": -0.0024173883721232414, + "logps/rejected": -279.7489013671875, + "loss": 0.3827, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7748912796378136e-05, + "rewards/margins": 2.5914008617401123, + "rewards/rejected": -2.591418504714966, + "step": 3686 + }, + { + "epoch": 0.21, + "learning_rate": 9.133240437222737e-08, + "logits/chosen": -2.097177267074585, + "logits/rejected": -2.0947585105895996, + "logps/chosen": -49.06221008300781, + "logps/rejected": -145.2440185546875, + "loss": 0.5475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11828231811523438, + "rewards/margins": 0.5920646786689758, + "rewards/rejected": -0.47378236055374146, + "step": 3687 + }, + { + "epoch": 0.21, + "learning_rate": 9.132710055847896e-08, + "logits/chosen": -1.9835412502288818, + "logits/rejected": -1.9347202777862549, + "logps/chosen": -153.99264526367188, + "logps/rejected": -328.8932800292969, + "loss": 0.4981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30841371417045593, + "rewards/margins": 0.563488781452179, + "rewards/rejected": -0.255075067281723, + "step": 3688 + }, + { + "epoch": 0.21, + "learning_rate": 9.132179527658977e-08, + "logits/chosen": -2.0874369144439697, + "logits/rejected": -2.0727431774139404, + "logps/chosen": -288.30511474609375, + "logps/rejected": -293.8619079589844, + "loss": 0.0879, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7252014875411987, + "rewards/margins": 2.9824249744415283, + "rewards/rejected": -1.2572234869003296, + "step": 3689 + }, + { + "epoch": 0.21, + "learning_rate": 9.131648852674829e-08, + "logits/chosen": -2.2418293952941895, + "logits/rejected": -2.241237163543701, + "logps/chosen": -0.0009662151569500566, + "logps/rejected": -97.9827880859375, + "loss": 0.4586, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.44489056058228e-05, + "rewards/margins": 1.3777680397033691, + "rewards/rejected": -1.3777436017990112, + "step": 3690 + }, + { + "epoch": 0.21, + "learning_rate": 9.131118030914304e-08, + "logits/chosen": -1.9177906513214111, + "logits/rejected": -1.9121637344360352, + "logps/chosen": -51.2703857421875, + "logps/rejected": -293.01141357421875, + "loss": 0.3172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4542808532714844, + "rewards/margins": 1.597115397453308, + "rewards/rejected": -1.1428345441818237, + "step": 3691 + }, + { + "epoch": 0.21, + "learning_rate": 9.13058706239626e-08, + "logits/chosen": -2.207900047302246, + "logits/rejected": -2.192687749862671, + "logps/chosen": -124.66954040527344, + "logps/rejected": -225.84844970703125, + "loss": 0.4931, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25305938720703125, + "rewards/margins": 0.2743728756904602, + "rewards/rejected": -0.02131347730755806, + "step": 3692 + }, + { + "epoch": 0.21, + "learning_rate": 9.130055947139559e-08, + "logits/chosen": -2.1655490398406982, + "logits/rejected": -2.1519384384155273, + "logps/chosen": -36.67673873901367, + "logps/rejected": -167.42152404785156, + "loss": 0.4089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18839608132839203, + "rewards/margins": 1.4526371955871582, + "rewards/rejected": -1.264241099357605, + "step": 3693 + }, + { + "epoch": 0.21, + "learning_rate": 9.129524685163066e-08, + "logits/chosen": -1.9937610626220703, + "logits/rejected": -1.9874159097671509, + "logps/chosen": -76.22357177734375, + "logps/rejected": -190.0743865966797, + "loss": 0.4481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06902465969324112, + "rewards/margins": 1.1092697381973267, + "rewards/rejected": -1.1782944202423096, + "step": 3694 + }, + { + "epoch": 0.22, + "learning_rate": 9.128993276485659e-08, + "logits/chosen": -2.066869020462036, + "logits/rejected": -2.056757926940918, + "logps/chosen": -269.117431640625, + "logps/rejected": -513.4720458984375, + "loss": 0.1134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2179383039474487, + "rewards/margins": 2.7684967517852783, + "rewards/rejected": -1.5505584478378296, + "step": 3695 + }, + { + "epoch": 0.22, + "learning_rate": 9.128461721126213e-08, + "logits/chosen": -2.201636552810669, + "logits/rejected": -2.1938560009002686, + "logps/chosen": -262.451416015625, + "logps/rejected": -360.1075439453125, + "loss": 0.277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.049310326576233, + "rewards/margins": 0.75140380859375, + "rewards/rejected": 0.2979064881801605, + "step": 3696 + }, + { + "epoch": 0.22, + "learning_rate": 9.127930019103611e-08, + "logits/chosen": -1.8856815099716187, + "logits/rejected": -1.8866329193115234, + "logps/chosen": -2.991260051727295, + "logps/rejected": -89.98004150390625, + "loss": 0.6665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01625356636941433, + "rewards/margins": 0.0968291312456131, + "rewards/rejected": -0.08057556301355362, + "step": 3697 + }, + { + "epoch": 0.22, + "learning_rate": 9.127398170436744e-08, + "logits/chosen": -2.0928103923797607, + "logits/rejected": -2.101266860961914, + "logps/chosen": -21.92072296142578, + "logps/rejected": -107.7128677368164, + "loss": 0.5685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08421669155359268, + "rewards/margins": 0.5060535669326782, + "rewards/rejected": -0.42183685302734375, + "step": 3698 + }, + { + "epoch": 0.22, + "learning_rate": 9.126866175144504e-08, + "logits/chosen": -1.9311504364013672, + "logits/rejected": -1.9280693531036377, + "logps/chosen": -15.57850170135498, + "logps/rejected": -134.2400665283203, + "loss": 0.5409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.054601576179265976, + "rewards/margins": 0.576710045337677, + "rewards/rejected": -0.5221084952354431, + "step": 3699 + }, + { + "epoch": 0.22, + "learning_rate": 9.126334033245791e-08, + "logits/chosen": -2.0684540271759033, + "logits/rejected": -2.0549912452697754, + "logps/chosen": -12.715132713317871, + "logps/rejected": -207.86138916015625, + "loss": 0.3791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023512840270996094, + "rewards/margins": 2.3302435874938965, + "rewards/rejected": -2.3067307472229004, + "step": 3700 + }, + { + "epoch": 0.22, + "learning_rate": 9.12580174475951e-08, + "logits/chosen": -1.915088415145874, + "logits/rejected": -1.9198981523513794, + "logps/chosen": -0.001454679062590003, + "logps/rejected": -191.09710693359375, + "loss": 0.4396, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.753439033171162e-05, + "rewards/margins": 1.533118724822998, + "rewards/rejected": -1.5332062244415283, + "step": 3701 + }, + { + "epoch": 0.22, + "learning_rate": 9.125269309704569e-08, + "logits/chosen": -1.986879587173462, + "logits/rejected": -1.969909906387329, + "logps/chosen": -222.1624755859375, + "logps/rejected": -350.82550048828125, + "loss": 0.2547, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2278488874435425, + "rewards/margins": 0.9503341913223267, + "rewards/rejected": 0.27751466631889343, + "step": 3702 + }, + { + "epoch": 0.22, + "learning_rate": 9.124736728099882e-08, + "logits/chosen": -1.949196219444275, + "logits/rejected": -1.9437074661254883, + "logps/chosen": -2.124915361404419, + "logps/rejected": -63.78245162963867, + "loss": 0.6088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005867529194802046, + "rewards/margins": 0.3071496784687042, + "rewards/rejected": -0.30128213763237, + "step": 3703 + }, + { + "epoch": 0.22, + "learning_rate": 9.124203999964371e-08, + "logits/chosen": -2.288802146911621, + "logits/rejected": -2.2822182178497314, + "logps/chosen": -33.921974182128906, + "logps/rejected": -212.27670288085938, + "loss": 0.3839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23804818093776703, + "rewards/margins": 1.3918933868408203, + "rewards/rejected": -1.153845191001892, + "step": 3704 + }, + { + "epoch": 0.22, + "learning_rate": 9.123671125316961e-08, + "logits/chosen": -1.9345039129257202, + "logits/rejected": -1.9328742027282715, + "logps/chosen": -0.00015461034490726888, + "logps/rejected": -98.0859146118164, + "loss": 0.6538, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.835750367717992e-06, + "rewards/margins": 0.16275985538959503, + "rewards/rejected": -0.16276168823242188, + "step": 3705 + }, + { + "epoch": 0.22, + "learning_rate": 9.12313810417658e-08, + "logits/chosen": -1.9713923931121826, + "logits/rejected": -1.9623920917510986, + "logps/chosen": -335.78350830078125, + "logps/rejected": -493.24212646484375, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.384472608566284, + "rewards/margins": 0.335479736328125, + "rewards/rejected": 2.048992872238159, + "step": 3706 + }, + { + "epoch": 0.22, + "learning_rate": 9.122604936562166e-08, + "logits/chosen": -2.134647846221924, + "logits/rejected": -2.133495330810547, + "logps/chosen": -41.40985107421875, + "logps/rejected": -174.205078125, + "loss": 0.7047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2176612913608551, + "rewards/margins": 0.0360771119594574, + "rewards/rejected": -0.2537384033203125, + "step": 3707 + }, + { + "epoch": 0.22, + "learning_rate": 9.12207162249266e-08, + "logits/chosen": -2.05536150932312, + "logits/rejected": -2.04718017578125, + "logps/chosen": -165.00344848632812, + "logps/rejected": -321.02532958984375, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0189971923828125, + "rewards/margins": 0.9021698236465454, + "rewards/rejected": 0.11682739108800888, + "step": 3708 + }, + { + "epoch": 0.22, + "learning_rate": 9.121538161987005e-08, + "logits/chosen": -2.079171657562256, + "logits/rejected": -2.0755231380462646, + "logps/chosen": -265.0633544921875, + "logps/rejected": -388.2770690917969, + "loss": 0.1784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.904443383216858, + "rewards/margins": 1.1235992908477783, + "rewards/rejected": 0.7808441519737244, + "step": 3709 + }, + { + "epoch": 0.22, + "learning_rate": 9.121004555064153e-08, + "logits/chosen": -2.156801223754883, + "logits/rejected": -2.1498379707336426, + "logps/chosen": -66.16597747802734, + "logps/rejected": -172.5418701171875, + "loss": 0.4188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7955421805381775, + "rewards/margins": 0.5424355268478394, + "rewards/rejected": 0.2531066834926605, + "step": 3710 + }, + { + "epoch": 0.22, + "learning_rate": 9.120470801743063e-08, + "logits/chosen": -1.8803437948226929, + "logits/rejected": -1.8511794805526733, + "logps/chosen": -149.03921508789062, + "logps/rejected": -209.6813507080078, + "loss": 0.5425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3643295466899872, + "rewards/margins": 0.3202560544013977, + "rewards/rejected": 0.04407348856329918, + "step": 3711 + }, + { + "epoch": 0.22, + "learning_rate": 9.119936902042695e-08, + "logits/chosen": -2.079416036605835, + "logits/rejected": -2.0669431686401367, + "logps/chosen": -13.296710968017578, + "logps/rejected": -227.25369262695312, + "loss": 0.3793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08363266289234161, + "rewards/margins": 1.8993477821350098, + "rewards/rejected": -1.8157150745391846, + "step": 3712 + }, + { + "epoch": 0.22, + "learning_rate": 9.119402855982014e-08, + "logits/chosen": -2.183509588241577, + "logits/rejected": -2.1818318367004395, + "logps/chosen": -1.9057811498641968, + "logps/rejected": -177.62139892578125, + "loss": 0.4252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010036981664597988, + "rewards/margins": 1.699407696723938, + "rewards/rejected": -1.6893707513809204, + "step": 3713 + }, + { + "epoch": 0.22, + "learning_rate": 9.118868663579995e-08, + "logits/chosen": -2.111915349960327, + "logits/rejected": -2.0956008434295654, + "logps/chosen": -161.82415771484375, + "logps/rejected": -264.478271484375, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1439087390899658, + "rewards/margins": 0.4045349359512329, + "rewards/rejected": 0.7393738031387329, + "step": 3714 + }, + { + "epoch": 0.22, + "learning_rate": 9.118334324855612e-08, + "logits/chosen": -2.187988758087158, + "logits/rejected": -2.185124635696411, + "logps/chosen": -4.51568078994751, + "logps/rejected": -103.74441528320312, + "loss": 0.4614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024738360196352005, + "rewards/margins": 1.4098389148712158, + "rewards/rejected": -1.434577226638794, + "step": 3715 + }, + { + "epoch": 0.22, + "learning_rate": 9.117799839827851e-08, + "logits/chosen": -2.0433902740478516, + "logits/rejected": -2.072983980178833, + "logps/chosen": -250.01425170898438, + "logps/rejected": -265.35833740234375, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0565948486328125, + "rewards/margins": 0.921307384967804, + "rewards/rejected": 0.13528747856616974, + "step": 3716 + }, + { + "epoch": 0.22, + "learning_rate": 9.117265208515695e-08, + "logits/chosen": -2.0350778102874756, + "logits/rejected": -2.033935785293579, + "logps/chosen": -0.0012315138010308146, + "logps/rejected": -134.1399383544922, + "loss": 0.5249, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3137306268617976e-05, + "rewards/margins": 0.8260435461997986, + "rewards/rejected": -0.8260566592216492, + "step": 3717 + }, + { + "epoch": 0.22, + "learning_rate": 9.11673043093814e-08, + "logits/chosen": -1.9369508028030396, + "logits/rejected": -1.9400235414505005, + "logps/chosen": -229.94515991210938, + "logps/rejected": -348.5085754394531, + "loss": 0.3137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7761566042900085, + "rewards/margins": 0.8889373540878296, + "rewards/rejected": -0.11278076469898224, + "step": 3718 + }, + { + "epoch": 0.22, + "learning_rate": 9.116195507114184e-08, + "logits/chosen": -2.2918479442596436, + "logits/rejected": -2.2768948078155518, + "logps/chosen": -9.875411987304688, + "logps/rejected": -303.9809265136719, + "loss": 0.3707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024991989135742188, + "rewards/margins": 2.765110492706299, + "rewards/rejected": -2.7401185035705566, + "step": 3719 + }, + { + "epoch": 0.22, + "learning_rate": 9.115660437062826e-08, + "logits/chosen": -1.9939799308776855, + "logits/rejected": -1.9825197458267212, + "logps/chosen": -9.989396494347602e-05, + "logps/rejected": -209.2670135498047, + "loss": 0.4152, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.495248842635192e-06, + "rewards/margins": 1.788521409034729, + "rewards/rejected": -1.7885268926620483, + "step": 3720 + }, + { + "epoch": 0.22, + "learning_rate": 9.115125220803081e-08, + "logits/chosen": -1.9522318840026855, + "logits/rejected": -1.9506727457046509, + "logps/chosen": -29.22525405883789, + "logps/rejected": -251.73648071289062, + "loss": 0.3852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11499805748462677, + "rewards/margins": 4.265954494476318, + "rewards/rejected": -4.38095235824585, + "step": 3721 + }, + { + "epoch": 0.22, + "learning_rate": 9.114589858353957e-08, + "logits/chosen": -1.8974080085754395, + "logits/rejected": -1.8433074951171875, + "logps/chosen": -222.26345825195312, + "logps/rejected": -374.8673095703125, + "loss": 0.2006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1254242658615112, + "rewards/margins": 1.9141693115234375, + "rewards/rejected": -0.788745105266571, + "step": 3722 + }, + { + "epoch": 0.22, + "learning_rate": 9.114054349734474e-08, + "logits/chosen": -1.9834469556808472, + "logits/rejected": -1.9580639600753784, + "logps/chosen": -241.93112182617188, + "logps/rejected": -376.3322448730469, + "loss": 0.1435, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6230010986328125, + "rewards/margins": 1.575842261314392, + "rewards/rejected": 0.04715881496667862, + "step": 3723 + }, + { + "epoch": 0.22, + "learning_rate": 9.113518694963658e-08, + "logits/chosen": -2.0741450786590576, + "logits/rejected": -2.0709891319274902, + "logps/chosen": -59.38993453979492, + "logps/rejected": -231.017578125, + "loss": 0.4227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10110397636890411, + "rewards/margins": 1.5184776782989502, + "rewards/rejected": -1.4173736572265625, + "step": 3724 + }, + { + "epoch": 0.22, + "learning_rate": 9.112982894060537e-08, + "logits/chosen": -1.8256040811538696, + "logits/rejected": -1.8542064428329468, + "logps/chosen": -165.77737426757812, + "logps/rejected": -419.0748596191406, + "loss": 0.2111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8855499625205994, + "rewards/margins": 1.331140160560608, + "rewards/rejected": -0.44559022784233093, + "step": 3725 + }, + { + "epoch": 0.22, + "learning_rate": 9.112446947044143e-08, + "logits/chosen": -1.9662342071533203, + "logits/rejected": -1.920210361480713, + "logps/chosen": -218.02145385742188, + "logps/rejected": -471.60235595703125, + "loss": 0.1631, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4034241437911987, + "rewards/margins": 1.6854188442230225, + "rewards/rejected": -0.28199464082717896, + "step": 3726 + }, + { + "epoch": 0.22, + "learning_rate": 9.111910853933517e-08, + "logits/chosen": -1.873917818069458, + "logits/rejected": -1.816031575202942, + "logps/chosen": -240.72743225097656, + "logps/rejected": -584.3663330078125, + "loss": 0.2808, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1463806629180908, + "rewards/margins": 0.8928284049034119, + "rewards/rejected": 0.25355225801467896, + "step": 3727 + }, + { + "epoch": 0.22, + "learning_rate": 9.111374614747706e-08, + "logits/chosen": -1.9696496725082397, + "logits/rejected": -1.9695531129837036, + "logps/chosen": -27.0555362701416, + "logps/rejected": -272.59063720703125, + "loss": 0.4337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2877119183540344, + "rewards/margins": 2.2701189517974854, + "rewards/rejected": -2.557830810546875, + "step": 3728 + }, + { + "epoch": 0.22, + "learning_rate": 9.110838229505757e-08, + "logits/chosen": -2.1009833812713623, + "logits/rejected": -2.1060385704040527, + "logps/chosen": -148.19528198242188, + "logps/rejected": -411.19659423828125, + "loss": 0.2577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9270599484443665, + "rewards/margins": 1.2846587896347046, + "rewards/rejected": -0.3575988709926605, + "step": 3729 + }, + { + "epoch": 0.22, + "learning_rate": 9.110301698226726e-08, + "logits/chosen": -1.9962857961654663, + "logits/rejected": -1.9972896575927734, + "logps/chosen": -13.314569473266602, + "logps/rejected": -141.3563995361328, + "loss": 0.426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026242733001708984, + "rewards/margins": 1.8538850545883179, + "rewards/rejected": -1.8801277875900269, + "step": 3730 + }, + { + "epoch": 0.22, + "learning_rate": 9.109765020929673e-08, + "logits/chosen": -2.1402060985565186, + "logits/rejected": -2.12870717048645, + "logps/chosen": -24.087421417236328, + "logps/rejected": -372.31658935546875, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5331188440322876, + "rewards/margins": 2.1892712116241455, + "rewards/rejected": -1.656152367591858, + "step": 3731 + }, + { + "epoch": 0.22, + "learning_rate": 9.109228197633662e-08, + "logits/chosen": -2.0530850887298584, + "logits/rejected": -2.044238567352295, + "logps/chosen": -182.06317138671875, + "logps/rejected": -306.74444580078125, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.13683021068573, + "rewards/margins": 0.7162430286407471, + "rewards/rejected": 0.4205871522426605, + "step": 3732 + }, + { + "epoch": 0.22, + "learning_rate": 9.108691228357767e-08, + "logits/chosen": -2.17622447013855, + "logits/rejected": -2.1612868309020996, + "logps/chosen": -0.2619304358959198, + "logps/rejected": -200.95892333984375, + "loss": 0.384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017521535977721214, + "rewards/margins": 2.4465651512145996, + "rewards/rejected": -2.4290435314178467, + "step": 3733 + }, + { + "epoch": 0.22, + "learning_rate": 9.10815411312106e-08, + "logits/chosen": -2.0796279907226562, + "logits/rejected": -2.1060540676116943, + "logps/chosen": -234.23123168945312, + "logps/rejected": -205.18130493164062, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7793442010879517, + "rewards/margins": 0.8770187497138977, + "rewards/rejected": -0.09767456352710724, + "step": 3734 + }, + { + "epoch": 0.22, + "learning_rate": 9.107616851942625e-08, + "logits/chosen": -2.0724098682403564, + "logits/rejected": -2.073922634124756, + "logps/chosen": -0.049644555896520615, + "logps/rejected": -191.05921936035156, + "loss": 0.4504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010932412697002292, + "rewards/margins": 1.4675438404083252, + "rewards/rejected": -1.4686371088027954, + "step": 3735 + }, + { + "epoch": 0.22, + "learning_rate": 9.107079444841547e-08, + "logits/chosen": -2.1052496433258057, + "logits/rejected": -2.106920003890991, + "logps/chosen": -171.9210205078125, + "logps/rejected": -217.73367309570312, + "loss": 0.5186, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7766357660293579, + "rewards/margins": -0.10767513513565063, + "rewards/rejected": 0.8843109011650085, + "step": 3736 + }, + { + "epoch": 0.22, + "learning_rate": 9.106541891836916e-08, + "logits/chosen": -1.9150242805480957, + "logits/rejected": -1.9198293685913086, + "logps/chosen": -262.47137451171875, + "logps/rejected": -441.85748291015625, + "loss": 0.1357, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.073797583580017, + "rewards/margins": 3.0527985095977783, + "rewards/rejected": -1.9790009260177612, + "step": 3737 + }, + { + "epoch": 0.22, + "learning_rate": 9.106004192947832e-08, + "logits/chosen": -2.0506269931793213, + "logits/rejected": -2.037386655807495, + "logps/chosen": -22.57571029663086, + "logps/rejected": -137.70941162109375, + "loss": 0.6291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12866249680519104, + "rewards/margins": 0.40873983502388, + "rewards/rejected": -0.537402331829071, + "step": 3738 + }, + { + "epoch": 0.22, + "learning_rate": 9.105466348193391e-08, + "logits/chosen": -2.102818250656128, + "logits/rejected": -2.0553340911865234, + "logps/chosen": -150.7921142578125, + "logps/rejected": -424.4034729003906, + "loss": 0.1624, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3042633533477783, + "rewards/margins": 1.723907470703125, + "rewards/rejected": -0.41964417695999146, + "step": 3739 + }, + { + "epoch": 0.22, + "learning_rate": 9.104928357592705e-08, + "logits/chosen": -2.160099506378174, + "logits/rejected": -2.1572558879852295, + "logps/chosen": -4.248475074768066, + "logps/rejected": -57.78386688232422, + "loss": 0.526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01247186679393053, + "rewards/margins": 0.8691084980964661, + "rewards/rejected": -0.8815803527832031, + "step": 3740 + }, + { + "epoch": 0.22, + "learning_rate": 9.104390221164886e-08, + "logits/chosen": -2.153578996658325, + "logits/rejected": -2.1458566188812256, + "logps/chosen": -0.0009299429366365075, + "logps/rejected": -188.12686157226562, + "loss": 0.3623, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5653509257826954e-05, + "rewards/margins": 3.4282233715057373, + "rewards/rejected": -3.428187608718872, + "step": 3741 + }, + { + "epoch": 0.22, + "learning_rate": 9.103851938929048e-08, + "logits/chosen": -2.0691680908203125, + "logits/rejected": -2.059854507446289, + "logps/chosen": -199.49575805664062, + "logps/rejected": -366.0000915527344, + "loss": 0.2292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5584930181503296, + "rewards/margins": 0.9425109624862671, + "rewards/rejected": 0.6159820556640625, + "step": 3742 + }, + { + "epoch": 0.22, + "learning_rate": 9.103313510904315e-08, + "logits/chosen": -1.9893054962158203, + "logits/rejected": -1.9040237665176392, + "logps/chosen": -230.32571411132812, + "logps/rejected": -416.03106689453125, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4319183826446533, + "rewards/margins": 0.6820068955421448, + "rewards/rejected": 0.7499114871025085, + "step": 3743 + }, + { + "epoch": 0.22, + "learning_rate": 9.102774937109813e-08, + "logits/chosen": -2.001120090484619, + "logits/rejected": -1.9651868343353271, + "logps/chosen": -228.19522094726562, + "logps/rejected": -481.07391357421875, + "loss": 0.2022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4215484857559204, + "rewards/margins": 1.2093108892440796, + "rewards/rejected": 0.21223755180835724, + "step": 3744 + }, + { + "epoch": 0.22, + "learning_rate": 9.10223621756468e-08, + "logits/chosen": -2.0577163696289062, + "logits/rejected": -2.047851800918579, + "logps/chosen": -65.72663879394531, + "logps/rejected": -116.95732116699219, + "loss": 0.7203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20184364914894104, + "rewards/margins": 0.08088952302932739, + "rewards/rejected": -0.28273317217826843, + "step": 3745 + }, + { + "epoch": 0.22, + "learning_rate": 9.101697352288047e-08, + "logits/chosen": -2.2167294025421143, + "logits/rejected": -2.2081165313720703, + "logps/chosen": -50.07516098022461, + "logps/rejected": -283.8327941894531, + "loss": 0.458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07287903130054474, + "rewards/margins": 1.5044128894805908, + "rewards/rejected": -1.4315338134765625, + "step": 3746 + }, + { + "epoch": 0.22, + "learning_rate": 9.101158341299064e-08, + "logits/chosen": -2.1184232234954834, + "logits/rejected": -2.0666027069091797, + "logps/chosen": -72.78433227539062, + "logps/rejected": -293.3971862792969, + "loss": 0.3235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4717918336391449, + "rewards/margins": 1.332811713218689, + "rewards/rejected": -0.8610199093818665, + "step": 3747 + }, + { + "epoch": 0.22, + "learning_rate": 9.100619184616873e-08, + "logits/chosen": -2.1555135250091553, + "logits/rejected": -2.152296781539917, + "logps/chosen": -71.8601303100586, + "logps/rejected": -268.5780029296875, + "loss": 0.3928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1118873581290245, + "rewards/margins": 2.468459367752075, + "rewards/rejected": -2.5803468227386475, + "step": 3748 + }, + { + "epoch": 0.22, + "learning_rate": 9.100079882260632e-08, + "logits/chosen": -2.0151631832122803, + "logits/rejected": -1.9814553260803223, + "logps/chosen": -199.5104217529297, + "logps/rejected": -420.34503173828125, + "loss": 0.1387, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3623322248458862, + "rewards/margins": 1.7289551496505737, + "rewards/rejected": -0.3666229248046875, + "step": 3749 + }, + { + "epoch": 0.22, + "learning_rate": 9.099540434249496e-08, + "logits/chosen": -2.252866744995117, + "logits/rejected": -2.248605728149414, + "logps/chosen": -65.6123275756836, + "logps/rejected": -201.466796875, + "loss": 0.6404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18763123452663422, + "rewards/margins": 0.34539180994033813, + "rewards/rejected": -0.5330230593681335, + "step": 3750 + }, + { + "epoch": 0.22, + "learning_rate": 9.099000840602631e-08, + "logits/chosen": -2.134045124053955, + "logits/rejected": -2.1337368488311768, + "logps/chosen": -5.900798714719713e-05, + "logps/rejected": -252.4265899658203, + "loss": 0.3662, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.961192073300481e-08, + "rewards/margins": 3.3366684913635254, + "rewards/rejected": -3.3366684913635254, + "step": 3751 + }, + { + "epoch": 0.22, + "learning_rate": 9.098461101339208e-08, + "logits/chosen": -2.154980182647705, + "logits/rejected": -2.1546437740325928, + "logps/chosen": -21.307476043701172, + "logps/rejected": -121.33454895019531, + "loss": 0.5761, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2497226744890213, + "rewards/margins": 0.9150897860527039, + "rewards/rejected": -1.164812445640564, + "step": 3752 + }, + { + "epoch": 0.22, + "learning_rate": 9.097921216478397e-08, + "logits/chosen": -1.99942946434021, + "logits/rejected": -1.9956493377685547, + "logps/chosen": -0.7256412506103516, + "logps/rejected": -217.68898010253906, + "loss": 0.3931, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004276073072105646, + "rewards/margins": 2.322129249572754, + "rewards/rejected": -2.3264052867889404, + "step": 3753 + }, + { + "epoch": 0.22, + "learning_rate": 9.097381186039379e-08, + "logits/chosen": -2.0397160053253174, + "logits/rejected": -2.0527284145355225, + "logps/chosen": -12.763046264648438, + "logps/rejected": -133.07369995117188, + "loss": 0.5858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23596201837062836, + "rewards/margins": 0.8790113925933838, + "rewards/rejected": -1.1149734258651733, + "step": 3754 + }, + { + "epoch": 0.22, + "learning_rate": 9.09684101004134e-08, + "logits/chosen": -2.168297052383423, + "logits/rejected": -2.1726090908050537, + "logps/chosen": -0.008503258228302002, + "logps/rejected": -47.238182067871094, + "loss": 0.6853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024998877779580653, + "rewards/margins": 0.031633250415325165, + "rewards/rejected": -0.03188323974609375, + "step": 3755 + }, + { + "epoch": 0.22, + "learning_rate": 9.096300688503468e-08, + "logits/chosen": -2.183849573135376, + "logits/rejected": -2.180450201034546, + "logps/chosen": -0.8997331261634827, + "logps/rejected": -33.712520599365234, + "loss": 0.6532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023829419165849686, + "rewards/margins": 0.1413545161485672, + "rewards/rejected": -0.11752510070800781, + "step": 3756 + }, + { + "epoch": 0.22, + "learning_rate": 9.095760221444959e-08, + "logits/chosen": -2.215559959411621, + "logits/rejected": -2.1980583667755127, + "logps/chosen": -87.27130126953125, + "logps/rejected": -374.736572265625, + "loss": 0.2758, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4400016963481903, + "rewards/margins": 2.47672963142395, + "rewards/rejected": -2.0367279052734375, + "step": 3757 + }, + { + "epoch": 0.22, + "learning_rate": 9.095219608885012e-08, + "logits/chosen": -2.1795215606689453, + "logits/rejected": -2.2081966400146484, + "logps/chosen": -235.66241455078125, + "logps/rejected": -373.451416015625, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5343170166015625, + "rewards/margins": 1.1634979248046875, + "rewards/rejected": -0.629180908203125, + "step": 3758 + }, + { + "epoch": 0.22, + "learning_rate": 9.094678850842832e-08, + "logits/chosen": -1.9900861978530884, + "logits/rejected": -1.9719488620758057, + "logps/chosen": -238.10650634765625, + "logps/rejected": -370.3580322265625, + "loss": 0.4171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6217758059501648, + "rewards/margins": 0.6807357668876648, + "rewards/rejected": -0.0589599609375, + "step": 3759 + }, + { + "epoch": 0.22, + "learning_rate": 9.09413794733763e-08, + "logits/chosen": -2.0160276889801025, + "logits/rejected": -1.9994888305664062, + "logps/chosen": -354.0743713378906, + "logps/rejected": -473.2085876464844, + "loss": 0.3764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.339263916015625, + "rewards/margins": 0.2332855463027954, + "rewards/rejected": 1.1059783697128296, + "step": 3760 + }, + { + "epoch": 0.22, + "learning_rate": 9.093596898388623e-08, + "logits/chosen": -1.986743688583374, + "logits/rejected": -1.9255965948104858, + "logps/chosen": -236.34132385253906, + "logps/rejected": -422.24493408203125, + "loss": 0.3631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4640975892543793, + "rewards/margins": 1.1786636114120483, + "rewards/rejected": -0.7145660519599915, + "step": 3761 + }, + { + "epoch": 0.22, + "learning_rate": 9.093055704015029e-08, + "logits/chosen": -1.987151026725769, + "logits/rejected": -1.9920161962509155, + "logps/chosen": -16.060083389282227, + "logps/rejected": -37.5533561706543, + "loss": 0.6565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029871368780732155, + "rewards/margins": 0.09901390224695206, + "rewards/rejected": -0.06914253532886505, + "step": 3762 + }, + { + "epoch": 0.22, + "learning_rate": 9.092514364236074e-08, + "logits/chosen": -2.124077796936035, + "logits/rejected": -2.1061339378356934, + "logps/chosen": -21.768449783325195, + "logps/rejected": -216.8168182373047, + "loss": 0.3598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030281066428869963, + "rewards/margins": 2.9397621154785156, + "rewards/rejected": -2.9427902698516846, + "step": 3763 + }, + { + "epoch": 0.22, + "learning_rate": 9.091972879070993e-08, + "logits/chosen": -1.8477479219436646, + "logits/rejected": -1.824491262435913, + "logps/chosen": -134.67086791992188, + "logps/rejected": -358.0728759765625, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8871399164199829, + "rewards/margins": 0.87152099609375, + "rewards/rejected": 0.015618897043168545, + "step": 3764 + }, + { + "epoch": 0.22, + "learning_rate": 9.091431248539016e-08, + "logits/chosen": -2.179996967315674, + "logits/rejected": -2.1729297637939453, + "logps/chosen": -11.251964569091797, + "logps/rejected": -237.62136840820312, + "loss": 0.3853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014726447872817516, + "rewards/margins": 2.302320957183838, + "rewards/rejected": -2.2875945568084717, + "step": 3765 + }, + { + "epoch": 0.22, + "learning_rate": 9.09088947265939e-08, + "logits/chosen": -2.069167375564575, + "logits/rejected": -2.0806424617767334, + "logps/chosen": -268.26617431640625, + "logps/rejected": -430.3680725097656, + "loss": 0.1519, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9471282958984375, + "rewards/margins": 1.3693573474884033, + "rewards/rejected": 0.577771008014679, + "step": 3766 + }, + { + "epoch": 0.22, + "learning_rate": 9.090347551451359e-08, + "logits/chosen": -2.103705644607544, + "logits/rejected": -2.125664472579956, + "logps/chosen": -234.18368530273438, + "logps/rejected": -412.96868896484375, + "loss": 0.2079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4684158563613892, + "rewards/margins": 1.205113172531128, + "rewards/rejected": 0.26330262422561646, + "step": 3767 + }, + { + "epoch": 0.22, + "learning_rate": 9.089805484934174e-08, + "logits/chosen": -1.9733006954193115, + "logits/rejected": -1.951996922492981, + "logps/chosen": -273.16619873046875, + "logps/rejected": -487.00653076171875, + "loss": 0.0836, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.883331298828125, + "rewards/margins": 2.513702392578125, + "rewards/rejected": -0.63037109375, + "step": 3768 + }, + { + "epoch": 0.22, + "learning_rate": 9.089263273127093e-08, + "logits/chosen": -2.0178616046905518, + "logits/rejected": -1.9933267831802368, + "logps/chosen": -196.0766143798828, + "logps/rejected": -220.4959716796875, + "loss": 0.3341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8235992789268494, + "rewards/margins": 1.0794235467910767, + "rewards/rejected": -0.2558242976665497, + "step": 3769 + }, + { + "epoch": 0.22, + "learning_rate": 9.08872091604938e-08, + "logits/chosen": -2.1351194381713867, + "logits/rejected": -2.134847640991211, + "logps/chosen": -17.627655029296875, + "logps/rejected": -81.39241027832031, + "loss": 0.622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037493135780096054, + "rewards/margins": 0.26542359590530396, + "rewards/rejected": -0.2279304563999176, + "step": 3770 + }, + { + "epoch": 0.22, + "learning_rate": 9.088178413720298e-08, + "logits/chosen": -2.232759714126587, + "logits/rejected": -2.224268913269043, + "logps/chosen": -51.733787536621094, + "logps/rejected": -196.63351440429688, + "loss": 0.3758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04680824279785156, + "rewards/margins": 3.1646878719329834, + "rewards/rejected": -3.211496114730835, + "step": 3771 + }, + { + "epoch": 0.22, + "learning_rate": 9.087635766159121e-08, + "logits/chosen": -1.9740087985992432, + "logits/rejected": -1.963545322418213, + "logps/chosen": -8.120158195495605, + "logps/rejected": -259.7391662597656, + "loss": 0.4151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03649110719561577, + "rewards/margins": 1.553285002708435, + "rewards/rejected": -1.5167938470840454, + "step": 3772 + }, + { + "epoch": 0.22, + "learning_rate": 9.087092973385127e-08, + "logits/chosen": -2.187831163406372, + "logits/rejected": -2.2457423210144043, + "logps/chosen": -218.59910583496094, + "logps/rejected": -316.7466735839844, + "loss": 0.2774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8508865237236023, + "rewards/margins": 0.9866927862167358, + "rewards/rejected": -0.13580627739429474, + "step": 3773 + }, + { + "epoch": 0.22, + "learning_rate": 9.086550035417599e-08, + "logits/chosen": -2.0109145641326904, + "logits/rejected": -1.9841364622116089, + "logps/chosen": -136.7513427734375, + "logps/rejected": -291.37664794921875, + "loss": 0.294, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1170837879180908, + "rewards/margins": 0.7469848990440369, + "rewards/rejected": 0.37009888887405396, + "step": 3774 + }, + { + "epoch": 0.22, + "learning_rate": 9.086006952275824e-08, + "logits/chosen": -2.055532455444336, + "logits/rejected": -2.0915794372558594, + "logps/chosen": -190.06063842773438, + "logps/rejected": -332.64990234375, + "loss": 0.2302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9917816519737244, + "rewards/margins": 1.2528473138809204, + "rewards/rejected": -0.26106569170951843, + "step": 3775 + }, + { + "epoch": 0.22, + "learning_rate": 9.085463723979095e-08, + "logits/chosen": -2.047722816467285, + "logits/rejected": -2.0331308841705322, + "logps/chosen": -183.51971435546875, + "logps/rejected": -283.00762939453125, + "loss": 0.4701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.302267462015152, + "rewards/margins": 0.609387218952179, + "rewards/rejected": -0.307119756937027, + "step": 3776 + }, + { + "epoch": 0.22, + "learning_rate": 9.08492035054671e-08, + "logits/chosen": -1.9204403162002563, + "logits/rejected": -1.9273183345794678, + "logps/chosen": -215.0382080078125, + "logps/rejected": -347.50732421875, + "loss": 0.3838, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.49382483959198, + "rewards/margins": 0.07143104076385498, + "rewards/rejected": 1.422393798828125, + "step": 3777 + }, + { + "epoch": 0.22, + "learning_rate": 9.084376831997973e-08, + "logits/chosen": -2.054163932800293, + "logits/rejected": -1.984655499458313, + "logps/chosen": -285.46990966796875, + "logps/rejected": -502.9599914550781, + "loss": 0.3338, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5082184076309204, + "rewards/margins": 0.1946350336074829, + "rewards/rejected": 1.3135833740234375, + "step": 3778 + }, + { + "epoch": 0.22, + "learning_rate": 9.083833168352192e-08, + "logits/chosen": -1.7941539287567139, + "logits/rejected": -1.8291242122650146, + "logps/chosen": -287.7789306640625, + "logps/rejected": -281.244140625, + "loss": 0.2547, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.627655029296875, + "rewards/margins": 0.7966216802597046, + "rewards/rejected": 0.8310333490371704, + "step": 3779 + }, + { + "epoch": 0.22, + "learning_rate": 9.08328935962868e-08, + "logits/chosen": -1.8533941507339478, + "logits/rejected": -1.7928647994995117, + "logps/chosen": -260.93353271484375, + "logps/rejected": -274.138916015625, + "loss": 0.3156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9673523306846619, + "rewards/margins": 0.9701629877090454, + "rewards/rejected": -0.0028106688987463713, + "step": 3780 + }, + { + "epoch": 0.22, + "learning_rate": 9.082745405846759e-08, + "logits/chosen": -2.111086130142212, + "logits/rejected": -2.1087729930877686, + "logps/chosen": -45.192928314208984, + "logps/rejected": -160.38400268554688, + "loss": 0.5721, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4018509089946747, + "rewards/margins": 1.2014037370681763, + "rewards/rejected": -1.6032546758651733, + "step": 3781 + }, + { + "epoch": 0.22, + "learning_rate": 9.082201307025748e-08, + "logits/chosen": -2.0403265953063965, + "logits/rejected": -2.03460431098938, + "logps/chosen": -1.1470412015914917, + "logps/rejected": -113.96150970458984, + "loss": 0.5448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11009716242551804, + "rewards/margins": 0.5322445034980774, + "rewards/rejected": -0.42214736342430115, + "step": 3782 + }, + { + "epoch": 0.22, + "learning_rate": 9.08165706318498e-08, + "logits/chosen": -2.1903750896453857, + "logits/rejected": -2.1902120113372803, + "logps/chosen": -209.47921752929688, + "logps/rejected": -351.00689697265625, + "loss": 0.1887, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.454010009765625, + "rewards/margins": 1.184851050376892, + "rewards/rejected": 0.2691589295864105, + "step": 3783 + }, + { + "epoch": 0.22, + "learning_rate": 9.081112674343785e-08, + "logits/chosen": -2.134122610092163, + "logits/rejected": -2.1377415657043457, + "logps/chosen": -3.700550079345703, + "logps/rejected": -126.41676330566406, + "loss": 0.5207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034951258450746536, + "rewards/margins": 0.9910252690315247, + "rewards/rejected": -1.025976538658142, + "step": 3784 + }, + { + "epoch": 0.22, + "learning_rate": 9.080568140521508e-08, + "logits/chosen": -1.9005481004714966, + "logits/rejected": -1.8958338499069214, + "logps/chosen": -38.06025314331055, + "logps/rejected": -202.6575927734375, + "loss": 0.3892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.138916015625, + "rewards/margins": 1.6735016107559204, + "rewards/rejected": -1.5345855951309204, + "step": 3785 + }, + { + "epoch": 0.22, + "learning_rate": 9.080023461737487e-08, + "logits/chosen": -2.06168794631958, + "logits/rejected": -2.0649209022521973, + "logps/chosen": -233.36383056640625, + "logps/rejected": -452.6229553222656, + "loss": 0.197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.353413462638855, + "rewards/margins": 1.2945023775100708, + "rewards/rejected": 0.05891113355755806, + "step": 3786 + }, + { + "epoch": 0.22, + "learning_rate": 9.079478638011078e-08, + "logits/chosen": -1.7793062925338745, + "logits/rejected": -1.7628273963928223, + "logps/chosen": -1.912028193473816, + "logps/rejected": -231.57672119140625, + "loss": 0.3896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03094165399670601, + "rewards/margins": 2.6158173084259033, + "rewards/rejected": -2.646759033203125, + "step": 3787 + }, + { + "epoch": 0.22, + "learning_rate": 9.078933669361632e-08, + "logits/chosen": -2.2529594898223877, + "logits/rejected": -2.2362146377563477, + "logps/chosen": -67.95283508300781, + "logps/rejected": -162.97018432617188, + "loss": 0.3709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5038596987724304, + "rewards/margins": 1.1536506414413452, + "rewards/rejected": -0.6497909426689148, + "step": 3788 + }, + { + "epoch": 0.22, + "learning_rate": 9.07838855580851e-08, + "logits/chosen": -2.086854934692383, + "logits/rejected": -2.0611937046051025, + "logps/chosen": -223.20767211914062, + "logps/rejected": -359.6700439453125, + "loss": 0.3858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.54937744140625, + "rewards/margins": 0.0833282470703125, + "rewards/rejected": 1.4660491943359375, + "step": 3789 + }, + { + "epoch": 0.22, + "learning_rate": 9.077843297371077e-08, + "logits/chosen": -2.026697874069214, + "logits/rejected": -2.0213463306427, + "logps/chosen": -11.15166187286377, + "logps/rejected": -229.42984008789062, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14581070840358734, + "rewards/margins": 1.760108232498169, + "rewards/rejected": -1.6142975091934204, + "step": 3790 + }, + { + "epoch": 0.22, + "learning_rate": 9.077297894068703e-08, + "logits/chosen": -1.9887123107910156, + "logits/rejected": -1.9893876314163208, + "logps/chosen": -9.626484870910645, + "logps/rejected": -82.39796447753906, + "loss": 0.5761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05003204569220543, + "rewards/margins": 0.47198259830474854, + "rewards/rejected": -0.4219505488872528, + "step": 3791 + }, + { + "epoch": 0.22, + "learning_rate": 9.076752345920762e-08, + "logits/chosen": -2.1167140007019043, + "logits/rejected": -2.0951359272003174, + "logps/chosen": -142.24371337890625, + "logps/rejected": -301.12103271484375, + "loss": 0.561, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.20594178140163422, + "rewards/margins": -0.062438949942588806, + "rewards/rejected": 0.268380731344223, + "step": 3792 + }, + { + "epoch": 0.22, + "learning_rate": 9.076206652946639e-08, + "logits/chosen": -1.7845754623413086, + "logits/rejected": -1.7599949836730957, + "logps/chosen": -179.81686401367188, + "logps/rejected": -324.181640625, + "loss": 0.1851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5996185541152954, + "rewards/margins": 2.661242961883545, + "rewards/rejected": -2.06162428855896, + "step": 3793 + }, + { + "epoch": 0.22, + "learning_rate": 9.075660815165715e-08, + "logits/chosen": -2.167996406555176, + "logits/rejected": -2.1558244228363037, + "logps/chosen": -0.0003649813588708639, + "logps/rejected": -106.15855407714844, + "loss": 0.5588, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1023099432350136e-05, + "rewards/margins": 0.6360149383544922, + "rewards/rejected": -0.6360039114952087, + "step": 3794 + }, + { + "epoch": 0.22, + "learning_rate": 9.075114832597384e-08, + "logits/chosen": -1.9861433506011963, + "logits/rejected": -1.956063985824585, + "logps/chosen": -293.18609619140625, + "logps/rejected": -449.0289001464844, + "loss": 0.2602, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0572174787521362, + "rewards/margins": 1.251083493232727, + "rewards/rejected": -0.19386596977710724, + "step": 3795 + }, + { + "epoch": 0.22, + "learning_rate": 9.074568705261038e-08, + "logits/chosen": -2.214907169342041, + "logits/rejected": -2.2164077758789062, + "logps/chosen": -0.6915751099586487, + "logps/rejected": -145.4713897705078, + "loss": 0.483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015936005860567093, + "rewards/margins": 1.127588152885437, + "rewards/rejected": -1.143524169921875, + "step": 3796 + }, + { + "epoch": 0.22, + "learning_rate": 9.074022433176083e-08, + "logits/chosen": -2.032733201980591, + "logits/rejected": -2.02388596534729, + "logps/chosen": -39.72524642944336, + "logps/rejected": -210.31661987304688, + "loss": 0.3905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06413078308105469, + "rewards/margins": 1.7967052459716797, + "rewards/rejected": -1.732574462890625, + "step": 3797 + }, + { + "epoch": 0.22, + "learning_rate": 9.073476016361922e-08, + "logits/chosen": -2.0597074031829834, + "logits/rejected": -2.0634765625, + "logps/chosen": -4.089664459228516, + "logps/rejected": -120.02055358886719, + "loss": 0.4336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031559254974126816, + "rewards/margins": 1.6564809083938599, + "rewards/rejected": -1.6880401372909546, + "step": 3798 + }, + { + "epoch": 0.22, + "learning_rate": 9.072929454837968e-08, + "logits/chosen": -2.1868879795074463, + "logits/rejected": -2.1816844940185547, + "logps/chosen": -0.004098052624613047, + "logps/rejected": -27.275346755981445, + "loss": 0.6477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021519236906897277, + "rewards/margins": 0.15438133478164673, + "rewards/rejected": -0.15459652245044708, + "step": 3799 + }, + { + "epoch": 0.22, + "learning_rate": 9.072382748623637e-08, + "logits/chosen": -1.8940774202346802, + "logits/rejected": -1.8617039918899536, + "logps/chosen": -238.68338012695312, + "logps/rejected": -576.3548583984375, + "loss": 0.0963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4786376953125, + "rewards/margins": 3.422381639480591, + "rewards/rejected": -1.9437439441680908, + "step": 3800 + }, + { + "epoch": 0.22, + "learning_rate": 9.071835897738349e-08, + "logits/chosen": -1.9935415983200073, + "logits/rejected": -1.970526099205017, + "logps/chosen": -225.0538330078125, + "logps/rejected": -431.3870849609375, + "loss": 0.1337, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.538305640220642, + "rewards/margins": 2.08758544921875, + "rewards/rejected": -0.5492798089981079, + "step": 3801 + }, + { + "epoch": 0.22, + "learning_rate": 9.071288902201535e-08, + "logits/chosen": -1.9055304527282715, + "logits/rejected": -1.910460114479065, + "logps/chosen": -16.2436466217041, + "logps/rejected": -133.48690795898438, + "loss": 0.4626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07379608601331711, + "rewards/margins": 1.1726096868515015, + "rewards/rejected": -1.0988136529922485, + "step": 3802 + }, + { + "epoch": 0.22, + "learning_rate": 9.070741762032623e-08, + "logits/chosen": -2.1917080879211426, + "logits/rejected": -2.181272268295288, + "logps/chosen": -1.5085499286651611, + "logps/rejected": -140.00564575195312, + "loss": 0.4231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0407484769821167, + "rewards/margins": 1.6640280485153198, + "rewards/rejected": -1.6232795715332031, + "step": 3803 + }, + { + "epoch": 0.22, + "learning_rate": 9.070194477251051e-08, + "logits/chosen": -2.0270485877990723, + "logits/rejected": -2.0540285110473633, + "logps/chosen": -218.62619018554688, + "logps/rejected": -350.3172607421875, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0804046392440796, + "rewards/margins": 0.8637603521347046, + "rewards/rejected": 0.216644287109375, + "step": 3804 + }, + { + "epoch": 0.22, + "learning_rate": 9.069647047876263e-08, + "logits/chosen": -2.2177629470825195, + "logits/rejected": -2.208782196044922, + "logps/chosen": -0.000573876139242202, + "logps/rejected": -195.31475830078125, + "loss": 0.3857, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.722959137230646e-06, + "rewards/margins": 2.5420749187469482, + "rewards/rejected": -2.542083740234375, + "step": 3805 + }, + { + "epoch": 0.22, + "learning_rate": 9.069099473927703e-08, + "logits/chosen": -1.835227608680725, + "logits/rejected": -1.7734689712524414, + "logps/chosen": -180.9904327392578, + "logps/rejected": -394.1173400878906, + "loss": 0.1543, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3315261602401733, + "rewards/margins": 1.7213027477264404, + "rewards/rejected": -0.3897766172885895, + "step": 3806 + }, + { + "epoch": 0.22, + "learning_rate": 9.068551755424827e-08, + "logits/chosen": -1.9885706901550293, + "logits/rejected": -1.9808399677276611, + "logps/chosen": -5.984226299915463e-05, + "logps/rejected": -161.86717224121094, + "loss": 0.3793, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.576395215532102e-07, + "rewards/margins": 2.6000235080718994, + "rewards/rejected": -2.600023031234741, + "step": 3807 + }, + { + "epoch": 0.22, + "learning_rate": 9.06800389238709e-08, + "logits/chosen": -2.106604814529419, + "logits/rejected": -2.082301378250122, + "logps/chosen": -161.36366271972656, + "logps/rejected": -278.28472900390625, + "loss": 0.5072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4605697691440582, + "rewards/margins": 0.4423721432685852, + "rewards/rejected": 0.01819763146340847, + "step": 3808 + }, + { + "epoch": 0.22, + "learning_rate": 9.067455884833958e-08, + "logits/chosen": -1.9043956995010376, + "logits/rejected": -1.8737969398498535, + "logps/chosen": -245.05589294433594, + "logps/rejected": -446.7712707519531, + "loss": 0.1104, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0081710815429688, + "rewards/margins": 1.7945754528045654, + "rewards/rejected": 0.21359558403491974, + "step": 3809 + }, + { + "epoch": 0.22, + "learning_rate": 9.066907732784896e-08, + "logits/chosen": -1.900090217590332, + "logits/rejected": -1.903220534324646, + "logps/chosen": -4.105663299560547, + "logps/rejected": -163.66079711914062, + "loss": 0.4189, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024033833295106888, + "rewards/margins": 1.7865185737609863, + "rewards/rejected": -1.7624847888946533, + "step": 3810 + }, + { + "epoch": 0.22, + "learning_rate": 9.066359436259378e-08, + "logits/chosen": -2.170640230178833, + "logits/rejected": -2.1111536026000977, + "logps/chosen": -258.8048400878906, + "logps/rejected": -406.7073669433594, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.252844214439392, + "rewards/margins": 0.35415953397750854, + "rewards/rejected": 0.8986846804618835, + "step": 3811 + }, + { + "epoch": 0.22, + "learning_rate": 9.065810995276881e-08, + "logits/chosen": -1.732009768486023, + "logits/rejected": -1.6713166236877441, + "logps/chosen": -247.23965454101562, + "logps/rejected": -456.44232177734375, + "loss": 0.336, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0976990461349487, + "rewards/margins": 0.7615540027618408, + "rewards/rejected": 0.3361450135707855, + "step": 3812 + }, + { + "epoch": 0.22, + "learning_rate": 9.06526240985689e-08, + "logits/chosen": -2.178537368774414, + "logits/rejected": -2.1687333583831787, + "logps/chosen": -6.190428256988525, + "logps/rejected": -144.82249450683594, + "loss": 0.384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0433867946267128, + "rewards/margins": 2.464456081390381, + "rewards/rejected": -2.421069383621216, + "step": 3813 + }, + { + "epoch": 0.22, + "learning_rate": 9.064713680018894e-08, + "logits/chosen": -2.1800246238708496, + "logits/rejected": -2.181870698928833, + "logps/chosen": -266.7557373046875, + "logps/rejected": -255.55917358398438, + "loss": 0.4621, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0256531238555908, + "rewards/margins": -0.03927910327911377, + "rewards/rejected": 1.0649322271347046, + "step": 3814 + }, + { + "epoch": 0.22, + "learning_rate": 9.064164805782385e-08, + "logits/chosen": -2.2867953777313232, + "logits/rejected": -2.287302017211914, + "logps/chosen": -53.79969024658203, + "logps/rejected": -109.87919616699219, + "loss": 0.6764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48570939898490906, + "rewards/margins": 0.5649367570877075, + "rewards/rejected": -1.050646185874939, + "step": 3815 + }, + { + "epoch": 0.22, + "learning_rate": 9.063615787166861e-08, + "logits/chosen": -2.0819265842437744, + "logits/rejected": -2.0431222915649414, + "logps/chosen": -215.8865203857422, + "logps/rejected": -280.4429626464844, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9742019772529602, + "rewards/margins": 1.0044113397598267, + "rewards/rejected": -0.03020935133099556, + "step": 3816 + }, + { + "epoch": 0.22, + "learning_rate": 9.063066624191828e-08, + "logits/chosen": -2.0562868118286133, + "logits/rejected": -2.0453224182128906, + "logps/chosen": -3.564850330352783, + "logps/rejected": -143.91748046875, + "loss": 0.4638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.040192581713199615, + "rewards/margins": 1.475345492362976, + "rewards/rejected": -1.5155380964279175, + "step": 3817 + }, + { + "epoch": 0.22, + "learning_rate": 9.062517316876794e-08, + "logits/chosen": -2.0796163082122803, + "logits/rejected": -2.074028968811035, + "logps/chosen": -65.06877899169922, + "logps/rejected": -177.88519287109375, + "loss": 0.4167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27797776460647583, + "rewards/margins": 1.1061789989471436, + "rewards/rejected": -0.8282012939453125, + "step": 3818 + }, + { + "epoch": 0.22, + "learning_rate": 9.061967865241272e-08, + "logits/chosen": -1.905466079711914, + "logits/rejected": -1.8780351877212524, + "logps/chosen": -258.59063720703125, + "logps/rejected": -281.797607421875, + "loss": 0.194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.573358178138733, + "rewards/margins": 1.1327728033065796, + "rewards/rejected": 0.44058534502983093, + "step": 3819 + }, + { + "epoch": 0.22, + "learning_rate": 9.061418269304784e-08, + "logits/chosen": -2.1546239852905273, + "logits/rejected": -2.155217170715332, + "logps/chosen": -3.7012882232666016, + "logps/rejected": -172.25387573242188, + "loss": 0.5803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05976405367255211, + "rewards/margins": 0.6415955424308777, + "rewards/rejected": -0.7013595700263977, + "step": 3820 + }, + { + "epoch": 0.22, + "learning_rate": 9.060868529086852e-08, + "logits/chosen": -1.8821171522140503, + "logits/rejected": -1.7566121816635132, + "logps/chosen": -188.72271728515625, + "logps/rejected": -520.5574951171875, + "loss": 0.4048, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8030334711074829, + "rewards/margins": 0.5375610589981079, + "rewards/rejected": 0.265472412109375, + "step": 3821 + }, + { + "epoch": 0.22, + "learning_rate": 9.060318644607003e-08, + "logits/chosen": -2.237919569015503, + "logits/rejected": -2.2362399101257324, + "logps/chosen": -2.4903781414031982, + "logps/rejected": -213.82879638671875, + "loss": 0.5074, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00048210620298050344, + "rewards/margins": 1.003250002861023, + "rewards/rejected": -1.0027679204940796, + "step": 3822 + }, + { + "epoch": 0.22, + "learning_rate": 9.059768615884777e-08, + "logits/chosen": -2.169982671737671, + "logits/rejected": -2.147909641265869, + "logps/chosen": -42.929264068603516, + "logps/rejected": -338.5340576171875, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1846664398908615, + "rewards/margins": 2.1350693702697754, + "rewards/rejected": -1.950402855873108, + "step": 3823 + }, + { + "epoch": 0.22, + "learning_rate": 9.05921844293971e-08, + "logits/chosen": -1.934674859046936, + "logits/rejected": -1.936821699142456, + "logps/chosen": -221.48028564453125, + "logps/rejected": -323.6947326660156, + "loss": 0.4132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.997271716594696, + "rewards/margins": 0.11729121208190918, + "rewards/rejected": 0.8799805045127869, + "step": 3824 + }, + { + "epoch": 0.22, + "learning_rate": 9.058668125791351e-08, + "logits/chosen": -1.9264904260635376, + "logits/rejected": -1.937221646308899, + "logps/chosen": -90.93820190429688, + "logps/rejected": -281.1826477050781, + "loss": 0.4515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4070877134799957, + "rewards/margins": 0.7154678106307983, + "rewards/rejected": -0.308380126953125, + "step": 3825 + }, + { + "epoch": 0.22, + "learning_rate": 9.058117664459246e-08, + "logits/chosen": -2.043043613433838, + "logits/rejected": -2.0313398838043213, + "logps/chosen": -65.63427734375, + "logps/rejected": -320.9391784667969, + "loss": 0.3992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2873893678188324, + "rewards/margins": 1.2248375415802002, + "rewards/rejected": -0.9374481439590454, + "step": 3826 + }, + { + "epoch": 0.22, + "learning_rate": 9.057567058962949e-08, + "logits/chosen": -2.2499101161956787, + "logits/rejected": -2.234363079071045, + "logps/chosen": -71.8192138671875, + "logps/rejected": -224.22254943847656, + "loss": 0.4995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19564056396484375, + "rewards/margins": 1.47246253490448, + "rewards/rejected": -1.6681030988693237, + "step": 3827 + }, + { + "epoch": 0.22, + "learning_rate": 9.057016309322026e-08, + "logits/chosen": -2.0890674591064453, + "logits/rejected": -2.0826451778411865, + "logps/chosen": -69.91714477539062, + "logps/rejected": -329.4223327636719, + "loss": 0.4401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3418922424316406, + "rewards/margins": 3.645449161529541, + "rewards/rejected": -3.9873414039611816, + "step": 3828 + }, + { + "epoch": 0.22, + "learning_rate": 9.056465415556035e-08, + "logits/chosen": -2.0160908699035645, + "logits/rejected": -1.9841736555099487, + "logps/chosen": -230.5299072265625, + "logps/rejected": -428.15106201171875, + "loss": 0.3507, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4324554204940796, + "rewards/margins": 0.33436882495880127, + "rewards/rejected": 1.0980865955352783, + "step": 3829 + }, + { + "epoch": 0.22, + "learning_rate": 9.055914377684552e-08, + "logits/chosen": -2.08042311668396, + "logits/rejected": -2.0738918781280518, + "logps/chosen": -27.776639938354492, + "logps/rejected": -285.2278747558594, + "loss": 0.3532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007981300354003906, + "rewards/margins": 2.8821756839752197, + "rewards/rejected": -2.874194383621216, + "step": 3830 + }, + { + "epoch": 0.22, + "learning_rate": 9.055363195727151e-08, + "logits/chosen": -2.0563597679138184, + "logits/rejected": -2.032029151916504, + "logps/chosen": -159.54269409179688, + "logps/rejected": -256.56097412109375, + "loss": 0.4104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6452301144599915, + "rewards/margins": 0.6893035769462585, + "rewards/rejected": -0.04407348856329918, + "step": 3831 + }, + { + "epoch": 0.22, + "learning_rate": 9.054811869703411e-08, + "logits/chosen": -2.0312371253967285, + "logits/rejected": -2.026543378829956, + "logps/chosen": -215.30288696289062, + "logps/rejected": -383.58782958984375, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1277847290039062, + "rewards/margins": 0.6488296389579773, + "rewards/rejected": 0.47895509004592896, + "step": 3832 + }, + { + "epoch": 0.22, + "learning_rate": 9.054260399632921e-08, + "logits/chosen": -1.9632540941238403, + "logits/rejected": -1.9448881149291992, + "logps/chosen": -216.46986389160156, + "logps/rejected": -317.3753967285156, + "loss": 0.4388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5445083975791931, + "rewards/margins": 0.5424728989601135, + "rewards/rejected": 0.002035522600635886, + "step": 3833 + }, + { + "epoch": 0.22, + "learning_rate": 9.053708785535267e-08, + "logits/chosen": -2.028639793395996, + "logits/rejected": -2.0259718894958496, + "logps/chosen": -0.00021634838776662946, + "logps/rejected": -118.57625579833984, + "loss": 0.553, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.598377022877685e-06, + "rewards/margins": 0.6713055372238159, + "rewards/rejected": -0.671308159828186, + "step": 3834 + }, + { + "epoch": 0.22, + "learning_rate": 9.053157027430051e-08, + "logits/chosen": -2.2043333053588867, + "logits/rejected": -2.20123291015625, + "logps/chosen": -0.057231441140174866, + "logps/rejected": -79.58824157714844, + "loss": 0.5188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006797887617722154, + "rewards/margins": 0.8873146176338196, + "rewards/rejected": -0.8866348266601562, + "step": 3835 + }, + { + "epoch": 0.22, + "learning_rate": 9.052605125336869e-08, + "logits/chosen": -2.002037286758423, + "logits/rejected": -1.9979121685028076, + "logps/chosen": -45.01959991455078, + "logps/rejected": -91.95174407958984, + "loss": 0.7368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18426170945167542, + "rewards/margins": 0.018766403198242188, + "rewards/rejected": -0.2030281126499176, + "step": 3836 + }, + { + "epoch": 0.22, + "learning_rate": 9.052053079275333e-08, + "logits/chosen": -2.034593105316162, + "logits/rejected": -2.0220859050750732, + "logps/chosen": -57.775970458984375, + "logps/rejected": -180.872314453125, + "loss": 0.4342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021166611462831497, + "rewards/margins": 1.5995357036590576, + "rewards/rejected": -1.578369140625, + "step": 3837 + }, + { + "epoch": 0.22, + "learning_rate": 9.051500889265048e-08, + "logits/chosen": -2.0914804935455322, + "logits/rejected": -2.073896884918213, + "logps/chosen": -150.93429565429688, + "logps/rejected": -219.19790649414062, + "loss": 0.5175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.370736688375473, + "rewards/margins": 0.3596557676792145, + "rewards/rejected": 0.011080932803452015, + "step": 3838 + }, + { + "epoch": 0.22, + "learning_rate": 9.050948555325634e-08, + "logits/chosen": -2.021223306655884, + "logits/rejected": -2.0146021842956543, + "logps/chosen": -0.5797192454338074, + "logps/rejected": -214.56654357910156, + "loss": 0.4039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018798217177391052, + "rewards/margins": 2.1326498985290527, + "rewards/rejected": -2.1514480113983154, + "step": 3839 + }, + { + "epoch": 0.22, + "learning_rate": 9.050396077476712e-08, + "logits/chosen": -2.0664215087890625, + "logits/rejected": -2.0642776489257812, + "logps/chosen": -19.297876358032227, + "logps/rejected": -129.3434295654297, + "loss": 0.6463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37902402877807617, + "rewards/margins": 0.3825192451477051, + "rewards/rejected": -0.7615432739257812, + "step": 3840 + }, + { + "epoch": 0.22, + "learning_rate": 9.049843455737909e-08, + "logits/chosen": -2.0339057445526123, + "logits/rejected": -2.0276288986206055, + "logps/chosen": -0.014523068442940712, + "logps/rejected": -93.61553955078125, + "loss": 0.542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005656943540088832, + "rewards/margins": 0.725210964679718, + "rewards/rejected": -0.7257766723632812, + "step": 3841 + }, + { + "epoch": 0.22, + "learning_rate": 9.049290690128856e-08, + "logits/chosen": -2.338024377822876, + "logits/rejected": -2.3189353942871094, + "logps/chosen": -6.095966815948486, + "logps/rejected": -351.9507751464844, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01956024207174778, + "rewards/margins": 3.2122604846954346, + "rewards/rejected": -3.192700147628784, + "step": 3842 + }, + { + "epoch": 0.22, + "learning_rate": 9.048737780669192e-08, + "logits/chosen": -2.0143258571624756, + "logits/rejected": -2.0126595497131348, + "logps/chosen": -89.37288665771484, + "logps/rejected": -253.25914001464844, + "loss": 0.3999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022141266614198685, + "rewards/margins": 2.137187957763672, + "rewards/rejected": -2.1593291759490967, + "step": 3843 + }, + { + "epoch": 0.22, + "learning_rate": 9.048184727378555e-08, + "logits/chosen": -2.2326483726501465, + "logits/rejected": -2.2251665592193604, + "logps/chosen": -14.286046981811523, + "logps/rejected": -289.53643798828125, + "loss": 0.3636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05868091806769371, + "rewards/margins": 3.373914957046509, + "rewards/rejected": -3.43259596824646, + "step": 3844 + }, + { + "epoch": 0.22, + "learning_rate": 9.047631530276596e-08, + "logits/chosen": -1.9515454769134521, + "logits/rejected": -1.949459433555603, + "logps/chosen": -0.005558055825531483, + "logps/rejected": -132.505615234375, + "loss": 0.4667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018055188411381096, + "rewards/margins": 1.3161356449127197, + "rewards/rejected": -1.3163162469863892, + "step": 3845 + }, + { + "epoch": 0.22, + "learning_rate": 9.047078189382967e-08, + "logits/chosen": -1.9786839485168457, + "logits/rejected": -1.9337612390518188, + "logps/chosen": -180.357421875, + "logps/rejected": -527.63525390625, + "loss": 0.1006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.354943871498108, + "rewards/margins": 2.6248779296875, + "rewards/rejected": -1.269934058189392, + "step": 3846 + }, + { + "epoch": 0.22, + "learning_rate": 9.046524704717322e-08, + "logits/chosen": -1.896578073501587, + "logits/rejected": -1.8949238061904907, + "logps/chosen": -47.56996154785156, + "logps/rejected": -265.12481689453125, + "loss": 0.3078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19881057739257812, + "rewards/margins": 2.702146291732788, + "rewards/rejected": -2.50333571434021, + "step": 3847 + }, + { + "epoch": 0.22, + "learning_rate": 9.045971076299327e-08, + "logits/chosen": -2.1981937885284424, + "logits/rejected": -2.1928975582122803, + "logps/chosen": -26.901531219482422, + "logps/rejected": -160.60079956054688, + "loss": 0.5285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24433593451976776, + "rewards/margins": 0.36970824003219604, + "rewards/rejected": -0.12537232041358948, + "step": 3848 + }, + { + "epoch": 0.22, + "learning_rate": 9.045417304148649e-08, + "logits/chosen": -2.090425968170166, + "logits/rejected": -2.0646510124206543, + "logps/chosen": -205.15383911132812, + "logps/rejected": -333.97064208984375, + "loss": 0.1991, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5599075555801392, + "rewards/margins": 1.1000869274139404, + "rewards/rejected": 0.45982056856155396, + "step": 3849 + }, + { + "epoch": 0.22, + "learning_rate": 9.044863388284959e-08, + "logits/chosen": -1.9074149131774902, + "logits/rejected": -1.8991332054138184, + "logps/chosen": -0.16192995011806488, + "logps/rejected": -224.3415069580078, + "loss": 0.4097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0059860325418412685, + "rewards/margins": 1.995086669921875, + "rewards/rejected": -2.001072645187378, + "step": 3850 + }, + { + "epoch": 0.22, + "learning_rate": 9.044309328727937e-08, + "logits/chosen": -2.124781847000122, + "logits/rejected": -2.112896203994751, + "logps/chosen": -62.05193328857422, + "logps/rejected": -275.5185241699219, + "loss": 0.5083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0938648208975792, + "rewards/margins": 1.1560070514678955, + "rewards/rejected": -1.249871850013733, + "step": 3851 + }, + { + "epoch": 0.22, + "learning_rate": 9.043755125497263e-08, + "logits/chosen": -2.184138774871826, + "logits/rejected": -2.189846992492676, + "logps/chosen": -6.509047985076904, + "logps/rejected": -109.18490600585938, + "loss": 0.6611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012473726645112038, + "rewards/margins": 0.1445750743150711, + "rewards/rejected": -0.1570488065481186, + "step": 3852 + }, + { + "epoch": 0.22, + "learning_rate": 9.043200778612629e-08, + "logits/chosen": -1.9736146926879883, + "logits/rejected": -1.9326744079589844, + "logps/chosen": -266.1051330566406, + "logps/rejected": -433.20513916015625, + "loss": 0.1031, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7170623540878296, + "rewards/margins": 2.626150608062744, + "rewards/rejected": -0.909088134765625, + "step": 3853 + }, + { + "epoch": 0.22, + "learning_rate": 9.042646288093725e-08, + "logits/chosen": -2.0707924365997314, + "logits/rejected": -2.0739986896514893, + "logps/chosen": -60.18285369873047, + "logps/rejected": -319.2113952636719, + "loss": 0.2438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44275131821632385, + "rewards/margins": 2.8708336353302, + "rewards/rejected": -2.428082227706909, + "step": 3854 + }, + { + "epoch": 0.22, + "learning_rate": 9.042091653960248e-08, + "logits/chosen": -2.0510127544403076, + "logits/rejected": -2.05527925491333, + "logps/chosen": -21.390531539916992, + "logps/rejected": -194.56021118164062, + "loss": 0.4982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0013473511207848787, + "rewards/margins": 0.9359329342842102, + "rewards/rejected": -0.9345855712890625, + "step": 3855 + }, + { + "epoch": 0.22, + "learning_rate": 9.041536876231907e-08, + "logits/chosen": -2.09814190864563, + "logits/rejected": -2.0946502685546875, + "logps/chosen": -33.61724090576172, + "logps/rejected": -374.6953125, + "loss": 0.336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18166962265968323, + "rewards/margins": 2.8561081886291504, + "rewards/rejected": -2.6744384765625, + "step": 3856 + }, + { + "epoch": 0.22, + "learning_rate": 9.040981954928405e-08, + "logits/chosen": -2.071866035461426, + "logits/rejected": -2.0549986362457275, + "logps/chosen": -191.37991333007812, + "logps/rejected": -243.98992919921875, + "loss": 0.2432, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2854584455490112, + "rewards/margins": 0.9079957008361816, + "rewards/rejected": 0.377462774515152, + "step": 3857 + }, + { + "epoch": 0.22, + "learning_rate": 9.040426890069458e-08, + "logits/chosen": -1.9932644367218018, + "logits/rejected": -1.991126537322998, + "logps/chosen": -0.006965972948819399, + "logps/rejected": -89.62670135498047, + "loss": 0.5161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002761636860668659, + "rewards/margins": 0.9671822190284729, + "rewards/rejected": -0.9674583673477173, + "step": 3858 + }, + { + "epoch": 0.22, + "learning_rate": 9.039871681674783e-08, + "logits/chosen": -2.0315210819244385, + "logits/rejected": -2.028135061264038, + "logps/chosen": -37.75379180908203, + "logps/rejected": -277.69024658203125, + "loss": 0.3369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3674522340297699, + "rewards/margins": 1.917855143547058, + "rewards/rejected": -1.5504028797149658, + "step": 3859 + }, + { + "epoch": 0.22, + "learning_rate": 9.039316329764107e-08, + "logits/chosen": -2.1736507415771484, + "logits/rejected": -2.1583142280578613, + "logps/chosen": -57.689613342285156, + "logps/rejected": -200.02871704101562, + "loss": 0.4293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33262863755226135, + "rewards/margins": 0.7573814392089844, + "rewards/rejected": -0.424752801656723, + "step": 3860 + }, + { + "epoch": 0.22, + "learning_rate": 9.038760834357153e-08, + "logits/chosen": -2.119208335876465, + "logits/rejected": -2.0541770458221436, + "logps/chosen": -266.46234130859375, + "logps/rejected": -446.04852294921875, + "loss": 0.1058, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5214874744415283, + "rewards/margins": 2.295590400695801, + "rewards/rejected": -0.7741028070449829, + "step": 3861 + }, + { + "epoch": 0.22, + "learning_rate": 9.038205195473661e-08, + "logits/chosen": -2.257751941680908, + "logits/rejected": -2.2600653171539307, + "logps/chosen": -7.080846262397245e-05, + "logps/rejected": -265.96697998046875, + "loss": 0.3927, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7418064973971923e-07, + "rewards/margins": 2.299722194671631, + "rewards/rejected": -2.29972243309021, + "step": 3862 + }, + { + "epoch": 0.22, + "learning_rate": 9.037649413133366e-08, + "logits/chosen": -2.22318696975708, + "logits/rejected": -2.213473320007324, + "logps/chosen": -85.04731750488281, + "logps/rejected": -204.0230712890625, + "loss": 0.5914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.220469668507576, + "rewards/margins": 0.8015182614326477, + "rewards/rejected": -1.0219879150390625, + "step": 3863 + }, + { + "epoch": 0.22, + "learning_rate": 9.037093487356016e-08, + "logits/chosen": -2.3161582946777344, + "logits/rejected": -2.311203956604004, + "logps/chosen": -8.046977996826172, + "logps/rejected": -134.79344177246094, + "loss": 0.4148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02988109551370144, + "rewards/margins": 1.6611158847808838, + "rewards/rejected": -1.6312347650527954, + "step": 3864 + }, + { + "epoch": 0.22, + "learning_rate": 9.036537418161355e-08, + "logits/chosen": -1.816057801246643, + "logits/rejected": -1.8201100826263428, + "logps/chosen": -0.003803282044827938, + "logps/rejected": -36.07612991333008, + "loss": 0.6481, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.249451689654961e-05, + "rewards/margins": 0.18910376727581024, + "rewards/rejected": -0.18916626274585724, + "step": 3865 + }, + { + "epoch": 0.22, + "learning_rate": 9.035981205569141e-08, + "logits/chosen": -2.0817511081695557, + "logits/rejected": -1.998811960220337, + "logps/chosen": -270.56695556640625, + "logps/rejected": -573.440673828125, + "loss": 0.2202, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.145288109779358, + "rewards/margins": 1.17584228515625, + "rewards/rejected": -0.03055419959127903, + "step": 3866 + }, + { + "epoch": 0.23, + "learning_rate": 9.035424849599134e-08, + "logits/chosen": -2.01057505607605, + "logits/rejected": -2.007624387741089, + "logps/chosen": -7.437777519226074, + "logps/rejected": -187.779541015625, + "loss": 0.483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06359272450208664, + "rewards/margins": 1.0466279983520508, + "rewards/rejected": -0.9830352663993835, + "step": 3867 + }, + { + "epoch": 0.23, + "learning_rate": 9.034868350271094e-08, + "logits/chosen": -1.9593935012817383, + "logits/rejected": -1.9586703777313232, + "logps/chosen": -2.187002182006836, + "logps/rejected": -77.32264709472656, + "loss": 0.6504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03617978096008301, + "rewards/margins": 0.205295130610466, + "rewards/rejected": -0.241474911570549, + "step": 3868 + }, + { + "epoch": 0.23, + "learning_rate": 9.034311707604796e-08, + "logits/chosen": -2.068573474884033, + "logits/rejected": -2.0581161975860596, + "logps/chosen": -3.1681902408599854, + "logps/rejected": -104.56295013427734, + "loss": 0.6278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048326801508665085, + "rewards/margins": 0.22634118795394897, + "rewards/rejected": -0.1780143827199936, + "step": 3869 + }, + { + "epoch": 0.23, + "learning_rate": 9.033754921620012e-08, + "logits/chosen": -2.1580471992492676, + "logits/rejected": -2.140378475189209, + "logps/chosen": -27.752817153930664, + "logps/rejected": -243.03819274902344, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13587819039821625, + "rewards/margins": 2.5389511585235596, + "rewards/rejected": -2.4030730724334717, + "step": 3870 + }, + { + "epoch": 0.23, + "learning_rate": 9.033197992336521e-08, + "logits/chosen": -1.9975138902664185, + "logits/rejected": -1.9966782331466675, + "logps/chosen": -42.32441329956055, + "logps/rejected": -164.27587890625, + "loss": 0.6459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03627128526568413, + "rewards/margins": 0.2540394067764282, + "rewards/rejected": -0.29031068086624146, + "step": 3871 + }, + { + "epoch": 0.23, + "learning_rate": 9.032640919774108e-08, + "logits/chosen": -2.0278141498565674, + "logits/rejected": -2.0006003379821777, + "logps/chosen": -238.77825927734375, + "logps/rejected": -433.140869140625, + "loss": 0.211, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.078948974609375, + "rewards/margins": 1.4419463872909546, + "rewards/rejected": -0.362997442483902, + "step": 3872 + }, + { + "epoch": 0.23, + "learning_rate": 9.032083703952565e-08, + "logits/chosen": -1.7729768753051758, + "logits/rejected": -1.7731270790100098, + "logps/chosen": -215.06512451171875, + "logps/rejected": -360.2082214355469, + "loss": 0.2604, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1496918201446533, + "rewards/margins": 0.8277100324630737, + "rewards/rejected": 0.321981817483902, + "step": 3873 + }, + { + "epoch": 0.23, + "learning_rate": 9.031526344891687e-08, + "logits/chosen": -2.294194221496582, + "logits/rejected": -2.2999725341796875, + "logps/chosen": -32.75385284423828, + "logps/rejected": -123.21443939208984, + "loss": 0.5581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015046692453324795, + "rewards/margins": 0.5478859543800354, + "rewards/rejected": -0.5328392386436462, + "step": 3874 + }, + { + "epoch": 0.23, + "learning_rate": 9.030968842611271e-08, + "logits/chosen": -2.0613064765930176, + "logits/rejected": -2.07236647605896, + "logps/chosen": -168.08306884765625, + "logps/rejected": -449.450927734375, + "loss": 0.4796, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8047119379043579, + "rewards/margins": -0.15236812829971313, + "rewards/rejected": 0.957080066204071, + "step": 3875 + }, + { + "epoch": 0.23, + "learning_rate": 9.030411197131125e-08, + "logits/chosen": -1.9341059923171997, + "logits/rejected": -1.9086500406265259, + "logps/chosen": -188.83506774902344, + "logps/rejected": -279.482421875, + "loss": 0.4452, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6456772089004517, + "rewards/margins": 0.5067214965820312, + "rewards/rejected": 0.13895569741725922, + "step": 3876 + }, + { + "epoch": 0.23, + "learning_rate": 9.029853408471057e-08, + "logits/chosen": -2.09338116645813, + "logits/rejected": -2.084527015686035, + "logps/chosen": -93.57071685791016, + "logps/rejected": -247.531982421875, + "loss": 0.515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26582643389701843, + "rewards/margins": 2.2283661365509033, + "rewards/rejected": -2.494192600250244, + "step": 3877 + }, + { + "epoch": 0.23, + "learning_rate": 9.029295476650885e-08, + "logits/chosen": -2.1946158409118652, + "logits/rejected": -2.1786751747131348, + "logps/chosen": -0.0003191021387465298, + "logps/rejected": -215.87393188476562, + "loss": 0.4134, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.784712008491624e-06, + "rewards/margins": 1.9450321197509766, + "rewards/rejected": -1.9450409412384033, + "step": 3878 + }, + { + "epoch": 0.23, + "learning_rate": 9.028737401690429e-08, + "logits/chosen": -1.9226112365722656, + "logits/rejected": -1.9314519166946411, + "logps/chosen": -212.21231079101562, + "logps/rejected": -368.89642333984375, + "loss": 0.1972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9099854230880737, + "rewards/margins": 1.064208984375, + "rewards/rejected": 0.845776379108429, + "step": 3879 + }, + { + "epoch": 0.23, + "learning_rate": 9.028179183609513e-08, + "logits/chosen": -1.9730792045593262, + "logits/rejected": -1.973668098449707, + "logps/chosen": -0.0002480582916177809, + "logps/rejected": -155.865966796875, + "loss": 0.4337, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0071986253024079e-05, + "rewards/margins": 1.5646659135818481, + "rewards/rejected": -1.5646759271621704, + "step": 3880 + }, + { + "epoch": 0.23, + "learning_rate": 9.027620822427968e-08, + "logits/chosen": -2.1372923851013184, + "logits/rejected": -2.1411757469177246, + "logps/chosen": -2.073296308517456, + "logps/rejected": -83.97393798828125, + "loss": 0.5173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022376347333192825, + "rewards/margins": 0.831344485282898, + "rewards/rejected": -0.8537208437919617, + "step": 3881 + }, + { + "epoch": 0.23, + "learning_rate": 9.02706231816563e-08, + "logits/chosen": -1.9441492557525635, + "logits/rejected": -1.9450148344039917, + "logps/chosen": -100.39645385742188, + "logps/rejected": -385.4826965332031, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08950576931238174, + "rewards/margins": 3.8549554347991943, + "rewards/rejected": -3.9444611072540283, + "step": 3882 + }, + { + "epoch": 0.23, + "learning_rate": 9.02650367084234e-08, + "logits/chosen": -1.7592278718948364, + "logits/rejected": -1.740909457206726, + "logps/chosen": -307.74639892578125, + "logps/rejected": -407.30694580078125, + "loss": 0.1088, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6461578607559204, + "rewards/margins": 2.174487352371216, + "rewards/rejected": -0.5283294916152954, + "step": 3883 + }, + { + "epoch": 0.23, + "learning_rate": 9.025944880477945e-08, + "logits/chosen": -2.0671396255493164, + "logits/rejected": -2.059133768081665, + "logps/chosen": -254.84402465820312, + "logps/rejected": -440.25506591796875, + "loss": 0.1036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6197082996368408, + "rewards/margins": 2.569873094558716, + "rewards/rejected": -0.950164794921875, + "step": 3884 + }, + { + "epoch": 0.23, + "learning_rate": 9.025385947092295e-08, + "logits/chosen": -2.204226493835449, + "logits/rejected": -2.1971020698547363, + "logps/chosen": -208.7667236328125, + "logps/rejected": -330.0500183105469, + "loss": 0.5193, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8552032709121704, + "rewards/margins": -0.012332141399383545, + "rewards/rejected": 0.867535412311554, + "step": 3885 + }, + { + "epoch": 0.23, + "learning_rate": 9.024826870705245e-08, + "logits/chosen": -2.012573719024658, + "logits/rejected": -1.9703335762023926, + "logps/chosen": -230.73248291015625, + "logps/rejected": -402.2081604003906, + "loss": 0.1119, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.14176344871521, + "rewards/margins": 1.7150239944458008, + "rewards/rejected": 0.42673951387405396, + "step": 3886 + }, + { + "epoch": 0.23, + "learning_rate": 9.024267651336656e-08, + "logits/chosen": -2.0328359603881836, + "logits/rejected": -2.0288078784942627, + "logps/chosen": -261.5451354980469, + "logps/rejected": -358.6324462890625, + "loss": 0.3447, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9973175525665283, + "rewards/margins": 0.15124821662902832, + "rewards/rejected": 1.8460693359375, + "step": 3887 + }, + { + "epoch": 0.23, + "learning_rate": 9.023708289006396e-08, + "logits/chosen": -2.1594014167785645, + "logits/rejected": -2.1646344661712646, + "logps/chosen": -121.64334106445312, + "logps/rejected": -218.22012329101562, + "loss": 0.3608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5632873773574829, + "rewards/margins": 1.2080962657928467, + "rewards/rejected": -0.6448089480400085, + "step": 3888 + }, + { + "epoch": 0.23, + "learning_rate": 9.023148783734336e-08, + "logits/chosen": -2.1239113807678223, + "logits/rejected": -1.9959133863449097, + "logps/chosen": -172.12335205078125, + "logps/rejected": -435.03289794921875, + "loss": 0.4487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20379029214382172, + "rewards/margins": 0.8145416378974915, + "rewards/rejected": -0.6107513308525085, + "step": 3889 + }, + { + "epoch": 0.23, + "learning_rate": 9.022589135540352e-08, + "logits/chosen": -2.2123117446899414, + "logits/rejected": -2.216801643371582, + "logps/chosen": -8.818014144897461, + "logps/rejected": -98.71131896972656, + "loss": 0.4648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18701468408107758, + "rewards/margins": 1.0229957103729248, + "rewards/rejected": -0.835981011390686, + "step": 3890 + }, + { + "epoch": 0.23, + "learning_rate": 9.022029344444324e-08, + "logits/chosen": -2.231926679611206, + "logits/rejected": -2.2294559478759766, + "logps/chosen": -13.503594398498535, + "logps/rejected": -149.78457641601562, + "loss": 0.5709, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07499837875366211, + "rewards/margins": 0.6772553324699402, + "rewards/rejected": -0.7522537112236023, + "step": 3891 + }, + { + "epoch": 0.23, + "learning_rate": 9.021469410466142e-08, + "logits/chosen": -2.0238277912139893, + "logits/rejected": -2.0090219974517822, + "logps/chosen": -233.5036163330078, + "logps/rejected": -352.0626220703125, + "loss": 0.3051, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0718567371368408, + "rewards/margins": 0.6022095084190369, + "rewards/rejected": 0.46964722871780396, + "step": 3892 + }, + { + "epoch": 0.23, + "learning_rate": 9.020909333625693e-08, + "logits/chosen": -2.1530213356018066, + "logits/rejected": -2.1323862075805664, + "logps/chosen": -37.145240783691406, + "logps/rejected": -338.9165344238281, + "loss": 0.3294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06250496208667755, + "rewards/margins": 4.620433330535889, + "rewards/rejected": -4.557928562164307, + "step": 3893 + }, + { + "epoch": 0.23, + "learning_rate": 9.020349113942878e-08, + "logits/chosen": -2.062527894973755, + "logits/rejected": -1.9981166124343872, + "logps/chosen": -169.38540649414062, + "logps/rejected": -248.17030334472656, + "loss": 0.5348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3447280824184418, + "rewards/margins": 0.29483336210250854, + "rewards/rejected": 0.04989471659064293, + "step": 3894 + }, + { + "epoch": 0.23, + "learning_rate": 9.019788751437596e-08, + "logits/chosen": -2.0204973220825195, + "logits/rejected": -2.0108747482299805, + "logps/chosen": -64.93177795410156, + "logps/rejected": -248.71182250976562, + "loss": 0.3966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10045929253101349, + "rewards/margins": 2.8280348777770996, + "rewards/rejected": -2.9284942150115967, + "step": 3895 + }, + { + "epoch": 0.23, + "learning_rate": 9.019228246129754e-08, + "logits/chosen": -2.1175827980041504, + "logits/rejected": -2.1082494258880615, + "logps/chosen": -34.606178283691406, + "logps/rejected": -244.95599365234375, + "loss": 0.4123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14332543313503265, + "rewards/margins": 1.4852261543273926, + "rewards/rejected": -1.3419007062911987, + "step": 3896 + }, + { + "epoch": 0.23, + "learning_rate": 9.018667598039266e-08, + "logits/chosen": -2.1101276874542236, + "logits/rejected": -2.105459213256836, + "logps/chosen": -52.45360565185547, + "logps/rejected": -302.918701171875, + "loss": 0.4754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2049858123064041, + "rewards/margins": 0.8165214657783508, + "rewards/rejected": -0.6115356683731079, + "step": 3897 + }, + { + "epoch": 0.23, + "learning_rate": 9.018106807186045e-08, + "logits/chosen": -2.029003143310547, + "logits/rejected": -2.038959264755249, + "logps/chosen": -234.2406005859375, + "logps/rejected": -336.5359802246094, + "loss": 0.3732, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.893611192703247, + "rewards/margins": 0.02753150463104248, + "rewards/rejected": 1.8660796880722046, + "step": 3898 + }, + { + "epoch": 0.23, + "learning_rate": 9.017545873590018e-08, + "logits/chosen": -1.9656033515930176, + "logits/rejected": -1.9600462913513184, + "logps/chosen": -276.795654296875, + "logps/rejected": -360.8323669433594, + "loss": 0.2661, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0617706775665283, + "rewards/margins": 1.000634789466858, + "rewards/rejected": 0.06113586574792862, + "step": 3899 + }, + { + "epoch": 0.23, + "learning_rate": 9.01698479727111e-08, + "logits/chosen": -2.1545565128326416, + "logits/rejected": -2.152730703353882, + "logps/chosen": -41.663307189941406, + "logps/rejected": -100.7566909790039, + "loss": 0.6164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00540504464879632, + "rewards/margins": 0.30933037400245667, + "rewards/rejected": -0.31473541259765625, + "step": 3900 + }, + { + "epoch": 0.23, + "learning_rate": 9.016423578249251e-08, + "logits/chosen": -2.063600778579712, + "logits/rejected": -2.0782783031463623, + "logps/chosen": -293.754150390625, + "logps/rejected": -541.0332641601562, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10509949177503586, + "rewards/margins": 1.4836394786834717, + "rewards/rejected": -1.3785400390625, + "step": 3901 + }, + { + "epoch": 0.23, + "learning_rate": 9.015862216544381e-08, + "logits/chosen": -2.077475070953369, + "logits/rejected": -2.120495319366455, + "logps/chosen": -197.77896118164062, + "logps/rejected": -314.2933654785156, + "loss": 0.3736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9093475341796875, + "rewards/margins": 0.6223052740097046, + "rewards/rejected": 0.2870422303676605, + "step": 3902 + }, + { + "epoch": 0.23, + "learning_rate": 9.015300712176441e-08, + "logits/chosen": -2.2551722526550293, + "logits/rejected": -2.251302719116211, + "logps/chosen": -28.68299674987793, + "logps/rejected": -104.84309387207031, + "loss": 0.5468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2065582275390625, + "rewards/margins": 0.3886215090751648, + "rewards/rejected": -0.1820632964372635, + "step": 3903 + }, + { + "epoch": 0.23, + "learning_rate": 9.014739065165379e-08, + "logits/chosen": -1.8759833574295044, + "logits/rejected": -1.8752365112304688, + "logps/chosen": -94.30610656738281, + "logps/rejected": -210.76962280273438, + "loss": 0.8395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7979095578193665, + "rewards/margins": 0.4819595217704773, + "rewards/rejected": -1.2798690795898438, + "step": 3904 + }, + { + "epoch": 0.23, + "learning_rate": 9.014177275531146e-08, + "logits/chosen": -1.8907767534255981, + "logits/rejected": -1.8294124603271484, + "logps/chosen": -272.966064453125, + "logps/rejected": -488.3935546875, + "loss": 0.1654, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5393860340118408, + "rewards/margins": 1.1920228004455566, + "rewards/rejected": 0.34736329317092896, + "step": 3905 + }, + { + "epoch": 0.23, + "learning_rate": 9.013615343293701e-08, + "logits/chosen": -1.8606233596801758, + "logits/rejected": -1.873488187789917, + "logps/chosen": -171.55906677246094, + "logps/rejected": -277.0879211425781, + "loss": 0.3405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8123825192451477, + "rewards/margins": 0.7860061526298523, + "rewards/rejected": 0.02637634240090847, + "step": 3906 + }, + { + "epoch": 0.23, + "learning_rate": 9.013053268473008e-08, + "logits/chosen": -2.1645944118499756, + "logits/rejected": -2.1624646186828613, + "logps/chosen": -83.3219985961914, + "logps/rejected": -278.156494140625, + "loss": 0.3147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4211387634277344, + "rewards/margins": 1.3305107355117798, + "rewards/rejected": -0.9093719720840454, + "step": 3907 + }, + { + "epoch": 0.23, + "learning_rate": 9.012491051089032e-08, + "logits/chosen": -1.9710856676101685, + "logits/rejected": -1.9049311876296997, + "logps/chosen": -357.62896728515625, + "logps/rejected": -496.0426330566406, + "loss": 0.3639, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0660400390625, + "rewards/margins": 0.49767452478408813, + "rewards/rejected": 0.5683655142784119, + "step": 3908 + }, + { + "epoch": 0.23, + "learning_rate": 9.011928691161746e-08, + "logits/chosen": -1.9175200462341309, + "logits/rejected": -1.7998847961425781, + "logps/chosen": -328.6599426269531, + "logps/rejected": -744.20361328125, + "loss": 0.3535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8688201904296875, + "rewards/margins": 0.7099701166152954, + "rewards/rejected": 0.15885010361671448, + "step": 3909 + }, + { + "epoch": 0.23, + "learning_rate": 9.011366188711129e-08, + "logits/chosen": -2.1027660369873047, + "logits/rejected": -2.0786237716674805, + "logps/chosen": -36.90361785888672, + "logps/rejected": -191.3604278564453, + "loss": 0.4592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2545303404331207, + "rewards/margins": 0.7860993146896362, + "rewards/rejected": -0.5315689444541931, + "step": 3910 + }, + { + "epoch": 0.23, + "learning_rate": 9.010803543757165e-08, + "logits/chosen": -1.9592331647872925, + "logits/rejected": -1.9275660514831543, + "logps/chosen": -266.47943115234375, + "logps/rejected": -318.13580322265625, + "loss": 0.401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8945862054824829, + "rewards/margins": 0.27760010957717896, + "rewards/rejected": 0.616986095905304, + "step": 3911 + }, + { + "epoch": 0.23, + "learning_rate": 9.010240756319836e-08, + "logits/chosen": -2.001124382019043, + "logits/rejected": -1.9394725561141968, + "logps/chosen": -248.07693481445312, + "logps/rejected": -645.0526123046875, + "loss": 0.1947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8923859000205994, + "rewards/margins": 2.8267974853515625, + "rewards/rejected": -1.934411644935608, + "step": 3912 + }, + { + "epoch": 0.23, + "learning_rate": 9.009677826419142e-08, + "logits/chosen": -2.1696972846984863, + "logits/rejected": -2.132941246032715, + "logps/chosen": -247.67784118652344, + "logps/rejected": -461.11517333984375, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6239089965820312, + "rewards/margins": 0.19158172607421875, + "rewards/rejected": 1.4323272705078125, + "step": 3913 + }, + { + "epoch": 0.23, + "learning_rate": 9.009114754075078e-08, + "logits/chosen": -2.1278274059295654, + "logits/rejected": -2.121162176132202, + "logps/chosen": -0.2949872612953186, + "logps/rejected": -126.97099304199219, + "loss": 0.4505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0005078643443994224, + "rewards/margins": 1.4481457471847534, + "rewards/rejected": -1.447637915611267, + "step": 3914 + }, + { + "epoch": 0.23, + "learning_rate": 9.008551539307647e-08, + "logits/chosen": -1.9815160036087036, + "logits/rejected": -1.9841277599334717, + "logps/chosen": -4.4138288497924805, + "logps/rejected": -315.566650390625, + "loss": 0.4413, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06370172649621964, + "rewards/margins": 1.739652156829834, + "rewards/rejected": -1.8033539056777954, + "step": 3915 + }, + { + "epoch": 0.23, + "learning_rate": 9.007988182136857e-08, + "logits/chosen": -2.013637065887451, + "logits/rejected": -1.985960841178894, + "logps/chosen": -223.4360809326172, + "logps/rejected": -316.29144287109375, + "loss": 0.3483, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3065003156661987, + "rewards/margins": 0.23972177505493164, + "rewards/rejected": 1.066778540611267, + "step": 3916 + }, + { + "epoch": 0.23, + "learning_rate": 9.007424682582722e-08, + "logits/chosen": -2.1209161281585693, + "logits/rejected": -2.111574411392212, + "logps/chosen": -250.03033447265625, + "logps/rejected": -433.0115966796875, + "loss": 0.2217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.757128894329071, + "rewards/margins": 1.6595337390899658, + "rewards/rejected": -0.90240478515625, + "step": 3917 + }, + { + "epoch": 0.23, + "learning_rate": 9.006861040665259e-08, + "logits/chosen": -1.973399043083191, + "logits/rejected": -1.9736056327819824, + "logps/chosen": -148.089111328125, + "logps/rejected": -319.61810302734375, + "loss": 0.2302, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.035121202468872, + "rewards/margins": 1.0272263288497925, + "rewards/rejected": 0.00789489783346653, + "step": 3918 + }, + { + "epoch": 0.23, + "learning_rate": 9.006297256404494e-08, + "logits/chosen": -2.217475414276123, + "logits/rejected": -2.218503952026367, + "logps/chosen": -24.97377586364746, + "logps/rejected": -126.50650024414062, + "loss": 0.7259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4834577739238739, + "rewards/margins": 0.536758542060852, + "rewards/rejected": -1.0202163457870483, + "step": 3919 + }, + { + "epoch": 0.23, + "learning_rate": 9.005733329820453e-08, + "logits/chosen": -2.023829936981201, + "logits/rejected": -2.0458314418792725, + "logps/chosen": -205.63986206054688, + "logps/rejected": -431.9466857910156, + "loss": 0.1825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2359603643417358, + "rewards/margins": 1.517970323562622, + "rewards/rejected": -0.28200989961624146, + "step": 3920 + }, + { + "epoch": 0.23, + "learning_rate": 9.005169260933169e-08, + "logits/chosen": -2.1117708683013916, + "logits/rejected": -2.109419345855713, + "logps/chosen": -15.613333702087402, + "logps/rejected": -147.27127075195312, + "loss": 0.5393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030712319537997246, + "rewards/margins": 0.6945486068725586, + "rewards/rejected": -0.7252609133720398, + "step": 3921 + }, + { + "epoch": 0.23, + "learning_rate": 9.004605049762681e-08, + "logits/chosen": -1.9705941677093506, + "logits/rejected": -1.9738842248916626, + "logps/chosen": -314.4552307128906, + "logps/rejected": -424.98443603515625, + "loss": 0.166, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9753021001815796, + "rewards/margins": 1.2487335205078125, + "rewards/rejected": 0.7265686392784119, + "step": 3922 + }, + { + "epoch": 0.23, + "learning_rate": 9.004040696329034e-08, + "logits/chosen": -2.1473140716552734, + "logits/rejected": -2.143958806991577, + "logps/chosen": -13.79163646697998, + "logps/rejected": -135.7344207763672, + "loss": 0.5321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1832815259695053, + "rewards/margins": 0.5638524889945984, + "rewards/rejected": -0.3805709779262543, + "step": 3923 + }, + { + "epoch": 0.23, + "learning_rate": 9.003476200652276e-08, + "logits/chosen": -2.0140631198883057, + "logits/rejected": -1.9995390176773071, + "logps/chosen": -70.17668151855469, + "logps/rejected": -252.8489532470703, + "loss": 0.4058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07847137749195099, + "rewards/margins": 2.2507872581481934, + "rewards/rejected": -2.329258680343628, + "step": 3924 + }, + { + "epoch": 0.23, + "learning_rate": 9.00291156275246e-08, + "logits/chosen": -2.0933291912078857, + "logits/rejected": -2.089376211166382, + "logps/chosen": -12.740224838256836, + "logps/rejected": -134.67556762695312, + "loss": 0.4879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.271394819021225, + "rewards/margins": 0.7464131116867065, + "rewards/rejected": -0.47501832246780396, + "step": 3925 + }, + { + "epoch": 0.23, + "learning_rate": 9.002346782649643e-08, + "logits/chosen": -2.118464469909668, + "logits/rejected": -2.0867905616760254, + "logps/chosen": -65.07709503173828, + "logps/rejected": -226.84230041503906, + "loss": 0.4291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32220458984375, + "rewards/margins": 0.9007522463798523, + "rewards/rejected": -0.5785476565361023, + "step": 3926 + }, + { + "epoch": 0.23, + "learning_rate": 9.001781860363892e-08, + "logits/chosen": -2.0919454097747803, + "logits/rejected": -2.039360761642456, + "logps/chosen": -190.08627319335938, + "logps/rejected": -420.5928039550781, + "loss": 0.2873, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0724700689315796, + "rewards/margins": 0.8209228515625, + "rewards/rejected": 0.251547247171402, + "step": 3927 + }, + { + "epoch": 0.23, + "learning_rate": 9.001216795915276e-08, + "logits/chosen": -2.0802574157714844, + "logits/rejected": -2.077085018157959, + "logps/chosen": -19.232803344726562, + "logps/rejected": -82.50967407226562, + "loss": 0.7324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38736963272094727, + "rewards/margins": 0.3813781142234802, + "rewards/rejected": -0.7687477469444275, + "step": 3928 + }, + { + "epoch": 0.23, + "learning_rate": 9.000651589323865e-08, + "logits/chosen": -1.9511195421218872, + "logits/rejected": -1.929872989654541, + "logps/chosen": -229.72979736328125, + "logps/rejected": -401.6511535644531, + "loss": 0.2264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9628921747207642, + "rewards/margins": 1.2067307233810425, + "rewards/rejected": -0.24383850395679474, + "step": 3929 + }, + { + "epoch": 0.23, + "learning_rate": 9.000086240609743e-08, + "logits/chosen": -2.0323619842529297, + "logits/rejected": -1.924963116645813, + "logps/chosen": -219.97418212890625, + "logps/rejected": -354.2469482421875, + "loss": 0.2747, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.285369873046875, + "rewards/margins": 0.6725494265556335, + "rewards/rejected": 0.6128204464912415, + "step": 3930 + }, + { + "epoch": 0.23, + "learning_rate": 8.999520749792988e-08, + "logits/chosen": -1.9541949033737183, + "logits/rejected": -1.9690332412719727, + "logps/chosen": -334.7078857421875, + "logps/rejected": -402.412353515625, + "loss": 0.481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7328430414199829, + "rewards/margins": 0.029040515422821045, + "rewards/rejected": 0.7038025259971619, + "step": 3931 + }, + { + "epoch": 0.23, + "learning_rate": 8.998955116893694e-08, + "logits/chosen": -2.089545965194702, + "logits/rejected": -2.086052417755127, + "logps/chosen": -39.51319122314453, + "logps/rejected": -212.70433044433594, + "loss": 0.5518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07645530998706818, + "rewards/margins": 0.3940548300743103, + "rewards/rejected": -0.31759950518608093, + "step": 3932 + }, + { + "epoch": 0.23, + "learning_rate": 8.998389341931953e-08, + "logits/chosen": -2.102917432785034, + "logits/rejected": -2.0981833934783936, + "logps/chosen": -2.540879487991333, + "logps/rejected": -124.55162811279297, + "loss": 0.5395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018653630977496505, + "rewards/margins": 0.7061981558799744, + "rewards/rejected": -0.708063542842865, + "step": 3933 + }, + { + "epoch": 0.23, + "learning_rate": 8.997823424927864e-08, + "logits/chosen": -2.099377155303955, + "logits/rejected": -2.126450300216675, + "logps/chosen": -199.82058715820312, + "logps/rejected": -141.87962341308594, + "loss": 0.563, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4086761474609375, + "rewards/margins": -0.0629623532295227, + "rewards/rejected": 0.4716385006904602, + "step": 3934 + }, + { + "epoch": 0.23, + "learning_rate": 8.997257365901533e-08, + "logits/chosen": -2.136916160583496, + "logits/rejected": -2.115715980529785, + "logps/chosen": -43.590335845947266, + "logps/rejected": -204.4893035888672, + "loss": 0.4675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04422569274902344, + "rewards/margins": 1.0255444049835205, + "rewards/rejected": -0.9813186526298523, + "step": 3935 + }, + { + "epoch": 0.23, + "learning_rate": 8.996691164873066e-08, + "logits/chosen": -2.132067918777466, + "logits/rejected": -2.116856575012207, + "logps/chosen": -57.1392707824707, + "logps/rejected": -270.4578857421875, + "loss": 0.5997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5240635275840759, + "rewards/margins": 1.3310039043426514, + "rewards/rejected": -1.855067491531372, + "step": 3936 + }, + { + "epoch": 0.23, + "learning_rate": 8.996124821862579e-08, + "logits/chosen": -2.0917727947235107, + "logits/rejected": -2.0301570892333984, + "logps/chosen": -209.3621826171875, + "logps/rejected": -524.7110595703125, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7626678347587585, + "rewards/margins": 0.8443450927734375, + "rewards/rejected": -0.08167725056409836, + "step": 3937 + }, + { + "epoch": 0.23, + "learning_rate": 8.995558336890192e-08, + "logits/chosen": -1.9962278604507446, + "logits/rejected": -1.9986090660095215, + "logps/chosen": -5.390965938568115, + "logps/rejected": -152.28271484375, + "loss": 0.4236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07467212527990341, + "rewards/margins": 1.5505602359771729, + "rewards/rejected": -1.4758881330490112, + "step": 3938 + }, + { + "epoch": 0.23, + "learning_rate": 8.994991709976027e-08, + "logits/chosen": -2.044219493865967, + "logits/rejected": -2.0453317165374756, + "logps/chosen": -200.18768310546875, + "logps/rejected": -415.87457275390625, + "loss": 0.22, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0291045904159546, + "rewards/margins": 1.3820769786834717, + "rewards/rejected": -0.3529724180698395, + "step": 3939 + }, + { + "epoch": 0.23, + "learning_rate": 8.994424941140217e-08, + "logits/chosen": -1.917620062828064, + "logits/rejected": -1.9246693849563599, + "logps/chosen": -226.8818359375, + "logps/rejected": -506.716064453125, + "loss": 0.1604, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2707947492599487, + "rewards/margins": 1.709396481513977, + "rewards/rejected": -0.43860170245170593, + "step": 3940 + }, + { + "epoch": 0.23, + "learning_rate": 8.993858030402892e-08, + "logits/chosen": -2.144886016845703, + "logits/rejected": -2.122846841812134, + "logps/chosen": -72.13809967041016, + "logps/rejected": -191.1493682861328, + "loss": 0.7948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.541760265827179, + "rewards/margins": 0.1554824709892273, + "rewards/rejected": -0.6972427368164062, + "step": 3941 + }, + { + "epoch": 0.23, + "learning_rate": 8.993290977784197e-08, + "logits/chosen": -2.040386915206909, + "logits/rejected": -2.036731004714966, + "logps/chosen": -40.595733642578125, + "logps/rejected": -145.31163024902344, + "loss": 0.5981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13315124809741974, + "rewards/margins": 0.33619993925094604, + "rewards/rejected": -0.2030487060546875, + "step": 3942 + }, + { + "epoch": 0.23, + "learning_rate": 8.99272378330427e-08, + "logits/chosen": -2.136425495147705, + "logits/rejected": -2.073786735534668, + "logps/chosen": -210.58572387695312, + "logps/rejected": -482.31964111328125, + "loss": 0.2387, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6085373163223267, + "rewards/margins": 0.9817428588867188, + "rewards/rejected": 0.6267944574356079, + "step": 3943 + }, + { + "epoch": 0.23, + "learning_rate": 8.992156446983264e-08, + "logits/chosen": -2.1587631702423096, + "logits/rejected": -2.1506502628326416, + "logps/chosen": -30.200971603393555, + "logps/rejected": -194.04071044921875, + "loss": 0.3322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2331399917602539, + "rewards/margins": 1.5310983657836914, + "rewards/rejected": -1.2979583740234375, + "step": 3944 + }, + { + "epoch": 0.23, + "learning_rate": 8.991588968841335e-08, + "logits/chosen": -2.0174543857574463, + "logits/rejected": -2.0133168697357178, + "logps/chosen": -223.48020935058594, + "logps/rejected": -393.85455322265625, + "loss": 0.116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2204726934432983, + "rewards/margins": 2.576124668121338, + "rewards/rejected": -1.35565185546875, + "step": 3945 + }, + { + "epoch": 0.23, + "learning_rate": 8.991021348898638e-08, + "logits/chosen": -1.8705278635025024, + "logits/rejected": -1.887420415878296, + "logps/chosen": -147.97003173828125, + "logps/rejected": -278.38275146484375, + "loss": 0.2982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8907455801963806, + "rewards/margins": 0.8001022934913635, + "rewards/rejected": 0.09064330905675888, + "step": 3946 + }, + { + "epoch": 0.23, + "learning_rate": 8.990453587175342e-08, + "logits/chosen": -2.011547088623047, + "logits/rejected": -2.0002377033233643, + "logps/chosen": -27.559656143188477, + "logps/rejected": -312.1697692871094, + "loss": 0.4253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22346821427345276, + "rewards/margins": 1.3438050746917725, + "rewards/rejected": -1.120336890220642, + "step": 3947 + }, + { + "epoch": 0.23, + "learning_rate": 8.989885683691615e-08, + "logits/chosen": -1.8890866041183472, + "logits/rejected": -1.9088224172592163, + "logps/chosen": -367.4887390136719, + "logps/rejected": -308.22991943359375, + "loss": 0.3669, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0844024419784546, + "rewards/margins": 0.5605559945106506, + "rewards/rejected": 0.523846447467804, + "step": 3948 + }, + { + "epoch": 0.23, + "learning_rate": 8.989317638467633e-08, + "logits/chosen": -2.194286346435547, + "logits/rejected": -2.176088333129883, + "logps/chosen": -4.9587812423706055, + "logps/rejected": -92.53990173339844, + "loss": 0.5718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10227866470813751, + "rewards/margins": 0.35939842462539673, + "rewards/rejected": -0.257119745016098, + "step": 3949 + }, + { + "epoch": 0.23, + "learning_rate": 8.988749451523574e-08, + "logits/chosen": -2.109119415283203, + "logits/rejected": -2.0955452919006348, + "logps/chosen": -174.37599182128906, + "logps/rejected": -239.06494140625, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8118850588798523, + "rewards/margins": 0.4671783149242401, + "rewards/rejected": 0.3447067439556122, + "step": 3950 + }, + { + "epoch": 0.23, + "learning_rate": 8.988181122879624e-08, + "logits/chosen": -2.325221300125122, + "logits/rejected": -2.321044683456421, + "logps/chosen": -10.095870971679688, + "logps/rejected": -239.31668090820312, + "loss": 0.3747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2532198131084442, + "rewards/margins": 1.8132952451705933, + "rewards/rejected": -1.5600754022598267, + "step": 3951 + }, + { + "epoch": 0.23, + "learning_rate": 8.987612652555971e-08, + "logits/chosen": -2.1414127349853516, + "logits/rejected": -2.1440014839172363, + "logps/chosen": -15.515664100646973, + "logps/rejected": -73.47891998291016, + "loss": 0.5033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01228113193064928, + "rewards/margins": 0.9308045506477356, + "rewards/rejected": -0.9185234308242798, + "step": 3952 + }, + { + "epoch": 0.23, + "learning_rate": 8.987044040572812e-08, + "logits/chosen": -2.286283254623413, + "logits/rejected": -2.2772998809814453, + "logps/chosen": -57.616153717041016, + "logps/rejected": -402.53436279296875, + "loss": 0.4076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12921638786792755, + "rewards/margins": 3.4278454780578613, + "rewards/rejected": -3.5570619106292725, + "step": 3953 + }, + { + "epoch": 0.23, + "learning_rate": 8.986475286950345e-08, + "logits/chosen": -2.0560407638549805, + "logits/rejected": -2.0602564811706543, + "logps/chosen": -51.8022575378418, + "logps/rejected": -109.73106384277344, + "loss": 0.5129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07895012199878693, + "rewards/margins": 0.6978985071182251, + "rewards/rejected": -0.6189484000205994, + "step": 3954 + }, + { + "epoch": 0.23, + "learning_rate": 8.985906391708777e-08, + "logits/chosen": -2.1966583728790283, + "logits/rejected": -2.1852314472198486, + "logps/chosen": -7.658987522125244, + "logps/rejected": -193.26234436035156, + "loss": 0.4705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02974724769592285, + "rewards/margins": 1.2184308767318726, + "rewards/rejected": -1.2481781244277954, + "step": 3955 + }, + { + "epoch": 0.23, + "learning_rate": 8.985337354868316e-08, + "logits/chosen": -1.9638091325759888, + "logits/rejected": -1.9603536128997803, + "logps/chosen": -248.15090942382812, + "logps/rejected": -363.8904724121094, + "loss": 0.1121, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8240387439727783, + "rewards/margins": 1.9726074934005737, + "rewards/rejected": -0.14856873452663422, + "step": 3956 + }, + { + "epoch": 0.23, + "learning_rate": 8.984768176449178e-08, + "logits/chosen": -2.0004220008850098, + "logits/rejected": -2.016838550567627, + "logps/chosen": -232.461669921875, + "logps/rejected": -286.2878723144531, + "loss": 0.1149, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6291077136993408, + "rewards/margins": 2.045858860015869, + "rewards/rejected": -0.41675111651420593, + "step": 3957 + }, + { + "epoch": 0.23, + "learning_rate": 8.984198856471583e-08, + "logits/chosen": -2.105506181716919, + "logits/rejected": -2.1011478900909424, + "logps/chosen": -34.799339294433594, + "logps/rejected": -120.39784240722656, + "loss": 0.6492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29762575030326843, + "rewards/margins": 0.34863051772117615, + "rewards/rejected": -0.6462562680244446, + "step": 3958 + }, + { + "epoch": 0.23, + "learning_rate": 8.983629394955755e-08, + "logits/chosen": -2.0442705154418945, + "logits/rejected": -2.041968822479248, + "logps/chosen": -0.0019863557536154985, + "logps/rejected": -123.54391479492188, + "loss": 0.5737, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.392112406203523e-05, + "rewards/margins": 0.5476218461990356, + "rewards/rejected": -0.5477157831192017, + "step": 3959 + }, + { + "epoch": 0.23, + "learning_rate": 8.983059791921925e-08, + "logits/chosen": -2.064035415649414, + "logits/rejected": -2.0601589679718018, + "logps/chosen": -5.960397538729012e-05, + "logps/rejected": -65.96400451660156, + "loss": 0.5074, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.536451983649386e-08, + "rewards/margins": 0.9554350972175598, + "rewards/rejected": -0.9554352164268494, + "step": 3960 + }, + { + "epoch": 0.23, + "learning_rate": 8.982490047390328e-08, + "logits/chosen": -2.0253989696502686, + "logits/rejected": -2.0131278038024902, + "logps/chosen": -65.13999938964844, + "logps/rejected": -247.30259704589844, + "loss": 0.4547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.194041445851326, + "rewards/margins": 1.0561646223068237, + "rewards/rejected": -0.8621231317520142, + "step": 3961 + }, + { + "epoch": 0.23, + "learning_rate": 8.981920161381204e-08, + "logits/chosen": -2.1245944499969482, + "logits/rejected": -2.12870454788208, + "logps/chosen": -0.0928061455488205, + "logps/rejected": -223.58731079101562, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0037683770060539246, + "rewards/margins": 3.0717432498931885, + "rewards/rejected": -3.067974805831909, + "step": 3962 + }, + { + "epoch": 0.23, + "learning_rate": 8.9813501339148e-08, + "logits/chosen": -2.094525098800659, + "logits/rejected": -2.0883066654205322, + "logps/chosen": -229.85447692871094, + "logps/rejected": -401.538330078125, + "loss": 0.1783, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4102783203125, + "rewards/margins": 1.390966773033142, + "rewards/rejected": 0.01931152306497097, + "step": 3963 + }, + { + "epoch": 0.23, + "learning_rate": 8.980779965011362e-08, + "logits/chosen": -1.8756808042526245, + "logits/rejected": -1.8770270347595215, + "logps/chosen": -40.0634765625, + "logps/rejected": -62.68058776855469, + "loss": 0.7267, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1381816864013672, + "rewards/margins": -0.009602740406990051, + "rewards/rejected": -0.12857894599437714, + "step": 3964 + }, + { + "epoch": 0.23, + "learning_rate": 8.980209654691148e-08, + "logits/chosen": -2.0824506282806396, + "logits/rejected": -2.062685489654541, + "logps/chosen": -206.65182495117188, + "logps/rejected": -280.04217529296875, + "loss": 0.3729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8086288571357727, + "rewards/margins": 0.6503463983535767, + "rewards/rejected": 0.15828247368335724, + "step": 3965 + }, + { + "epoch": 0.23, + "learning_rate": 8.979639202974418e-08, + "logits/chosen": -2.1152403354644775, + "logits/rejected": -2.109174966812134, + "logps/chosen": -53.51015853881836, + "logps/rejected": -314.2261962890625, + "loss": 0.418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039093781262636185, + "rewards/margins": 1.242902398109436, + "rewards/rejected": -1.2038086652755737, + "step": 3966 + }, + { + "epoch": 0.23, + "learning_rate": 8.979068609881439e-08, + "logits/chosen": -1.8559824228286743, + "logits/rejected": -1.8264720439910889, + "logps/chosen": -257.2699279785156, + "logps/rejected": -443.07879638671875, + "loss": 0.155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.389929175376892, + "rewards/margins": 1.7369292974472046, + "rewards/rejected": -0.3470001220703125, + "step": 3967 + }, + { + "epoch": 0.23, + "learning_rate": 8.978497875432476e-08, + "logits/chosen": -1.9769216775894165, + "logits/rejected": -1.9542618989944458, + "logps/chosen": -105.19137573242188, + "logps/rejected": -295.8101806640625, + "loss": 0.5235, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6000396609306335, + "rewards/margins": 1.5068817138671875, + "rewards/rejected": -2.106921434402466, + "step": 3968 + }, + { + "epoch": 0.23, + "learning_rate": 8.977926999647809e-08, + "logits/chosen": -2.1201584339141846, + "logits/rejected": -2.111459732055664, + "logps/chosen": -3.0875053198542446e-05, + "logps/rejected": -150.6307373046875, + "loss": 0.4833, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0265470368485694e-07, + "rewards/margins": 1.1928831338882446, + "rewards/rejected": -1.1928833723068237, + "step": 3969 + }, + { + "epoch": 0.23, + "learning_rate": 8.977355982547716e-08, + "logits/chosen": -2.1202681064605713, + "logits/rejected": -2.114283561706543, + "logps/chosen": -39.739662170410156, + "logps/rejected": -246.26116943359375, + "loss": 0.6053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.378042608499527, + "rewards/margins": 0.9533524513244629, + "rewards/rejected": -1.3313950300216675, + "step": 3970 + }, + { + "epoch": 0.23, + "learning_rate": 8.976784824152486e-08, + "logits/chosen": -2.1968915462493896, + "logits/rejected": -2.1929495334625244, + "logps/chosen": -175.14405822753906, + "logps/rejected": -390.0744323730469, + "loss": 0.1453, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4453048706054688, + "rewards/margins": 1.841151475906372, + "rewards/rejected": -0.39584657549858093, + "step": 3971 + }, + { + "epoch": 0.23, + "learning_rate": 8.976213524482403e-08, + "logits/chosen": -1.8742692470550537, + "logits/rejected": -1.8695893287658691, + "logps/chosen": -20.802988052368164, + "logps/rejected": -288.88592529296875, + "loss": 0.4598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07455310970544815, + "rewards/margins": 0.9979296326637268, + "rewards/rejected": -0.9233765006065369, + "step": 3972 + }, + { + "epoch": 0.23, + "learning_rate": 8.975642083557769e-08, + "logits/chosen": -1.9143238067626953, + "logits/rejected": -1.8829225301742554, + "logps/chosen": -322.17120361328125, + "logps/rejected": -486.72943115234375, + "loss": 0.7103, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.33831483125686646, + "rewards/margins": -0.2829986810684204, + "rewards/rejected": 0.6213135123252869, + "step": 3973 + }, + { + "epoch": 0.23, + "learning_rate": 8.975070501398878e-08, + "logits/chosen": -2.1292314529418945, + "logits/rejected": -2.1172430515289307, + "logps/chosen": -2.941047430038452, + "logps/rejected": -270.641357421875, + "loss": 0.3922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.170706644654274, + "rewards/margins": 3.283493995666504, + "rewards/rejected": -3.4542007446289062, + "step": 3974 + }, + { + "epoch": 0.23, + "learning_rate": 8.974498778026041e-08, + "logits/chosen": -2.0932958126068115, + "logits/rejected": -2.0979902744293213, + "logps/chosen": -0.024911627173423767, + "logps/rejected": -40.09678268432617, + "loss": 0.6503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005940910778008401, + "rewards/margins": 0.16462846100330353, + "rewards/rejected": -0.16522255539894104, + "step": 3975 + }, + { + "epoch": 0.23, + "learning_rate": 8.973926913459564e-08, + "logits/chosen": -1.9223932027816772, + "logits/rejected": -1.9217146635055542, + "logps/chosen": -298.607421875, + "logps/rejected": -397.66497802734375, + "loss": 0.3864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.726306140422821, + "rewards/margins": 0.680560290813446, + "rewards/rejected": 0.045745849609375, + "step": 3976 + }, + { + "epoch": 0.23, + "learning_rate": 8.973354907719766e-08, + "logits/chosen": -2.0907440185546875, + "logits/rejected": -2.1274302005767822, + "logps/chosen": -195.49374389648438, + "logps/rejected": -272.8067626953125, + "loss": 0.4714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19954834878444672, + "rewards/margins": 0.6517120599746704, + "rewards/rejected": -0.4521636962890625, + "step": 3977 + }, + { + "epoch": 0.23, + "learning_rate": 8.972782760826965e-08, + "logits/chosen": -1.814728856086731, + "logits/rejected": -1.7710093259811401, + "logps/chosen": -307.9123229980469, + "logps/rejected": -313.992919921875, + "loss": 0.2433, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1707671880722046, + "rewards/margins": 1.1790008544921875, + "rewards/rejected": -0.008233643136918545, + "step": 3978 + }, + { + "epoch": 0.23, + "learning_rate": 8.972210472801489e-08, + "logits/chosen": -2.0851359367370605, + "logits/rejected": -2.161816120147705, + "logps/chosen": -183.48904418945312, + "logps/rejected": -365.52935791015625, + "loss": 0.3899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8319671750068665, + "rewards/margins": 0.416342169046402, + "rewards/rejected": 0.4156250059604645, + "step": 3979 + }, + { + "epoch": 0.23, + "learning_rate": 8.971638043663665e-08, + "logits/chosen": -1.8596305847167969, + "logits/rejected": -1.8736131191253662, + "logps/chosen": -249.08493041992188, + "logps/rejected": -470.0887145996094, + "loss": 0.1241, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6340973377227783, + "rewards/margins": 1.8683899641036987, + "rewards/rejected": -0.23429261147975922, + "step": 3980 + }, + { + "epoch": 0.23, + "learning_rate": 8.971065473433831e-08, + "logits/chosen": -1.8983075618743896, + "logits/rejected": -1.8995038270950317, + "logps/chosen": -7.265500068664551, + "logps/rejected": -111.41584777832031, + "loss": 0.6004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047440530033782125, + "rewards/margins": 0.43345123529434204, + "rewards/rejected": -0.4339256286621094, + "step": 3981 + }, + { + "epoch": 0.23, + "learning_rate": 8.970492762132327e-08, + "logits/chosen": -1.8615195751190186, + "logits/rejected": -1.9174610376358032, + "logps/chosen": -223.11685180664062, + "logps/rejected": -352.078857421875, + "loss": 0.3973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0434387922286987, + "rewards/margins": 0.2948181629180908, + "rewards/rejected": 0.7486206293106079, + "step": 3982 + }, + { + "epoch": 0.23, + "learning_rate": 8.969919909779498e-08, + "logits/chosen": -1.986469030380249, + "logits/rejected": -1.9949127435684204, + "logps/chosen": -48.747764587402344, + "logps/rejected": -251.47409057617188, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33338966965675354, + "rewards/margins": 2.6713125705718994, + "rewards/rejected": -2.3379228115081787, + "step": 3983 + }, + { + "epoch": 0.23, + "learning_rate": 8.969346916395695e-08, + "logits/chosen": -2.0906691551208496, + "logits/rejected": -2.0485353469848633, + "logps/chosen": -193.63543701171875, + "logps/rejected": -438.6356506347656, + "loss": 0.2423, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1519012451171875, + "rewards/margins": 0.6582610607147217, + "rewards/rejected": 1.4936401844024658, + "step": 3984 + }, + { + "epoch": 0.23, + "learning_rate": 8.968773782001273e-08, + "logits/chosen": -2.0276472568511963, + "logits/rejected": -2.019827127456665, + "logps/chosen": -11.795260429382324, + "logps/rejected": -85.16096496582031, + "loss": 0.689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08518142998218536, + "rewards/margins": 0.13773269951343536, + "rewards/rejected": -0.22291412949562073, + "step": 3985 + }, + { + "epoch": 0.23, + "learning_rate": 8.968200506616594e-08, + "logits/chosen": -1.9443777799606323, + "logits/rejected": -1.938912034034729, + "logps/chosen": -1.065889596939087, + "logps/rejected": -341.31256103515625, + "loss": 0.3706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010078364983201027, + "rewards/margins": 3.1257035732269287, + "rewards/rejected": -3.13578200340271, + "step": 3986 + }, + { + "epoch": 0.23, + "learning_rate": 8.96762709026202e-08, + "logits/chosen": -1.9924705028533936, + "logits/rejected": -1.9859226942062378, + "logps/chosen": -0.2073890119791031, + "logps/rejected": -364.0379943847656, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0023472921457141638, + "rewards/margins": 6.085419654846191, + "rewards/rejected": -6.083072185516357, + "step": 3987 + }, + { + "epoch": 0.23, + "learning_rate": 8.967053532957927e-08, + "logits/chosen": -2.060701608657837, + "logits/rejected": -2.046211004257202, + "logps/chosen": -247.35020446777344, + "logps/rejected": -339.25665283203125, + "loss": 0.4929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5270339846611023, + "rewards/margins": 0.28057098388671875, + "rewards/rejected": 0.24646301567554474, + "step": 3988 + }, + { + "epoch": 0.23, + "learning_rate": 8.966479834724685e-08, + "logits/chosen": -2.0297675132751465, + "logits/rejected": -2.0159478187561035, + "logps/chosen": -181.58494567871094, + "logps/rejected": -243.61370849609375, + "loss": 0.5321, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6507583856582642, + "rewards/margins": -0.18097072839736938, + "rewards/rejected": 0.8317291140556335, + "step": 3989 + }, + { + "epoch": 0.23, + "learning_rate": 8.965905995582678e-08, + "logits/chosen": -2.150940418243408, + "logits/rejected": -2.1433188915252686, + "logps/chosen": -112.94078063964844, + "logps/rejected": -296.2431335449219, + "loss": 0.249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42394790053367615, + "rewards/margins": 3.2625739574432373, + "rewards/rejected": -2.8386261463165283, + "step": 3990 + }, + { + "epoch": 0.23, + "learning_rate": 8.965332015552291e-08, + "logits/chosen": -2.0437071323394775, + "logits/rejected": -2.034846305847168, + "logps/chosen": -1.6391805410385132, + "logps/rejected": -223.75875854492188, + "loss": 0.4415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021431291475892067, + "rewards/margins": 1.611043930053711, + "rewards/rejected": -1.6324752569198608, + "step": 3991 + }, + { + "epoch": 0.23, + "learning_rate": 8.964757894653915e-08, + "logits/chosen": -2.0777409076690674, + "logits/rejected": -2.07313871383667, + "logps/chosen": -40.30453872680664, + "logps/rejected": -100.22327423095703, + "loss": 0.7156, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07652206718921661, + "rewards/margins": -0.049652099609375, + "rewards/rejected": 0.1261741667985916, + "step": 3992 + }, + { + "epoch": 0.23, + "learning_rate": 8.964183632907945e-08, + "logits/chosen": -1.8480088710784912, + "logits/rejected": -1.7992395162582397, + "logps/chosen": -162.35400390625, + "logps/rejected": -283.6559753417969, + "loss": 0.1847, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3068832159042358, + "rewards/margins": 1.8436203002929688, + "rewards/rejected": -0.5367370843887329, + "step": 3993 + }, + { + "epoch": 0.23, + "learning_rate": 8.963609230334779e-08, + "logits/chosen": -2.070016384124756, + "logits/rejected": -2.0512635707855225, + "logps/chosen": -187.8499755859375, + "logps/rejected": -481.32958984375, + "loss": 0.7061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1295166015625, + "rewards/margins": 3.710257053375244, + "rewards/rejected": -4.839773654937744, + "step": 3994 + }, + { + "epoch": 0.23, + "learning_rate": 8.963034686954826e-08, + "logits/chosen": -2.190894365310669, + "logits/rejected": -2.1948330402374268, + "logps/chosen": -181.95138549804688, + "logps/rejected": -459.09393310546875, + "loss": 0.1048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6481841802597046, + "rewards/margins": 2.307690382003784, + "rewards/rejected": -0.6595062613487244, + "step": 3995 + }, + { + "epoch": 0.23, + "learning_rate": 8.962460002788496e-08, + "logits/chosen": -2.0675175189971924, + "logits/rejected": -2.060615062713623, + "logps/chosen": -0.004522367846220732, + "logps/rejected": -182.34930419921875, + "loss": 0.4211, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.548945334041491e-05, + "rewards/margins": 1.9595551490783691, + "rewards/rejected": -1.9594696760177612, + "step": 3996 + }, + { + "epoch": 0.23, + "learning_rate": 8.961885177856205e-08, + "logits/chosen": -2.1204190254211426, + "logits/rejected": -2.12857985496521, + "logps/chosen": -169.46792602539062, + "logps/rejected": -241.80169677734375, + "loss": 0.2443, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1585830450057983, + "rewards/margins": 0.9768462777137756, + "rewards/rejected": 0.1817367523908615, + "step": 3997 + }, + { + "epoch": 0.23, + "learning_rate": 8.961310212178372e-08, + "logits/chosen": -1.9801061153411865, + "logits/rejected": -2.0069501399993896, + "logps/chosen": -151.47178649902344, + "logps/rejected": -347.98626708984375, + "loss": 0.5897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.690075695514679, + "rewards/margins": 2.814511299133301, + "rewards/rejected": -3.504586935043335, + "step": 3998 + }, + { + "epoch": 0.23, + "learning_rate": 8.960735105775425e-08, + "logits/chosen": -2.0028319358825684, + "logits/rejected": -2.014679193496704, + "logps/chosen": -235.02725219726562, + "logps/rejected": -384.9045104980469, + "loss": 0.1766, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5729522705078125, + "rewards/margins": 1.4265778064727783, + "rewards/rejected": 0.14637450873851776, + "step": 3999 + }, + { + "epoch": 0.23, + "learning_rate": 8.960159858667791e-08, + "logits/chosen": -2.210862636566162, + "logits/rejected": -2.228980302810669, + "logps/chosen": -237.37863159179688, + "logps/rejected": -201.34547424316406, + "loss": 0.2913, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3986634016036987, + "rewards/margins": 0.6141709089279175, + "rewards/rejected": 0.7844924926757812, + "step": 4000 + }, + { + "epoch": 0.23, + "learning_rate": 8.959584470875909e-08, + "logits/chosen": -2.160327434539795, + "logits/rejected": -2.1558525562286377, + "logps/chosen": -31.041929244995117, + "logps/rejected": -120.93669891357422, + "loss": 0.6979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37908193469047546, + "rewards/margins": 0.16763970255851746, + "rewards/rejected": -0.5467216372489929, + "step": 4001 + }, + { + "epoch": 0.23, + "learning_rate": 8.959008942420216e-08, + "logits/chosen": -2.049103021621704, + "logits/rejected": -2.0462934970855713, + "logps/chosen": -13.760799407958984, + "logps/rejected": -203.99606323242188, + "loss": 0.4561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.044770050793886185, + "rewards/margins": 1.2754188776016235, + "rewards/rejected": -1.2306488752365112, + "step": 4002 + }, + { + "epoch": 0.23, + "learning_rate": 8.958433273321161e-08, + "logits/chosen": -2.045973062515259, + "logits/rejected": -2.025844097137451, + "logps/chosen": -15.048783302307129, + "logps/rejected": -204.21188354492188, + "loss": 0.3945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09696836769580841, + "rewards/margins": 2.269996404647827, + "rewards/rejected": -2.366964817047119, + "step": 4003 + }, + { + "epoch": 0.23, + "learning_rate": 8.957857463599195e-08, + "logits/chosen": -1.9278435707092285, + "logits/rejected": -1.90078604221344, + "logps/chosen": -267.4656982421875, + "logps/rejected": -321.01629638671875, + "loss": 0.413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9544464349746704, + "rewards/margins": 0.17543643712997437, + "rewards/rejected": 0.779009997844696, + "step": 4004 + }, + { + "epoch": 0.23, + "learning_rate": 8.957281513274769e-08, + "logits/chosen": -2.2068254947662354, + "logits/rejected": -2.1930289268493652, + "logps/chosen": -16.03424644470215, + "logps/rejected": -230.30810546875, + "loss": 0.41, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026346206665039062, + "rewards/margins": 2.097019672393799, + "rewards/rejected": -2.123365879058838, + "step": 4005 + }, + { + "epoch": 0.23, + "learning_rate": 8.956705422368349e-08, + "logits/chosen": -2.112401247024536, + "logits/rejected": -2.1154632568359375, + "logps/chosen": -16.809534072875977, + "logps/rejected": -183.85760498046875, + "loss": 0.4326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10269918292760849, + "rewards/margins": 2.126849412918091, + "rewards/rejected": -2.229548692703247, + "step": 4006 + }, + { + "epoch": 0.23, + "learning_rate": 8.956129190900397e-08, + "logits/chosen": -2.038593053817749, + "logits/rejected": -2.030369281768799, + "logps/chosen": -51.72951889038086, + "logps/rejected": -224.9512939453125, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43100854754447937, + "rewards/margins": 1.5662106275558472, + "rewards/rejected": -1.1352020502090454, + "step": 4007 + }, + { + "epoch": 0.23, + "learning_rate": 8.955552818891383e-08, + "logits/chosen": -2.003739356994629, + "logits/rejected": -1.9828076362609863, + "logps/chosen": -209.3475341796875, + "logps/rejected": -335.5985412597656, + "loss": 0.1683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7380478382110596, + "rewards/margins": 1.2255264520645142, + "rewards/rejected": 0.5125213861465454, + "step": 4008 + }, + { + "epoch": 0.23, + "learning_rate": 8.954976306361786e-08, + "logits/chosen": -2.074296236038208, + "logits/rejected": -2.056022882461548, + "logps/chosen": -94.10762023925781, + "logps/rejected": -221.24484252929688, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35661086440086365, + "rewards/margins": 0.4878791570663452, + "rewards/rejected": -0.13126830756664276, + "step": 4009 + }, + { + "epoch": 0.23, + "learning_rate": 8.954399653332086e-08, + "logits/chosen": -1.9726133346557617, + "logits/rejected": -1.9906798601150513, + "logps/chosen": -176.57223510742188, + "logps/rejected": -260.53143310546875, + "loss": 0.2657, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1436386108398438, + "rewards/margins": 0.8911056518554688, + "rewards/rejected": 0.252532958984375, + "step": 4010 + }, + { + "epoch": 0.23, + "learning_rate": 8.953822859822764e-08, + "logits/chosen": -2.102365493774414, + "logits/rejected": -2.104598045349121, + "logps/chosen": -0.0008277191664092243, + "logps/rejected": -193.18951416015625, + "loss": 0.4386, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.485609290190041e-05, + "rewards/margins": 1.6462724208831787, + "rewards/rejected": -1.646327257156372, + "step": 4011 + }, + { + "epoch": 0.23, + "learning_rate": 8.953245925854316e-08, + "logits/chosen": -2.0880916118621826, + "logits/rejected": -2.054025650024414, + "logps/chosen": -335.1008605957031, + "logps/rejected": -491.1622009277344, + "loss": 0.2842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.962780773639679, + "rewards/margins": 1.3987823724746704, + "rewards/rejected": -0.43600159883499146, + "step": 4012 + }, + { + "epoch": 0.23, + "learning_rate": 8.952668851447234e-08, + "logits/chosen": -1.9985008239746094, + "logits/rejected": -1.9815452098846436, + "logps/chosen": -58.87910079956055, + "logps/rejected": -247.04742431640625, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31279221177101135, + "rewards/margins": 2.59926700592041, + "rewards/rejected": -2.2864747047424316, + "step": 4013 + }, + { + "epoch": 0.23, + "learning_rate": 8.952091636622021e-08, + "logits/chosen": -2.0465035438537598, + "logits/rejected": -2.041325092315674, + "logps/chosen": -50.15616226196289, + "logps/rejected": -185.95521545410156, + "loss": 0.4183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18605689704418182, + "rewards/margins": 1.4503238201141357, + "rewards/rejected": -1.2642669677734375, + "step": 4014 + }, + { + "epoch": 0.23, + "learning_rate": 8.951514281399182e-08, + "logits/chosen": -2.032467842102051, + "logits/rejected": -2.047562837600708, + "logps/chosen": -198.90408325195312, + "logps/rejected": -442.34442138671875, + "loss": 0.0643, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.020782470703125, + "rewards/margins": 3.158194065093994, + "rewards/rejected": -1.1374114751815796, + "step": 4015 + }, + { + "epoch": 0.23, + "learning_rate": 8.950936785799225e-08, + "logits/chosen": -1.9472500085830688, + "logits/rejected": -1.9438748359680176, + "logps/chosen": -9.054487228393555, + "logps/rejected": -173.29530334472656, + "loss": 0.4988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27510973811149597, + "rewards/margins": 1.278700351715088, + "rewards/rejected": -1.5538101196289062, + "step": 4016 + }, + { + "epoch": 0.23, + "learning_rate": 8.950359149842668e-08, + "logits/chosen": -1.9330214262008667, + "logits/rejected": -1.9321300983428955, + "logps/chosen": -77.61249542236328, + "logps/rejected": -429.4147033691406, + "loss": 0.4133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2163383513689041, + "rewards/margins": 3.1705360412597656, + "rewards/rejected": -3.3868744373321533, + "step": 4017 + }, + { + "epoch": 0.23, + "learning_rate": 8.949781373550029e-08, + "logits/chosen": -2.1292998790740967, + "logits/rejected": -2.128774881362915, + "logps/chosen": -0.0014293510466814041, + "logps/rejected": -121.96487426757812, + "loss": 0.5313, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1888025003136136e-05, + "rewards/margins": 0.7817533016204834, + "rewards/rejected": -0.7817314267158508, + "step": 4018 + }, + { + "epoch": 0.23, + "learning_rate": 8.949203456941838e-08, + "logits/chosen": -2.1409692764282227, + "logits/rejected": -2.114818572998047, + "logps/chosen": -139.18310546875, + "logps/rejected": -348.6098327636719, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6154510378837585, + "rewards/margins": 1.0734832286834717, + "rewards/rejected": -0.4580322206020355, + "step": 4019 + }, + { + "epoch": 0.23, + "learning_rate": 8.948625400038621e-08, + "logits/chosen": -2.2150769233703613, + "logits/rejected": -2.216198444366455, + "logps/chosen": -23.371746063232422, + "logps/rejected": -43.65290832519531, + "loss": 0.6615, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016891861334443092, + "rewards/margins": 0.09146156162023544, + "rewards/rejected": -0.0745697021484375, + "step": 4020 + }, + { + "epoch": 0.23, + "learning_rate": 8.948047202860916e-08, + "logits/chosen": -2.151569366455078, + "logits/rejected": -2.1458487510681152, + "logps/chosen": -6.233321189880371, + "logps/rejected": -88.02085876464844, + "loss": 0.4897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03493843227624893, + "rewards/margins": 1.0333569049835205, + "rewards/rejected": -1.0682953596115112, + "step": 4021 + }, + { + "epoch": 0.23, + "learning_rate": 8.947468865429262e-08, + "logits/chosen": -2.0836527347564697, + "logits/rejected": -2.0939347743988037, + "logps/chosen": -220.0587158203125, + "logps/rejected": -355.81463623046875, + "loss": 0.2222, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4665848016738892, + "rewards/margins": 1.0172653198242188, + "rewards/rejected": 0.449319452047348, + "step": 4022 + }, + { + "epoch": 0.23, + "learning_rate": 8.946890387764203e-08, + "logits/chosen": -2.0722646713256836, + "logits/rejected": -2.0367374420166016, + "logps/chosen": -227.9844970703125, + "logps/rejected": -305.29248046875, + "loss": 0.2741, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4932937622070312, + "rewards/margins": 0.6388381719589233, + "rewards/rejected": 0.8544555902481079, + "step": 4023 + }, + { + "epoch": 0.23, + "learning_rate": 8.946311769886292e-08, + "logits/chosen": -2.2093374729156494, + "logits/rejected": -2.2003531455993652, + "logps/chosen": -34.75117492675781, + "logps/rejected": -178.01222229003906, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23122063279151917, + "rewards/margins": 2.6330015659332275, + "rewards/rejected": -2.401780843734741, + "step": 4024 + }, + { + "epoch": 0.23, + "learning_rate": 8.945733011816086e-08, + "logits/chosen": -1.9611701965332031, + "logits/rejected": -1.9764801263809204, + "logps/chosen": -228.58216857910156, + "logps/rejected": -334.8411560058594, + "loss": 0.4027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6393997073173523, + "rewards/margins": 0.6969131231307983, + "rewards/rejected": -0.05751342698931694, + "step": 4025 + }, + { + "epoch": 0.23, + "learning_rate": 8.94515411357414e-08, + "logits/chosen": -2.0642309188842773, + "logits/rejected": -2.0113813877105713, + "logps/chosen": -308.0826110839844, + "logps/rejected": -645.924560546875, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5345094203948975, + "rewards/margins": 3.547107219696045, + "rewards/rejected": -1.012597680091858, + "step": 4026 + }, + { + "epoch": 0.23, + "learning_rate": 8.944575075181022e-08, + "logits/chosen": -2.0476291179656982, + "logits/rejected": -2.054734706878662, + "logps/chosen": -204.1866455078125, + "logps/rejected": -374.63055419921875, + "loss": 0.217, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5243713855743408, + "rewards/margins": 0.9842835068702698, + "rewards/rejected": 0.540087878704071, + "step": 4027 + }, + { + "epoch": 0.23, + "learning_rate": 8.943995896657303e-08, + "logits/chosen": -1.9923189878463745, + "logits/rejected": -1.9512239694595337, + "logps/chosen": -319.89520263671875, + "logps/rejected": -448.5430908203125, + "loss": 0.0687, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2383058071136475, + "rewards/margins": 2.604767084121704, + "rewards/rejected": -0.3664611876010895, + "step": 4028 + }, + { + "epoch": 0.23, + "learning_rate": 8.943416578023557e-08, + "logits/chosen": -1.7916514873504639, + "logits/rejected": -1.795529842376709, + "logps/chosen": -76.00103759765625, + "logps/rejected": -261.64886474609375, + "loss": 0.3765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.268960565328598, + "rewards/margins": 1.5903900861740112, + "rewards/rejected": -1.3214294910430908, + "step": 4029 + }, + { + "epoch": 0.23, + "learning_rate": 8.942837119300366e-08, + "logits/chosen": -2.0350701808929443, + "logits/rejected": -2.0376574993133545, + "logps/chosen": -12.411324501037598, + "logps/rejected": -102.01152038574219, + "loss": 0.5323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08729629963636398, + "rewards/margins": 0.6641700863838196, + "rewards/rejected": -0.576873779296875, + "step": 4030 + }, + { + "epoch": 0.23, + "learning_rate": 8.942257520508314e-08, + "logits/chosen": -2.0793936252593994, + "logits/rejected": -2.0770583152770996, + "logps/chosen": -126.59532165527344, + "logps/rejected": -258.66845703125, + "loss": 0.4079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08019409328699112, + "rewards/margins": 1.3907990455627441, + "rewards/rejected": -1.3106049299240112, + "step": 4031 + }, + { + "epoch": 0.23, + "learning_rate": 8.94167778166799e-08, + "logits/chosen": -2.1697838306427, + "logits/rejected": -2.1298253536224365, + "logps/chosen": -325.8828125, + "logps/rejected": -411.47607421875, + "loss": 0.0798, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.017138719558716, + "rewards/margins": 3.0390563011169434, + "rewards/rejected": -1.021917700767517, + "step": 4032 + }, + { + "epoch": 0.23, + "learning_rate": 8.941097902799991e-08, + "logits/chosen": -1.9676692485809326, + "logits/rejected": -1.947672963142395, + "logps/chosen": -187.06027221679688, + "logps/rejected": -289.49591064453125, + "loss": 0.3727, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9075866937637329, + "rewards/margins": 0.39068603515625, + "rewards/rejected": 0.5169006586074829, + "step": 4033 + }, + { + "epoch": 0.23, + "learning_rate": 8.940517883924916e-08, + "logits/chosen": -2.1536970138549805, + "logits/rejected": -2.1446115970611572, + "logps/chosen": -62.36060333251953, + "logps/rejected": -228.8769989013672, + "loss": 0.3864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38106536865234375, + "rewards/margins": 1.3021087646484375, + "rewards/rejected": -0.9210433959960938, + "step": 4034 + }, + { + "epoch": 0.23, + "learning_rate": 8.939937725063372e-08, + "logits/chosen": -1.9754289388656616, + "logits/rejected": -1.9666097164154053, + "logps/chosen": -152.898681640625, + "logps/rejected": -262.5728759765625, + "loss": 0.4561, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.062493920326233, + "rewards/margins": 0.04708254337310791, + "rewards/rejected": 1.015411376953125, + "step": 4035 + }, + { + "epoch": 0.23, + "learning_rate": 8.939357426235967e-08, + "logits/chosen": -2.1390228271484375, + "logits/rejected": -2.1376640796661377, + "logps/chosen": -6.540865898132324, + "logps/rejected": -323.6346740722656, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14689092338085175, + "rewards/margins": 2.550607919692993, + "rewards/rejected": -2.403717041015625, + "step": 4036 + }, + { + "epoch": 0.23, + "learning_rate": 8.938776987463317e-08, + "logits/chosen": -1.8621606826782227, + "logits/rejected": -1.871625542640686, + "logps/chosen": -243.0994873046875, + "logps/rejected": -288.05126953125, + "loss": 0.198, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7246429920196533, + "rewards/margins": 1.0602295398712158, + "rewards/rejected": 0.6644134521484375, + "step": 4037 + }, + { + "epoch": 0.23, + "learning_rate": 8.938196408766041e-08, + "logits/chosen": -1.9292781352996826, + "logits/rejected": -1.9471489191055298, + "logps/chosen": -256.994140625, + "logps/rejected": -352.6945495605469, + "loss": 0.2052, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3717286586761475, + "rewards/margins": 0.86252760887146, + "rewards/rejected": 1.5092010498046875, + "step": 4038 + }, + { + "epoch": 0.24, + "learning_rate": 8.937615690164765e-08, + "logits/chosen": -1.920720100402832, + "logits/rejected": -1.9215786457061768, + "logps/chosen": -4.291584014892578, + "logps/rejected": -147.9872283935547, + "loss": 0.5445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04576706886291504, + "rewards/margins": 0.7777467370033264, + "rewards/rejected": -0.8235138058662415, + "step": 4039 + }, + { + "epoch": 0.24, + "learning_rate": 8.937034831680119e-08, + "logits/chosen": -2.0291800498962402, + "logits/rejected": -2.0423290729522705, + "logps/chosen": -205.16616821289062, + "logps/rejected": -281.1581115722656, + "loss": 0.2757, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9773941040039062, + "rewards/margins": 0.5132675170898438, + "rewards/rejected": 1.4641265869140625, + "step": 4040 + }, + { + "epoch": 0.24, + "learning_rate": 8.936453833332738e-08, + "logits/chosen": -1.9711885452270508, + "logits/rejected": -1.9470268487930298, + "logps/chosen": -222.52645874023438, + "logps/rejected": -431.8173828125, + "loss": 0.2415, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3775055408477783, + "rewards/margins": 0.921600341796875, + "rewards/rejected": 0.45590516924858093, + "step": 4041 + }, + { + "epoch": 0.24, + "learning_rate": 8.93587269514326e-08, + "logits/chosen": -2.059150218963623, + "logits/rejected": -2.054924249649048, + "logps/chosen": -378.92547607421875, + "logps/rejected": -500.55810546875, + "loss": 0.1664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2881287336349487, + "rewards/margins": 1.6530762910842896, + "rewards/rejected": -0.36494752764701843, + "step": 4042 + }, + { + "epoch": 0.24, + "learning_rate": 8.935291417132333e-08, + "logits/chosen": -1.9391542673110962, + "logits/rejected": -1.9521617889404297, + "logps/chosen": -196.29196166992188, + "logps/rejected": -374.4561767578125, + "loss": 0.2258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3356720209121704, + "rewards/margins": 1.1620819568634033, + "rewards/rejected": 0.17359009385108948, + "step": 4043 + }, + { + "epoch": 0.24, + "learning_rate": 8.934709999320605e-08, + "logits/chosen": -1.9294646978378296, + "logits/rejected": -1.9104516506195068, + "logps/chosen": -267.9031982421875, + "logps/rejected": -403.7046813964844, + "loss": 0.1732, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9425842761993408, + "rewards/margins": 1.203271508216858, + "rewards/rejected": 0.7393127679824829, + "step": 4044 + }, + { + "epoch": 0.24, + "learning_rate": 8.93412844172873e-08, + "logits/chosen": -1.9465150833129883, + "logits/rejected": -1.9426672458648682, + "logps/chosen": -13.290779113769531, + "logps/rejected": -226.07154846191406, + "loss": 0.4063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17943410575389862, + "rewards/margins": 1.5396071672439575, + "rewards/rejected": -1.3601731061935425, + "step": 4045 + }, + { + "epoch": 0.24, + "learning_rate": 8.933546744377369e-08, + "logits/chosen": -2.104914903640747, + "logits/rejected": -2.1035351753234863, + "logps/chosen": -15.754287719726562, + "logps/rejected": -120.11007690429688, + "loss": 0.6796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48877963423728943, + "rewards/margins": 0.5320135354995728, + "rewards/rejected": -1.0207931995391846, + "step": 4046 + }, + { + "epoch": 0.24, + "learning_rate": 8.932964907287188e-08, + "logits/chosen": -2.0703399181365967, + "logits/rejected": -2.043706178665161, + "logps/chosen": -121.17345428466797, + "logps/rejected": -373.2054443359375, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.417938232421875, + "rewards/margins": 2.4045653343200684, + "rewards/rejected": -1.986627221107483, + "step": 4047 + }, + { + "epoch": 0.24, + "learning_rate": 8.932382930478854e-08, + "logits/chosen": -1.9484621286392212, + "logits/rejected": -1.95095694065094, + "logps/chosen": -126.68121337890625, + "logps/rejected": -502.8865661621094, + "loss": 0.3836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3562377989292145, + "rewards/margins": 5.416540622711182, + "rewards/rejected": -5.772778511047363, + "step": 4048 + }, + { + "epoch": 0.24, + "learning_rate": 8.931800813973045e-08, + "logits/chosen": -2.139530897140503, + "logits/rejected": -2.1339824199676514, + "logps/chosen": -94.08406066894531, + "logps/rejected": -329.1752014160156, + "loss": 0.2743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6227577328681946, + "rewards/margins": 1.6197152137756348, + "rewards/rejected": -0.9969574213027954, + "step": 4049 + }, + { + "epoch": 0.24, + "learning_rate": 8.931218557790437e-08, + "logits/chosen": -1.8935259580612183, + "logits/rejected": -1.896774172782898, + "logps/chosen": -0.000636781333014369, + "logps/rejected": -163.3016357421875, + "loss": 0.4259, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.502391675487161e-06, + "rewards/margins": 1.760380506515503, + "rewards/rejected": -1.7603759765625, + "step": 4050 + }, + { + "epoch": 0.24, + "learning_rate": 8.930636161951716e-08, + "logits/chosen": -2.0334811210632324, + "logits/rejected": -2.009805679321289, + "logps/chosen": -59.066734313964844, + "logps/rejected": -269.74932861328125, + "loss": 0.5706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26480409502983093, + "rewards/margins": 1.154687523841858, + "rewards/rejected": -1.4194916486740112, + "step": 4051 + }, + { + "epoch": 0.24, + "learning_rate": 8.930053626477572e-08, + "logits/chosen": -2.050395965576172, + "logits/rejected": -2.06789231300354, + "logps/chosen": -200.53016662597656, + "logps/rejected": -265.4050598144531, + "loss": 0.2056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7024582624435425, + "rewards/margins": 1.0850541591644287, + "rewards/rejected": 0.6174041628837585, + "step": 4052 + }, + { + "epoch": 0.24, + "learning_rate": 8.929470951388698e-08, + "logits/chosen": -1.8021528720855713, + "logits/rejected": -1.7856202125549316, + "logps/chosen": -60.84263610839844, + "logps/rejected": -284.4924011230469, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4746665954589844, + "rewards/margins": 2.8277275562286377, + "rewards/rejected": -2.3530609607696533, + "step": 4053 + }, + { + "epoch": 0.24, + "learning_rate": 8.928888136705796e-08, + "logits/chosen": -1.9318029880523682, + "logits/rejected": -1.8970489501953125, + "logps/chosen": -210.06689453125, + "logps/rejected": -362.3267822265625, + "loss": 0.372, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0880218744277954, + "rewards/margins": 0.32187503576278687, + "rewards/rejected": 0.7661468386650085, + "step": 4054 + }, + { + "epoch": 0.24, + "learning_rate": 8.92830518244957e-08, + "logits/chosen": -2.071582078933716, + "logits/rejected": -2.076185464859009, + "logps/chosen": -3.484076499938965, + "logps/rejected": -83.76349639892578, + "loss": 0.5446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03669116646051407, + "rewards/margins": 0.5799185633659363, + "rewards/rejected": -0.5432273745536804, + "step": 4055 + }, + { + "epoch": 0.24, + "learning_rate": 8.927722088640726e-08, + "logits/chosen": -2.0152344703674316, + "logits/rejected": -1.9989302158355713, + "logps/chosen": -127.64344787597656, + "logps/rejected": -329.9542236328125, + "loss": 0.4466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08631744235754013, + "rewards/margins": 0.7924270629882812, + "rewards/rejected": -0.7061096429824829, + "step": 4056 + }, + { + "epoch": 0.24, + "learning_rate": 8.927138855299983e-08, + "logits/chosen": -1.995887279510498, + "logits/rejected": -1.9954217672348022, + "logps/chosen": -5.961761951446533, + "logps/rejected": -14.966756820678711, + "loss": 0.703, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01588764227926731, + "rewards/margins": -0.0025214198976755142, + "rewards/rejected": -0.013366222381591797, + "step": 4057 + }, + { + "epoch": 0.24, + "learning_rate": 8.926555482448057e-08, + "logits/chosen": -2.0889551639556885, + "logits/rejected": -2.0780580043792725, + "logps/chosen": -89.80521392822266, + "logps/rejected": -323.7073059082031, + "loss": 0.4535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029418183490633965, + "rewards/margins": 1.1676018238067627, + "rewards/rejected": -1.13818359375, + "step": 4058 + }, + { + "epoch": 0.24, + "learning_rate": 8.925971970105675e-08, + "logits/chosen": -2.104243278503418, + "logits/rejected": -2.082197904586792, + "logps/chosen": -17.526199340820312, + "logps/rejected": -454.68682861328125, + "loss": 0.2797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26758918166160583, + "rewards/margins": 4.344328880310059, + "rewards/rejected": -4.07673978805542, + "step": 4059 + }, + { + "epoch": 0.24, + "learning_rate": 8.925388318293562e-08, + "logits/chosen": -1.8969073295593262, + "logits/rejected": -1.8966683149337769, + "logps/chosen": -0.005302727688103914, + "logps/rejected": -52.06948471069336, + "loss": 0.5309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016235027578659356, + "rewards/margins": 0.794169008731842, + "rewards/rejected": -0.7943313717842102, + "step": 4060 + }, + { + "epoch": 0.24, + "learning_rate": 8.924804527032457e-08, + "logits/chosen": -2.101686477661133, + "logits/rejected": -2.0944504737854004, + "logps/chosen": -104.371826171875, + "logps/rejected": -285.3134460449219, + "loss": 0.6391, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.214152529835701, + "rewards/margins": 1.0134903192520142, + "rewards/rejected": -1.2276428937911987, + "step": 4061 + }, + { + "epoch": 0.24, + "learning_rate": 8.924220596343096e-08, + "logits/chosen": -2.1867189407348633, + "logits/rejected": -2.1922104358673096, + "logps/chosen": -83.43373107910156, + "logps/rejected": -191.13201904296875, + "loss": 0.4022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18673478066921234, + "rewards/margins": 1.0409828424453735, + "rewards/rejected": -0.854248046875, + "step": 4062 + }, + { + "epoch": 0.24, + "learning_rate": 8.923636526246224e-08, + "logits/chosen": -2.1715641021728516, + "logits/rejected": -2.0964972972869873, + "logps/chosen": -202.223388671875, + "logps/rejected": -331.66046142578125, + "loss": 0.4207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.769946277141571, + "rewards/margins": 0.5308929085731506, + "rewards/rejected": 0.23905335366725922, + "step": 4063 + }, + { + "epoch": 0.24, + "learning_rate": 8.92305231676259e-08, + "logits/chosen": -2.0170538425445557, + "logits/rejected": -2.01292085647583, + "logps/chosen": -51.279998779296875, + "logps/rejected": -124.09516143798828, + "loss": 0.7108, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.16478538513183594, + "rewards/margins": -0.009965896606445312, + "rewards/rejected": -0.15481948852539062, + "step": 4064 + }, + { + "epoch": 0.24, + "learning_rate": 8.922467967912948e-08, + "logits/chosen": -1.9749003648757935, + "logits/rejected": -1.9835559129714966, + "logps/chosen": -240.77597045898438, + "logps/rejected": -259.9002990722656, + "loss": 0.2507, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2374267578125, + "rewards/margins": 1.2097686529159546, + "rewards/rejected": 0.02765808068215847, + "step": 4065 + }, + { + "epoch": 0.24, + "learning_rate": 8.921883479718058e-08, + "logits/chosen": -1.95004141330719, + "logits/rejected": -1.9433932304382324, + "logps/chosen": -14.976056098937988, + "logps/rejected": -132.28109741210938, + "loss": 0.5283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003231048583984375, + "rewards/margins": 0.8162574768066406, + "rewards/rejected": -0.819488525390625, + "step": 4066 + }, + { + "epoch": 0.24, + "learning_rate": 8.921298852198682e-08, + "logits/chosen": -2.0607681274414062, + "logits/rejected": -2.0490925312042236, + "logps/chosen": -0.0011550365015864372, + "logps/rejected": -76.43282318115234, + "loss": 0.6076, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7022284686827334e-06, + "rewards/margins": 0.36710116267204285, + "rewards/rejected": -0.3670974671840668, + "step": 4067 + }, + { + "epoch": 0.24, + "learning_rate": 8.920714085375589e-08, + "logits/chosen": -2.1746997833251953, + "logits/rejected": -2.1762616634368896, + "logps/chosen": -31.523174285888672, + "logps/rejected": -159.43402099609375, + "loss": 0.5541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09056740254163742, + "rewards/margins": 0.4436054527759552, + "rewards/rejected": -0.3530380427837372, + "step": 4068 + }, + { + "epoch": 0.24, + "learning_rate": 8.920129179269553e-08, + "logits/chosen": -2.288257598876953, + "logits/rejected": -2.2995314598083496, + "logps/chosen": -31.39280128479004, + "logps/rejected": -89.58952331542969, + "loss": 0.6161, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.42898160219192505, + "rewards/margins": -0.02417925000190735, + "rewards/rejected": 0.4531608521938324, + "step": 4069 + }, + { + "epoch": 0.24, + "learning_rate": 8.919544133901354e-08, + "logits/chosen": -2.045199155807495, + "logits/rejected": -2.0415456295013428, + "logps/chosen": -0.002274902770295739, + "logps/rejected": -161.96401977539062, + "loss": 0.4802, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1590309867415272e-07, + "rewards/margins": 1.2047303915023804, + "rewards/rejected": -1.2047302722930908, + "step": 4070 + }, + { + "epoch": 0.24, + "learning_rate": 8.918958949291775e-08, + "logits/chosen": -1.9203230142593384, + "logits/rejected": -1.9332847595214844, + "logps/chosen": -283.32586669921875, + "logps/rejected": -311.1142272949219, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2945802211761475, + "rewards/margins": 2.564969062805176, + "rewards/rejected": -0.27038881182670593, + "step": 4071 + }, + { + "epoch": 0.24, + "learning_rate": 8.918373625461603e-08, + "logits/chosen": -1.9283826351165771, + "logits/rejected": -1.8670815229415894, + "logps/chosen": -213.16937255859375, + "logps/rejected": -599.0069580078125, + "loss": 0.1402, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4015320539474487, + "rewards/margins": 1.8827149868011475, + "rewards/rejected": -0.48118287324905396, + "step": 4072 + }, + { + "epoch": 0.24, + "learning_rate": 8.917788162431635e-08, + "logits/chosen": -1.8969968557357788, + "logits/rejected": -1.8876057863235474, + "logps/chosen": -172.19688415527344, + "logps/rejected": -296.0353088378906, + "loss": 0.4257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6580215692520142, + "rewards/margins": 0.2743270993232727, + "rewards/rejected": 0.38369446992874146, + "step": 4073 + }, + { + "epoch": 0.24, + "learning_rate": 8.917202560222667e-08, + "logits/chosen": -1.8074464797973633, + "logits/rejected": -1.774322748184204, + "logps/chosen": -254.743408203125, + "logps/rejected": -366.5771484375, + "loss": 0.2148, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.130346655845642, + "rewards/margins": 1.5416290760040283, + "rewards/rejected": -0.41128236055374146, + "step": 4074 + }, + { + "epoch": 0.24, + "learning_rate": 8.916616818855502e-08, + "logits/chosen": -2.123598575592041, + "logits/rejected": -2.1284124851226807, + "logps/chosen": -258.3324279785156, + "logps/rejected": -483.78704833984375, + "loss": 0.1171, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.646722435951233, + "rewards/margins": 1.8218170404434204, + "rewards/rejected": -0.1750946044921875, + "step": 4075 + }, + { + "epoch": 0.24, + "learning_rate": 8.916030938350951e-08, + "logits/chosen": -2.023198366165161, + "logits/rejected": -2.0255985260009766, + "logps/chosen": -1.4786338806152344, + "logps/rejected": -49.301910400390625, + "loss": 0.595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07704650610685349, + "rewards/margins": 0.41422584652900696, + "rewards/rejected": -0.49127236008644104, + "step": 4076 + }, + { + "epoch": 0.24, + "learning_rate": 8.915444918729825e-08, + "logits/chosen": -2.1029937267303467, + "logits/rejected": -2.096468448638916, + "logps/chosen": -248.7884521484375, + "logps/rejected": -360.25665283203125, + "loss": 0.1885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.902459740638733, + "rewards/margins": 1.038360595703125, + "rewards/rejected": 0.8640991449356079, + "step": 4077 + }, + { + "epoch": 0.24, + "learning_rate": 8.914858760012942e-08, + "logits/chosen": -1.9311541318893433, + "logits/rejected": -1.9294562339782715, + "logps/chosen": -30.103769302368164, + "logps/rejected": -162.14840698242188, + "loss": 0.5321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19286365807056427, + "rewards/margins": 0.4829027056694031, + "rewards/rejected": -0.2900390625, + "step": 4078 + }, + { + "epoch": 0.24, + "learning_rate": 8.914272462221126e-08, + "logits/chosen": -1.9966145753860474, + "logits/rejected": -1.9922113418579102, + "logps/chosen": -49.83526611328125, + "logps/rejected": -223.4486541748047, + "loss": 0.3458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.142375186085701, + "rewards/margins": 2.239523410797119, + "rewards/rejected": -2.0971481800079346, + "step": 4079 + }, + { + "epoch": 0.24, + "learning_rate": 8.913686025375206e-08, + "logits/chosen": -1.9897223711013794, + "logits/rejected": -1.9872628450393677, + "logps/chosen": -2.962944269180298, + "logps/rejected": -40.47101974487305, + "loss": 0.7067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07299225777387619, + "rewards/margins": 0.009200356900691986, + "rewards/rejected": -0.08219261467456818, + "step": 4080 + }, + { + "epoch": 0.24, + "learning_rate": 8.913099449496015e-08, + "logits/chosen": -1.8854074478149414, + "logits/rejected": -1.8810392618179321, + "logps/chosen": -221.2354736328125, + "logps/rejected": -338.3192443847656, + "loss": 0.1813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5870789289474487, + "rewards/margins": 1.2487092018127441, + "rewards/rejected": 0.338369756937027, + "step": 4081 + }, + { + "epoch": 0.24, + "learning_rate": 8.912512734604391e-08, + "logits/chosen": -2.118161201477051, + "logits/rejected": -2.1126747131347656, + "logps/chosen": -3.075587301282212e-05, + "logps/rejected": -153.1004180908203, + "loss": 0.3669, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.410718190683838e-07, + "rewards/margins": 2.994720697402954, + "rewards/rejected": -2.9947211742401123, + "step": 4082 + }, + { + "epoch": 0.24, + "learning_rate": 8.911925880721176e-08, + "logits/chosen": -2.031010150909424, + "logits/rejected": -2.029517650604248, + "logps/chosen": -171.5832061767578, + "logps/rejected": -271.89892578125, + "loss": 0.7556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6330307126045227, + "rewards/margins": 0.24511563777923584, + "rewards/rejected": -0.8781463503837585, + "step": 4083 + }, + { + "epoch": 0.24, + "learning_rate": 8.911338887867219e-08, + "logits/chosen": -2.1023998260498047, + "logits/rejected": -2.087050199508667, + "logps/chosen": -5.628442764282227, + "logps/rejected": -296.963623046875, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1916169673204422, + "rewards/margins": 3.0646884441375732, + "rewards/rejected": -2.8730714321136475, + "step": 4084 + }, + { + "epoch": 0.24, + "learning_rate": 8.91075175606337e-08, + "logits/chosen": -2.109804391860962, + "logits/rejected": -2.0686991214752197, + "logps/chosen": -216.24404907226562, + "logps/rejected": -394.4367370605469, + "loss": 0.2043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8415848016738892, + "rewards/margins": 1.0909347534179688, + "rewards/rejected": 0.7506500482559204, + "step": 4085 + }, + { + "epoch": 0.24, + "learning_rate": 8.910164485330491e-08, + "logits/chosen": -1.9891855716705322, + "logits/rejected": -1.986253023147583, + "logps/chosen": -45.70231246948242, + "logps/rejected": -131.08053588867188, + "loss": 0.7013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5149189233779907, + "rewards/margins": 0.4796329140663147, + "rewards/rejected": -0.9945518374443054, + "step": 4086 + }, + { + "epoch": 0.24, + "learning_rate": 8.909577075689443e-08, + "logits/chosen": -2.087179183959961, + "logits/rejected": -2.0852367877960205, + "logps/chosen": -83.88682556152344, + "logps/rejected": -185.12936401367188, + "loss": 0.8221, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.07289200276136398, + "rewards/margins": -0.08180923759937286, + "rewards/rejected": 0.00891723670065403, + "step": 4087 + }, + { + "epoch": 0.24, + "learning_rate": 8.908989527161092e-08, + "logits/chosen": -1.9933152198791504, + "logits/rejected": -1.9918252229690552, + "logps/chosen": -7.319304131669924e-05, + "logps/rejected": -189.78451538085938, + "loss": 0.3654, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.887646696261072e-07, + "rewards/margins": 2.96649432182312, + "rewards/rejected": -2.9664947986602783, + "step": 4088 + }, + { + "epoch": 0.24, + "learning_rate": 8.908401839766313e-08, + "logits/chosen": -2.0580050945281982, + "logits/rejected": -2.058091163635254, + "logps/chosen": -198.62451171875, + "logps/rejected": -295.3096008300781, + "loss": 0.4394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05764465406537056, + "rewards/margins": 0.809539794921875, + "rewards/rejected": -0.7518951296806335, + "step": 4089 + }, + { + "epoch": 0.24, + "learning_rate": 8.907814013525982e-08, + "logits/chosen": -2.0608608722686768, + "logits/rejected": -2.0539052486419678, + "logps/chosen": -123.3894271850586, + "logps/rejected": -219.01947021484375, + "loss": 0.5103, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9462318420410156, + "rewards/margins": -0.12173998355865479, + "rewards/rejected": 1.0679718255996704, + "step": 4090 + }, + { + "epoch": 0.24, + "learning_rate": 8.907226048460983e-08, + "logits/chosen": -2.0577290058135986, + "logits/rejected": -2.0385475158691406, + "logps/chosen": -128.7187042236328, + "logps/rejected": -188.07447814941406, + "loss": 0.4132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6737319827079773, + "rewards/margins": 0.567761242389679, + "rewards/rejected": 0.10597076267004013, + "step": 4091 + }, + { + "epoch": 0.24, + "learning_rate": 8.906637944592201e-08, + "logits/chosen": -2.13993763923645, + "logits/rejected": -2.1454050540924072, + "logps/chosen": -126.62525939941406, + "logps/rejected": -294.72418212890625, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8158111572265625, + "rewards/margins": 0.7464050054550171, + "rewards/rejected": 0.06940612941980362, + "step": 4092 + }, + { + "epoch": 0.24, + "learning_rate": 8.906049701940531e-08, + "logits/chosen": -2.240177869796753, + "logits/rejected": -2.214360475540161, + "logps/chosen": -76.06336975097656, + "logps/rejected": -254.12103271484375, + "loss": 0.5645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45666810870170593, + "rewards/margins": 1.5191924571990967, + "rewards/rejected": -1.975860595703125, + "step": 4093 + }, + { + "epoch": 0.24, + "learning_rate": 8.905461320526868e-08, + "logits/chosen": -2.127812147140503, + "logits/rejected": -2.1262760162353516, + "logps/chosen": -82.81365203857422, + "logps/rejected": -301.643798828125, + "loss": 0.3163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29579851031303406, + "rewards/margins": 2.119788408279419, + "rewards/rejected": -1.8239898681640625, + "step": 4094 + }, + { + "epoch": 0.24, + "learning_rate": 8.904872800372116e-08, + "logits/chosen": -2.1262290477752686, + "logits/rejected": -2.097332239151001, + "logps/chosen": -163.81875610351562, + "logps/rejected": -354.52117919921875, + "loss": 0.6387, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3032349348068237, + "rewards/margins": -0.7187317609786987, + "rewards/rejected": 2.0219666957855225, + "step": 4095 + }, + { + "epoch": 0.24, + "learning_rate": 8.904284141497182e-08, + "logits/chosen": -2.111786127090454, + "logits/rejected": -2.101698637008667, + "logps/chosen": -219.02767944335938, + "logps/rejected": -373.43792724609375, + "loss": 0.2673, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.800013780593872, + "rewards/margins": 0.5836746692657471, + "rewards/rejected": 1.216339111328125, + "step": 4096 + }, + { + "epoch": 0.24, + "learning_rate": 8.903695343922976e-08, + "logits/chosen": -1.8863075971603394, + "logits/rejected": -1.8851248025894165, + "logps/chosen": -277.8609619140625, + "logps/rejected": -473.8173828125, + "loss": 0.0984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7069427967071533, + "rewards/margins": 2.4960052967071533, + "rewards/rejected": -0.7890625, + "step": 4097 + }, + { + "epoch": 0.24, + "learning_rate": 8.903106407670417e-08, + "logits/chosen": -2.0810699462890625, + "logits/rejected": -2.0625762939453125, + "logps/chosen": -250.08245849609375, + "logps/rejected": -352.9135437011719, + "loss": 0.3453, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.822906494140625, + "rewards/margins": 0.42111510038375854, + "rewards/rejected": 0.40179139375686646, + "step": 4098 + }, + { + "epoch": 0.24, + "learning_rate": 8.902517332760427e-08, + "logits/chosen": -1.9382362365722656, + "logits/rejected": -1.9329400062561035, + "logps/chosen": -169.46620178222656, + "logps/rejected": -233.65869140625, + "loss": 0.2069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4058853387832642, + "rewards/margins": 1.2278518676757812, + "rewards/rejected": 0.17803345620632172, + "step": 4099 + }, + { + "epoch": 0.24, + "learning_rate": 8.901928119213931e-08, + "logits/chosen": -2.124542713165283, + "logits/rejected": -2.127347469329834, + "logps/chosen": -297.51251220703125, + "logps/rejected": -499.1878662109375, + "loss": 0.0772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.590576171875, + "rewards/margins": 3.4172394275665283, + "rewards/rejected": -1.8266632556915283, + "step": 4100 + }, + { + "epoch": 0.24, + "learning_rate": 8.901338767051862e-08, + "logits/chosen": -1.9636393785476685, + "logits/rejected": -1.909451961517334, + "logps/chosen": -250.15428161621094, + "logps/rejected": -347.9764099121094, + "loss": 0.2137, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4181381464004517, + "rewards/margins": 1.3308639526367188, + "rewards/rejected": 0.08727417141199112, + "step": 4101 + }, + { + "epoch": 0.24, + "learning_rate": 8.900749276295157e-08, + "logits/chosen": -1.8818669319152832, + "logits/rejected": -1.863702654838562, + "logps/chosen": -281.1456604003906, + "logps/rejected": -489.07684326171875, + "loss": 0.1055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.785864233970642, + "rewards/margins": 2.2558655738830566, + "rewards/rejected": -0.470001220703125, + "step": 4102 + }, + { + "epoch": 0.24, + "learning_rate": 8.900159646964756e-08, + "logits/chosen": -2.037703275680542, + "logits/rejected": -2.0536856651306152, + "logps/chosen": -186.49472045898438, + "logps/rejected": -297.2249755859375, + "loss": 0.2304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2911423444747925, + "rewards/margins": 0.926918089389801, + "rewards/rejected": 0.36422425508499146, + "step": 4103 + }, + { + "epoch": 0.24, + "learning_rate": 8.899569879081607e-08, + "logits/chosen": -2.1979963779449463, + "logits/rejected": -2.184882402420044, + "logps/chosen": -179.01483154296875, + "logps/rejected": -240.600830078125, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6137313842773438, + "rewards/margins": 0.21799468994140625, + "rewards/rejected": 1.3957366943359375, + "step": 4104 + }, + { + "epoch": 0.24, + "learning_rate": 8.898979972666663e-08, + "logits/chosen": -2.25718355178833, + "logits/rejected": -2.2582974433898926, + "logps/chosen": -14.423333168029785, + "logps/rejected": -199.7100372314453, + "loss": 0.4092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17143011093139648, + "rewards/margins": 1.497423529624939, + "rewards/rejected": -1.3259934186935425, + "step": 4105 + }, + { + "epoch": 0.24, + "learning_rate": 8.898389927740876e-08, + "logits/chosen": -2.1578688621520996, + "logits/rejected": -2.1546080112457275, + "logps/chosen": -0.03843298181891441, + "logps/rejected": -148.531494140625, + "loss": 0.5279, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008514527580700815, + "rewards/margins": 0.7846298217773438, + "rewards/rejected": -0.7854812741279602, + "step": 4106 + }, + { + "epoch": 0.24, + "learning_rate": 8.89779974432521e-08, + "logits/chosen": -1.9817734956741333, + "logits/rejected": -1.986764669418335, + "logps/chosen": -197.30743408203125, + "logps/rejected": -266.9241943359375, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8728790283203125, + "rewards/margins": 0.6683197021484375, + "rewards/rejected": 0.204559326171875, + "step": 4107 + }, + { + "epoch": 0.24, + "learning_rate": 8.897209422440631e-08, + "logits/chosen": -1.9477968215942383, + "logits/rejected": -1.9497486352920532, + "logps/chosen": -203.65670776367188, + "logps/rejected": -245.74386596679688, + "loss": 0.4266, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.6762726306915283, + "rewards/margins": -0.09188234806060791, + "rewards/rejected": 1.7681549787521362, + "step": 4108 + }, + { + "epoch": 0.24, + "learning_rate": 8.896618962108111e-08, + "logits/chosen": -1.8885974884033203, + "logits/rejected": -1.9028682708740234, + "logps/chosen": -5.596909523010254, + "logps/rejected": -210.2476806640625, + "loss": 0.397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024031640496104956, + "rewards/margins": 2.1474533081054688, + "rewards/rejected": -2.1498565673828125, + "step": 4109 + }, + { + "epoch": 0.24, + "learning_rate": 8.896028363348624e-08, + "logits/chosen": -1.9933667182922363, + "logits/rejected": -2.0723540782928467, + "logps/chosen": -297.98370361328125, + "logps/rejected": -215.81903076171875, + "loss": 0.1712, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.12921142578125, + "rewards/margins": 1.1157439947128296, + "rewards/rejected": 1.0134674310684204, + "step": 4110 + }, + { + "epoch": 0.24, + "learning_rate": 8.895437626183152e-08, + "logits/chosen": -2.1654999256134033, + "logits/rejected": -2.1591174602508545, + "logps/chosen": -44.34165954589844, + "logps/rejected": -381.3507385253906, + "loss": 0.2149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40943604707717896, + "rewards/margins": 2.4455902576446533, + "rewards/rejected": -2.036154270172119, + "step": 4111 + }, + { + "epoch": 0.24, + "learning_rate": 8.89484675063268e-08, + "logits/chosen": -2.0725691318511963, + "logits/rejected": -2.0818662643432617, + "logps/chosen": -183.82867431640625, + "logps/rejected": -220.54986572265625, + "loss": 0.3979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9480560421943665, + "rewards/margins": 0.1900634765625, + "rewards/rejected": 0.7579925656318665, + "step": 4112 + }, + { + "epoch": 0.24, + "learning_rate": 8.894255736718203e-08, + "logits/chosen": -1.9702115058898926, + "logits/rejected": -1.95585036277771, + "logps/chosen": -288.9813537597656, + "logps/rejected": -444.33599853515625, + "loss": 0.2795, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0875519514083862, + "rewards/margins": 0.8390656113624573, + "rewards/rejected": 0.24848632514476776, + "step": 4113 + }, + { + "epoch": 0.24, + "learning_rate": 8.893664584460709e-08, + "logits/chosen": -2.044374704360962, + "logits/rejected": -1.995707631111145, + "logps/chosen": -218.84619140625, + "logps/rejected": -332.27947998046875, + "loss": 0.2688, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6021759510040283, + "rewards/margins": 0.6981415152549744, + "rewards/rejected": 0.904034435749054, + "step": 4114 + }, + { + "epoch": 0.24, + "learning_rate": 8.893073293881206e-08, + "logits/chosen": -2.0831139087677, + "logits/rejected": -2.072338104248047, + "logps/chosen": -0.664482593536377, + "logps/rejected": -65.51519775390625, + "loss": 0.7152, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0004739224968943745, + "rewards/margins": -0.09634413570165634, + "rewards/rejected": 0.09587021172046661, + "step": 4115 + }, + { + "epoch": 0.24, + "learning_rate": 8.892481865000695e-08, + "logits/chosen": -1.8664556741714478, + "logits/rejected": -1.8599185943603516, + "logps/chosen": -179.25747680664062, + "logps/rejected": -293.13800048828125, + "loss": 0.5306, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9556609988212585, + "rewards/margins": -0.08706969022750854, + "rewards/rejected": 1.042730689048767, + "step": 4116 + }, + { + "epoch": 0.24, + "learning_rate": 8.89189029784019e-08, + "logits/chosen": -2.0010881423950195, + "logits/rejected": -2.0009303092956543, + "logps/chosen": -15.840691566467285, + "logps/rejected": -70.18771362304688, + "loss": 0.6429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0002986908075399697, + "rewards/margins": 0.1945262998342514, + "rewards/rejected": -0.19422760605812073, + "step": 4117 + }, + { + "epoch": 0.24, + "learning_rate": 8.891298592420703e-08, + "logits/chosen": -2.2611567974090576, + "logits/rejected": -2.258319139480591, + "logps/chosen": -55.919822692871094, + "logps/rejected": -196.7661895751953, + "loss": 0.4363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2895774841308594, + "rewards/margins": 0.7903480529785156, + "rewards/rejected": -0.5007705688476562, + "step": 4118 + }, + { + "epoch": 0.24, + "learning_rate": 8.890706748763255e-08, + "logits/chosen": -1.9515111446380615, + "logits/rejected": -1.8883150815963745, + "logps/chosen": -204.80213928222656, + "logps/rejected": -273.56829833984375, + "loss": 0.3344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9056671261787415, + "rewards/margins": 0.7180450558662415, + "rewards/rejected": 0.1876220703125, + "step": 4119 + }, + { + "epoch": 0.24, + "learning_rate": 8.890114766888872e-08, + "logits/chosen": -1.9856773614883423, + "logits/rejected": -1.9860912561416626, + "logps/chosen": -118.17401885986328, + "logps/rejected": -201.03350830078125, + "loss": 0.4174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39097291231155396, + "rewards/margins": 0.6449340581893921, + "rewards/rejected": -0.2539611756801605, + "step": 4120 + }, + { + "epoch": 0.24, + "learning_rate": 8.889522646818584e-08, + "logits/chosen": -2.177227735519409, + "logits/rejected": -2.1751677989959717, + "logps/chosen": -34.50229263305664, + "logps/rejected": -156.0879364013672, + "loss": 0.4579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3378433287143707, + "rewards/margins": 0.7763229608535767, + "rewards/rejected": -0.43847963213920593, + "step": 4121 + }, + { + "epoch": 0.24, + "learning_rate": 8.888930388573428e-08, + "logits/chosen": -2.0011839866638184, + "logits/rejected": -2.024721622467041, + "logps/chosen": -198.99412536621094, + "logps/rejected": -357.9315185546875, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9211746454238892, + "rewards/margins": 0.8462509512901306, + "rewards/rejected": 0.07492370903491974, + "step": 4122 + }, + { + "epoch": 0.24, + "learning_rate": 8.888337992174439e-08, + "logits/chosen": -1.9792953729629517, + "logits/rejected": -1.9765106439590454, + "logps/chosen": -23.646085739135742, + "logps/rejected": -175.4580078125, + "loss": 0.4559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3615293502807617, + "rewards/margins": 0.6258894205093384, + "rewards/rejected": -0.2643600404262543, + "step": 4123 + }, + { + "epoch": 0.24, + "learning_rate": 8.887745457642666e-08, + "logits/chosen": -2.011815071105957, + "logits/rejected": -2.0010108947753906, + "logps/chosen": -192.35879516601562, + "logps/rejected": -258.0422058105469, + "loss": 0.2752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.439723253250122, + "rewards/margins": 0.8095810413360596, + "rewards/rejected": 0.6301422119140625, + "step": 4124 + }, + { + "epoch": 0.24, + "learning_rate": 8.887152784999156e-08, + "logits/chosen": -1.9042854309082031, + "logits/rejected": -1.893082857131958, + "logps/chosen": -0.4425186216831207, + "logps/rejected": -127.19033813476562, + "loss": 0.4595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008067667367868125, + "rewards/margins": 1.3272221088409424, + "rewards/rejected": -1.328028917312622, + "step": 4125 + }, + { + "epoch": 0.24, + "learning_rate": 8.886559974264966e-08, + "logits/chosen": -2.0419929027557373, + "logits/rejected": -2.0376853942871094, + "logps/chosen": -40.16442108154297, + "logps/rejected": -175.22561645507812, + "loss": 0.5598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22331582009792328, + "rewards/margins": 1.1329494714736938, + "rewards/rejected": -1.3562653064727783, + "step": 4126 + }, + { + "epoch": 0.24, + "learning_rate": 8.885967025461154e-08, + "logits/chosen": -2.0216221809387207, + "logits/rejected": -2.0064737796783447, + "logps/chosen": -4.886188983917236, + "logps/rejected": -300.9678039550781, + "loss": 0.3338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08702116459608078, + "rewards/margins": 4.039825916290283, + "rewards/rejected": -3.9528045654296875, + "step": 4127 + }, + { + "epoch": 0.24, + "learning_rate": 8.885373938608786e-08, + "logits/chosen": -1.8869167566299438, + "logits/rejected": -1.8738303184509277, + "logps/chosen": -111.17426300048828, + "logps/rejected": -339.6894836425781, + "loss": 0.4293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2620300352573395, + "rewards/margins": 1.6077667474746704, + "rewards/rejected": -1.3457367420196533, + "step": 4128 + }, + { + "epoch": 0.24, + "learning_rate": 8.88478071372893e-08, + "logits/chosen": -1.743311882019043, + "logits/rejected": -1.7447350025177002, + "logps/chosen": -85.06739807128906, + "logps/rejected": -284.7451477050781, + "loss": 0.5503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.342965692281723, + "rewards/margins": 1.2887359857559204, + "rewards/rejected": -1.6317017078399658, + "step": 4129 + }, + { + "epoch": 0.24, + "learning_rate": 8.884187350842659e-08, + "logits/chosen": -2.020024061203003, + "logits/rejected": -2.0101449489593506, + "logps/chosen": -51.244171142578125, + "logps/rejected": -245.10293579101562, + "loss": 0.3638, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3737381100654602, + "rewards/margins": 1.211714267730713, + "rewards/rejected": -0.8379760980606079, + "step": 4130 + }, + { + "epoch": 0.24, + "learning_rate": 8.883593849971054e-08, + "logits/chosen": -2.0660338401794434, + "logits/rejected": -2.041492462158203, + "logps/chosen": -193.869873046875, + "logps/rejected": -394.7563781738281, + "loss": 0.0658, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5156646966934204, + "rewards/margins": 4.216027736663818, + "rewards/rejected": -2.7003631591796875, + "step": 4131 + }, + { + "epoch": 0.24, + "learning_rate": 8.883000211135201e-08, + "logits/chosen": -2.1759166717529297, + "logits/rejected": -2.1459765434265137, + "logps/chosen": -251.0311279296875, + "logps/rejected": -351.66265869140625, + "loss": 0.3932, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2671631574630737, + "rewards/margins": 0.1862945556640625, + "rewards/rejected": 1.0808686017990112, + "step": 4132 + }, + { + "epoch": 0.24, + "learning_rate": 8.882406434356184e-08, + "logits/chosen": -2.059659957885742, + "logits/rejected": -2.0253093242645264, + "logps/chosen": -1.049047589302063, + "logps/rejected": -443.6643371582031, + "loss": 0.3677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02979700081050396, + "rewards/margins": 3.3804049491882324, + "rewards/rejected": -3.4102020263671875, + "step": 4133 + }, + { + "epoch": 0.24, + "learning_rate": 8.8818125196551e-08, + "logits/chosen": -2.2158703804016113, + "logits/rejected": -2.217303514480591, + "logps/chosen": -53.78510284423828, + "logps/rejected": -148.1216278076172, + "loss": 0.6448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013581848703324795, + "rewards/margins": 0.12548217177391052, + "rewards/rejected": -0.11190032958984375, + "step": 4134 + }, + { + "epoch": 0.24, + "learning_rate": 8.881218467053049e-08, + "logits/chosen": -2.1966166496276855, + "logits/rejected": -2.187150239944458, + "logps/chosen": -29.522476196289062, + "logps/rejected": -179.40499877929688, + "loss": 0.4519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18903522193431854, + "rewards/margins": 1.1277132034301758, + "rewards/rejected": -0.938677966594696, + "step": 4135 + }, + { + "epoch": 0.24, + "learning_rate": 8.880624276571131e-08, + "logits/chosen": -2.0299899578094482, + "logits/rejected": -2.0056729316711426, + "logps/chosen": -24.91091537475586, + "logps/rejected": -220.1949920654297, + "loss": 0.4701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17910099029541016, + "rewards/margins": 0.7836252450942993, + "rewards/rejected": -0.6045242547988892, + "step": 4136 + }, + { + "epoch": 0.24, + "learning_rate": 8.880029948230458e-08, + "logits/chosen": -2.0715548992156982, + "logits/rejected": -2.0677578449249268, + "logps/chosen": -0.40974920988082886, + "logps/rejected": -181.31414794921875, + "loss": 0.4035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017123322933912277, + "rewards/margins": 2.1973116397857666, + "rewards/rejected": -2.214434862136841, + "step": 4137 + }, + { + "epoch": 0.24, + "learning_rate": 8.879435482052142e-08, + "logits/chosen": -2.1103663444519043, + "logits/rejected": -2.106663942337036, + "logps/chosen": -59.99204635620117, + "logps/rejected": -176.03738403320312, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18133239448070526, + "rewards/margins": 0.20099182426929474, + "rewards/rejected": -0.01965942420065403, + "step": 4138 + }, + { + "epoch": 0.24, + "learning_rate": 8.878840878057301e-08, + "logits/chosen": -2.0326180458068848, + "logits/rejected": -2.0308849811553955, + "logps/chosen": -21.42190933227539, + "logps/rejected": -91.03729248046875, + "loss": 0.4893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14289093017578125, + "rewards/margins": 0.7621063590049744, + "rewards/rejected": -0.6192154288291931, + "step": 4139 + }, + { + "epoch": 0.24, + "learning_rate": 8.878246136267058e-08, + "logits/chosen": -1.9034730195999146, + "logits/rejected": -1.8976739645004272, + "logps/chosen": -343.697265625, + "logps/rejected": -404.6409606933594, + "loss": 0.4987, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.202642798423767, + "rewards/margins": -0.3103882074356079, + "rewards/rejected": 1.513031005859375, + "step": 4140 + }, + { + "epoch": 0.24, + "learning_rate": 8.877651256702542e-08, + "logits/chosen": -2.041006088256836, + "logits/rejected": -2.002281904220581, + "logps/chosen": -251.25125122070312, + "logps/rejected": -432.91363525390625, + "loss": 0.3775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6225525140762329, + "rewards/margins": 0.8503296375274658, + "rewards/rejected": -0.22777710855007172, + "step": 4141 + }, + { + "epoch": 0.24, + "learning_rate": 8.877056239384886e-08, + "logits/chosen": -2.019192934036255, + "logits/rejected": -2.016899824142456, + "logps/chosen": -209.54559326171875, + "logps/rejected": -188.39682006835938, + "loss": 1.3104, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7641265988349915, + "rewards/margins": -1.2116806507110596, + "rewards/rejected": 0.4475540220737457, + "step": 4142 + }, + { + "epoch": 0.24, + "learning_rate": 8.876461084335227e-08, + "logits/chosen": -2.019878387451172, + "logits/rejected": -2.0146749019622803, + "logps/chosen": -33.57606887817383, + "logps/rejected": -102.97021484375, + "loss": 0.6158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04247245937585831, + "rewards/margins": 0.23439446091651917, + "rewards/rejected": -0.19192199409008026, + "step": 4143 + }, + { + "epoch": 0.24, + "learning_rate": 8.875865791574709e-08, + "logits/chosen": -1.935611367225647, + "logits/rejected": -1.9152272939682007, + "logps/chosen": -198.08016967773438, + "logps/rejected": -305.1046142578125, + "loss": 0.459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8752182126045227, + "rewards/margins": 0.1912582516670227, + "rewards/rejected": 0.6839599609375, + "step": 4144 + }, + { + "epoch": 0.24, + "learning_rate": 8.875270361124478e-08, + "logits/chosen": -1.882843255996704, + "logits/rejected": -1.8783705234527588, + "logps/chosen": -141.59100341796875, + "logps/rejected": -214.16049194335938, + "loss": 0.4927, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6740967035293579, + "rewards/margins": -0.0014175176620483398, + "rewards/rejected": 0.6755142211914062, + "step": 4145 + }, + { + "epoch": 0.24, + "learning_rate": 8.87467479300569e-08, + "logits/chosen": -2.0194170475006104, + "logits/rejected": -1.9844813346862793, + "logps/chosen": -65.552978515625, + "logps/rejected": -328.2747802734375, + "loss": 0.2857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27229005098342896, + "rewards/margins": 2.758892774581909, + "rewards/rejected": -2.486602783203125, + "step": 4146 + }, + { + "epoch": 0.24, + "learning_rate": 8.874079087239499e-08, + "logits/chosen": -2.010193109512329, + "logits/rejected": -2.0140883922576904, + "logps/chosen": -1.5026280879974365, + "logps/rejected": -78.24293518066406, + "loss": 0.4519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019925212487578392, + "rewards/margins": 1.40016508102417, + "rewards/rejected": -1.3802398443222046, + "step": 4147 + }, + { + "epoch": 0.24, + "learning_rate": 8.873483243847069e-08, + "logits/chosen": -2.0837013721466064, + "logits/rejected": -2.094686985015869, + "logps/chosen": -1.321065068244934, + "logps/rejected": -78.1016845703125, + "loss": 0.5562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026252806186676025, + "rewards/margins": 0.6869513988494873, + "rewards/rejected": -0.7132042050361633, + "step": 4148 + }, + { + "epoch": 0.24, + "learning_rate": 8.872887262849567e-08, + "logits/chosen": -2.238128900527954, + "logits/rejected": -2.205350875854492, + "logps/chosen": -0.1129823550581932, + "logps/rejected": -249.1659698486328, + "loss": 0.3678, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.617473784193862e-06, + "rewards/margins": 3.136911153793335, + "rewards/rejected": -3.136918783187866, + "step": 4149 + }, + { + "epoch": 0.24, + "learning_rate": 8.872291144268164e-08, + "logits/chosen": -1.9564223289489746, + "logits/rejected": -1.9403586387634277, + "logps/chosen": -4.51336145401001, + "logps/rejected": -178.86990356445312, + "loss": 0.4505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049512576311826706, + "rewards/margins": 1.3405526876449585, + "rewards/rejected": -1.291040062904358, + "step": 4150 + }, + { + "epoch": 0.24, + "learning_rate": 8.87169488812404e-08, + "logits/chosen": -2.07353138923645, + "logits/rejected": -2.0941574573516846, + "logps/chosen": -158.03585815429688, + "logps/rejected": -197.69662475585938, + "loss": 0.225, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6904449462890625, + "rewards/margins": 0.8475723266601562, + "rewards/rejected": 0.8428726196289062, + "step": 4151 + }, + { + "epoch": 0.24, + "learning_rate": 8.871098494438373e-08, + "logits/chosen": -2.057725667953491, + "logits/rejected": -2.0487520694732666, + "logps/chosen": -249.15802001953125, + "logps/rejected": -342.6678466796875, + "loss": 0.0984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5336395502090454, + "rewards/margins": 2.2781829833984375, + "rewards/rejected": -0.7445434927940369, + "step": 4152 + }, + { + "epoch": 0.24, + "learning_rate": 8.870501963232353e-08, + "logits/chosen": -2.0667970180511475, + "logits/rejected": -2.0622339248657227, + "logps/chosen": -7.094517230987549, + "logps/rejected": -248.09353637695312, + "loss": 0.3994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04705810546875, + "rewards/margins": 2.5869691371917725, + "rewards/rejected": -2.6340272426605225, + "step": 4153 + }, + { + "epoch": 0.24, + "learning_rate": 8.869905294527169e-08, + "logits/chosen": -2.0370535850524902, + "logits/rejected": -2.023101568222046, + "logps/chosen": -235.24560546875, + "logps/rejected": -228.80368041992188, + "loss": 0.3463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9122421145439148, + "rewards/margins": 0.7502731084823608, + "rewards/rejected": 0.16196899116039276, + "step": 4154 + }, + { + "epoch": 0.24, + "learning_rate": 8.869308488344021e-08, + "logits/chosen": -2.0240917205810547, + "logits/rejected": -2.0023140907287598, + "logps/chosen": -63.56261444091797, + "logps/rejected": -324.2400817871094, + "loss": 0.4955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4002113342285156, + "rewards/margins": 2.3216025829315186, + "rewards/rejected": -2.721813917160034, + "step": 4155 + }, + { + "epoch": 0.24, + "learning_rate": 8.868711544704108e-08, + "logits/chosen": -2.1154770851135254, + "logits/rejected": -2.1156258583068848, + "logps/chosen": -0.011363783851265907, + "logps/rejected": -241.6143341064453, + "loss": 0.3838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002523187722545117, + "rewards/margins": 2.44140887260437, + "rewards/rejected": -2.4416611194610596, + "step": 4156 + }, + { + "epoch": 0.24, + "learning_rate": 8.868114463628638e-08, + "logits/chosen": -2.15165376663208, + "logits/rejected": -2.1163768768310547, + "logps/chosen": -259.30975341796875, + "logps/rejected": -415.1805419921875, + "loss": 0.2686, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.856311023235321, + "rewards/margins": 1.2134063243865967, + "rewards/rejected": -0.357095330953598, + "step": 4157 + }, + { + "epoch": 0.24, + "learning_rate": 8.86751724513882e-08, + "logits/chosen": -2.1929476261138916, + "logits/rejected": -2.190560817718506, + "logps/chosen": -0.18606193363666534, + "logps/rejected": -128.52456665039062, + "loss": 0.4442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007914862595498562, + "rewards/margins": 1.55422043800354, + "rewards/rejected": -1.5621353387832642, + "step": 4158 + }, + { + "epoch": 0.24, + "learning_rate": 8.866919889255873e-08, + "logits/chosen": -1.893119215965271, + "logits/rejected": -1.8959202766418457, + "logps/chosen": -12.189047813415527, + "logps/rejected": -156.29745483398438, + "loss": 0.4925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04028301313519478, + "rewards/margins": 1.182556390762329, + "rewards/rejected": -1.22283935546875, + "step": 4159 + }, + { + "epoch": 0.24, + "learning_rate": 8.866322396001014e-08, + "logits/chosen": -2.087991714477539, + "logits/rejected": -2.086378812789917, + "logps/chosen": -46.15825653076172, + "logps/rejected": -79.49729919433594, + "loss": 0.7266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23016738891601562, + "rewards/margins": 0.1146644651889801, + "rewards/rejected": -0.3448318541049957, + "step": 4160 + }, + { + "epoch": 0.24, + "learning_rate": 8.865724765395472e-08, + "logits/chosen": -2.0812830924987793, + "logits/rejected": -2.0843505859375, + "logps/chosen": -106.32125091552734, + "logps/rejected": -156.47055053710938, + "loss": 0.3576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5328254699707031, + "rewards/margins": 1.0762016773223877, + "rewards/rejected": -0.5433761477470398, + "step": 4161 + }, + { + "epoch": 0.24, + "learning_rate": 8.865126997460478e-08, + "logits/chosen": -2.1230061054229736, + "logits/rejected": -2.125147581100464, + "logps/chosen": -47.89124298095703, + "logps/rejected": -253.205078125, + "loss": 0.3782, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04379463195800781, + "rewards/margins": 1.9513691663742065, + "rewards/rejected": -1.9075745344161987, + "step": 4162 + }, + { + "epoch": 0.24, + "learning_rate": 8.864529092217266e-08, + "logits/chosen": -2.0406270027160645, + "logits/rejected": -2.018733263015747, + "logps/chosen": -131.51390075683594, + "logps/rejected": -294.2590026855469, + "loss": 0.1624, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3081588745117188, + "rewards/margins": 2.0876388549804688, + "rewards/rejected": -0.77947998046875, + "step": 4163 + }, + { + "epoch": 0.24, + "learning_rate": 8.863931049687077e-08, + "logits/chosen": -2.0326662063598633, + "logits/rejected": -2.0440683364868164, + "logps/chosen": -0.08997706323862076, + "logps/rejected": -229.446533203125, + "loss": 0.4765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024865062441676855, + "rewards/margins": 1.2192100286483765, + "rewards/rejected": -1.2216964960098267, + "step": 4164 + }, + { + "epoch": 0.24, + "learning_rate": 8.863332869891158e-08, + "logits/chosen": -2.1549603939056396, + "logits/rejected": -2.1601903438568115, + "logps/chosen": -9.083971977233887, + "logps/rejected": -146.27166748046875, + "loss": 0.4441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016172219067811966, + "rewards/margins": 1.6477835178375244, + "rewards/rejected": -1.6639556884765625, + "step": 4165 + }, + { + "epoch": 0.24, + "learning_rate": 8.862734552850757e-08, + "logits/chosen": -2.0130257606506348, + "logits/rejected": -1.9584218263626099, + "logps/chosen": -142.55140686035156, + "logps/rejected": -309.86090087890625, + "loss": 0.4333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4894210994243622, + "rewards/margins": 0.6559677124023438, + "rewards/rejected": -0.16654662787914276, + "step": 4166 + }, + { + "epoch": 0.24, + "learning_rate": 8.86213609858713e-08, + "logits/chosen": -2.1055400371551514, + "logits/rejected": -2.106605291366577, + "logps/chosen": -2.4325509071350098, + "logps/rejected": -137.97488403320312, + "loss": 0.4783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08050087839365005, + "rewards/margins": 1.1599845886230469, + "rewards/rejected": -1.240485429763794, + "step": 4167 + }, + { + "epoch": 0.24, + "learning_rate": 8.861537507121537e-08, + "logits/chosen": -2.125488519668579, + "logits/rejected": -2.1173691749572754, + "logps/chosen": -40.705169677734375, + "logps/rejected": -219.27499389648438, + "loss": 0.3923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03320007398724556, + "rewards/margins": 2.0237977504730225, + "rewards/rejected": -1.9905976057052612, + "step": 4168 + }, + { + "epoch": 0.24, + "learning_rate": 8.860938778475243e-08, + "logits/chosen": -2.1880195140838623, + "logits/rejected": -2.1868703365325928, + "logps/chosen": -95.41337585449219, + "logps/rejected": -249.6134033203125, + "loss": 0.4497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21682052314281464, + "rewards/margins": 2.413743019104004, + "rewards/rejected": -2.630563497543335, + "step": 4169 + }, + { + "epoch": 0.24, + "learning_rate": 8.860339912669517e-08, + "logits/chosen": -2.0303351879119873, + "logits/rejected": -2.0282037258148193, + "logps/chosen": -56.64391326904297, + "logps/rejected": -191.43112182617188, + "loss": 0.457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27334481477737427, + "rewards/margins": 0.8716816306114197, + "rewards/rejected": -0.5983368158340454, + "step": 4170 + }, + { + "epoch": 0.24, + "learning_rate": 8.859740909725635e-08, + "logits/chosen": -2.0064826011657715, + "logits/rejected": -2.0092451572418213, + "logps/chosen": -108.51133728027344, + "logps/rejected": -128.2952880859375, + "loss": 0.5846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15582580864429474, + "rewards/margins": 0.7252769470214844, + "rewards/rejected": -0.8811027407646179, + "step": 4171 + }, + { + "epoch": 0.24, + "learning_rate": 8.859141769664875e-08, + "logits/chosen": -1.8368712663650513, + "logits/rejected": -1.870252251625061, + "logps/chosen": -201.2571258544922, + "logps/rejected": -191.0185546875, + "loss": 0.326, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2294098138809204, + "rewards/margins": 0.39730072021484375, + "rewards/rejected": 0.8321090936660767, + "step": 4172 + }, + { + "epoch": 0.24, + "learning_rate": 8.858542492508523e-08, + "logits/chosen": -2.2816412448883057, + "logits/rejected": -2.2777791023254395, + "logps/chosen": -0.0003515041316859424, + "logps/rejected": -29.451387405395508, + "loss": 0.6983, + "rewards/accuracies": 0.0, + "rewards/chosen": 4.135232302360237e-06, + "rewards/margins": -0.0207773819565773, + "rewards/rejected": 0.020781517028808594, + "step": 4173 + }, + { + "epoch": 0.24, + "learning_rate": 8.857943078277867e-08, + "logits/chosen": -2.12270450592041, + "logits/rejected": -2.12245774269104, + "logps/chosen": -4.029470920562744, + "logps/rejected": -174.89456176757812, + "loss": 0.4027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004283046815544367, + "rewards/margins": 2.247706890106201, + "rewards/rejected": -2.2519898414611816, + "step": 4174 + }, + { + "epoch": 0.24, + "learning_rate": 8.857343526994203e-08, + "logits/chosen": -2.211667060852051, + "logits/rejected": -2.2088675498962402, + "logps/chosen": -70.10673522949219, + "logps/rejected": -223.55963134765625, + "loss": 0.8417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8320808410644531, + "rewards/margins": 0.2589057683944702, + "rewards/rejected": -1.0909866094589233, + "step": 4175 + }, + { + "epoch": 0.24, + "learning_rate": 8.856743838678827e-08, + "logits/chosen": -2.0490455627441406, + "logits/rejected": -2.047943592071533, + "logps/chosen": -21.664207458496094, + "logps/rejected": -226.87850952148438, + "loss": 0.3051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19122830033302307, + "rewards/margins": 3.1376547813415527, + "rewards/rejected": -2.9464263916015625, + "step": 4176 + }, + { + "epoch": 0.24, + "learning_rate": 8.856144013353046e-08, + "logits/chosen": -2.11521315574646, + "logits/rejected": -2.116727590560913, + "logps/chosen": -15.500277519226074, + "logps/rejected": -27.910993576049805, + "loss": 0.6466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024990558624267578, + "rewards/margins": 0.16814032196998596, + "rewards/rejected": -0.14314976334571838, + "step": 4177 + }, + { + "epoch": 0.24, + "learning_rate": 8.855544051038166e-08, + "logits/chosen": -2.1099636554718018, + "logits/rejected": -2.1140389442443848, + "logps/chosen": -238.58474731445312, + "logps/rejected": -275.4434814453125, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3067290782928467, + "rewards/margins": 3.4503448009490967, + "rewards/rejected": -1.14361572265625, + "step": 4178 + }, + { + "epoch": 0.24, + "learning_rate": 8.854943951755502e-08, + "logits/chosen": -2.1429851055145264, + "logits/rejected": -2.1347262859344482, + "logps/chosen": -40.4700927734375, + "logps/rejected": -171.19485473632812, + "loss": 0.7162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5626198053359985, + "rewards/margins": 0.6913245916366577, + "rewards/rejected": -1.2539443969726562, + "step": 4179 + }, + { + "epoch": 0.24, + "learning_rate": 8.854343715526372e-08, + "logits/chosen": -2.0856435298919678, + "logits/rejected": -2.026606798171997, + "logps/chosen": -168.99713134765625, + "logps/rejected": -427.384765625, + "loss": 0.1498, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2593872547149658, + "rewards/margins": 2.1428589820861816, + "rewards/rejected": -0.883471667766571, + "step": 4180 + }, + { + "epoch": 0.24, + "learning_rate": 8.8537433423721e-08, + "logits/chosen": -2.0422234535217285, + "logits/rejected": -2.00529146194458, + "logps/chosen": -157.69097900390625, + "logps/rejected": -402.2411193847656, + "loss": 0.2108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3414276838302612, + "rewards/margins": 1.2882142066955566, + "rewards/rejected": 0.05321350321173668, + "step": 4181 + }, + { + "epoch": 0.24, + "learning_rate": 8.853142832314015e-08, + "logits/chosen": -1.9321012496948242, + "logits/rejected": -1.9414052963256836, + "logps/chosen": -283.7438659667969, + "logps/rejected": -366.8032531738281, + "loss": 0.1814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8535979986190796, + "rewards/margins": 1.3488585948944092, + "rewards/rejected": 0.5047394037246704, + "step": 4182 + }, + { + "epoch": 0.24, + "learning_rate": 8.852542185373447e-08, + "logits/chosen": -1.974390983581543, + "logits/rejected": -1.9748907089233398, + "logps/chosen": -29.977773666381836, + "logps/rejected": -197.9732208251953, + "loss": 0.3835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21521663665771484, + "rewards/margins": 1.425856590270996, + "rewards/rejected": -1.2106399536132812, + "step": 4183 + }, + { + "epoch": 0.24, + "learning_rate": 8.851941401571736e-08, + "logits/chosen": -2.0264670848846436, + "logits/rejected": -2.023341178894043, + "logps/chosen": -47.79008102416992, + "logps/rejected": -182.1667022705078, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25215911865234375, + "rewards/margins": 2.465402364730835, + "rewards/rejected": -2.213243246078491, + "step": 4184 + }, + { + "epoch": 0.24, + "learning_rate": 8.851340480930225e-08, + "logits/chosen": -2.017021894454956, + "logits/rejected": -2.0093791484832764, + "logps/chosen": -180.8625946044922, + "logps/rejected": -366.9915771484375, + "loss": 0.2631, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1905441284179688, + "rewards/margins": 0.8968307375907898, + "rewards/rejected": 0.29371339082717896, + "step": 4185 + }, + { + "epoch": 0.24, + "learning_rate": 8.850739423470261e-08, + "logits/chosen": -2.104386806488037, + "logits/rejected": -2.097024917602539, + "logps/chosen": -66.87321472167969, + "logps/rejected": -197.64894104003906, + "loss": 0.5546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35426026582717896, + "rewards/margins": 0.1302536129951477, + "rewards/rejected": 0.22400665283203125, + "step": 4186 + }, + { + "epoch": 0.24, + "learning_rate": 8.850138229213198e-08, + "logits/chosen": -2.028827667236328, + "logits/rejected": -2.005739212036133, + "logps/chosen": -191.23513793945312, + "logps/rejected": -352.31195068359375, + "loss": 0.1555, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.850286841392517, + "rewards/margins": 1.342779517173767, + "rewards/rejected": 0.50750732421875, + "step": 4187 + }, + { + "epoch": 0.24, + "learning_rate": 8.84953689818039e-08, + "logits/chosen": -2.0108015537261963, + "logits/rejected": -1.9998420476913452, + "logps/chosen": -238.90371704101562, + "logps/rejected": -286.0639343261719, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8064255118370056, + "rewards/margins": 1.4993667602539062, + "rewards/rejected": -0.6929413080215454, + "step": 4188 + }, + { + "epoch": 0.24, + "learning_rate": 8.848935430393202e-08, + "logits/chosen": -2.0379559993743896, + "logits/rejected": -2.037611722946167, + "logps/chosen": -29.27580451965332, + "logps/rejected": -90.83998107910156, + "loss": 0.6226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01760539971292019, + "rewards/margins": 0.3667837083339691, + "rewards/rejected": -0.3491783142089844, + "step": 4189 + }, + { + "epoch": 0.24, + "learning_rate": 8.848333825873e-08, + "logits/chosen": -2.1216461658477783, + "logits/rejected": -2.1059610843658447, + "logps/chosen": -30.30743408203125, + "logps/rejected": -218.18173217773438, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1479816436767578, + "rewards/margins": 2.372804641723633, + "rewards/rejected": -2.224822998046875, + "step": 4190 + }, + { + "epoch": 0.24, + "learning_rate": 8.847732084641157e-08, + "logits/chosen": -1.9894405603408813, + "logits/rejected": -1.9636049270629883, + "logps/chosen": -90.4224624633789, + "logps/rejected": -381.1052551269531, + "loss": 0.4622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3926078975200653, + "rewards/margins": 2.95991587638855, + "rewards/rejected": -3.3525238037109375, + "step": 4191 + }, + { + "epoch": 0.24, + "learning_rate": 8.847130206719049e-08, + "logits/chosen": -2.0038695335388184, + "logits/rejected": -2.015744686126709, + "logps/chosen": -248.10433959960938, + "logps/rejected": -474.2284851074219, + "loss": 0.0972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7531158924102783, + "rewards/margins": 2.0804474353790283, + "rewards/rejected": -0.32733154296875, + "step": 4192 + }, + { + "epoch": 0.24, + "learning_rate": 8.846528192128058e-08, + "logits/chosen": -1.7766474485397339, + "logits/rejected": -1.7786656618118286, + "logps/chosen": -1.2441967725753784, + "logps/rejected": -224.7303466796875, + "loss": 0.4214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0610586516559124, + "rewards/margins": 1.942897915840149, + "rewards/rejected": -2.0039565563201904, + "step": 4193 + }, + { + "epoch": 0.24, + "learning_rate": 8.845926040889568e-08, + "logits/chosen": -1.9856359958648682, + "logits/rejected": -1.9821851253509521, + "logps/chosen": -36.14057540893555, + "logps/rejected": -92.18035125732422, + "loss": 0.56, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33159753680229187, + "rewards/margins": 0.2771778404712677, + "rewards/rejected": 0.054419707506895065, + "step": 4194 + }, + { + "epoch": 0.24, + "learning_rate": 8.845323753024976e-08, + "logits/chosen": -2.259498357772827, + "logits/rejected": -2.2566983699798584, + "logps/chosen": -6.056857109069824, + "logps/rejected": -141.61752319335938, + "loss": 0.4007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20326080918312073, + "rewards/margins": 1.3260955810546875, + "rewards/rejected": -1.1228348016738892, + "step": 4195 + }, + { + "epoch": 0.24, + "learning_rate": 8.844721328555673e-08, + "logits/chosen": -2.0565555095672607, + "logits/rejected": -2.0364906787872314, + "logps/chosen": -101.06161499023438, + "logps/rejected": -193.46156311035156, + "loss": 0.5692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39802780747413635, + "rewards/margins": 1.4331597089767456, + "rewards/rejected": -1.8311874866485596, + "step": 4196 + }, + { + "epoch": 0.24, + "learning_rate": 8.844118767503062e-08, + "logits/chosen": -2.035104990005493, + "logits/rejected": -2.0337414741516113, + "logps/chosen": -25.91703987121582, + "logps/rejected": -77.21559143066406, + "loss": 0.6504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24051551520824432, + "rewards/margins": 0.40172386169433594, + "rewards/rejected": -0.6422393918037415, + "step": 4197 + }, + { + "epoch": 0.24, + "learning_rate": 8.843516069888548e-08, + "logits/chosen": -1.927408218383789, + "logits/rejected": -1.9333304166793823, + "logps/chosen": -0.00022779205755796283, + "logps/rejected": -125.93704986572266, + "loss": 0.4433, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.066348760214169e-06, + "rewards/margins": 1.5346190929412842, + "rewards/rejected": -1.534613013267517, + "step": 4198 + }, + { + "epoch": 0.24, + "learning_rate": 8.842913235733543e-08, + "logits/chosen": -1.9900394678115845, + "logits/rejected": -1.983624815940857, + "logps/chosen": -29.87529182434082, + "logps/rejected": -106.04640197753906, + "loss": 0.4848, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08124752342700958, + "rewards/margins": 0.9462270736694336, + "rewards/rejected": -0.8649795651435852, + "step": 4199 + }, + { + "epoch": 0.24, + "learning_rate": 8.842310265059462e-08, + "logits/chosen": -2.0041558742523193, + "logits/rejected": -2.0742475986480713, + "logps/chosen": -309.34429931640625, + "logps/rejected": -445.9875183105469, + "loss": 0.0947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4116028547286987, + "rewards/margins": 3.063342332839966, + "rewards/rejected": -1.651739478111267, + "step": 4200 + }, + { + "epoch": 0.24, + "learning_rate": 8.841707157887725e-08, + "logits/chosen": -1.99383544921875, + "logits/rejected": -1.9985430240631104, + "logps/chosen": -6.532572297146544e-05, + "logps/rejected": -222.01589965820312, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8728779852826847e-06, + "rewards/margins": 3.2329912185668945, + "rewards/rejected": -3.2329940795898438, + "step": 4201 + }, + { + "epoch": 0.24, + "learning_rate": 8.84110391423976e-08, + "logits/chosen": -2.1074154376983643, + "logits/rejected": -2.056398391723633, + "logps/chosen": -215.3243408203125, + "logps/rejected": -346.660400390625, + "loss": 0.3294, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7565964460372925, + "rewards/margins": 0.25652623176574707, + "rewards/rejected": 1.5000702142715454, + "step": 4202 + }, + { + "epoch": 0.24, + "learning_rate": 8.840500534136994e-08, + "logits/chosen": -2.0590126514434814, + "logits/rejected": -2.0611939430236816, + "logps/chosen": -54.00105285644531, + "logps/rejected": -106.37494659423828, + "loss": 0.5905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1301429718732834, + "rewards/margins": 0.5134735107421875, + "rewards/rejected": -0.6436164975166321, + "step": 4203 + }, + { + "epoch": 0.24, + "learning_rate": 8.839897017600864e-08, + "logits/chosen": -1.9712650775909424, + "logits/rejected": -1.958727240562439, + "logps/chosen": -205.58920288085938, + "logps/rejected": -310.9195556640625, + "loss": 0.4565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8335174918174744, + "rewards/margins": 0.18502503633499146, + "rewards/rejected": 0.6484924554824829, + "step": 4204 + }, + { + "epoch": 0.24, + "learning_rate": 8.839293364652807e-08, + "logits/chosen": -2.0497207641601562, + "logits/rejected": -2.0378799438476562, + "logps/chosen": -69.82633209228516, + "logps/rejected": -162.4429168701172, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7029396295547485, + "rewards/margins": 1.012276530265808, + "rewards/rejected": -0.3093368709087372, + "step": 4205 + }, + { + "epoch": 0.24, + "learning_rate": 8.83868957531427e-08, + "logits/chosen": -2.0675885677337646, + "logits/rejected": -2.0624496936798096, + "logps/chosen": -174.03817749023438, + "logps/rejected": -288.2379150390625, + "loss": 0.4507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.627551257610321, + "rewards/margins": 0.4400482177734375, + "rewards/rejected": 0.18750305473804474, + "step": 4206 + }, + { + "epoch": 0.24, + "learning_rate": 8.838085649606704e-08, + "logits/chosen": -1.8257267475128174, + "logits/rejected": -1.8327736854553223, + "logps/chosen": -50.195919036865234, + "logps/rejected": -187.6907196044922, + "loss": 0.6244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13796959817409515, + "rewards/margins": 0.5465289950370789, + "rewards/rejected": -0.6844986081123352, + "step": 4207 + }, + { + "epoch": 0.24, + "learning_rate": 8.837481587551561e-08, + "logits/chosen": -2.1309783458709717, + "logits/rejected": -2.0901365280151367, + "logps/chosen": -181.7342529296875, + "logps/rejected": -322.17822265625, + "loss": 0.5137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4164672791957855, + "rewards/margins": 0.135101318359375, + "rewards/rejected": 0.2813659608364105, + "step": 4208 + }, + { + "epoch": 0.24, + "learning_rate": 8.8368773891703e-08, + "logits/chosen": -1.9694294929504395, + "logits/rejected": -1.8630188703536987, + "logps/chosen": -393.56884765625, + "logps/rejected": -696.267822265625, + "loss": 0.34, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3929595947265625, + "rewards/margins": 1.5613921880722046, + "rewards/rejected": -1.168432593345642, + "step": 4209 + }, + { + "epoch": 0.24, + "learning_rate": 8.836273054484387e-08, + "logits/chosen": -2.134370803833008, + "logits/rejected": -2.1255369186401367, + "logps/chosen": -2.7894688173546456e-05, + "logps/rejected": -120.25166320800781, + "loss": 0.4757, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1457199750329892e-07, + "rewards/margins": 1.2035731077194214, + "rewards/rejected": -1.2035728693008423, + "step": 4210 + }, + { + "epoch": 0.25, + "learning_rate": 8.835668583515288e-08, + "logits/chosen": -1.9597623348236084, + "logits/rejected": -1.9520442485809326, + "logps/chosen": -44.938720703125, + "logps/rejected": -260.9400634765625, + "loss": 0.5013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3551323115825653, + "rewards/margins": 1.8998007774353027, + "rewards/rejected": -2.2549331188201904, + "step": 4211 + }, + { + "epoch": 0.25, + "learning_rate": 8.83506397628448e-08, + "logits/chosen": -2.175799608230591, + "logits/rejected": -2.166147232055664, + "logps/chosen": -48.71994400024414, + "logps/rejected": -177.95187377929688, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3977096676826477, + "rewards/margins": 2.101214647293091, + "rewards/rejected": -1.7035049200057983, + "step": 4212 + }, + { + "epoch": 0.25, + "learning_rate": 8.83445923281344e-08, + "logits/chosen": -2.0998587608337402, + "logits/rejected": -2.0923712253570557, + "logps/chosen": -2.6106679797521792e-05, + "logps/rejected": -90.54122161865234, + "loss": 0.7372, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.1722779542396893e-07, + "rewards/margins": -0.17144207656383514, + "rewards/rejected": 0.17144165933132172, + "step": 4213 + }, + { + "epoch": 0.25, + "learning_rate": 8.83385435312365e-08, + "logits/chosen": -2.1262779235839844, + "logits/rejected": -2.1243090629577637, + "logps/chosen": -8.320893287658691, + "logps/rejected": -58.110107421875, + "loss": 0.663, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08934793621301651, + "rewards/margins": -0.036132343113422394, + "rewards/rejected": 0.1254802793264389, + "step": 4214 + }, + { + "epoch": 0.25, + "learning_rate": 8.833249337236603e-08, + "logits/chosen": -2.0559568405151367, + "logits/rejected": -2.011631727218628, + "logps/chosen": -189.5521240234375, + "logps/rejected": -321.1125793457031, + "loss": 0.3039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6374969482421875, + "rewards/margins": 1.3983216285705566, + "rewards/rejected": -0.7608246207237244, + "step": 4215 + }, + { + "epoch": 0.25, + "learning_rate": 8.832644185173788e-08, + "logits/chosen": -2.0536887645721436, + "logits/rejected": -2.0116329193115234, + "logps/chosen": -194.9098358154297, + "logps/rejected": -413.0777587890625, + "loss": 0.2542, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.405146837234497, + "rewards/margins": 0.746254026889801, + "rewards/rejected": 0.658892810344696, + "step": 4216 + }, + { + "epoch": 0.25, + "learning_rate": 8.832038896956704e-08, + "logits/chosen": -1.9251618385314941, + "logits/rejected": -1.9018840789794922, + "logps/chosen": -265.6654052734375, + "logps/rejected": -490.64874267578125, + "loss": 0.1315, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.149688720703125, + "rewards/margins": 2.015368700027466, + "rewards/rejected": -0.865679919719696, + "step": 4217 + }, + { + "epoch": 0.25, + "learning_rate": 8.831433472606853e-08, + "logits/chosen": -2.0189082622528076, + "logits/rejected": -2.0304696559906006, + "logps/chosen": -48.19126510620117, + "logps/rejected": -195.4128875732422, + "loss": 0.452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23573532700538635, + "rewards/margins": 2.5696158409118652, + "rewards/rejected": -2.8053512573242188, + "step": 4218 + }, + { + "epoch": 0.25, + "learning_rate": 8.830827912145744e-08, + "logits/chosen": -2.0948336124420166, + "logits/rejected": -2.08894419670105, + "logps/chosen": -49.734710693359375, + "logps/rejected": -223.52294921875, + "loss": 0.5889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15903282165527344, + "rewards/margins": 0.13189658522605896, + "rewards/rejected": 0.02713623084127903, + "step": 4219 + }, + { + "epoch": 0.25, + "learning_rate": 8.83022221559489e-08, + "logits/chosen": -2.015629529953003, + "logits/rejected": -2.0052125453948975, + "logps/chosen": -33.02724075317383, + "logps/rejected": -241.11659240722656, + "loss": 0.3933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3392658233642578, + "rewards/margins": 1.2952580451965332, + "rewards/rejected": -0.9559921622276306, + "step": 4220 + }, + { + "epoch": 0.25, + "learning_rate": 8.829616382975806e-08, + "logits/chosen": -2.2595560550689697, + "logits/rejected": -2.2487356662750244, + "logps/chosen": -0.4788571000099182, + "logps/rejected": -106.02435302734375, + "loss": 0.6619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008665687404572964, + "rewards/margins": 0.07413962483406067, + "rewards/rejected": -0.06547393649816513, + "step": 4221 + }, + { + "epoch": 0.25, + "learning_rate": 8.829010414310017e-08, + "logits/chosen": -1.9155765771865845, + "logits/rejected": -1.9137539863586426, + "logps/chosen": -148.1734619140625, + "logps/rejected": -268.24224853515625, + "loss": 0.4079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9819290041923523, + "rewards/margins": 0.24600064754486084, + "rewards/rejected": 0.7359283566474915, + "step": 4222 + }, + { + "epoch": 0.25, + "learning_rate": 8.828404309619046e-08, + "logits/chosen": -2.0277884006500244, + "logits/rejected": -2.026245355606079, + "logps/chosen": -34.43082809448242, + "logps/rejected": -177.61404418945312, + "loss": 0.5035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20069657266139984, + "rewards/margins": 1.3610649108886719, + "rewards/rejected": -1.561761498451233, + "step": 4223 + }, + { + "epoch": 0.25, + "learning_rate": 8.827798068924429e-08, + "logits/chosen": -2.0464024543762207, + "logits/rejected": -2.0618085861206055, + "logps/chosen": -91.23760986328125, + "logps/rejected": -271.31707763671875, + "loss": 0.2387, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6791191101074219, + "rewards/margins": 2.20988392829895, + "rewards/rejected": -1.5307648181915283, + "step": 4224 + }, + { + "epoch": 0.25, + "learning_rate": 8.827191692247699e-08, + "logits/chosen": -1.9786546230316162, + "logits/rejected": -1.9624416828155518, + "logps/chosen": -168.18063354492188, + "logps/rejected": -253.3795623779297, + "loss": 0.4774, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0071487426757812, + "rewards/margins": 0.00632631778717041, + "rewards/rejected": 1.0008224248886108, + "step": 4225 + }, + { + "epoch": 0.25, + "learning_rate": 8.826585179610401e-08, + "logits/chosen": -2.0717196464538574, + "logits/rejected": -2.052931070327759, + "logps/chosen": -168.76513671875, + "logps/rejected": -293.5430908203125, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4283294677734375, + "rewards/margins": 0.5148559212684631, + "rewards/rejected": 0.9134735465049744, + "step": 4226 + }, + { + "epoch": 0.25, + "learning_rate": 8.82597853103408e-08, + "logits/chosen": -1.866127848625183, + "logits/rejected": -1.8639650344848633, + "logps/chosen": -38.950191497802734, + "logps/rejected": -101.69280242919922, + "loss": 0.4903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16840477287769318, + "rewards/margins": 0.8097980618476868, + "rewards/rejected": -0.6413933038711548, + "step": 4227 + }, + { + "epoch": 0.25, + "learning_rate": 8.825371746540284e-08, + "logits/chosen": -2.151895761489868, + "logits/rejected": -2.1510488986968994, + "logps/chosen": -13.007810592651367, + "logps/rejected": -122.39288330078125, + "loss": 0.6143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05006609112024307, + "rewards/margins": 0.3820408880710602, + "rewards/rejected": -0.3319748044013977, + "step": 4228 + }, + { + "epoch": 0.25, + "learning_rate": 8.824764826150574e-08, + "logits/chosen": -2.1298673152923584, + "logits/rejected": -2.1053059101104736, + "logps/chosen": -68.29505157470703, + "logps/rejected": -285.04388427734375, + "loss": 0.4292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17089156806468964, + "rewards/margins": 3.572141408920288, + "rewards/rejected": -3.743032932281494, + "step": 4229 + }, + { + "epoch": 0.25, + "learning_rate": 8.824157769886509e-08, + "logits/chosen": -2.124662399291992, + "logits/rejected": -2.1257379055023193, + "logps/chosen": -11.06705379486084, + "logps/rejected": -68.92681121826172, + "loss": 0.5853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12617187201976776, + "rewards/margins": 0.3611289858818054, + "rewards/rejected": -0.23495712876319885, + "step": 4230 + }, + { + "epoch": 0.25, + "learning_rate": 8.823550577769651e-08, + "logits/chosen": -1.852554440498352, + "logits/rejected": -1.838375210762024, + "logps/chosen": -224.8169403076172, + "logps/rejected": -377.22149658203125, + "loss": 0.0947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.564324975013733, + "rewards/margins": 2.635730028152466, + "rewards/rejected": -1.071405053138733, + "step": 4231 + }, + { + "epoch": 0.25, + "learning_rate": 8.822943249821576e-08, + "logits/chosen": -1.994977355003357, + "logits/rejected": -1.997962474822998, + "logps/chosen": -2.8901684284210205, + "logps/rejected": -256.78961181640625, + "loss": 0.3712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014063906855881214, + "rewards/margins": 2.8132855892181396, + "rewards/rejected": -2.7992217540740967, + "step": 4232 + }, + { + "epoch": 0.25, + "learning_rate": 8.822335786063855e-08, + "logits/chosen": -2.034292459487915, + "logits/rejected": -2.0165183544158936, + "logps/chosen": -8.782602310180664, + "logps/rejected": -221.34945678710938, + "loss": 0.3748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.190754696726799, + "rewards/margins": 1.8830764293670654, + "rewards/rejected": -1.69232177734375, + "step": 4233 + }, + { + "epoch": 0.25, + "learning_rate": 8.821728186518071e-08, + "logits/chosen": -1.8483303785324097, + "logits/rejected": -1.8042819499969482, + "logps/chosen": -177.99740600585938, + "logps/rejected": -247.70281982421875, + "loss": 0.3524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7822815179824829, + "rewards/margins": 0.9152008295059204, + "rewards/rejected": -0.1329193115234375, + "step": 4234 + }, + { + "epoch": 0.25, + "learning_rate": 8.821120451205808e-08, + "logits/chosen": -2.019257068634033, + "logits/rejected": -2.023885488510132, + "logps/chosen": -20.853351593017578, + "logps/rejected": -147.62997436523438, + "loss": 0.5406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1537197083234787, + "rewards/margins": 0.47590601444244385, + "rewards/rejected": -0.32218629121780396, + "step": 4235 + }, + { + "epoch": 0.25, + "learning_rate": 8.820512580148656e-08, + "logits/chosen": -2.0091378688812256, + "logits/rejected": -1.9847513437271118, + "logps/chosen": -258.9022216796875, + "logps/rejected": -468.1782531738281, + "loss": 0.0855, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4551910161972046, + "rewards/margins": 2.867480516433716, + "rewards/rejected": -1.4122895002365112, + "step": 4236 + }, + { + "epoch": 0.25, + "learning_rate": 8.819904573368207e-08, + "logits/chosen": -2.0299289226531982, + "logits/rejected": -2.024829387664795, + "logps/chosen": -33.803993225097656, + "logps/rejected": -326.50543212890625, + "loss": 0.2593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37809792160987854, + "rewards/margins": 4.100445747375488, + "rewards/rejected": -3.7223479747772217, + "step": 4237 + }, + { + "epoch": 0.25, + "learning_rate": 8.819296430886063e-08, + "logits/chosen": -1.9588561058044434, + "logits/rejected": -1.9506456851959229, + "logps/chosen": -135.39317321777344, + "logps/rejected": -268.8063049316406, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2007354497909546, + "rewards/margins": 0.4411376714706421, + "rewards/rejected": 0.7595977783203125, + "step": 4238 + }, + { + "epoch": 0.25, + "learning_rate": 8.818688152723829e-08, + "logits/chosen": -2.0477333068847656, + "logits/rejected": -2.054915428161621, + "logps/chosen": -207.7776336669922, + "logps/rejected": -329.33038330078125, + "loss": 0.0596, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.796107530593872, + "rewards/margins": 4.3050217628479, + "rewards/rejected": -2.5089142322540283, + "step": 4239 + }, + { + "epoch": 0.25, + "learning_rate": 8.818079738903111e-08, + "logits/chosen": -2.1020119190216064, + "logits/rejected": -2.09867262840271, + "logps/chosen": -0.003076172899454832, + "logps/rejected": -175.89239501953125, + "loss": 0.4368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015406122838612646, + "rewards/margins": 1.6208223104476929, + "rewards/rejected": -1.6209763288497925, + "step": 4240 + }, + { + "epoch": 0.25, + "learning_rate": 8.817471189445524e-08, + "logits/chosen": -1.8826894760131836, + "logits/rejected": -1.8722947835922241, + "logps/chosen": -36.26972579956055, + "logps/rejected": -279.54461669921875, + "loss": 0.3232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1702709197998047, + "rewards/margins": 2.75144624710083, + "rewards/rejected": -2.5811753273010254, + "step": 4241 + }, + { + "epoch": 0.25, + "learning_rate": 8.816862504372689e-08, + "logits/chosen": -2.2335970401763916, + "logits/rejected": -2.2208011150360107, + "logps/chosen": -7.0361647605896, + "logps/rejected": -290.0146179199219, + "loss": 0.4063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14415636658668518, + "rewards/margins": 3.340657949447632, + "rewards/rejected": -3.484814405441284, + "step": 4242 + }, + { + "epoch": 0.25, + "learning_rate": 8.816253683706228e-08, + "logits/chosen": -2.040968418121338, + "logits/rejected": -2.0166015625, + "logps/chosen": -160.79212951660156, + "logps/rejected": -428.14764404296875, + "loss": 0.4428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48593902587890625, + "rewards/margins": 0.6378433108329773, + "rewards/rejected": -0.15190429985523224, + "step": 4243 + }, + { + "epoch": 0.25, + "learning_rate": 8.815644727467766e-08, + "logits/chosen": -2.0886499881744385, + "logits/rejected": -2.0826148986816406, + "logps/chosen": -1.7470530271530151, + "logps/rejected": -209.47042846679688, + "loss": 0.4238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02931903675198555, + "rewards/margins": 1.8393332958221436, + "rewards/rejected": -1.86865234375, + "step": 4244 + }, + { + "epoch": 0.25, + "learning_rate": 8.81503563567894e-08, + "logits/chosen": -1.9438607692718506, + "logits/rejected": -1.9457569122314453, + "logps/chosen": -0.016490761190652847, + "logps/rejected": -103.9532470703125, + "loss": 0.4454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009613089496269822, + "rewards/margins": 1.5008255243301392, + "rewards/rejected": -1.5017868280410767, + "step": 4245 + }, + { + "epoch": 0.25, + "learning_rate": 8.814426408361388e-08, + "logits/chosen": -1.9636814594268799, + "logits/rejected": -1.9433116912841797, + "logps/chosen": -240.35342407226562, + "logps/rejected": -452.41326904296875, + "loss": 0.2048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7712005376815796, + "rewards/margins": 0.9411132335662842, + "rewards/rejected": 0.8300873041152954, + "step": 4246 + }, + { + "epoch": 0.25, + "learning_rate": 8.81381704553675e-08, + "logits/chosen": -2.1919541358947754, + "logits/rejected": -2.1897640228271484, + "logps/chosen": -37.85674285888672, + "logps/rejected": -139.23660278320312, + "loss": 0.4179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2409217804670334, + "rewards/margins": 1.4809173345565796, + "rewards/rejected": -1.2399955987930298, + "step": 4247 + }, + { + "epoch": 0.25, + "learning_rate": 8.813207547226677e-08, + "logits/chosen": -2.1390128135681152, + "logits/rejected": -2.131390333175659, + "logps/chosen": -81.19501495361328, + "logps/rejected": -234.22506713867188, + "loss": 0.4885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025342559441924095, + "rewards/margins": 1.238532304763794, + "rewards/rejected": -1.213189721107483, + "step": 4248 + }, + { + "epoch": 0.25, + "learning_rate": 8.812597913452819e-08, + "logits/chosen": -2.2028071880340576, + "logits/rejected": -2.1817710399627686, + "logps/chosen": -228.96771240234375, + "logps/rejected": -291.5164794921875, + "loss": 0.4026, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.0646729469299316, + "rewards/margins": -0.07109975814819336, + "rewards/rejected": 2.135772705078125, + "step": 4249 + }, + { + "epoch": 0.25, + "learning_rate": 8.811988144236833e-08, + "logits/chosen": -2.0475385189056396, + "logits/rejected": -2.0452661514282227, + "logps/chosen": -0.00012659643834922463, + "logps/rejected": -28.128782272338867, + "loss": 0.6236, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.505876404437004e-06, + "rewards/margins": 0.30877885222435, + "rewards/rejected": -0.3087833523750305, + "step": 4250 + }, + { + "epoch": 0.25, + "learning_rate": 8.811378239600382e-08, + "logits/chosen": -2.0385634899139404, + "logits/rejected": -2.0367040634155273, + "logps/chosen": -73.66773986816406, + "logps/rejected": -228.10638427734375, + "loss": 0.5032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3288131654262543, + "rewards/margins": 0.4521285891532898, + "rewards/rejected": -0.12331543117761612, + "step": 4251 + }, + { + "epoch": 0.25, + "learning_rate": 8.810768199565131e-08, + "logits/chosen": -2.1996941566467285, + "logits/rejected": -2.1922426223754883, + "logps/chosen": -93.69133758544922, + "logps/rejected": -274.2013244628906, + "loss": 0.5148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12108764797449112, + "rewards/margins": 0.9722656011581421, + "rewards/rejected": -1.093353271484375, + "step": 4252 + }, + { + "epoch": 0.25, + "learning_rate": 8.810158024152756e-08, + "logits/chosen": -2.0766043663024902, + "logits/rejected": -2.0629382133483887, + "logps/chosen": -0.29936492443084717, + "logps/rejected": -524.1210327148438, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00849259551614523, + "rewards/margins": 5.2000908851623535, + "rewards/rejected": -5.191598415374756, + "step": 4253 + }, + { + "epoch": 0.25, + "learning_rate": 8.809547713384928e-08, + "logits/chosen": -2.0334296226501465, + "logits/rejected": -2.0363574028015137, + "logps/chosen": -9.875429153442383, + "logps/rejected": -175.33624267578125, + "loss": 0.4229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07499074935913086, + "rewards/margins": 1.568090796470642, + "rewards/rejected": -1.4931000471115112, + "step": 4254 + }, + { + "epoch": 0.25, + "learning_rate": 8.808937267283332e-08, + "logits/chosen": -2.028514862060547, + "logits/rejected": -2.0056533813476562, + "logps/chosen": -178.83834838867188, + "logps/rejected": -285.11444091796875, + "loss": 0.429, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.90533447265625, + "rewards/margins": -0.13782954216003418, + "rewards/rejected": 2.043164014816284, + "step": 4255 + }, + { + "epoch": 0.25, + "learning_rate": 8.808326685869652e-08, + "logits/chosen": -1.9681814908981323, + "logits/rejected": -1.9666671752929688, + "logps/chosen": -251.42921447753906, + "logps/rejected": -450.1543884277344, + "loss": 0.2126, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4455918073654175, + "rewards/margins": 1.1095962524414062, + "rewards/rejected": 0.33599549531936646, + "step": 4256 + }, + { + "epoch": 0.25, + "learning_rate": 8.80771596916558e-08, + "logits/chosen": -2.1847972869873047, + "logits/rejected": -2.170550584793091, + "logps/chosen": -0.00028048455715179443, + "logps/rejected": -200.09439086914062, + "loss": 0.3666, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1110598683881108e-06, + "rewards/margins": 3.1943907737731934, + "rewards/rejected": -3.1943938732147217, + "step": 4257 + }, + { + "epoch": 0.25, + "learning_rate": 8.80710511719281e-08, + "logits/chosen": -2.162388801574707, + "logits/rejected": -2.164515256881714, + "logps/chosen": -41.519989013671875, + "logps/rejected": -227.6747283935547, + "loss": 0.3187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.412771612405777, + "rewards/margins": 2.042524814605713, + "rewards/rejected": -1.6297531127929688, + "step": 4258 + }, + { + "epoch": 0.25, + "learning_rate": 8.806494129973046e-08, + "logits/chosen": -2.230792760848999, + "logits/rejected": -2.227407693862915, + "logps/chosen": -249.08111572265625, + "logps/rejected": -285.04705810546875, + "loss": 0.33, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46370241045951843, + "rewards/margins": 1.4513367414474487, + "rewards/rejected": -0.9876343011856079, + "step": 4259 + }, + { + "epoch": 0.25, + "learning_rate": 8.80588300752799e-08, + "logits/chosen": -1.9466806650161743, + "logits/rejected": -1.9061232805252075, + "logps/chosen": -245.1396484375, + "logps/rejected": -558.9509887695312, + "loss": 0.1541, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.894067406654358, + "rewards/margins": 1.377923607826233, + "rewards/rejected": 0.516143798828125, + "step": 4260 + }, + { + "epoch": 0.25, + "learning_rate": 8.805271749879353e-08, + "logits/chosen": -2.017796277999878, + "logits/rejected": -2.0152900218963623, + "logps/chosen": -8.962677001953125, + "logps/rejected": -67.86044311523438, + "loss": 0.5341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10626397281885147, + "rewards/margins": 0.9035146236419678, + "rewards/rejected": -1.009778618812561, + "step": 4261 + }, + { + "epoch": 0.25, + "learning_rate": 8.804660357048848e-08, + "logits/chosen": -2.0865488052368164, + "logits/rejected": -2.075908899307251, + "logps/chosen": -27.78708839416504, + "logps/rejected": -222.0883026123047, + "loss": 0.377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0018712998135015368, + "rewards/margins": 2.5240774154663086, + "rewards/rejected": -2.5222060680389404, + "step": 4262 + }, + { + "epoch": 0.25, + "learning_rate": 8.804048829058198e-08, + "logits/chosen": -2.020055055618286, + "logits/rejected": -2.026338815689087, + "logps/chosen": -8.34615707397461, + "logps/rejected": -88.75355529785156, + "loss": 0.5681, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012267875485122204, + "rewards/margins": 0.5137056112289429, + "rewards/rejected": -0.5259734988212585, + "step": 4263 + }, + { + "epoch": 0.25, + "learning_rate": 8.803437165929126e-08, + "logits/chosen": -2.159045934677124, + "logits/rejected": -2.153752326965332, + "logps/chosen": -192.2050018310547, + "logps/rejected": -278.0408020019531, + "loss": 0.2772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5547516345977783, + "rewards/margins": 0.6444153189659119, + "rewards/rejected": 0.9103363156318665, + "step": 4264 + }, + { + "epoch": 0.25, + "learning_rate": 8.802825367683362e-08, + "logits/chosen": -2.0533998012542725, + "logits/rejected": -2.0461626052856445, + "logps/chosen": -142.20654296875, + "logps/rejected": -259.6479187011719, + "loss": 0.3683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3276077508926392, + "rewards/margins": 0.25051116943359375, + "rewards/rejected": 1.0770965814590454, + "step": 4265 + }, + { + "epoch": 0.25, + "learning_rate": 8.802213434342637e-08, + "logits/chosen": -2.1578452587127686, + "logits/rejected": -2.159567356109619, + "logps/chosen": -21.914134979248047, + "logps/rejected": -149.19747924804688, + "loss": 0.4364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24326877295970917, + "rewards/margins": 1.0949792861938477, + "rewards/rejected": -0.8517104983329773, + "step": 4266 + }, + { + "epoch": 0.25, + "learning_rate": 8.801601365928695e-08, + "logits/chosen": -2.0381596088409424, + "logits/rejected": -2.0520248413085938, + "logps/chosen": -201.54490661621094, + "logps/rejected": -401.9229736328125, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9229645133018494, + "rewards/margins": 1.3010650873184204, + "rewards/rejected": -0.37810060381889343, + "step": 4267 + }, + { + "epoch": 0.25, + "learning_rate": 8.800989162463275e-08, + "logits/chosen": -2.06599760055542, + "logits/rejected": -2.1040163040161133, + "logps/chosen": -337.9496765136719, + "logps/rejected": -492.9561767578125, + "loss": 0.0546, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0287139415740967, + "rewards/margins": 3.3804502487182617, + "rewards/rejected": -1.3517364263534546, + "step": 4268 + }, + { + "epoch": 0.25, + "learning_rate": 8.80037682396813e-08, + "logits/chosen": -1.9920334815979004, + "logits/rejected": -1.9484115839004517, + "logps/chosen": -241.396728515625, + "logps/rejected": -441.10723876953125, + "loss": 0.1268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.485498070716858, + "rewards/margins": 2.1384124755859375, + "rewards/rejected": -0.6529144644737244, + "step": 4269 + }, + { + "epoch": 0.25, + "learning_rate": 8.799764350465007e-08, + "logits/chosen": -2.110924005508423, + "logits/rejected": -2.085035562515259, + "logps/chosen": -205.62916564941406, + "logps/rejected": -385.7052001953125, + "loss": 0.3812, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3024215698242188, + "rewards/margins": 0.16045987606048584, + "rewards/rejected": 1.141961693763733, + "step": 4270 + }, + { + "epoch": 0.25, + "learning_rate": 8.79915174197567e-08, + "logits/chosen": -2.1134629249572754, + "logits/rejected": -2.0989739894866943, + "logps/chosen": -41.010963439941406, + "logps/rejected": -279.63958740234375, + "loss": 0.489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13696061074733734, + "rewards/margins": 0.740045964717865, + "rewards/rejected": -0.6030853390693665, + "step": 4271 + }, + { + "epoch": 0.25, + "learning_rate": 8.798538998521879e-08, + "logits/chosen": -1.9927467107772827, + "logits/rejected": -1.99066162109375, + "logps/chosen": -26.348939895629883, + "logps/rejected": -69.08810424804688, + "loss": 0.5673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2755354046821594, + "rewards/margins": 0.23173962533473969, + "rewards/rejected": 0.04379577562212944, + "step": 4272 + }, + { + "epoch": 0.25, + "learning_rate": 8.797926120125401e-08, + "logits/chosen": -2.0390822887420654, + "logits/rejected": -2.0479283332824707, + "logps/chosen": -26.002023696899414, + "logps/rejected": -98.3953857421875, + "loss": 0.5514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08337803184986115, + "rewards/margins": 0.8915334939956665, + "rewards/rejected": -0.9749115109443665, + "step": 4273 + }, + { + "epoch": 0.25, + "learning_rate": 8.79731310680801e-08, + "logits/chosen": -1.6115294694900513, + "logits/rejected": -1.5945430994033813, + "logps/chosen": -168.10829162597656, + "logps/rejected": -287.4114685058594, + "loss": 0.4367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7523452639579773, + "rewards/margins": 0.36497342586517334, + "rewards/rejected": 0.38737183809280396, + "step": 4274 + }, + { + "epoch": 0.25, + "learning_rate": 8.796699958591482e-08, + "logits/chosen": -1.8676419258117676, + "logits/rejected": -1.868841290473938, + "logps/chosen": -58.394752502441406, + "logps/rejected": -253.97781372070312, + "loss": 0.5054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10813026875257492, + "rewards/margins": 0.7946720123291016, + "rewards/rejected": -0.686541736125946, + "step": 4275 + }, + { + "epoch": 0.25, + "learning_rate": 8.796086675497601e-08, + "logits/chosen": -2.1037988662719727, + "logits/rejected": -2.0781099796295166, + "logps/chosen": -115.76209259033203, + "logps/rejected": -409.9439392089844, + "loss": 0.2787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4936210811138153, + "rewards/margins": 2.0914268493652344, + "rewards/rejected": -1.5978058576583862, + "step": 4276 + }, + { + "epoch": 0.25, + "learning_rate": 8.795473257548153e-08, + "logits/chosen": -2.084204912185669, + "logits/rejected": -2.0852155685424805, + "logps/chosen": -29.50522804260254, + "logps/rejected": -59.06568145751953, + "loss": 0.7135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1658029556274414, + "rewards/margins": 0.10653361678123474, + "rewards/rejected": -0.27233657240867615, + "step": 4277 + }, + { + "epoch": 0.25, + "learning_rate": 8.794859704764929e-08, + "logits/chosen": -2.070906162261963, + "logits/rejected": -2.0693142414093018, + "logps/chosen": -23.42906379699707, + "logps/rejected": -86.42261505126953, + "loss": 0.6015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1703464537858963, + "rewards/margins": 0.18178139626979828, + "rewards/rejected": -0.01143493689596653, + "step": 4278 + }, + { + "epoch": 0.25, + "learning_rate": 8.794246017169724e-08, + "logits/chosen": -2.216817617416382, + "logits/rejected": -2.2055928707122803, + "logps/chosen": -6.206165790557861, + "logps/rejected": -198.1431427001953, + "loss": 0.5378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05019502714276314, + "rewards/margins": 0.6875333189964294, + "rewards/rejected": -0.6373382806777954, + "step": 4279 + }, + { + "epoch": 0.25, + "learning_rate": 8.793632194784343e-08, + "logits/chosen": -2.0846779346466064, + "logits/rejected": -2.089045524597168, + "logps/chosen": -14.775657653808594, + "logps/rejected": -234.1753387451172, + "loss": 0.6082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11498651653528214, + "rewards/margins": 0.23023614287376404, + "rewards/rejected": -0.1152496337890625, + "step": 4280 + }, + { + "epoch": 0.25, + "learning_rate": 8.79301823763059e-08, + "logits/chosen": -2.011183023452759, + "logits/rejected": -2.058825731277466, + "logps/chosen": -173.4513702392578, + "logps/rejected": -229.4068145751953, + "loss": 0.3993, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3585189580917358, + "rewards/margins": 0.03913259506225586, + "rewards/rejected": 1.31938636302948, + "step": 4281 + }, + { + "epoch": 0.25, + "learning_rate": 8.792404145730273e-08, + "logits/chosen": -2.0457327365875244, + "logits/rejected": -2.0593154430389404, + "logps/chosen": -149.10986328125, + "logps/rejected": -232.35646057128906, + "loss": 0.4648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5074371695518494, + "rewards/margins": 0.6786361932754517, + "rewards/rejected": -0.1711990386247635, + "step": 4282 + }, + { + "epoch": 0.25, + "learning_rate": 8.791789919105212e-08, + "logits/chosen": -1.9334841966629028, + "logits/rejected": -1.9015388488769531, + "logps/chosen": -287.51251220703125, + "logps/rejected": -449.6929931640625, + "loss": 0.3291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.059930443763733, + "rewards/margins": 0.7829437255859375, + "rewards/rejected": 0.276986688375473, + "step": 4283 + }, + { + "epoch": 0.25, + "learning_rate": 8.791175557777224e-08, + "logits/chosen": -1.9515082836151123, + "logits/rejected": -1.9232107400894165, + "logps/chosen": -269.56573486328125, + "logps/rejected": -592.0831298828125, + "loss": 0.1649, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.918829321861267, + "rewards/margins": 1.2669615745544434, + "rewards/rejected": 0.651867687702179, + "step": 4284 + }, + { + "epoch": 0.25, + "learning_rate": 8.790561061768137e-08, + "logits/chosen": -2.2013039588928223, + "logits/rejected": -2.194958448410034, + "logps/chosen": -2.3058764934539795, + "logps/rejected": -164.59796142578125, + "loss": 0.3973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022432399913668633, + "rewards/margins": 2.2868099212646484, + "rewards/rejected": -2.3092422485351562, + "step": 4285 + }, + { + "epoch": 0.25, + "learning_rate": 8.78994643109978e-08, + "logits/chosen": -2.265265703201294, + "logits/rejected": -2.234714984893799, + "logps/chosen": -0.005301685072481632, + "logps/rejected": -405.9368591308594, + "loss": 0.3472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011843116953969002, + "rewards/margins": 5.766721248626709, + "rewards/rejected": -5.766839504241943, + "step": 4286 + }, + { + "epoch": 0.25, + "learning_rate": 8.789331665793984e-08, + "logits/chosen": -2.0164878368377686, + "logits/rejected": -1.9540573358535767, + "logps/chosen": -187.00747680664062, + "logps/rejected": -401.4420166015625, + "loss": 0.0757, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.600408911705017, + "rewards/margins": 3.5864500999450684, + "rewards/rejected": -1.9860413074493408, + "step": 4287 + }, + { + "epoch": 0.25, + "learning_rate": 8.788716765872595e-08, + "logits/chosen": -1.9222500324249268, + "logits/rejected": -1.9751309156417847, + "logps/chosen": -168.81399536132812, + "logps/rejected": -300.6523742675781, + "loss": 0.2306, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1963897943496704, + "rewards/margins": 1.038659691810608, + "rewards/rejected": 0.1577301025390625, + "step": 4288 + }, + { + "epoch": 0.25, + "learning_rate": 8.788101731357452e-08, + "logits/chosen": -1.834133267402649, + "logits/rejected": -1.8308041095733643, + "logps/chosen": -43.77501678466797, + "logps/rejected": -161.79751586914062, + "loss": 0.4881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2470756620168686, + "rewards/margins": 0.5371940732002258, + "rewards/rejected": -0.29011842608451843, + "step": 4289 + }, + { + "epoch": 0.25, + "learning_rate": 8.787486562270408e-08, + "logits/chosen": -1.9931522607803345, + "logits/rejected": -1.9924348592758179, + "logps/chosen": -61.702606201171875, + "logps/rejected": -235.10659790039062, + "loss": 0.3463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2426406890153885, + "rewards/margins": 2.984973192214966, + "rewards/rejected": -2.7423324584960938, + "step": 4290 + }, + { + "epoch": 0.25, + "learning_rate": 8.786871258633313e-08, + "logits/chosen": -1.8236351013183594, + "logits/rejected": -1.8265371322631836, + "logps/chosen": -257.04095458984375, + "logps/rejected": -304.62677001953125, + "loss": 0.1933, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4590972661972046, + "rewards/margins": 1.1124053001403809, + "rewards/rejected": 0.34669190645217896, + "step": 4291 + }, + { + "epoch": 0.25, + "learning_rate": 8.786255820468025e-08, + "logits/chosen": -2.173382520675659, + "logits/rejected": -2.157672643661499, + "logps/chosen": -8.022649853955954e-05, + "logps/rejected": -262.949951171875, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1682132026180625e-06, + "rewards/margins": 3.446150541305542, + "rewards/rejected": -3.4461517333984375, + "step": 4292 + }, + { + "epoch": 0.25, + "learning_rate": 8.785640247796413e-08, + "logits/chosen": -2.058746099472046, + "logits/rejected": -2.0572681427001953, + "logps/chosen": -32.69625473022461, + "logps/rejected": -146.54571533203125, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4628414213657379, + "rewards/margins": 2.5031979084014893, + "rewards/rejected": -2.040356397628784, + "step": 4293 + }, + { + "epoch": 0.25, + "learning_rate": 8.785024540640341e-08, + "logits/chosen": -1.9602173566818237, + "logits/rejected": -1.9727321863174438, + "logps/chosen": -187.52816772460938, + "logps/rejected": -275.3929443359375, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.959716796875, + "rewards/margins": 0.5540832281112671, + "rewards/rejected": 0.4056335389614105, + "step": 4294 + }, + { + "epoch": 0.25, + "learning_rate": 8.784408699021682e-08, + "logits/chosen": -2.1442854404449463, + "logits/rejected": -2.137488603591919, + "logps/chosen": -0.11918854713439941, + "logps/rejected": -151.8917236328125, + "loss": 0.5072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000252486759563908, + "rewards/margins": 0.9712334275245667, + "rewards/rejected": -0.9714859127998352, + "step": 4295 + }, + { + "epoch": 0.25, + "learning_rate": 8.783792722962315e-08, + "logits/chosen": -2.0310308933258057, + "logits/rejected": -2.0106449127197266, + "logps/chosen": -212.82867431640625, + "logps/rejected": -499.7899169921875, + "loss": 0.0803, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7709747552871704, + "rewards/margins": 2.859271287918091, + "rewards/rejected": -1.0882965326309204, + "step": 4296 + }, + { + "epoch": 0.25, + "learning_rate": 8.783176612484123e-08, + "logits/chosen": -2.2286534309387207, + "logits/rejected": -2.226982831954956, + "logps/chosen": -0.005312125198543072, + "logps/rejected": -216.8818817138672, + "loss": 0.4553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002766283287201077, + "rewards/margins": 1.3607157468795776, + "rewards/rejected": -1.360992431640625, + "step": 4297 + }, + { + "epoch": 0.25, + "learning_rate": 8.78256036760899e-08, + "logits/chosen": -1.7895640134811401, + "logits/rejected": -1.7909096479415894, + "logps/chosen": -2.596456527709961, + "logps/rejected": -63.49026870727539, + "loss": 0.6483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06748590618371964, + "rewards/margins": 0.17747870087623596, + "rewards/rejected": -0.244964599609375, + "step": 4298 + }, + { + "epoch": 0.25, + "learning_rate": 8.781943988358813e-08, + "logits/chosen": -1.9769192934036255, + "logits/rejected": -1.9605132341384888, + "logps/chosen": -52.37970733642578, + "logps/rejected": -465.9668884277344, + "loss": 0.3388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05861205980181694, + "rewards/margins": 5.421551704406738, + "rewards/rejected": -5.48016357421875, + "step": 4299 + }, + { + "epoch": 0.25, + "learning_rate": 8.781327474755484e-08, + "logits/chosen": -2.165646553039551, + "logits/rejected": -2.163461685180664, + "logps/chosen": -0.0028278653044253588, + "logps/rejected": -78.09557342529297, + "loss": 0.6779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020992732606828213, + "rewards/margins": 0.0663222074508667, + "rewards/rejected": -0.06653213500976562, + "step": 4300 + }, + { + "epoch": 0.25, + "learning_rate": 8.780710826820907e-08, + "logits/chosen": -2.243901491165161, + "logits/rejected": -2.2389822006225586, + "logps/chosen": -13.099906921386719, + "logps/rejected": -143.25045776367188, + "loss": 0.4656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13975058495998383, + "rewards/margins": 1.0080138444900513, + "rewards/rejected": -0.8682632446289062, + "step": 4301 + }, + { + "epoch": 0.25, + "learning_rate": 8.780094044576988e-08, + "logits/chosen": -2.2089719772338867, + "logits/rejected": -2.2099769115448, + "logps/chosen": -0.5516006946563721, + "logps/rejected": -173.78402709960938, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0068624080158770084, + "rewards/margins": 1.5615900754928589, + "rewards/rejected": -1.5684524774551392, + "step": 4302 + }, + { + "epoch": 0.25, + "learning_rate": 8.779477128045638e-08, + "logits/chosen": -1.9914499521255493, + "logits/rejected": -1.9965217113494873, + "logps/chosen": -289.31494140625, + "logps/rejected": -419.92413330078125, + "loss": 0.2156, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0630676746368408, + "rewards/margins": 1.6898438930511475, + "rewards/rejected": -0.6267761588096619, + "step": 4303 + }, + { + "epoch": 0.25, + "learning_rate": 8.778860077248775e-08, + "logits/chosen": -2.0371625423431396, + "logits/rejected": -2.058663845062256, + "logps/chosen": -218.2996368408203, + "logps/rejected": -473.3773498535156, + "loss": 0.2047, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3836227655410767, + "rewards/margins": 1.0376724004745483, + "rewards/rejected": 0.34595033526420593, + "step": 4304 + }, + { + "epoch": 0.25, + "learning_rate": 8.778242892208315e-08, + "logits/chosen": -1.8973710536956787, + "logits/rejected": -1.9013137817382812, + "logps/chosen": -237.59788513183594, + "logps/rejected": -340.0915222167969, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7606796622276306, + "rewards/margins": 1.412458896636963, + "rewards/rejected": -0.6517791748046875, + "step": 4305 + }, + { + "epoch": 0.25, + "learning_rate": 8.777625572946188e-08, + "logits/chosen": -2.1927735805511475, + "logits/rejected": -2.162095546722412, + "logps/chosen": -56.47725296020508, + "logps/rejected": -369.8475341796875, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0036945343017578125, + "rewards/margins": 3.7076008319854736, + "rewards/rejected": -3.703906297683716, + "step": 4306 + }, + { + "epoch": 0.25, + "learning_rate": 8.77700811948432e-08, + "logits/chosen": -2.0120890140533447, + "logits/rejected": -2.015990972518921, + "logps/chosen": -12.84510326385498, + "logps/rejected": -116.76301574707031, + "loss": 0.5576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006825351621955633, + "rewards/margins": 0.6786040663719177, + "rewards/rejected": -0.6854293942451477, + "step": 4307 + }, + { + "epoch": 0.25, + "learning_rate": 8.776390531844651e-08, + "logits/chosen": -1.8782190084457397, + "logits/rejected": -1.9032341241836548, + "logps/chosen": -224.95892333984375, + "logps/rejected": -308.3650207519531, + "loss": 0.2536, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.554656982421875, + "rewards/margins": 0.7395538091659546, + "rewards/rejected": 0.8151031732559204, + "step": 4308 + }, + { + "epoch": 0.25, + "learning_rate": 8.775772810049116e-08, + "logits/chosen": -1.9695405960083008, + "logits/rejected": -1.9952738285064697, + "logps/chosen": -289.9783935546875, + "logps/rejected": -471.06707763671875, + "loss": 0.1687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.811877429485321, + "rewards/margins": 2.829241991043091, + "rewards/rejected": -2.017364501953125, + "step": 4309 + }, + { + "epoch": 0.25, + "learning_rate": 8.775154954119661e-08, + "logits/chosen": -1.9937750101089478, + "logits/rejected": -1.9822672605514526, + "logps/chosen": -32.26598358154297, + "logps/rejected": -100.62347412109375, + "loss": 0.7629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4069742262363434, + "rewards/margins": 0.2716298997402191, + "rewards/rejected": -0.6786041259765625, + "step": 4310 + }, + { + "epoch": 0.25, + "learning_rate": 8.774536964078238e-08, + "logits/chosen": -1.9688822031021118, + "logits/rejected": -1.9591760635375977, + "logps/chosen": -4.748785495758057, + "logps/rejected": -137.20834350585938, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1263161152601242, + "rewards/margins": 1.5166428089141846, + "rewards/rejected": -1.390326738357544, + "step": 4311 + }, + { + "epoch": 0.25, + "learning_rate": 8.773918839946797e-08, + "logits/chosen": -2.098958969116211, + "logits/rejected": -2.096752166748047, + "logps/chosen": -0.000754417444113642, + "logps/rejected": -247.23486328125, + "loss": 0.3704, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.008728400454856e-06, + "rewards/margins": 3.4870645999908447, + "rewards/rejected": -3.487072706222534, + "step": 4312 + }, + { + "epoch": 0.25, + "learning_rate": 8.7733005817473e-08, + "logits/chosen": -1.8366931676864624, + "logits/rejected": -1.829762578010559, + "logps/chosen": -46.15772247314453, + "logps/rejected": -196.68301391601562, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36008378863334656, + "rewards/margins": 1.4009621143341064, + "rewards/rejected": -1.0408782958984375, + "step": 4313 + }, + { + "epoch": 0.25, + "learning_rate": 8.772682189501707e-08, + "logits/chosen": -1.9421206712722778, + "logits/rejected": -1.946637749671936, + "logps/chosen": -0.0019360671285539865, + "logps/rejected": -104.37496948242188, + "loss": 0.5129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001194230790133588, + "rewards/margins": 0.9069972634315491, + "rewards/rejected": -0.907116711139679, + "step": 4314 + }, + { + "epoch": 0.25, + "learning_rate": 8.77206366323199e-08, + "logits/chosen": -2.0370419025421143, + "logits/rejected": -2.0363144874572754, + "logps/chosen": -0.03240015357732773, + "logps/rejected": -109.69493865966797, + "loss": 0.4964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001454462530091405, + "rewards/margins": 0.8692424297332764, + "rewards/rejected": -0.8677879571914673, + "step": 4315 + }, + { + "epoch": 0.25, + "learning_rate": 8.77144500296012e-08, + "logits/chosen": -2.1003520488739014, + "logits/rejected": -2.06573224067688, + "logps/chosen": -57.69074249267578, + "logps/rejected": -234.35064697265625, + "loss": 0.3033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2722027003765106, + "rewards/margins": 3.0688915252685547, + "rewards/rejected": -2.7966887950897217, + "step": 4316 + }, + { + "epoch": 0.25, + "learning_rate": 8.770826208708075e-08, + "logits/chosen": -1.9895045757293701, + "logits/rejected": -2.0029516220092773, + "logps/chosen": -206.17323303222656, + "logps/rejected": -294.16619873046875, + "loss": 0.1933, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7382538318634033, + "rewards/margins": 0.9812713861465454, + "rewards/rejected": 0.7569824457168579, + "step": 4317 + }, + { + "epoch": 0.25, + "learning_rate": 8.770207280497839e-08, + "logits/chosen": -2.123599052429199, + "logits/rejected": -2.124332904815674, + "logps/chosen": -30.25394058227539, + "logps/rejected": -101.698486328125, + "loss": 0.5644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30127641558647156, + "rewards/margins": 0.2616409361362457, + "rewards/rejected": 0.039635468274354935, + "step": 4318 + }, + { + "epoch": 0.25, + "learning_rate": 8.769588218351398e-08, + "logits/chosen": -1.8424708843231201, + "logits/rejected": -1.8204329013824463, + "logps/chosen": -97.9871826171875, + "logps/rejected": -390.43359375, + "loss": 0.1764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.764678955078125, + "rewards/margins": 5.722937107086182, + "rewards/rejected": -4.958258152008057, + "step": 4319 + }, + { + "epoch": 0.25, + "learning_rate": 8.768969022290744e-08, + "logits/chosen": -2.2018792629241943, + "logits/rejected": -2.200845956802368, + "logps/chosen": -69.28713989257812, + "logps/rejected": -173.7586669921875, + "loss": 0.3747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37827226519584656, + "rewards/margins": 1.0564125776290894, + "rewards/rejected": -0.6781402826309204, + "step": 4320 + }, + { + "epoch": 0.25, + "learning_rate": 8.768349692337876e-08, + "logits/chosen": -2.285726308822632, + "logits/rejected": -2.2861487865448, + "logps/chosen": -49.59550857543945, + "logps/rejected": -250.33103942871094, + "loss": 0.3623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06321793049573898, + "rewards/margins": 2.0678794384002686, + "rewards/rejected": -2.0046615600585938, + "step": 4321 + }, + { + "epoch": 0.25, + "learning_rate": 8.767730228514792e-08, + "logits/chosen": -1.8823540210723877, + "logits/rejected": -1.8865567445755005, + "logps/chosen": -43.632301330566406, + "logps/rejected": -328.3719177246094, + "loss": 0.2096, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.756203830242157, + "rewards/margins": 2.696178913116455, + "rewards/rejected": -1.9399750232696533, + "step": 4322 + }, + { + "epoch": 0.25, + "learning_rate": 8.767110630843501e-08, + "logits/chosen": -2.0857861042022705, + "logits/rejected": -2.0794262886047363, + "logps/chosen": -31.21067237854004, + "logps/rejected": -107.56574249267578, + "loss": 0.4765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3636804521083832, + "rewards/margins": 0.6790345907211304, + "rewards/rejected": -0.3153541684150696, + "step": 4323 + }, + { + "epoch": 0.25, + "learning_rate": 8.766490899346012e-08, + "logits/chosen": -1.945657730102539, + "logits/rejected": -1.9415777921676636, + "logps/chosen": -19.1616153717041, + "logps/rejected": -135.77224731445312, + "loss": 0.449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2737022340297699, + "rewards/margins": 0.9234168529510498, + "rewards/rejected": -0.6497146487236023, + "step": 4324 + }, + { + "epoch": 0.25, + "learning_rate": 8.765871034044345e-08, + "logits/chosen": -2.087155818939209, + "logits/rejected": -2.086467981338501, + "logps/chosen": -62.00600814819336, + "logps/rejected": -225.210205078125, + "loss": 0.4464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13010787963867188, + "rewards/margins": 2.0358636379241943, + "rewards/rejected": -2.165971517562866, + "step": 4325 + }, + { + "epoch": 0.25, + "learning_rate": 8.765251034960517e-08, + "logits/chosen": -1.9465752840042114, + "logits/rejected": -1.9401957988739014, + "logps/chosen": -82.13009643554688, + "logps/rejected": -225.1377716064453, + "loss": 0.3115, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7359504699707031, + "rewards/margins": 1.2207419872283936, + "rewards/rejected": -0.4847915768623352, + "step": 4326 + }, + { + "epoch": 0.25, + "learning_rate": 8.764630902116553e-08, + "logits/chosen": -2.014662027359009, + "logits/rejected": -2.013976573944092, + "logps/chosen": -101.92710876464844, + "logps/rejected": -288.7059326171875, + "loss": 0.5605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07684784382581711, + "rewards/margins": 0.1959884762763977, + "rewards/rejected": -0.119140625, + "step": 4327 + }, + { + "epoch": 0.25, + "learning_rate": 8.764010635534486e-08, + "logits/chosen": -2.137030601501465, + "logits/rejected": -2.1325249671936035, + "logps/chosen": -13.187870025634766, + "logps/rejected": -99.10717010498047, + "loss": 0.4807, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13953304290771484, + "rewards/margins": 0.8234876990318298, + "rewards/rejected": -0.683954656124115, + "step": 4328 + }, + { + "epoch": 0.25, + "learning_rate": 8.76339023523635e-08, + "logits/chosen": -2.1957950592041016, + "logits/rejected": -2.182652235031128, + "logps/chosen": -30.044937133789062, + "logps/rejected": -173.05020141601562, + "loss": 0.587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09246521443128586, + "rewards/margins": 0.31333160400390625, + "rewards/rejected": -0.220866397023201, + "step": 4329 + }, + { + "epoch": 0.25, + "learning_rate": 8.762769701244182e-08, + "logits/chosen": -2.1694211959838867, + "logits/rejected": -2.1473660469055176, + "logps/chosen": -27.629552841186523, + "logps/rejected": -347.69012451171875, + "loss": 0.3403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25767308473587036, + "rewards/margins": 4.416826248168945, + "rewards/rejected": -4.67449951171875, + "step": 4330 + }, + { + "epoch": 0.25, + "learning_rate": 8.762149033580031e-08, + "logits/chosen": -1.676188588142395, + "logits/rejected": -1.6890190839767456, + "logps/chosen": -227.24606323242188, + "logps/rejected": -367.68927001953125, + "loss": 0.3395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7125107049942017, + "rewards/margins": 1.0705184936523438, + "rewards/rejected": -0.3580078184604645, + "step": 4331 + }, + { + "epoch": 0.25, + "learning_rate": 8.761528232265942e-08, + "logits/chosen": -2.1652796268463135, + "logits/rejected": -2.157766819000244, + "logps/chosen": -0.0003766760346479714, + "logps/rejected": -210.04251098632812, + "loss": 0.4677, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.027505727004609e-06, + "rewards/margins": 1.2939728498458862, + "rewards/rejected": -1.293975830078125, + "step": 4332 + }, + { + "epoch": 0.25, + "learning_rate": 8.76090729732397e-08, + "logits/chosen": -2.151601791381836, + "logits/rejected": -2.1568708419799805, + "logps/chosen": -3.6716159229399636e-05, + "logps/rejected": -90.37725830078125, + "loss": 0.4589, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.245146894594654e-07, + "rewards/margins": 1.340869426727295, + "rewards/rejected": -1.3408699035644531, + "step": 4333 + }, + { + "epoch": 0.25, + "learning_rate": 8.760286228776176e-08, + "logits/chosen": -2.0559041500091553, + "logits/rejected": -2.028322458267212, + "logps/chosen": -16.000329971313477, + "logps/rejected": -428.888671875, + "loss": 0.3174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12500667572021484, + "rewards/margins": 5.941153526306152, + "rewards/rejected": -5.8161468505859375, + "step": 4334 + }, + { + "epoch": 0.25, + "learning_rate": 8.75966502664462e-08, + "logits/chosen": -2.244410753250122, + "logits/rejected": -2.240048885345459, + "logps/chosen": -40.68570327758789, + "logps/rejected": -135.96054077148438, + "loss": 0.588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17958565056324005, + "rewards/margins": 0.20014838874340057, + "rewards/rejected": -0.02056274376809597, + "step": 4335 + }, + { + "epoch": 0.25, + "learning_rate": 8.759043690951374e-08, + "logits/chosen": -1.9248583316802979, + "logits/rejected": -1.890439748764038, + "logps/chosen": -327.64300537109375, + "logps/rejected": -456.10479736328125, + "loss": 0.0585, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.328155517578125, + "rewards/margins": 3.218432664871216, + "rewards/rejected": -0.890277087688446, + "step": 4336 + }, + { + "epoch": 0.25, + "learning_rate": 8.758422221718507e-08, + "logits/chosen": -2.0040130615234375, + "logits/rejected": -1.9885694980621338, + "logps/chosen": -245.52389526367188, + "logps/rejected": -483.45843505859375, + "loss": 0.07, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5648285150527954, + "rewards/margins": 4.048532009124756, + "rewards/rejected": -2.48370361328125, + "step": 4337 + }, + { + "epoch": 0.25, + "learning_rate": 8.757800618968099e-08, + "logits/chosen": -1.8061153888702393, + "logits/rejected": -1.7902953624725342, + "logps/chosen": -143.21249389648438, + "logps/rejected": -318.8363037109375, + "loss": 0.5453, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7112060785293579, + "rewards/margins": -0.10968017578125, + "rewards/rejected": 0.8208862543106079, + "step": 4338 + }, + { + "epoch": 0.25, + "learning_rate": 8.757178882722229e-08, + "logits/chosen": -2.133669853210449, + "logits/rejected": -2.1384778022766113, + "logps/chosen": -75.0537338256836, + "logps/rejected": -226.6506805419922, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2617851197719574, + "rewards/margins": 2.0530190467834473, + "rewards/rejected": -1.7912338972091675, + "step": 4339 + }, + { + "epoch": 0.25, + "learning_rate": 8.75655701300299e-08, + "logits/chosen": -2.1412036418914795, + "logits/rejected": -2.135561227798462, + "logps/chosen": -0.018476560711860657, + "logps/rejected": -127.23612976074219, + "loss": 0.5755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040419315337203443, + "rewards/margins": 0.5440141558647156, + "rewards/rejected": -0.5444183349609375, + "step": 4340 + }, + { + "epoch": 0.25, + "learning_rate": 8.755935009832469e-08, + "logits/chosen": -2.032627582550049, + "logits/rejected": -2.030702590942383, + "logps/chosen": -33.743656158447266, + "logps/rejected": -239.43136596679688, + "loss": 0.5491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30814647674560547, + "rewards/margins": 1.4340273141860962, + "rewards/rejected": -1.7421737909317017, + "step": 4341 + }, + { + "epoch": 0.25, + "learning_rate": 8.755312873232766e-08, + "logits/chosen": -2.191251754760742, + "logits/rejected": -2.173548460006714, + "logps/chosen": -61.142578125, + "logps/rejected": -166.10740661621094, + "loss": 0.4666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13599319756031036, + "rewards/margins": 1.2418984174728394, + "rewards/rejected": -1.1059051752090454, + "step": 4342 + }, + { + "epoch": 0.25, + "learning_rate": 8.754690603225977e-08, + "logits/chosen": -2.120065689086914, + "logits/rejected": -2.112308979034424, + "logps/chosen": -39.27275848388672, + "logps/rejected": -172.02890014648438, + "loss": 0.6562, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.14488831162452698, + "rewards/margins": -0.016961663961410522, + "rewards/rejected": 0.1618499755859375, + "step": 4343 + }, + { + "epoch": 0.25, + "learning_rate": 8.754068199834215e-08, + "logits/chosen": -2.0313704013824463, + "logits/rejected": -2.0254979133605957, + "logps/chosen": -0.00024901554570533335, + "logps/rejected": -191.98724365234375, + "loss": 0.3751, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0848016245290637e-06, + "rewards/margins": 2.8337268829345703, + "rewards/rejected": -2.833728075027466, + "step": 4344 + }, + { + "epoch": 0.25, + "learning_rate": 8.753445663079585e-08, + "logits/chosen": -2.078172206878662, + "logits/rejected": -2.069823980331421, + "logps/chosen": -10.417105674743652, + "logps/rejected": -190.16856384277344, + "loss": 0.484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055231668055057526, + "rewards/margins": 1.075882911682129, + "rewards/rejected": -1.0206512212753296, + "step": 4345 + }, + { + "epoch": 0.25, + "learning_rate": 8.752822992984206e-08, + "logits/chosen": -2.1368470191955566, + "logits/rejected": -2.1357123851776123, + "logps/chosen": -65.9219970703125, + "logps/rejected": -231.9493408203125, + "loss": 0.5373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061002351343631744, + "rewards/margins": 0.7521324753761292, + "rewards/rejected": -0.6911301016807556, + "step": 4346 + }, + { + "epoch": 0.25, + "learning_rate": 8.752200189570198e-08, + "logits/chosen": -2.0389180183410645, + "logits/rejected": -2.039005994796753, + "logps/chosen": -52.92980194091797, + "logps/rejected": -214.51153564453125, + "loss": 0.3288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44266968965530396, + "rewards/margins": 1.558319091796875, + "rewards/rejected": -1.1156494617462158, + "step": 4347 + }, + { + "epoch": 0.25, + "learning_rate": 8.751577252859685e-08, + "logits/chosen": -2.112980842590332, + "logits/rejected": -2.0987653732299805, + "logps/chosen": -130.02658081054688, + "logps/rejected": -294.2808837890625, + "loss": 0.2314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7362945675849915, + "rewards/margins": 2.3602051734924316, + "rewards/rejected": -1.6239105463027954, + "step": 4348 + }, + { + "epoch": 0.25, + "learning_rate": 8.750954182874795e-08, + "logits/chosen": -2.1142494678497314, + "logits/rejected": -2.1120898723602295, + "logps/chosen": -38.938663482666016, + "logps/rejected": -231.98130798339844, + "loss": 0.4275, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1267879456281662, + "rewards/margins": 2.371670961380005, + "rewards/rejected": -2.4984588623046875, + "step": 4349 + }, + { + "epoch": 0.25, + "learning_rate": 8.750330979637668e-08, + "logits/chosen": -1.6844083070755005, + "logits/rejected": -1.6451743841171265, + "logps/chosen": -109.61150360107422, + "logps/rejected": -352.22161865234375, + "loss": 0.1981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6744911074638367, + "rewards/margins": 3.2794227600097656, + "rewards/rejected": -2.604931592941284, + "step": 4350 + }, + { + "epoch": 0.25, + "learning_rate": 8.749707643170436e-08, + "logits/chosen": -2.0800397396087646, + "logits/rejected": -2.0826399326324463, + "logps/chosen": -0.1998368203639984, + "logps/rejected": -167.1138458251953, + "loss": 0.4344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012256392277777195, + "rewards/margins": 1.564525842666626, + "rewards/rejected": -1.5767822265625, + "step": 4351 + }, + { + "epoch": 0.25, + "learning_rate": 8.74908417349525e-08, + "logits/chosen": -1.983415961265564, + "logits/rejected": -1.9898998737335205, + "logps/chosen": -54.67241287231445, + "logps/rejected": -215.5611572265625, + "loss": 0.4557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25707969069480896, + "rewards/margins": 1.0077297687530518, + "rewards/rejected": -0.7506500482559204, + "step": 4352 + }, + { + "epoch": 0.25, + "learning_rate": 8.748460570634252e-08, + "logits/chosen": -1.8964372873306274, + "logits/rejected": -1.876128911972046, + "logps/chosen": -231.63534545898438, + "logps/rejected": -349.8128356933594, + "loss": 0.3256, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.571624755859375, + "rewards/margins": 0.4201263189315796, + "rewards/rejected": 1.1514984369277954, + "step": 4353 + }, + { + "epoch": 0.25, + "learning_rate": 8.747836834609602e-08, + "logits/chosen": -1.8946382999420166, + "logits/rejected": -1.9380199909210205, + "logps/chosen": -345.09521484375, + "logps/rejected": -403.3249816894531, + "loss": 0.1757, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4802002906799316, + "rewards/margins": 1.0054047107696533, + "rewards/rejected": 1.4747955799102783, + "step": 4354 + }, + { + "epoch": 0.25, + "learning_rate": 8.747212965443454e-08, + "logits/chosen": -2.2358179092407227, + "logits/rejected": -2.2211318016052246, + "logps/chosen": -1.5707744359970093, + "logps/rejected": -240.52078247070312, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0055974009446799755, + "rewards/margins": 1.739383339881897, + "rewards/rejected": -1.7337859869003296, + "step": 4355 + }, + { + "epoch": 0.25, + "learning_rate": 8.746588963157972e-08, + "logits/chosen": -1.9493045806884766, + "logits/rejected": -1.9341884851455688, + "logps/chosen": -199.72903442382812, + "logps/rejected": -373.2344970703125, + "loss": 0.4822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3934173583984375, + "rewards/margins": 0.6565033197402954, + "rewards/rejected": -0.2630859315395355, + "step": 4356 + }, + { + "epoch": 0.25, + "learning_rate": 8.745964827775323e-08, + "logits/chosen": -1.799902319908142, + "logits/rejected": -1.8144341707229614, + "logps/chosen": -169.1505584716797, + "logps/rejected": -199.51422119140625, + "loss": 0.2828, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0008515119552612, + "rewards/margins": 1.1185059547424316, + "rewards/rejected": -0.11765442043542862, + "step": 4357 + }, + { + "epoch": 0.25, + "learning_rate": 8.74534055931768e-08, + "logits/chosen": -2.0914902687072754, + "logits/rejected": -2.0981357097625732, + "logps/chosen": -0.09815017879009247, + "logps/rejected": -218.19183349609375, + "loss": 0.496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006260567344725132, + "rewards/margins": 1.0728563070297241, + "rewards/rejected": -1.0791168212890625, + "step": 4358 + }, + { + "epoch": 0.25, + "learning_rate": 8.74471615780722e-08, + "logits/chosen": -2.0452728271484375, + "logits/rejected": -1.977402687072754, + "logps/chosen": -287.3544921875, + "logps/rejected": -436.8928527832031, + "loss": 0.356, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.477594017982483, + "rewards/margins": 0.1800476312637329, + "rewards/rejected": 1.29754638671875, + "step": 4359 + }, + { + "epoch": 0.25, + "learning_rate": 8.744091623266124e-08, + "logits/chosen": -1.9450939893722534, + "logits/rejected": -1.9213191270828247, + "logps/chosen": -39.76700210571289, + "logps/rejected": -187.52955627441406, + "loss": 0.5057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2847171723842621, + "rewards/margins": 1.229757308959961, + "rewards/rejected": -1.5144745111465454, + "step": 4360 + }, + { + "epoch": 0.25, + "learning_rate": 8.743466955716581e-08, + "logits/chosen": -1.977479338645935, + "logits/rejected": -1.9799842834472656, + "logps/chosen": -1.7158584594726562, + "logps/rejected": -63.053565979003906, + "loss": 0.6038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010760235600173473, + "rewards/margins": 0.2813096344470978, + "rewards/rejected": -0.27054938673973083, + "step": 4361 + }, + { + "epoch": 0.25, + "learning_rate": 8.742842155180778e-08, + "logits/chosen": -2.0548222064971924, + "logits/rejected": -2.0936999320983887, + "logps/chosen": -225.07472229003906, + "logps/rejected": -397.97998046875, + "loss": 0.1207, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.163288950920105, + "rewards/margins": 2.2863144874572754, + "rewards/rejected": -1.1230255365371704, + "step": 4362 + }, + { + "epoch": 0.25, + "learning_rate": 8.742217221680914e-08, + "logits/chosen": -2.086884021759033, + "logits/rejected": -2.0779120922088623, + "logps/chosen": -0.5986356735229492, + "logps/rejected": -311.692626953125, + "loss": 0.36, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009968477301299572, + "rewards/margins": 4.072127342224121, + "rewards/rejected": -4.082095623016357, + "step": 4363 + }, + { + "epoch": 0.25, + "learning_rate": 8.74159215523919e-08, + "logits/chosen": -1.8344327211380005, + "logits/rejected": -1.834388256072998, + "logps/chosen": -41.4554443359375, + "logps/rejected": -122.71773529052734, + "loss": 0.5311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2518249452114105, + "rewards/margins": 0.3330703675746918, + "rewards/rejected": -0.08124542236328125, + "step": 4364 + }, + { + "epoch": 0.25, + "learning_rate": 8.740966955877811e-08, + "logits/chosen": -2.0350911617279053, + "logits/rejected": -1.9925167560577393, + "logps/chosen": -285.3272705078125, + "logps/rejected": -437.1617431640625, + "loss": 0.4275, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.74896240234375, + "rewards/margins": -0.11927187442779541, + "rewards/rejected": 1.8682342767715454, + "step": 4365 + }, + { + "epoch": 0.25, + "learning_rate": 8.740341623618985e-08, + "logits/chosen": -1.9745502471923828, + "logits/rejected": -1.9722946882247925, + "logps/chosen": -0.31951290369033813, + "logps/rejected": -133.8507843017578, + "loss": 0.5399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003407555865123868, + "rewards/margins": 0.7278699278831482, + "rewards/rejected": -0.7312774658203125, + "step": 4366 + }, + { + "epoch": 0.25, + "learning_rate": 8.73971615848493e-08, + "logits/chosen": -2.2282814979553223, + "logits/rejected": -2.2150847911834717, + "logps/chosen": -46.543975830078125, + "logps/rejected": -192.99395751953125, + "loss": 0.4089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1272842437028885, + "rewards/margins": 1.3126145601272583, + "rewards/rejected": -1.1853302717208862, + "step": 4367 + }, + { + "epoch": 0.25, + "learning_rate": 8.739090560497864e-08, + "logits/chosen": -1.8543784618377686, + "logits/rejected": -1.8547747135162354, + "logps/chosen": -144.72088623046875, + "logps/rejected": -266.0430908203125, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35110780596733093, + "rewards/margins": 1.4894592761993408, + "rewards/rejected": -1.1383514404296875, + "step": 4368 + }, + { + "epoch": 0.25, + "learning_rate": 8.738464829680011e-08, + "logits/chosen": -2.0026135444641113, + "logits/rejected": -1.9938490390777588, + "logps/chosen": -6.394175052642822, + "logps/rejected": -156.32962036132812, + "loss": 0.3806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06496739387512207, + "rewards/margins": 2.3320634365081787, + "rewards/rejected": -2.2670960426330566, + "step": 4369 + }, + { + "epoch": 0.25, + "learning_rate": 8.737838966053601e-08, + "logits/chosen": -2.061852216720581, + "logits/rejected": -2.0725364685058594, + "logps/chosen": -7.152797698974609, + "logps/rejected": -70.264404296875, + "loss": 0.605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013892650604248047, + "rewards/margins": 0.3548069894313812, + "rewards/rejected": -0.3686996400356293, + "step": 4370 + }, + { + "epoch": 0.25, + "learning_rate": 8.737212969640865e-08, + "logits/chosen": -2.1041033267974854, + "logits/rejected": -2.1025333404541016, + "logps/chosen": -157.30160522460938, + "logps/rejected": -246.70639038085938, + "loss": 0.5153, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1272125244140625, + "rewards/margins": -0.27488410472869873, + "rewards/rejected": 1.4020966291427612, + "step": 4371 + }, + { + "epoch": 0.25, + "learning_rate": 8.736586840464046e-08, + "logits/chosen": -2.107595443725586, + "logits/rejected": -2.1246535778045654, + "logps/chosen": -140.534423828125, + "logps/rejected": -195.63461303710938, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7561157941818237, + "rewards/margins": 0.4702500104904175, + "rewards/rejected": 1.2858657836914062, + "step": 4372 + }, + { + "epoch": 0.25, + "learning_rate": 8.735960578545384e-08, + "logits/chosen": -1.8580387830734253, + "logits/rejected": -1.8281528949737549, + "logps/chosen": -255.1118927001953, + "logps/rejected": -397.7469787597656, + "loss": 0.2445, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0922043323516846, + "rewards/margins": 1.3745224475860596, + "rewards/rejected": -0.282318115234375, + "step": 4373 + }, + { + "epoch": 0.25, + "learning_rate": 8.735334183907128e-08, + "logits/chosen": -1.8048303127288818, + "logits/rejected": -1.749267816543579, + "logps/chosen": -193.9121551513672, + "logps/rejected": -413.8419189453125, + "loss": 0.1666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6605483889579773, + "rewards/margins": 3.421025276184082, + "rewards/rejected": -2.76047682762146, + "step": 4374 + }, + { + "epoch": 0.25, + "learning_rate": 8.734707656571531e-08, + "logits/chosen": -2.149512767791748, + "logits/rejected": -2.152719020843506, + "logps/chosen": -0.98441481590271, + "logps/rejected": -156.69525146484375, + "loss": 0.4439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04274815320968628, + "rewards/margins": 1.4706823825836182, + "rewards/rejected": -1.4279342889785767, + "step": 4375 + }, + { + "epoch": 0.25, + "learning_rate": 8.734080996560849e-08, + "logits/chosen": -1.979522466659546, + "logits/rejected": -1.9722490310668945, + "logps/chosen": -17.016305923461914, + "logps/rejected": -166.40841674804688, + "loss": 0.4237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012941169552505016, + "rewards/margins": 1.8199125528335571, + "rewards/rejected": -1.8328536748886108, + "step": 4376 + }, + { + "epoch": 0.25, + "learning_rate": 8.733454203897343e-08, + "logits/chosen": -1.9195449352264404, + "logits/rejected": -1.9163662195205688, + "logps/chosen": -34.13062286376953, + "logps/rejected": -169.21231079101562, + "loss": 0.4209, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4937644898891449, + "rewards/margins": 0.8267707824707031, + "rewards/rejected": -0.3330062925815582, + "step": 4377 + }, + { + "epoch": 0.25, + "learning_rate": 8.732827278603284e-08, + "logits/chosen": -2.114161968231201, + "logits/rejected": -2.111032485961914, + "logps/chosen": -195.1546630859375, + "logps/rejected": -240.65538024902344, + "loss": 0.3784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7614105939865112, + "rewards/margins": 0.04231727123260498, + "rewards/rejected": 1.7190933227539062, + "step": 4378 + }, + { + "epoch": 0.25, + "learning_rate": 8.73220022070094e-08, + "logits/chosen": -2.087825298309326, + "logits/rejected": -2.057920455932617, + "logps/chosen": -215.29336547851562, + "logps/rejected": -350.54388427734375, + "loss": 0.26, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9351059198379517, + "rewards/margins": 1.3721909523010254, + "rewards/rejected": -0.43708497285842896, + "step": 4379 + }, + { + "epoch": 0.25, + "learning_rate": 8.731573030212586e-08, + "logits/chosen": -2.1142618656158447, + "logits/rejected": -2.0612130165100098, + "logps/chosen": -219.39205932617188, + "logps/rejected": -498.68133544921875, + "loss": 0.0772, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1863129138946533, + "rewards/margins": 2.3383729457855225, + "rewards/rejected": -0.15205994248390198, + "step": 4380 + }, + { + "epoch": 0.25, + "learning_rate": 8.730945707160508e-08, + "logits/chosen": -2.0847904682159424, + "logits/rejected": -2.080014228820801, + "logps/chosen": -4.527470111846924, + "logps/rejected": -82.22064971923828, + "loss": 0.5609, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03373274952173233, + "rewards/margins": 0.4468061923980713, + "rewards/rejected": -0.480538934469223, + "step": 4381 + }, + { + "epoch": 0.26, + "learning_rate": 8.730318251566986e-08, + "logits/chosen": -2.18921160697937, + "logits/rejected": -2.1915557384490967, + "logps/chosen": -22.993568420410156, + "logps/rejected": -161.79833984375, + "loss": 0.385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.353672593832016, + "rewards/margins": 1.1472761631011963, + "rewards/rejected": -0.7936035394668579, + "step": 4382 + }, + { + "epoch": 0.26, + "learning_rate": 8.729690663454315e-08, + "logits/chosen": -2.0679283142089844, + "logits/rejected": -2.066089153289795, + "logps/chosen": -24.005577087402344, + "logps/rejected": -139.5053253173828, + "loss": 0.5568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24556885659694672, + "rewards/margins": 0.2757217586040497, + "rewards/rejected": -0.03015289269387722, + "step": 4383 + }, + { + "epoch": 0.26, + "learning_rate": 8.729062942844785e-08, + "logits/chosen": -1.9850327968597412, + "logits/rejected": -1.9802334308624268, + "logps/chosen": -18.587749481201172, + "logps/rejected": -288.53289794921875, + "loss": 0.3887, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005593109410256147, + "rewards/margins": 1.9466514587402344, + "rewards/rejected": -1.9410583972930908, + "step": 4384 + }, + { + "epoch": 0.26, + "learning_rate": 8.7284350897607e-08, + "logits/chosen": -1.9186053276062012, + "logits/rejected": -1.9502736330032349, + "logps/chosen": -266.3626708984375, + "logps/rejected": -384.17913818359375, + "loss": 0.0935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5526306629180908, + "rewards/margins": 2.8054323196411133, + "rewards/rejected": -1.252801537513733, + "step": 4385 + }, + { + "epoch": 0.26, + "learning_rate": 8.727807104224363e-08, + "logits/chosen": -2.0530924797058105, + "logits/rejected": -2.050732374191284, + "logps/chosen": -14.222566604614258, + "logps/rejected": -109.07881927490234, + "loss": 0.5417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04838428646326065, + "rewards/margins": 0.6444956064224243, + "rewards/rejected": -0.5961112976074219, + "step": 4386 + }, + { + "epoch": 0.26, + "learning_rate": 8.727178986258082e-08, + "logits/chosen": -2.0759129524230957, + "logits/rejected": -2.0709686279296875, + "logps/chosen": -2.3628363609313965, + "logps/rejected": -112.7435302734375, + "loss": 0.4851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01075201015919447, + "rewards/margins": 1.0577292442321777, + "rewards/rejected": -1.0469772815704346, + "step": 4387 + }, + { + "epoch": 0.26, + "learning_rate": 8.726550735884173e-08, + "logits/chosen": -1.9705764055252075, + "logits/rejected": -1.9590948820114136, + "logps/chosen": -251.46881103515625, + "logps/rejected": -402.2425537109375, + "loss": 0.2533, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4237244129180908, + "rewards/margins": 0.7813782095909119, + "rewards/rejected": 0.642346203327179, + "step": 4388 + }, + { + "epoch": 0.26, + "learning_rate": 8.725922353124952e-08, + "logits/chosen": -2.2348599433898926, + "logits/rejected": -2.2414238452911377, + "logps/chosen": -0.000247107760515064, + "logps/rejected": -213.00811767578125, + "loss": 0.3667, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3234487141744466e-06, + "rewards/margins": 3.1202118396759033, + "rewards/rejected": -3.120213270187378, + "step": 4389 + }, + { + "epoch": 0.26, + "learning_rate": 8.725293838002745e-08, + "logits/chosen": -1.9562326669692993, + "logits/rejected": -1.9318270683288574, + "logps/chosen": -260.8195495605469, + "logps/rejected": -333.8931884765625, + "loss": 0.4592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7998199462890625, + "rewards/margins": 0.017071545124053955, + "rewards/rejected": 0.7827484011650085, + "step": 4390 + }, + { + "epoch": 0.26, + "learning_rate": 8.724665190539878e-08, + "logits/chosen": -1.8474329710006714, + "logits/rejected": -1.8445135354995728, + "logps/chosen": -190.4754638671875, + "logps/rejected": -267.2718505859375, + "loss": 0.3678, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3698548078536987, + "rewards/margins": 0.24780893325805664, + "rewards/rejected": 1.122045874595642, + "step": 4391 + }, + { + "epoch": 0.26, + "learning_rate": 8.724036410758685e-08, + "logits/chosen": -2.124272584915161, + "logits/rejected": -2.117011070251465, + "logps/chosen": -36.08222579956055, + "logps/rejected": -146.2766571044922, + "loss": 0.4162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2143482267856598, + "rewards/margins": 1.159433364868164, + "rewards/rejected": -0.9450851678848267, + "step": 4392 + }, + { + "epoch": 0.26, + "learning_rate": 8.723407498681501e-08, + "logits/chosen": -1.9476168155670166, + "logits/rejected": -1.932973861694336, + "logps/chosen": -61.169761657714844, + "logps/rejected": -183.19210815429688, + "loss": 0.2981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7087516784667969, + "rewards/margins": 1.4474998712539673, + "rewards/rejected": -0.7387481927871704, + "step": 4393 + }, + { + "epoch": 0.26, + "learning_rate": 8.72277845433067e-08, + "logits/chosen": -1.9991892576217651, + "logits/rejected": -2.001878023147583, + "logps/chosen": -7.772276876494288e-05, + "logps/rejected": -246.2355194091797, + "loss": 0.3533, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.655589110479923e-07, + "rewards/margins": 4.323691368103027, + "rewards/rejected": -4.323692321777344, + "step": 4394 + }, + { + "epoch": 0.26, + "learning_rate": 8.722149277728538e-08, + "logits/chosen": -2.3064630031585693, + "logits/rejected": -2.3095924854278564, + "logps/chosen": -9.119209289550781, + "logps/rejected": -175.02340698242188, + "loss": 0.4047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14932118356227875, + "rewards/margins": 1.6563310623168945, + "rewards/rejected": -1.5070098638534546, + "step": 4395 + }, + { + "epoch": 0.26, + "learning_rate": 8.721519968897457e-08, + "logits/chosen": -2.131068468093872, + "logits/rejected": -2.126986503601074, + "logps/chosen": -13.500737190246582, + "logps/rejected": -195.48255920410156, + "loss": 0.4237, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024991130456328392, + "rewards/margins": 1.6782764196395874, + "rewards/rejected": -1.653285264968872, + "step": 4396 + }, + { + "epoch": 0.26, + "learning_rate": 8.720890527859783e-08, + "logits/chosen": -1.9798117876052856, + "logits/rejected": -1.9824179410934448, + "logps/chosen": -0.05782540887594223, + "logps/rejected": -66.47189331054688, + "loss": 0.5774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007282420992851257, + "rewards/margins": 0.5305275321006775, + "rewards/rejected": -0.5297992825508118, + "step": 4397 + }, + { + "epoch": 0.26, + "learning_rate": 8.720260954637875e-08, + "logits/chosen": -2.306852102279663, + "logits/rejected": -2.2908051013946533, + "logps/chosen": -59.07946014404297, + "logps/rejected": -165.7937774658203, + "loss": 0.4423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07477646321058273, + "rewards/margins": 1.087934136390686, + "rewards/rejected": -1.0131577253341675, + "step": 4398 + }, + { + "epoch": 0.26, + "learning_rate": 8.719631249254102e-08, + "logits/chosen": -2.199756383895874, + "logits/rejected": -2.2042248249053955, + "logps/chosen": -0.00011753685976145789, + "logps/rejected": -212.6976318359375, + "loss": 0.3596, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.729731249928591e-06, + "rewards/margins": 3.6332128047943115, + "rewards/rejected": -3.6332154273986816, + "step": 4399 + }, + { + "epoch": 0.26, + "learning_rate": 8.71900141173083e-08, + "logits/chosen": -1.7545266151428223, + "logits/rejected": -1.7519099712371826, + "logps/chosen": -40.56831359863281, + "logps/rejected": -210.4626922607422, + "loss": 0.4666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.058293916285037994, + "rewards/margins": 1.4855934381484985, + "rewards/rejected": -1.5438873767852783, + "step": 4400 + }, + { + "epoch": 0.26, + "learning_rate": 8.71837144209044e-08, + "logits/chosen": -2.092496633529663, + "logits/rejected": -2.092288017272949, + "logps/chosen": -68.56974029541016, + "logps/rejected": -190.98822021484375, + "loss": 0.7228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7579734921455383, + "rewards/margins": 0.7884102463722229, + "rewards/rejected": -1.5463837385177612, + "step": 4401 + }, + { + "epoch": 0.26, + "learning_rate": 8.717741340355305e-08, + "logits/chosen": -1.8644490242004395, + "logits/rejected": -1.8569552898406982, + "logps/chosen": -0.0001746358466334641, + "logps/rejected": -182.05233764648438, + "loss": 0.4571, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.128356173780048e-06, + "rewards/margins": 1.3942769765853882, + "rewards/rejected": -1.3942841291427612, + "step": 4402 + }, + { + "epoch": 0.26, + "learning_rate": 8.717111106547813e-08, + "logits/chosen": -2.2166380882263184, + "logits/rejected": -2.210510015487671, + "logps/chosen": -3.714651584625244, + "logps/rejected": -143.04693603515625, + "loss": 0.4851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02704489231109619, + "rewards/margins": 0.7007555365562439, + "rewards/rejected": -0.6737106442451477, + "step": 4403 + }, + { + "epoch": 0.26, + "learning_rate": 8.716480740690353e-08, + "logits/chosen": -1.8088629245758057, + "logits/rejected": -1.8073159456253052, + "logps/chosen": -25.728525161743164, + "logps/rejected": -116.88236999511719, + "loss": 0.3785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3116453289985657, + "rewards/margins": 1.3356153964996338, + "rewards/rejected": -1.0239700078964233, + "step": 4404 + }, + { + "epoch": 0.26, + "learning_rate": 8.715850242805316e-08, + "logits/chosen": -2.035402536392212, + "logits/rejected": -2.0385777950286865, + "logps/chosen": -221.8690643310547, + "logps/rejected": -368.5995788574219, + "loss": 0.0648, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8151733875274658, + "rewards/margins": 3.315054416656494, + "rewards/rejected": -1.4998810291290283, + "step": 4405 + }, + { + "epoch": 0.26, + "learning_rate": 8.715219612915104e-08, + "logits/chosen": -2.1537256240844727, + "logits/rejected": -2.152717351913452, + "logps/chosen": -0.000603787659201771, + "logps/rejected": -180.9631805419922, + "loss": 0.3842, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7348634830559604e-05, + "rewards/margins": 2.454759359359741, + "rewards/rejected": -2.454786777496338, + "step": 4406 + }, + { + "epoch": 0.26, + "learning_rate": 8.714588851042116e-08, + "logits/chosen": -2.0131866931915283, + "logits/rejected": -2.010659694671631, + "logps/chosen": -198.3681182861328, + "logps/rejected": -377.38372802734375, + "loss": 0.0842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7163361310958862, + "rewards/margins": 2.85611891746521, + "rewards/rejected": -1.1397827863693237, + "step": 4407 + }, + { + "epoch": 0.26, + "learning_rate": 8.713957957208763e-08, + "logits/chosen": -2.1447746753692627, + "logits/rejected": -2.1335020065307617, + "logps/chosen": -32.29100799560547, + "logps/rejected": -210.32504272460938, + "loss": 0.4163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.033017732203006744, + "rewards/margins": 1.6508095264434814, + "rewards/rejected": -1.617791771888733, + "step": 4408 + }, + { + "epoch": 0.26, + "learning_rate": 8.713326931437457e-08, + "logits/chosen": -1.8397172689437866, + "logits/rejected": -1.8372999429702759, + "logps/chosen": -69.62849426269531, + "logps/rejected": -227.6742401123047, + "loss": 0.4401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2873291075229645, + "rewards/margins": 0.9704102277755737, + "rewards/rejected": -0.6830810904502869, + "step": 4409 + }, + { + "epoch": 0.26, + "learning_rate": 8.712695773750616e-08, + "logits/chosen": -2.0002987384796143, + "logits/rejected": -1.9907431602478027, + "logps/chosen": -205.1468048095703, + "logps/rejected": -275.263427734375, + "loss": 0.391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4084213376045227, + "rewards/margins": 1.1256301403045654, + "rewards/rejected": -0.7172088623046875, + "step": 4410 + }, + { + "epoch": 0.26, + "learning_rate": 8.712064484170657e-08, + "logits/chosen": -2.0180823802948, + "logits/rejected": -1.9905036687850952, + "logps/chosen": -236.48269653320312, + "logps/rejected": -559.08154296875, + "loss": 0.0993, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1782867908477783, + "rewards/margins": 2.00065016746521, + "rewards/rejected": 0.17763672769069672, + "step": 4411 + }, + { + "epoch": 0.26, + "learning_rate": 8.711433062720013e-08, + "logits/chosen": -2.033784866333008, + "logits/rejected": -2.0287833213806152, + "logps/chosen": -217.4815216064453, + "logps/rejected": -307.0809020996094, + "loss": 0.2613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8499862551689148, + "rewards/margins": 1.4402389526367188, + "rewards/rejected": -0.590252697467804, + "step": 4412 + }, + { + "epoch": 0.26, + "learning_rate": 8.710801509421111e-08, + "logits/chosen": -2.08339262008667, + "logits/rejected": -2.085705518722534, + "logps/chosen": -4.366680145263672, + "logps/rejected": -59.7245979309082, + "loss": 0.6398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010159730911254883, + "rewards/margins": 0.20761333405971527, + "rewards/rejected": -0.21777306497097015, + "step": 4413 + }, + { + "epoch": 0.26, + "learning_rate": 8.710169824296387e-08, + "logits/chosen": -2.044271469116211, + "logits/rejected": -2.040663957595825, + "logps/chosen": -27.13062286376953, + "logps/rejected": -162.86326599121094, + "loss": 0.5134, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04692192003130913, + "rewards/margins": 0.7401302456855774, + "rewards/rejected": -0.6932083368301392, + "step": 4414 + }, + { + "epoch": 0.26, + "learning_rate": 8.709538007368283e-08, + "logits/chosen": -2.187323808670044, + "logits/rejected": -2.183138608932495, + "logps/chosen": -0.7473631501197815, + "logps/rejected": -112.93504333496094, + "loss": 0.4417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009492510929703712, + "rewards/margins": 1.4838294982910156, + "rewards/rejected": -1.4933220148086548, + "step": 4415 + }, + { + "epoch": 0.26, + "learning_rate": 8.708906058659242e-08, + "logits/chosen": -2.0491795539855957, + "logits/rejected": -2.045118808746338, + "logps/chosen": -155.22048950195312, + "logps/rejected": -322.6627502441406, + "loss": 0.1772, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0657501220703125, + "rewards/margins": 1.2049376964569092, + "rewards/rejected": 0.8608123660087585, + "step": 4416 + }, + { + "epoch": 0.26, + "learning_rate": 8.708273978191718e-08, + "logits/chosen": -2.0198490619659424, + "logits/rejected": -2.0177996158599854, + "logps/chosen": -205.32357788085938, + "logps/rejected": -419.2907409667969, + "loss": 0.1679, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4132202863693237, + "rewards/margins": 1.6451141834259033, + "rewards/rejected": -0.23189392685890198, + "step": 4417 + }, + { + "epoch": 0.26, + "learning_rate": 8.707641765988164e-08, + "logits/chosen": -1.926580786705017, + "logits/rejected": -1.9080973863601685, + "logps/chosen": -192.81370544433594, + "logps/rejected": -292.3896179199219, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.278045654296875, + "rewards/margins": 0.47107845544815063, + "rewards/rejected": 0.8069671988487244, + "step": 4418 + }, + { + "epoch": 0.26, + "learning_rate": 8.707009422071034e-08, + "logits/chosen": -1.919618844985962, + "logits/rejected": -1.8879408836364746, + "logps/chosen": -216.33840942382812, + "logps/rejected": -423.3924560546875, + "loss": 0.1164, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4654343128204346, + "rewards/margins": 2.2852706909179688, + "rewards/rejected": -0.819836437702179, + "step": 4419 + }, + { + "epoch": 0.26, + "learning_rate": 8.7063769464628e-08, + "logits/chosen": -1.882239818572998, + "logits/rejected": -1.8958287239074707, + "logps/chosen": -207.9881591796875, + "logps/rejected": -355.48577880859375, + "loss": 0.367, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.166223168373108, + "rewards/margins": 0.1886841058731079, + "rewards/rejected": 0.9775390625, + "step": 4420 + }, + { + "epoch": 0.26, + "learning_rate": 8.705744339185927e-08, + "logits/chosen": -2.1432042121887207, + "logits/rejected": -2.1411540508270264, + "logps/chosen": -16.877197265625, + "logps/rejected": -83.30548858642578, + "loss": 0.6715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013759422115981579, + "rewards/margins": 0.08390751481056213, + "rewards/rejected": -0.09766693413257599, + "step": 4421 + }, + { + "epoch": 0.26, + "learning_rate": 8.705111600262886e-08, + "logits/chosen": -1.971401572227478, + "logits/rejected": -1.9656031131744385, + "logps/chosen": -152.70509338378906, + "logps/rejected": -330.49951171875, + "loss": 0.5085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4000381529331207, + "rewards/margins": 0.3223403990268707, + "rewards/rejected": 0.07769775390625, + "step": 4422 + }, + { + "epoch": 0.26, + "learning_rate": 8.70447872971616e-08, + "logits/chosen": -2.110788106918335, + "logits/rejected": -2.062730312347412, + "logps/chosen": -186.72314453125, + "logps/rejected": -360.81231689453125, + "loss": 0.2391, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0621063709259033, + "rewards/margins": 1.3789154291152954, + "rewards/rejected": -0.3168090879917145, + "step": 4423 + }, + { + "epoch": 0.26, + "learning_rate": 8.703845727568227e-08, + "logits/chosen": -1.9837104082107544, + "logits/rejected": -1.9657914638519287, + "logps/chosen": -69.81828308105469, + "logps/rejected": -335.05828857421875, + "loss": 0.5029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4863029420375824, + "rewards/margins": 2.5483529567718506, + "rewards/rejected": -3.034655809402466, + "step": 4424 + }, + { + "epoch": 0.26, + "learning_rate": 8.703212593841577e-08, + "logits/chosen": -2.13061785697937, + "logits/rejected": -2.132171630859375, + "logps/chosen": -69.77068328857422, + "logps/rejected": -139.4492645263672, + "loss": 0.8119, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7152603268623352, + "rewards/margins": 0.1749114990234375, + "rewards/rejected": -0.8901718258857727, + "step": 4425 + }, + { + "epoch": 0.26, + "learning_rate": 8.702579328558701e-08, + "logits/chosen": -2.2078452110290527, + "logits/rejected": -2.2052953243255615, + "logps/chosen": -51.27326583862305, + "logps/rejected": -176.98184204101562, + "loss": 0.563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33929750323295593, + "rewards/margins": 1.2069228887557983, + "rewards/rejected": -1.5462204217910767, + "step": 4426 + }, + { + "epoch": 0.26, + "learning_rate": 8.701945931742097e-08, + "logits/chosen": -2.003429412841797, + "logits/rejected": -2.008913278579712, + "logps/chosen": -1.2371619939804077, + "logps/rejected": -195.00009155273438, + "loss": 0.3837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021439706906676292, + "rewards/margins": 2.0657176971435547, + "rewards/rejected": -2.0442779064178467, + "step": 4427 + }, + { + "epoch": 0.26, + "learning_rate": 8.701312403414264e-08, + "logits/chosen": -2.086387872695923, + "logits/rejected": -2.0762500762939453, + "logps/chosen": -0.005472038872539997, + "logps/rejected": -168.2879638671875, + "loss": 0.382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.000320610124617815, + "rewards/margins": 2.6038074493408203, + "rewards/rejected": -2.6034867763519287, + "step": 4428 + }, + { + "epoch": 0.26, + "learning_rate": 8.70067874359771e-08, + "logits/chosen": -1.9978355169296265, + "logits/rejected": -1.9812580347061157, + "logps/chosen": -0.003490545554086566, + "logps/rejected": -142.30345153808594, + "loss": 0.5756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013549158757086843, + "rewards/margins": 0.5430454015731812, + "rewards/rejected": -0.5431808829307556, + "step": 4429 + }, + { + "epoch": 0.26, + "learning_rate": 8.700044952314944e-08, + "logits/chosen": -2.093348503112793, + "logits/rejected": -2.080467939376831, + "logps/chosen": -37.95283889770508, + "logps/rejected": -139.19351196289062, + "loss": 0.4593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23424874246120453, + "rewards/margins": 1.5696437358856201, + "rewards/rejected": -1.8038924932479858, + "step": 4430 + }, + { + "epoch": 0.26, + "learning_rate": 8.699411029588484e-08, + "logits/chosen": -2.1590864658355713, + "logits/rejected": -2.156674861907959, + "logps/chosen": -9.334856033325195, + "logps/rejected": -103.72806549072266, + "loss": 0.2639, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3626928329467773, + "rewards/margins": 0.9516050219535828, + "rewards/rejected": 0.4110878109931946, + "step": 4431 + }, + { + "epoch": 0.26, + "learning_rate": 8.698776975440849e-08, + "logits/chosen": -2.009103536605835, + "logits/rejected": -2.0124659538269043, + "logps/chosen": -0.0019181123934686184, + "logps/rejected": -232.0693359375, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.292255759239197e-06, + "rewards/margins": 3.6850600242614746, + "rewards/rejected": -3.6850526332855225, + "step": 4432 + }, + { + "epoch": 0.26, + "learning_rate": 8.698142789894561e-08, + "logits/chosen": -1.9959441423416138, + "logits/rejected": -1.9533395767211914, + "logps/chosen": -264.9355163574219, + "logps/rejected": -367.60247802734375, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0630218982696533, + "rewards/margins": 0.19113469123840332, + "rewards/rejected": 1.87188720703125, + "step": 4433 + }, + { + "epoch": 0.26, + "learning_rate": 8.697508472972153e-08, + "logits/chosen": -1.9190950393676758, + "logits/rejected": -1.9113966226577759, + "logps/chosen": -67.05371856689453, + "logps/rejected": -192.36000061035156, + "loss": 0.6566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41406556963920593, + "rewards/margins": 0.7821502685546875, + "rewards/rejected": -1.1962158679962158, + "step": 4434 + }, + { + "epoch": 0.26, + "learning_rate": 8.696874024696157e-08, + "logits/chosen": -2.0828208923339844, + "logits/rejected": -2.041290760040283, + "logps/chosen": -186.66677856445312, + "logps/rejected": -400.14483642578125, + "loss": 0.3143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9634491205215454, + "rewards/margins": 0.821380615234375, + "rewards/rejected": 0.14206849038600922, + "step": 4435 + }, + { + "epoch": 0.26, + "learning_rate": 8.696239445089113e-08, + "logits/chosen": -1.9902911186218262, + "logits/rejected": -1.9775457382202148, + "logps/chosen": -162.68707275390625, + "logps/rejected": -213.1519775390625, + "loss": 0.4588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5048874020576477, + "rewards/margins": 0.28345948457717896, + "rewards/rejected": 0.22142791748046875, + "step": 4436 + }, + { + "epoch": 0.26, + "learning_rate": 8.695604734173563e-08, + "logits/chosen": -1.8372204303741455, + "logits/rejected": -1.8143655061721802, + "logps/chosen": -263.47088623046875, + "logps/rejected": -424.28826904296875, + "loss": 0.3027, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8249419927597046, + "rewards/margins": 0.4354674816131592, + "rewards/rejected": 1.3894745111465454, + "step": 4437 + }, + { + "epoch": 0.26, + "learning_rate": 8.694969891972055e-08, + "logits/chosen": -1.9902466535568237, + "logits/rejected": -1.9758268594741821, + "logps/chosen": -40.57730484008789, + "logps/rejected": -326.991943359375, + "loss": 0.2426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8114704489707947, + "rewards/margins": 1.785179615020752, + "rewards/rejected": -0.9737091064453125, + "step": 4438 + }, + { + "epoch": 0.26, + "learning_rate": 8.694334918507144e-08, + "logits/chosen": -2.0849123001098633, + "logits/rejected": -2.101151943206787, + "logps/chosen": -262.28753662109375, + "logps/rejected": -378.67315673828125, + "loss": 0.127, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.290496826171875, + "rewards/margins": 1.4599487781524658, + "rewards/rejected": 0.830548107624054, + "step": 4439 + }, + { + "epoch": 0.26, + "learning_rate": 8.693699813801385e-08, + "logits/chosen": -2.049631118774414, + "logits/rejected": -2.0474655628204346, + "logps/chosen": -129.88526916503906, + "logps/rejected": -229.78733825683594, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.410467505455017, + "rewards/margins": 0.41407161951065063, + "rewards/rejected": 0.9963958859443665, + "step": 4440 + }, + { + "epoch": 0.26, + "learning_rate": 8.69306457787734e-08, + "logits/chosen": -2.09902024269104, + "logits/rejected": -1.9395246505737305, + "logps/chosen": -181.89981079101562, + "logps/rejected": -670.4276123046875, + "loss": 0.2744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.647753894329071, + "rewards/margins": 1.563269019126892, + "rewards/rejected": -0.915515124797821, + "step": 4441 + }, + { + "epoch": 0.26, + "learning_rate": 8.692429210757577e-08, + "logits/chosen": -2.215317964553833, + "logits/rejected": -2.202622652053833, + "logps/chosen": -32.592262268066406, + "logps/rejected": -242.20513916015625, + "loss": 0.294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3820396363735199, + "rewards/margins": 2.830995798110962, + "rewards/rejected": -2.448956251144409, + "step": 4442 + }, + { + "epoch": 0.26, + "learning_rate": 8.691793712464666e-08, + "logits/chosen": -2.1693129539489746, + "logits/rejected": -2.159252405166626, + "logps/chosen": -0.015077752061188221, + "logps/rejected": -168.70558166503906, + "loss": 0.4645, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001193191739730537, + "rewards/margins": 1.320988416671753, + "rewards/rejected": -1.3197952508926392, + "step": 4443 + }, + { + "epoch": 0.26, + "learning_rate": 8.691158083021186e-08, + "logits/chosen": -2.0064444541931152, + "logits/rejected": -1.9501228332519531, + "logps/chosen": -181.05718994140625, + "logps/rejected": -381.97650146484375, + "loss": 0.0934, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.953425645828247, + "rewards/margins": 2.0943710803985596, + "rewards/rejected": -0.1409454345703125, + "step": 4444 + }, + { + "epoch": 0.26, + "learning_rate": 8.690522322449713e-08, + "logits/chosen": -1.9077646732330322, + "logits/rejected": -1.913457989692688, + "logps/chosen": -199.01547241210938, + "logps/rejected": -384.4425964355469, + "loss": 0.1949, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6568329334259033, + "rewards/margins": 1.250848412513733, + "rewards/rejected": 0.405984491109848, + "step": 4445 + }, + { + "epoch": 0.26, + "learning_rate": 8.689886430772837e-08, + "logits/chosen": -2.0004889965057373, + "logits/rejected": -2.0100879669189453, + "logps/chosen": -146.14547729492188, + "logps/rejected": -347.2763671875, + "loss": 0.2015, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2143585681915283, + "rewards/margins": 1.3560456037521362, + "rewards/rejected": -0.14168702065944672, + "step": 4446 + }, + { + "epoch": 0.26, + "learning_rate": 8.689250408013144e-08, + "logits/chosen": -2.0212433338165283, + "logits/rejected": -2.0156800746917725, + "logps/chosen": -11.750229835510254, + "logps/rejected": -85.38308715820312, + "loss": 0.6833, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06250037997961044, + "rewards/margins": -0.021966934204101562, + "rewards/rejected": 0.084467314183712, + "step": 4447 + }, + { + "epoch": 0.26, + "learning_rate": 8.688614254193232e-08, + "logits/chosen": -2.0213475227355957, + "logits/rejected": -2.0011351108551025, + "logps/chosen": -220.7007293701172, + "logps/rejected": -341.59552001953125, + "loss": 0.3815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7175338864326477, + "rewards/margins": 0.4933151304721832, + "rewards/rejected": 0.22421875596046448, + "step": 4448 + }, + { + "epoch": 0.26, + "learning_rate": 8.687977969335698e-08, + "logits/chosen": -2.222247362136841, + "logits/rejected": -2.1746666431427, + "logps/chosen": -279.6768798828125, + "logps/rejected": -376.07330322265625, + "loss": 0.1228, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0209991931915283, + "rewards/margins": 1.9119598865509033, + "rewards/rejected": 0.109039306640625, + "step": 4449 + }, + { + "epoch": 0.26, + "learning_rate": 8.687341553463147e-08, + "logits/chosen": -2.1212925910949707, + "logits/rejected": -2.1154956817626953, + "logps/chosen": -0.14302101731300354, + "logps/rejected": -87.76920318603516, + "loss": 0.516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008973202668130398, + "rewards/margins": 0.8969281911849976, + "rewards/rejected": -0.9059013724327087, + "step": 4450 + }, + { + "epoch": 0.26, + "learning_rate": 8.686705006598187e-08, + "logits/chosen": -2.1521871089935303, + "logits/rejected": -2.144951581954956, + "logps/chosen": -226.4821319580078, + "logps/rejected": -327.20556640625, + "loss": 0.1639, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9567917585372925, + "rewards/margins": 1.2276902198791504, + "rewards/rejected": 0.7291015982627869, + "step": 4451 + }, + { + "epoch": 0.26, + "learning_rate": 8.686068328763432e-08, + "logits/chosen": -2.1396825313568115, + "logits/rejected": -2.136199712753296, + "logps/chosen": -0.00038677570410072803, + "logps/rejected": -170.3930206298828, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5648498447262682e-05, + "rewards/margins": 2.647306442260742, + "rewards/rejected": -2.647322177886963, + "step": 4452 + }, + { + "epoch": 0.26, + "learning_rate": 8.685431519981499e-08, + "logits/chosen": -1.9024324417114258, + "logits/rejected": -1.883103370666504, + "logps/chosen": -239.9227294921875, + "logps/rejected": -359.39178466796875, + "loss": 0.3943, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4460922479629517, + "rewards/margins": 0.12769627571105957, + "rewards/rejected": 1.318395972251892, + "step": 4453 + }, + { + "epoch": 0.26, + "learning_rate": 8.68479458027501e-08, + "logits/chosen": -1.9700464010238647, + "logits/rejected": -2.0213024616241455, + "logps/chosen": -381.65350341796875, + "logps/rejected": -336.6183166503906, + "loss": 0.067, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8610137701034546, + "rewards/margins": 2.8183441162109375, + "rewards/rejected": -0.9573303461074829, + "step": 4454 + }, + { + "epoch": 0.26, + "learning_rate": 8.684157509666594e-08, + "logits/chosen": -1.8035098314285278, + "logits/rejected": -1.7709418535232544, + "logps/chosen": -293.98846435546875, + "logps/rejected": -414.2165832519531, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0069153308868408, + "rewards/margins": 0.8917877674102783, + "rewards/rejected": 0.1151275634765625, + "step": 4455 + }, + { + "epoch": 0.26, + "learning_rate": 8.683520308178882e-08, + "logits/chosen": -2.0320894718170166, + "logits/rejected": -2.034125804901123, + "logps/chosen": -63.588836669921875, + "logps/rejected": -203.0726776123047, + "loss": 0.427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4335517883300781, + "rewards/margins": 0.5942268371582031, + "rewards/rejected": -0.160675048828125, + "step": 4456 + }, + { + "epoch": 0.26, + "learning_rate": 8.682882975834511e-08, + "logits/chosen": -1.9885300397872925, + "logits/rejected": -1.9845679998397827, + "logps/chosen": -22.050182342529297, + "logps/rejected": -117.24222564697266, + "loss": 0.406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20188866555690765, + "rewards/margins": 1.2424900531768799, + "rewards/rejected": -1.040601372718811, + "step": 4457 + }, + { + "epoch": 0.26, + "learning_rate": 8.682245512656122e-08, + "logits/chosen": -2.179677963256836, + "logits/rejected": -2.169983386993408, + "logps/chosen": -15.985485076904297, + "logps/rejected": -201.22555541992188, + "loss": 0.3853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14647769927978516, + "rewards/margins": 1.801861047744751, + "rewards/rejected": -1.6553833484649658, + "step": 4458 + }, + { + "epoch": 0.26, + "learning_rate": 8.68160791866636e-08, + "logits/chosen": -2.2085111141204834, + "logits/rejected": -2.202474594116211, + "logps/chosen": -32.274009704589844, + "logps/rejected": -257.9255065917969, + "loss": 0.3814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20701752603054047, + "rewards/margins": 1.6818557977676392, + "rewards/rejected": -1.4748382568359375, + "step": 4459 + }, + { + "epoch": 0.26, + "learning_rate": 8.680970193887878e-08, + "logits/chosen": -1.9653925895690918, + "logits/rejected": -1.8778340816497803, + "logps/chosen": -255.02468872070312, + "logps/rejected": -511.05609130859375, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.531622290611267, + "rewards/margins": 0.7818053960800171, + "rewards/rejected": 0.74981689453125, + "step": 4460 + }, + { + "epoch": 0.26, + "learning_rate": 8.680332338343325e-08, + "logits/chosen": -1.8962526321411133, + "logits/rejected": -1.8883447647094727, + "logps/chosen": -297.2693176269531, + "logps/rejected": -411.5522766113281, + "loss": 0.0653, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.724218726158142, + "rewards/margins": 4.0367279052734375, + "rewards/rejected": -2.312509298324585, + "step": 4461 + }, + { + "epoch": 0.26, + "learning_rate": 8.679694352055369e-08, + "logits/chosen": -2.1534957885742188, + "logits/rejected": -2.1561086177825928, + "logps/chosen": -0.0002356550539843738, + "logps/rejected": -176.97311401367188, + "loss": 0.3563, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0631825716700405e-05, + "rewards/margins": 3.5229599475860596, + "rewards/rejected": -3.522970676422119, + "step": 4462 + }, + { + "epoch": 0.26, + "learning_rate": 8.679056235046668e-08, + "logits/chosen": -2.0469937324523926, + "logits/rejected": -2.024010181427002, + "logps/chosen": -116.6718521118164, + "logps/rejected": -364.6044006347656, + "loss": 0.2212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.68569415807724, + "rewards/margins": 4.022724151611328, + "rewards/rejected": -3.3370301723480225, + "step": 4463 + }, + { + "epoch": 0.26, + "learning_rate": 8.678417987339894e-08, + "logits/chosen": -2.034911870956421, + "logits/rejected": -2.022392511367798, + "logps/chosen": -73.69493865966797, + "logps/rejected": -163.33477783203125, + "loss": 0.4773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47974929213523865, + "rewards/margins": 0.513410210609436, + "rewards/rejected": -0.033660888671875, + "step": 4464 + }, + { + "epoch": 0.26, + "learning_rate": 8.67777960895772e-08, + "logits/chosen": -2.0274996757507324, + "logits/rejected": -2.014845848083496, + "logps/chosen": -39.130470275878906, + "logps/rejected": -234.26046752929688, + "loss": 0.6016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5493934750556946, + "rewards/margins": 1.4736549854278564, + "rewards/rejected": -2.0230484008789062, + "step": 4465 + }, + { + "epoch": 0.26, + "learning_rate": 8.677141099922825e-08, + "logits/chosen": -1.9728487730026245, + "logits/rejected": -1.9747166633605957, + "logps/chosen": -1.605371356010437, + "logps/rejected": -207.78350830078125, + "loss": 0.424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007303893566131592, + "rewards/margins": 1.6812336444854736, + "rewards/rejected": -1.68853759765625, + "step": 4466 + }, + { + "epoch": 0.26, + "learning_rate": 8.67650246025789e-08, + "logits/chosen": -2.02724027633667, + "logits/rejected": -2.0254855155944824, + "logps/chosen": -4.613347846316174e-05, + "logps/rejected": -169.67689514160156, + "loss": 0.4097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5854471939746873e-06, + "rewards/margins": 1.903334140777588, + "rewards/rejected": -1.9033325910568237, + "step": 4467 + }, + { + "epoch": 0.26, + "learning_rate": 8.675863689985605e-08, + "logits/chosen": -1.9613815546035767, + "logits/rejected": -1.9674042463302612, + "logps/chosen": -33.566856384277344, + "logps/rejected": -185.67694091796875, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40064355731010437, + "rewards/margins": 1.7513775825500488, + "rewards/rejected": -1.350733995437622, + "step": 4468 + }, + { + "epoch": 0.26, + "learning_rate": 8.675224789128658e-08, + "logits/chosen": -2.114337205886841, + "logits/rejected": -2.124478578567505, + "logps/chosen": -5.197463178774342e-05, + "logps/rejected": -149.69552612304688, + "loss": 0.4183, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.410583755998232e-07, + "rewards/margins": 1.8696538209915161, + "rewards/rejected": -1.869653344154358, + "step": 4469 + }, + { + "epoch": 0.26, + "learning_rate": 8.674585757709754e-08, + "logits/chosen": -2.05513334274292, + "logits/rejected": -2.027564764022827, + "logps/chosen": -218.47686767578125, + "logps/rejected": -593.0996704101562, + "loss": 0.1287, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9888214468955994, + "rewards/margins": 3.4983246326446533, + "rewards/rejected": -2.509503126144409, + "step": 4470 + }, + { + "epoch": 0.26, + "learning_rate": 8.673946595751586e-08, + "logits/chosen": -1.9747742414474487, + "logits/rejected": -1.9907077550888062, + "logps/chosen": -161.52066040039062, + "logps/rejected": -160.62283325195312, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1645020246505737, + "rewards/margins": 0.4746857285499573, + "rewards/rejected": 0.6898162961006165, + "step": 4471 + }, + { + "epoch": 0.26, + "learning_rate": 8.673307303276866e-08, + "logits/chosen": -1.9476099014282227, + "logits/rejected": -1.930416226387024, + "logps/chosen": -166.5934600830078, + "logps/rejected": -255.7588653564453, + "loss": 0.4717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6734970211982727, + "rewards/margins": 0.28135988116264343, + "rewards/rejected": 0.3921371400356293, + "step": 4472 + }, + { + "epoch": 0.26, + "learning_rate": 8.6726678803083e-08, + "logits/chosen": -2.132197618484497, + "logits/rejected": -2.0537707805633545, + "logps/chosen": -220.58328247070312, + "logps/rejected": -338.2627258300781, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7210357785224915, + "rewards/margins": 0.8374542593955994, + "rewards/rejected": -0.11641845852136612, + "step": 4473 + }, + { + "epoch": 0.26, + "learning_rate": 8.672028326868608e-08, + "logits/chosen": -2.0605995655059814, + "logits/rejected": -2.0646800994873047, + "logps/chosen": -2.1310181617736816, + "logps/rejected": -137.74026489257812, + "loss": 0.3745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0448639877140522, + "rewards/margins": 2.2772552967071533, + "rewards/rejected": -2.232391357421875, + "step": 4474 + }, + { + "epoch": 0.26, + "learning_rate": 8.671388642980507e-08, + "logits/chosen": -2.020056962966919, + "logits/rejected": -2.014509916305542, + "logps/chosen": -59.354522705078125, + "logps/rejected": -198.5517120361328, + "loss": 0.3902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2917037904262543, + "rewards/margins": 1.4314132928848267, + "rewards/rejected": -1.13970947265625, + "step": 4475 + }, + { + "epoch": 0.26, + "learning_rate": 8.670748828666723e-08, + "logits/chosen": -1.9226207733154297, + "logits/rejected": -1.9103106260299683, + "logps/chosen": -52.144325256347656, + "logps/rejected": -283.3834533691406, + "loss": 0.3722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1709640473127365, + "rewards/margins": 1.9559036493301392, + "rewards/rejected": -1.7849396467208862, + "step": 4476 + }, + { + "epoch": 0.26, + "learning_rate": 8.670108883949985e-08, + "logits/chosen": -1.9263230562210083, + "logits/rejected": -1.9101345539093018, + "logps/chosen": -43.154354095458984, + "logps/rejected": -234.98231506347656, + "loss": 0.4137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11589927971363068, + "rewards/margins": 2.0881290435791016, + "rewards/rejected": -2.204028367996216, + "step": 4477 + }, + { + "epoch": 0.26, + "learning_rate": 8.669468808853027e-08, + "logits/chosen": -1.9027725458145142, + "logits/rejected": -1.9209792613983154, + "logps/chosen": -26.900733947753906, + "logps/rejected": -188.54457092285156, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14998017251491547, + "rewards/margins": 2.2154083251953125, + "rewards/rejected": -2.0654282569885254, + "step": 4478 + }, + { + "epoch": 0.26, + "learning_rate": 8.668828603398588e-08, + "logits/chosen": -1.9326649904251099, + "logits/rejected": -1.926288366317749, + "logps/chosen": -168.86231994628906, + "logps/rejected": -449.7598876953125, + "loss": 0.0663, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7957168817520142, + "rewards/margins": 3.3058242797851562, + "rewards/rejected": -1.510107398033142, + "step": 4479 + }, + { + "epoch": 0.26, + "learning_rate": 8.66818826760941e-08, + "logits/chosen": -1.9860341548919678, + "logits/rejected": -1.9733729362487793, + "logps/chosen": -151.87490844726562, + "logps/rejected": -402.4973449707031, + "loss": 0.1364, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6073578596115112, + "rewards/margins": 2.0110292434692383, + "rewards/rejected": -0.4036712646484375, + "step": 4480 + }, + { + "epoch": 0.26, + "learning_rate": 8.667547801508243e-08, + "logits/chosen": -1.9921456575393677, + "logits/rejected": -2.0367050170898438, + "logps/chosen": -165.18795776367188, + "logps/rejected": -286.07427978515625, + "loss": 0.1742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3397048711776733, + "rewards/margins": 1.4523483514785767, + "rewards/rejected": -0.11264343559741974, + "step": 4481 + }, + { + "epoch": 0.26, + "learning_rate": 8.666907205117837e-08, + "logits/chosen": -2.0994956493377686, + "logits/rejected": -2.102288007736206, + "logps/chosen": -25.828115463256836, + "logps/rejected": -38.21202087402344, + "loss": 0.5725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0666189193725586, + "rewards/margins": 0.3079538345336914, + "rewards/rejected": -0.2413349151611328, + "step": 4482 + }, + { + "epoch": 0.26, + "learning_rate": 8.66626647846095e-08, + "logits/chosen": -1.9186773300170898, + "logits/rejected": -1.8873634338378906, + "logps/chosen": -239.89979553222656, + "logps/rejected": -387.2825927734375, + "loss": 0.4868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27899476885795593, + "rewards/margins": 0.33536988496780396, + "rewards/rejected": -0.05637512356042862, + "step": 4483 + }, + { + "epoch": 0.26, + "learning_rate": 8.665625621560346e-08, + "logits/chosen": -2.181271553039551, + "logits/rejected": -2.1847875118255615, + "logps/chosen": -23.97580909729004, + "logps/rejected": -101.550537109375, + "loss": 0.7312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28435346484184265, + "rewards/margins": 0.19980639219284058, + "rewards/rejected": -0.4841598570346832, + "step": 4484 + }, + { + "epoch": 0.26, + "learning_rate": 8.664984634438788e-08, + "logits/chosen": -2.069788932800293, + "logits/rejected": -2.0617918968200684, + "logps/chosen": -180.32229614257812, + "logps/rejected": -376.2783508300781, + "loss": 0.1531, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0025908946990967, + "rewards/margins": 1.3702757358551025, + "rewards/rejected": 0.6323150992393494, + "step": 4485 + }, + { + "epoch": 0.26, + "learning_rate": 8.664343517119049e-08, + "logits/chosen": -1.8379639387130737, + "logits/rejected": -1.8313508033752441, + "logps/chosen": -187.49038696289062, + "logps/rejected": -362.0277099609375, + "loss": 0.1882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1463623046875, + "rewards/margins": 1.36016845703125, + "rewards/rejected": -0.21380615234375, + "step": 4486 + }, + { + "epoch": 0.26, + "learning_rate": 8.663702269623903e-08, + "logits/chosen": -2.067234516143799, + "logits/rejected": -2.046292543411255, + "logps/chosen": -121.55401611328125, + "logps/rejected": -314.9567565917969, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5502510070800781, + "rewards/margins": 1.3133583068847656, + "rewards/rejected": -0.7631072998046875, + "step": 4487 + }, + { + "epoch": 0.26, + "learning_rate": 8.663060891976131e-08, + "logits/chosen": -2.0526578426361084, + "logits/rejected": -2.0447137355804443, + "logps/chosen": -137.4576416015625, + "logps/rejected": -190.44869995117188, + "loss": 0.4278, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.327844262123108, + "rewards/margins": -0.06231999397277832, + "rewards/rejected": 1.3901642560958862, + "step": 4488 + }, + { + "epoch": 0.26, + "learning_rate": 8.662419384198518e-08, + "logits/chosen": -1.9260767698287964, + "logits/rejected": -1.9279625415802002, + "logps/chosen": -6.027371406555176, + "logps/rejected": -122.93124389648438, + "loss": 0.3962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5250642895698547, + "rewards/margins": 0.7899332046508789, + "rewards/rejected": -0.26486894488334656, + "step": 4489 + }, + { + "epoch": 0.26, + "learning_rate": 8.661777746313856e-08, + "logits/chosen": -2.0266473293304443, + "logits/rejected": -2.027007579803467, + "logps/chosen": -203.60629272460938, + "logps/rejected": -266.0115661621094, + "loss": 0.3239, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.475225806236267, + "rewards/margins": 0.416473388671875, + "rewards/rejected": 1.058752417564392, + "step": 4490 + }, + { + "epoch": 0.26, + "learning_rate": 8.661135978344935e-08, + "logits/chosen": -1.9570332765579224, + "logits/rejected": -1.955718994140625, + "logps/chosen": -23.979780197143555, + "logps/rejected": -239.13278198242188, + "loss": 0.3909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2643308639526367, + "rewards/margins": 1.42207932472229, + "rewards/rejected": -1.1577484607696533, + "step": 4491 + }, + { + "epoch": 0.26, + "learning_rate": 8.660494080314556e-08, + "logits/chosen": -2.021319627761841, + "logits/rejected": -1.9978607892990112, + "logps/chosen": -217.7866668701172, + "logps/rejected": -283.6582336425781, + "loss": 0.4842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7641037106513977, + "rewards/margins": 0.23592376708984375, + "rewards/rejected": 0.528179943561554, + "step": 4492 + }, + { + "epoch": 0.26, + "learning_rate": 8.659852052245522e-08, + "logits/chosen": -2.0916359424591064, + "logits/rejected": -2.0694751739501953, + "logps/chosen": -278.9798278808594, + "logps/rejected": -296.9940185546875, + "loss": 0.0848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9469635486602783, + "rewards/margins": 2.3258209228515625, + "rewards/rejected": -0.37885743379592896, + "step": 4493 + }, + { + "epoch": 0.26, + "learning_rate": 8.65920989416064e-08, + "logits/chosen": -1.9366023540496826, + "logits/rejected": -1.9349918365478516, + "logps/chosen": -257.24884033203125, + "logps/rejected": -349.15802001953125, + "loss": 0.3576, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5336579084396362, + "rewards/margins": 0.1425933837890625, + "rewards/rejected": 1.3910645246505737, + "step": 4494 + }, + { + "epoch": 0.26, + "learning_rate": 8.658567606082723e-08, + "logits/chosen": -2.111147403717041, + "logits/rejected": -2.1021087169647217, + "logps/chosen": -204.32363891601562, + "logps/rejected": -361.67724609375, + "loss": 0.0552, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7574478387832642, + "rewards/margins": 3.383192539215088, + "rewards/rejected": -1.6257447004318237, + "step": 4495 + }, + { + "epoch": 0.26, + "learning_rate": 8.657925188034589e-08, + "logits/chosen": -1.9815784692764282, + "logits/rejected": -1.9662244319915771, + "logps/chosen": -8.558591842651367, + "logps/rejected": -156.82028198242188, + "loss": 0.5682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12379799038171768, + "rewards/margins": 0.44065743684768677, + "rewards/rejected": -0.3168594539165497, + "step": 4496 + }, + { + "epoch": 0.26, + "learning_rate": 8.65728264003906e-08, + "logits/chosen": -1.909525752067566, + "logits/rejected": -1.9005030393600464, + "logps/chosen": -18.894819259643555, + "logps/rejected": -268.22906494140625, + "loss": 0.4622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3502374589443207, + "rewards/margins": 3.3727760314941406, + "rewards/rejected": -3.723013401031494, + "step": 4497 + }, + { + "epoch": 0.26, + "learning_rate": 8.656639962118964e-08, + "logits/chosen": -1.945068359375, + "logits/rejected": -1.9492733478546143, + "logps/chosen": -9.289118766784668, + "logps/rejected": -26.307527542114258, + "loss": 0.6564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0750315710902214, + "rewards/margins": 0.08654909580945969, + "rewards/rejected": -0.1615806668996811, + "step": 4498 + }, + { + "epoch": 0.26, + "learning_rate": 8.655997154297127e-08, + "logits/chosen": -1.98732590675354, + "logits/rejected": -1.9926179647445679, + "logps/chosen": -76.12013244628906, + "logps/rejected": -252.14781188964844, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5325302481651306, + "rewards/margins": 1.2369263172149658, + "rewards/rejected": -0.7043960690498352, + "step": 4499 + }, + { + "epoch": 0.26, + "learning_rate": 8.655354216596389e-08, + "logits/chosen": -2.1514177322387695, + "logits/rejected": -2.1225554943084717, + "logps/chosen": -272.9747314453125, + "logps/rejected": -331.25164794921875, + "loss": 0.2081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5771057605743408, + "rewards/margins": 1.1826171875, + "rewards/rejected": 0.39448854327201843, + "step": 4500 + }, + { + "epoch": 0.26, + "learning_rate": 8.654711149039589e-08, + "logits/chosen": -2.0779478549957275, + "logits/rejected": -2.0778074264526367, + "logps/chosen": -12.766799926757812, + "logps/rejected": -94.13865661621094, + "loss": 0.6063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29112157225608826, + "rewards/margins": 0.12358388304710388, + "rewards/rejected": 0.16753768920898438, + "step": 4501 + }, + { + "epoch": 0.26, + "learning_rate": 8.654067951649571e-08, + "logits/chosen": -1.9729026556015015, + "logits/rejected": -1.9197990894317627, + "logps/chosen": -226.31689453125, + "logps/rejected": -374.00299072265625, + "loss": 0.0599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.931243896484375, + "rewards/margins": 3.150408983230591, + "rewards/rejected": -1.2191650867462158, + "step": 4502 + }, + { + "epoch": 0.26, + "learning_rate": 8.653424624449186e-08, + "logits/chosen": -1.9365510940551758, + "logits/rejected": -1.8890777826309204, + "logps/chosen": -235.283935546875, + "logps/rejected": -465.33062744140625, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.91943359375, + "rewards/margins": 3.8702392578125, + "rewards/rejected": -1.9508056640625, + "step": 4503 + }, + { + "epoch": 0.26, + "learning_rate": 8.652781167461287e-08, + "logits/chosen": -2.0949642658233643, + "logits/rejected": -2.0989224910736084, + "logps/chosen": -2.6957883834838867, + "logps/rejected": -229.44195556640625, + "loss": 0.5046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08841579407453537, + "rewards/margins": 1.195669412612915, + "rewards/rejected": -1.2840851545333862, + "step": 4504 + }, + { + "epoch": 0.26, + "learning_rate": 8.652137580708733e-08, + "logits/chosen": -1.8449206352233887, + "logits/rejected": -1.8849520683288574, + "logps/chosen": -306.3243713378906, + "logps/rejected": -468.8377685546875, + "loss": 0.1559, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2735565900802612, + "rewards/margins": 1.855966329574585, + "rewards/rejected": -0.582409679889679, + "step": 4505 + }, + { + "epoch": 0.26, + "learning_rate": 8.651493864214387e-08, + "logits/chosen": -2.035684585571289, + "logits/rejected": -2.053619861602783, + "logps/chosen": -176.91371154785156, + "logps/rejected": -213.6951904296875, + "loss": 0.2379, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8147720098495483, + "rewards/margins": 0.7461074590682983, + "rewards/rejected": 1.06866455078125, + "step": 4506 + }, + { + "epoch": 0.26, + "learning_rate": 8.65085001800112e-08, + "logits/chosen": -1.9463835954666138, + "logits/rejected": -1.9295334815979004, + "logps/chosen": -305.373291015625, + "logps/rejected": -445.0079345703125, + "loss": 0.0458, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.409655809402466, + "rewards/margins": 3.2147035598754883, + "rewards/rejected": -0.8050476312637329, + "step": 4507 + }, + { + "epoch": 0.26, + "learning_rate": 8.650206042091801e-08, + "logits/chosen": -1.995884656906128, + "logits/rejected": -1.977521300315857, + "logps/chosen": -53.504058837890625, + "logps/rejected": -343.2950744628906, + "loss": 0.2613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3599433898925781, + "rewards/margins": 4.492948055267334, + "rewards/rejected": -4.133004665374756, + "step": 4508 + }, + { + "epoch": 0.26, + "learning_rate": 8.649561936509308e-08, + "logits/chosen": -1.994608759880066, + "logits/rejected": -1.975527048110962, + "logps/chosen": -2.6906967163085938, + "logps/rejected": -118.55715942382812, + "loss": 0.4936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07681641727685928, + "rewards/margins": 0.7884657979011536, + "rewards/rejected": -0.7116493582725525, + "step": 4509 + }, + { + "epoch": 0.26, + "learning_rate": 8.648917701276523e-08, + "logits/chosen": -2.106459140777588, + "logits/rejected": -2.139009475708008, + "logps/chosen": -298.2066650390625, + "logps/rejected": -359.29437255859375, + "loss": 0.1589, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.246502637863159, + "rewards/margins": 1.1728148460388184, + "rewards/rejected": 1.0736877918243408, + "step": 4510 + }, + { + "epoch": 0.26, + "learning_rate": 8.648273336416332e-08, + "logits/chosen": -2.081683874130249, + "logits/rejected": -2.097611427307129, + "logps/chosen": -187.86502075195312, + "logps/rejected": -318.766845703125, + "loss": 0.5324, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.572863757610321, + "rewards/margins": -0.16764527559280396, + "rewards/rejected": 0.740509033203125, + "step": 4511 + }, + { + "epoch": 0.26, + "learning_rate": 8.647628841951628e-08, + "logits/chosen": -2.103961706161499, + "logits/rejected": -2.108663320541382, + "logps/chosen": -2.7776036262512207, + "logps/rejected": -52.541748046875, + "loss": 0.6579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06200089678168297, + "rewards/margins": 0.06696915626525879, + "rewards/rejected": -0.0049682618118822575, + "step": 4512 + }, + { + "epoch": 0.26, + "learning_rate": 8.646984217905304e-08, + "logits/chosen": -2.0283892154693604, + "logits/rejected": -2.020878553390503, + "logps/chosen": -3.37358214892447e-05, + "logps/rejected": -158.39666748046875, + "loss": 0.3798, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9801594791933894e-07, + "rewards/margins": 2.677341938018799, + "rewards/rejected": -2.677342176437378, + "step": 4513 + }, + { + "epoch": 0.26, + "learning_rate": 8.646339464300261e-08, + "logits/chosen": -2.0154547691345215, + "logits/rejected": -1.9139635562896729, + "logps/chosen": -190.4892120361328, + "logps/rejected": -376.99945068359375, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.77520751953125, + "rewards/margins": 0.948681652545929, + "rewards/rejected": -0.17347411811351776, + "step": 4514 + }, + { + "epoch": 0.26, + "learning_rate": 8.645694581159405e-08, + "logits/chosen": -2.1304819583892822, + "logits/rejected": -2.1263444423675537, + "logps/chosen": -33.321556091308594, + "logps/rejected": -196.76544189453125, + "loss": 0.3804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43566057085990906, + "rewards/margins": 1.1083106994628906, + "rewards/rejected": -0.672650158405304, + "step": 4515 + }, + { + "epoch": 0.26, + "learning_rate": 8.645049568505643e-08, + "logits/chosen": -2.0273659229278564, + "logits/rejected": -2.0413596630096436, + "logps/chosen": -108.5455093383789, + "logps/rejected": -275.28314208984375, + "loss": 0.3522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49852830171585083, + "rewards/margins": 1.089592695236206, + "rewards/rejected": -0.591064453125, + "step": 4516 + }, + { + "epoch": 0.26, + "learning_rate": 8.644404426361892e-08, + "logits/chosen": -2.083083391189575, + "logits/rejected": -2.088496685028076, + "logps/chosen": -39.72138214111328, + "logps/rejected": -160.45806884765625, + "loss": 0.4349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07539673149585724, + "rewards/margins": 1.2118744850158691, + "rewards/rejected": -1.1364777088165283, + "step": 4517 + }, + { + "epoch": 0.26, + "learning_rate": 8.64375915475107e-08, + "logits/chosen": -1.9445745944976807, + "logits/rejected": -1.9517122507095337, + "logps/chosen": -1.4615225791931152, + "logps/rejected": -169.48431396484375, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13303132355213165, + "rewards/margins": 2.7188987731933594, + "rewards/rejected": -2.585867404937744, + "step": 4518 + }, + { + "epoch": 0.26, + "learning_rate": 8.643113753696097e-08, + "logits/chosen": -1.9209152460098267, + "logits/rejected": -1.925340175628662, + "logps/chosen": -40.55234909057617, + "logps/rejected": -230.65664672851562, + "loss": 0.4464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04703560099005699, + "rewards/margins": 1.4703662395477295, + "rewards/rejected": -1.4233306646347046, + "step": 4519 + }, + { + "epoch": 0.26, + "learning_rate": 8.642468223219903e-08, + "logits/chosen": -2.0752854347229004, + "logits/rejected": -2.1031558513641357, + "logps/chosen": -219.08506774902344, + "logps/rejected": -329.5245056152344, + "loss": 0.1018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.629719614982605, + "rewards/margins": 2.396254062652588, + "rewards/rejected": -0.7665344476699829, + "step": 4520 + }, + { + "epoch": 0.26, + "learning_rate": 8.641822563345422e-08, + "logits/chosen": -1.9005504846572876, + "logits/rejected": -1.9252806901931763, + "logps/chosen": -268.100341796875, + "logps/rejected": -233.8260955810547, + "loss": 0.3494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5375946164131165, + "rewards/margins": 0.8597046136856079, + "rewards/rejected": -0.32210999727249146, + "step": 4521 + }, + { + "epoch": 0.26, + "learning_rate": 8.641176774095588e-08, + "logits/chosen": -2.1873836517333984, + "logits/rejected": -2.1910505294799805, + "logps/chosen": -95.134521484375, + "logps/rejected": -233.94667053222656, + "loss": 0.4318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4393173158168793, + "rewards/margins": 0.7314850091934204, + "rewards/rejected": -0.29216766357421875, + "step": 4522 + }, + { + "epoch": 0.26, + "learning_rate": 8.640530855493345e-08, + "logits/chosen": -1.991693139076233, + "logits/rejected": -1.9853376150131226, + "logps/chosen": -40.2745361328125, + "logps/rejected": -243.5348358154297, + "loss": 0.2426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.731249988079071, + "rewards/margins": 2.522444248199463, + "rewards/rejected": -1.791194200515747, + "step": 4523 + }, + { + "epoch": 0.26, + "learning_rate": 8.639884807561639e-08, + "logits/chosen": -1.9968723058700562, + "logits/rejected": -1.9896100759506226, + "logps/chosen": -7.472259521484375, + "logps/rejected": -143.20428466796875, + "loss": 0.6137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07345905154943466, + "rewards/margins": 0.2627229690551758, + "rewards/rejected": -0.18926392495632172, + "step": 4524 + }, + { + "epoch": 0.26, + "learning_rate": 8.639238630323419e-08, + "logits/chosen": -1.9908946752548218, + "logits/rejected": -1.933445692062378, + "logps/chosen": -240.1627197265625, + "logps/rejected": -325.99249267578125, + "loss": 0.2337, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4468811750411987, + "rewards/margins": 1.2502319812774658, + "rewards/rejected": 0.19664917886257172, + "step": 4525 + }, + { + "epoch": 0.26, + "learning_rate": 8.63859232380164e-08, + "logits/chosen": -2.1570332050323486, + "logits/rejected": -2.1569061279296875, + "logps/chosen": -21.28731346130371, + "logps/rejected": -77.11819458007812, + "loss": 0.483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14686451852321625, + "rewards/margins": 0.8905855417251587, + "rewards/rejected": -0.7437210083007812, + "step": 4526 + }, + { + "epoch": 0.26, + "learning_rate": 8.637945888019266e-08, + "logits/chosen": -2.2583107948303223, + "logits/rejected": -2.241002321243286, + "logps/chosen": -14.358087539672852, + "logps/rejected": -245.9864501953125, + "loss": 0.3501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04172630235552788, + "rewards/margins": 3.4280271530151367, + "rewards/rejected": -3.386300802230835, + "step": 4527 + }, + { + "epoch": 0.26, + "learning_rate": 8.637299322999258e-08, + "logits/chosen": -2.0268445014953613, + "logits/rejected": -2.0147407054901123, + "logps/chosen": -24.214994430541992, + "logps/rejected": -191.39686584472656, + "loss": 0.4851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17859898507595062, + "rewards/margins": 1.7164068222045898, + "rewards/rejected": -1.8950058221817017, + "step": 4528 + }, + { + "epoch": 0.26, + "learning_rate": 8.636652628764585e-08, + "logits/chosen": -1.9282963275909424, + "logits/rejected": -1.9246301651000977, + "logps/chosen": -224.7430877685547, + "logps/rejected": -350.2872009277344, + "loss": 0.1053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.941798448562622, + "rewards/margins": 2.4638872146606445, + "rewards/rejected": -0.5220886468887329, + "step": 4529 + }, + { + "epoch": 0.26, + "learning_rate": 8.636005805338224e-08, + "logits/chosen": -1.9367482662200928, + "logits/rejected": -1.9343435764312744, + "logps/chosen": -71.67317199707031, + "logps/rejected": -273.1678466796875, + "loss": 0.25, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7023735046386719, + "rewards/margins": 2.3341972827911377, + "rewards/rejected": -1.6318237781524658, + "step": 4530 + }, + { + "epoch": 0.26, + "learning_rate": 8.635358852743149e-08, + "logits/chosen": -2.2040133476257324, + "logits/rejected": -2.1766152381896973, + "logps/chosen": -314.115234375, + "logps/rejected": -539.213134765625, + "loss": 0.0772, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1786162853240967, + "rewards/margins": 2.131814479827881, + "rewards/rejected": 0.04680175706744194, + "step": 4531 + }, + { + "epoch": 0.26, + "learning_rate": 8.634711771002346e-08, + "logits/chosen": -1.9794533252716064, + "logits/rejected": -1.979291558265686, + "logps/chosen": -10.056808471679688, + "logps/rejected": -12.344536781311035, + "loss": 0.641, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0840686783194542, + "rewards/margins": -0.005480289459228516, + "rewards/rejected": 0.08954896777868271, + "step": 4532 + }, + { + "epoch": 0.26, + "learning_rate": 8.634064560138802e-08, + "logits/chosen": -2.228468894958496, + "logits/rejected": -2.2276217937469482, + "logps/chosen": -0.0029945322312414646, + "logps/rejected": -151.3770751953125, + "loss": 0.5134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018019396520685405, + "rewards/margins": 0.9877928495407104, + "rewards/rejected": -0.9879730343818665, + "step": 4533 + }, + { + "epoch": 0.26, + "learning_rate": 8.633417220175508e-08, + "logits/chosen": -1.8625028133392334, + "logits/rejected": -1.94597589969635, + "logps/chosen": -282.6065673828125, + "logps/rejected": -454.7380065917969, + "loss": 0.0925, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.670385718345642, + "rewards/margins": 3.3266053199768066, + "rewards/rejected": -1.656219482421875, + "step": 4534 + }, + { + "epoch": 0.26, + "learning_rate": 8.632769751135463e-08, + "logits/chosen": -1.9342138767242432, + "logits/rejected": -1.9238624572753906, + "logps/chosen": -232.74990844726562, + "logps/rejected": -368.1816101074219, + "loss": 0.1215, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6884338855743408, + "rewards/margins": 2.0388824939727783, + "rewards/rejected": -0.3504486083984375, + "step": 4535 + }, + { + "epoch": 0.26, + "learning_rate": 8.632122153041664e-08, + "logits/chosen": -1.9597265720367432, + "logits/rejected": -1.9410314559936523, + "logps/chosen": -30.940881729125977, + "logps/rejected": -196.720703125, + "loss": 0.456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24005261063575745, + "rewards/margins": 0.7715590000152588, + "rewards/rejected": -0.531506359577179, + "step": 4536 + }, + { + "epoch": 0.26, + "learning_rate": 8.631474425917121e-08, + "logits/chosen": -1.9141985177993774, + "logits/rejected": -1.8721184730529785, + "logps/chosen": -192.87393188476562, + "logps/rejected": -331.896484375, + "loss": 0.442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46514129638671875, + "rewards/margins": 0.5891769528388977, + "rewards/rejected": -0.12403564900159836, + "step": 4537 + }, + { + "epoch": 0.26, + "learning_rate": 8.630826569784843e-08, + "logits/chosen": -1.9032906293869019, + "logits/rejected": -1.9180352687835693, + "logps/chosen": -228.72145080566406, + "logps/rejected": -265.0331115722656, + "loss": 0.5686, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.222564697265625, + "rewards/margins": -0.5716644525527954, + "rewards/rejected": 1.7942291498184204, + "step": 4538 + }, + { + "epoch": 0.26, + "learning_rate": 8.630178584667844e-08, + "logits/chosen": -1.9939802885055542, + "logits/rejected": -1.9730263948440552, + "logps/chosen": -200.9716339111328, + "logps/rejected": -348.91375732421875, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9495498538017273, + "rewards/margins": 0.5678482055664062, + "rewards/rejected": 0.38170167803764343, + "step": 4539 + }, + { + "epoch": 0.26, + "learning_rate": 8.629530470589146e-08, + "logits/chosen": -2.1623411178588867, + "logits/rejected": -2.1508777141571045, + "logps/chosen": -43.779640197753906, + "logps/rejected": -140.7273712158203, + "loss": 0.5536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4020988643169403, + "rewards/margins": 1.101091742515564, + "rewards/rejected": -1.5031906366348267, + "step": 4540 + }, + { + "epoch": 0.26, + "learning_rate": 8.628882227571771e-08, + "logits/chosen": -1.888588547706604, + "logits/rejected": -1.8850069046020508, + "logps/chosen": -0.006802473682910204, + "logps/rejected": -247.21844482421875, + "loss": 0.3658, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.695812877907883e-06, + "rewards/margins": 3.1465601921081543, + "rewards/rejected": -3.1465699672698975, + "step": 4541 + }, + { + "epoch": 0.26, + "learning_rate": 8.628233855638749e-08, + "logits/chosen": -2.029752731323242, + "logits/rejected": -2.0041086673736572, + "logps/chosen": -171.64935302734375, + "logps/rejected": -284.61712646484375, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.567683458328247, + "rewards/margins": 0.24069058895111084, + "rewards/rejected": 1.3269928693771362, + "step": 4542 + }, + { + "epoch": 0.26, + "learning_rate": 8.627585354813112e-08, + "logits/chosen": -2.1240103244781494, + "logits/rejected": -2.1188292503356934, + "logps/chosen": -15.365615844726562, + "logps/rejected": -233.044677734375, + "loss": 0.3736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06273279339075089, + "rewards/margins": 2.9251868724823, + "rewards/rejected": -2.8624541759490967, + "step": 4543 + }, + { + "epoch": 0.26, + "learning_rate": 8.626936725117899e-08, + "logits/chosen": -2.055790424346924, + "logits/rejected": -2.0672292709350586, + "logps/chosen": -69.89312744140625, + "logps/rejected": -174.2943878173828, + "loss": 0.4659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03980865702033043, + "rewards/margins": 1.4545562267303467, + "rewards/rejected": -1.4147475957870483, + "step": 4544 + }, + { + "epoch": 0.26, + "learning_rate": 8.626287966576152e-08, + "logits/chosen": -2.042193651199341, + "logits/rejected": -2.0145411491394043, + "logps/chosen": -187.47836303710938, + "logps/rejected": -282.79949951171875, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1615570783615112, + "rewards/margins": 0.6499969959259033, + "rewards/rejected": 0.5115600824356079, + "step": 4545 + }, + { + "epoch": 0.26, + "learning_rate": 8.625639079210919e-08, + "logits/chosen": -1.7982978820800781, + "logits/rejected": -1.8249659538269043, + "logps/chosen": -201.29896545410156, + "logps/rejected": -376.21343994140625, + "loss": 0.123, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.534602403640747, + "rewards/margins": 1.9354844093322754, + "rewards/rejected": -0.40088197588920593, + "step": 4546 + }, + { + "epoch": 0.26, + "learning_rate": 8.62499006304525e-08, + "logits/chosen": -1.874893069267273, + "logits/rejected": -1.8746871948242188, + "logps/chosen": -37.941585540771484, + "logps/rejected": -230.6627960205078, + "loss": 0.5489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3146507441997528, + "rewards/margins": 0.3367103636264801, + "rewards/rejected": -0.02205963246524334, + "step": 4547 + }, + { + "epoch": 0.26, + "learning_rate": 8.624340918102205e-08, + "logits/chosen": -2.161081552505493, + "logits/rejected": -2.1325206756591797, + "logps/chosen": -245.36212158203125, + "logps/rejected": -541.9196166992188, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9215240478515625, + "rewards/margins": 4.4419097900390625, + "rewards/rejected": -2.5203857421875, + "step": 4548 + }, + { + "epoch": 0.26, + "learning_rate": 8.623691644404839e-08, + "logits/chosen": -2.0678296089172363, + "logits/rejected": -2.0440096855163574, + "logps/chosen": -181.29428100585938, + "logps/rejected": -352.1800231933594, + "loss": 0.2791, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3609390258789062, + "rewards/margins": 0.6936874389648438, + "rewards/rejected": 0.6672515869140625, + "step": 4549 + }, + { + "epoch": 0.26, + "learning_rate": 8.62304224197622e-08, + "logits/chosen": -2.0858166217803955, + "logits/rejected": -2.076202154159546, + "logps/chosen": -0.0005880305543541908, + "logps/rejected": -105.96521759033203, + "loss": 0.5867, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.946891800092999e-05, + "rewards/margins": 0.45901745557785034, + "rewards/rejected": -0.4590469300746918, + "step": 4550 + }, + { + "epoch": 0.26, + "learning_rate": 8.62239271083942e-08, + "logits/chosen": -1.8888449668884277, + "logits/rejected": -1.8920832872390747, + "logps/chosen": -208.88478088378906, + "logps/rejected": -208.3074493408203, + "loss": 0.5121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4678665101528168, + "rewards/margins": 0.06587827205657959, + "rewards/rejected": 0.4019882380962372, + "step": 4551 + }, + { + "epoch": 0.26, + "learning_rate": 8.621743051017511e-08, + "logits/chosen": -2.2090890407562256, + "logits/rejected": -2.2054712772369385, + "logps/chosen": -18.70391273498535, + "logps/rejected": -288.9303894042969, + "loss": 0.5233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37910786271095276, + "rewards/margins": 3.038215398788452, + "rewards/rejected": -3.417323350906372, + "step": 4552 + }, + { + "epoch": 0.26, + "learning_rate": 8.621093262533574e-08, + "logits/chosen": -2.0116214752197266, + "logits/rejected": -1.9868494272232056, + "logps/chosen": -166.01852416992188, + "logps/rejected": -395.2872009277344, + "loss": 0.0784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.483062744140625, + "rewards/margins": 3.107937812805176, + "rewards/rejected": -1.6248749494552612, + "step": 4553 + }, + { + "epoch": 0.27, + "learning_rate": 8.62044334541069e-08, + "logits/chosen": -2.1256978511810303, + "logits/rejected": -2.1260416507720947, + "logps/chosen": -46.02490997314453, + "logps/rejected": -166.9822998046875, + "loss": 0.447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11956939846277237, + "rewards/margins": 1.136555552482605, + "rewards/rejected": -1.0169861316680908, + "step": 4554 + }, + { + "epoch": 0.27, + "learning_rate": 8.619793299671952e-08, + "logits/chosen": -1.841360330581665, + "logits/rejected": -1.7288486957550049, + "logps/chosen": -141.849365234375, + "logps/rejected": -338.4358215332031, + "loss": 0.4673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6549392938613892, + "rewards/margins": 0.3811935484409332, + "rewards/rejected": 0.27374574542045593, + "step": 4555 + }, + { + "epoch": 0.27, + "learning_rate": 8.619143125340447e-08, + "logits/chosen": -1.9956722259521484, + "logits/rejected": -1.9856315851211548, + "logps/chosen": -50.89186096191406, + "logps/rejected": -280.6398620605469, + "loss": 0.3843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2704818844795227, + "rewards/margins": 1.6237778663635254, + "rewards/rejected": -1.353295922279358, + "step": 4556 + }, + { + "epoch": 0.27, + "learning_rate": 8.618492822439276e-08, + "logits/chosen": -2.199554204940796, + "logits/rejected": -2.1972851753234863, + "logps/chosen": -0.0004838899476453662, + "logps/rejected": -115.5965576171875, + "loss": 0.6051, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.059412119910121e-05, + "rewards/margins": 0.37470102310180664, + "rewards/rejected": -0.3746704161167145, + "step": 4557 + }, + { + "epoch": 0.27, + "learning_rate": 8.617842390991538e-08, + "logits/chosen": -2.053184986114502, + "logits/rejected": -2.0378036499023438, + "logps/chosen": -270.3515625, + "logps/rejected": -345.5577392578125, + "loss": 0.3364, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4889739751815796, + "rewards/margins": 0.35231924057006836, + "rewards/rejected": 1.1366547346115112, + "step": 4558 + }, + { + "epoch": 0.27, + "learning_rate": 8.617191831020346e-08, + "logits/chosen": -1.9962049722671509, + "logits/rejected": -2.0032074451446533, + "logps/chosen": -0.164911687374115, + "logps/rejected": -214.97640991210938, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020486339926719666, + "rewards/margins": 3.4001851081848145, + "rewards/rejected": -3.402233839035034, + "step": 4559 + }, + { + "epoch": 0.27, + "learning_rate": 8.616541142548802e-08, + "logits/chosen": -2.061645746231079, + "logits/rejected": -2.0651943683624268, + "logps/chosen": -18.244979858398438, + "logps/rejected": -66.2438735961914, + "loss": 0.7008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06565418094396591, + "rewards/margins": 0.05424175411462784, + "rewards/rejected": -0.11989593505859375, + "step": 4560 + }, + { + "epoch": 0.27, + "learning_rate": 8.615890325600028e-08, + "logits/chosen": -1.885240077972412, + "logits/rejected": -1.8825299739837646, + "logps/chosen": -20.102970123291016, + "logps/rejected": -109.99290466308594, + "loss": 0.6524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03311443328857422, + "rewards/margins": 0.14746952056884766, + "rewards/rejected": -0.18058395385742188, + "step": 4561 + }, + { + "epoch": 0.27, + "learning_rate": 8.615239380197143e-08, + "logits/chosen": -1.9285422563552856, + "logits/rejected": -1.9235280752182007, + "logps/chosen": -46.84066390991211, + "logps/rejected": -180.0621337890625, + "loss": 0.4126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0037555694580078125, + "rewards/margins": 1.2365788221359253, + "rewards/rejected": -1.2328232526779175, + "step": 4562 + }, + { + "epoch": 0.27, + "learning_rate": 8.614588306363271e-08, + "logits/chosen": -2.0030641555786133, + "logits/rejected": -2.1432180404663086, + "logps/chosen": -364.2039794921875, + "logps/rejected": -308.70062255859375, + "loss": 0.0644, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1505095958709717, + "rewards/margins": 2.777728319168091, + "rewards/rejected": -0.6272186636924744, + "step": 4563 + }, + { + "epoch": 0.27, + "learning_rate": 8.61393710412154e-08, + "logits/chosen": -2.0501620769500732, + "logits/rejected": -2.047895669937134, + "logps/chosen": -0.034975867718458176, + "logps/rejected": -104.0412826538086, + "loss": 0.5567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00039718765765428543, + "rewards/margins": 0.6492205858230591, + "rewards/rejected": -0.6496177911758423, + "step": 4564 + }, + { + "epoch": 0.27, + "learning_rate": 8.613285773495087e-08, + "logits/chosen": -1.9906699657440186, + "logits/rejected": -1.9952189922332764, + "logps/chosen": -0.46006032824516296, + "logps/rejected": -107.70793914794922, + "loss": 0.5969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017216363921761513, + "rewards/margins": 0.2894197106361389, + "rewards/rejected": -0.3066360652446747, + "step": 4565 + }, + { + "epoch": 0.27, + "learning_rate": 8.612634314507049e-08, + "logits/chosen": -1.8653693199157715, + "logits/rejected": -1.8668791055679321, + "logps/chosen": -300.05084228515625, + "logps/rejected": -358.82989501953125, + "loss": 0.1295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.47918701171875, + "rewards/margins": 1.7815215587615967, + "rewards/rejected": -0.30233460664749146, + "step": 4566 + }, + { + "epoch": 0.27, + "learning_rate": 8.611982727180567e-08, + "logits/chosen": -1.660704493522644, + "logits/rejected": -1.600954294204712, + "logps/chosen": -221.2476806640625, + "logps/rejected": -352.9578552246094, + "loss": 0.4215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42489930987358093, + "rewards/margins": 0.7205169796943665, + "rewards/rejected": -0.2956176698207855, + "step": 4567 + }, + { + "epoch": 0.27, + "learning_rate": 8.611331011538792e-08, + "logits/chosen": -2.1161844730377197, + "logits/rejected": -2.1093661785125732, + "logps/chosen": -0.0032638683915138245, + "logps/rejected": -161.713134765625, + "loss": 0.366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025979429483413696, + "rewards/margins": 2.694117307662964, + "rewards/rejected": -2.6943771839141846, + "step": 4568 + }, + { + "epoch": 0.27, + "learning_rate": 8.610679167604873e-08, + "logits/chosen": -2.039468765258789, + "logits/rejected": -2.039212465286255, + "logps/chosen": -47.85255432128906, + "logps/rejected": -203.29354858398438, + "loss": 0.3846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7293128967285156, + "rewards/margins": 0.7616935968399048, + "rewards/rejected": -0.03238067775964737, + "step": 4569 + }, + { + "epoch": 0.27, + "learning_rate": 8.61002719540197e-08, + "logits/chosen": -1.926608920097351, + "logits/rejected": -1.8771260976791382, + "logps/chosen": -262.1121826171875, + "logps/rejected": -565.4671630859375, + "loss": 0.206, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4398987293243408, + "rewards/margins": 1.0377991199493408, + "rewards/rejected": 0.402099609375, + "step": 4570 + }, + { + "epoch": 0.27, + "learning_rate": 8.609375094953241e-08, + "logits/chosen": -1.959883451461792, + "logits/rejected": -1.9596115350723267, + "logps/chosen": -0.00011300710320938379, + "logps/rejected": -138.9351043701172, + "loss": 0.4841, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6807761085146922e-06, + "rewards/margins": 1.1501165628433228, + "rewards/rejected": -1.1501182317733765, + "step": 4571 + }, + { + "epoch": 0.27, + "learning_rate": 8.608722866281854e-08, + "logits/chosen": -2.045691967010498, + "logits/rejected": -2.033022880554199, + "logps/chosen": -170.8185577392578, + "logps/rejected": -286.65887451171875, + "loss": 0.3291, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0570602416992188, + "rewards/margins": 0.21549224853515625, + "rewards/rejected": 1.8415679931640625, + "step": 4572 + }, + { + "epoch": 0.27, + "learning_rate": 8.608070509410978e-08, + "logits/chosen": -1.9468812942504883, + "logits/rejected": -1.9520946741104126, + "logps/chosen": -46.40657043457031, + "logps/rejected": -131.5620574951172, + "loss": 0.5201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24837227165699005, + "rewards/margins": 0.309744656085968, + "rewards/rejected": -0.06137237697839737, + "step": 4573 + }, + { + "epoch": 0.27, + "learning_rate": 8.607418024363791e-08, + "logits/chosen": -1.9726637601852417, + "logits/rejected": -1.9645941257476807, + "logps/chosen": -53.285213470458984, + "logps/rejected": -244.38919067382812, + "loss": 0.7148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.66701740026474, + "rewards/margins": 0.7955406308174133, + "rewards/rejected": -1.4625580310821533, + "step": 4574 + }, + { + "epoch": 0.27, + "learning_rate": 8.606765411163468e-08, + "logits/chosen": -2.03771710395813, + "logits/rejected": -2.034834384918213, + "logps/chosen": -111.44730377197266, + "logps/rejected": -342.6693115234375, + "loss": 0.6189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7784752249717712, + "rewards/margins": 2.342252254486084, + "rewards/rejected": -3.1207275390625, + "step": 4575 + }, + { + "epoch": 0.27, + "learning_rate": 8.606112669833195e-08, + "logits/chosen": -1.9942271709442139, + "logits/rejected": -2.006194591522217, + "logps/chosen": -204.13572692871094, + "logps/rejected": -322.5790100097656, + "loss": 0.1408, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0786149501800537, + "rewards/margins": 1.414067268371582, + "rewards/rejected": 0.6645477414131165, + "step": 4576 + }, + { + "epoch": 0.27, + "learning_rate": 8.60545980039616e-08, + "logits/chosen": -1.9792495965957642, + "logits/rejected": -1.9829024076461792, + "logps/chosen": -0.007994826883077621, + "logps/rejected": -45.096649169921875, + "loss": 0.604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009405558812431991, + "rewards/margins": 0.39412257075309753, + "rewards/rejected": -0.39318200945854187, + "step": 4577 + }, + { + "epoch": 0.27, + "learning_rate": 8.604806802875558e-08, + "logits/chosen": -1.8571747541427612, + "logits/rejected": -1.8386259078979492, + "logps/chosen": -143.12200927734375, + "logps/rejected": -213.556884765625, + "loss": 0.4017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6265228390693665, + "rewards/margins": 0.5271224975585938, + "rewards/rejected": 0.09940033406019211, + "step": 4578 + }, + { + "epoch": 0.27, + "learning_rate": 8.604153677294583e-08, + "logits/chosen": -1.9805684089660645, + "logits/rejected": -2.0528297424316406, + "logps/chosen": -201.77354431152344, + "logps/rejected": -277.61383056640625, + "loss": 0.2782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6555267572402954, + "rewards/margins": 0.5343139171600342, + "rewards/rejected": 1.1212128400802612, + "step": 4579 + }, + { + "epoch": 0.27, + "learning_rate": 8.603500423676441e-08, + "logits/chosen": -1.9775429964065552, + "logits/rejected": -1.973421335220337, + "logps/chosen": -29.2615909576416, + "logps/rejected": -125.69108581542969, + "loss": 0.7761, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5500028729438782, + "rewards/margins": 0.018187880516052246, + "rewards/rejected": -0.5681907534599304, + "step": 4580 + }, + { + "epoch": 0.27, + "learning_rate": 8.602847042044339e-08, + "logits/chosen": -2.1822876930236816, + "logits/rejected": -2.174572706222534, + "logps/chosen": -4.992666244506836, + "logps/rejected": -198.97955322265625, + "loss": 0.36, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07162785530090332, + "rewards/margins": 2.6833086013793945, + "rewards/rejected": -2.611680746078491, + "step": 4581 + }, + { + "epoch": 0.27, + "learning_rate": 8.602193532421486e-08, + "logits/chosen": -2.06465220451355, + "logits/rejected": -2.0450520515441895, + "logps/chosen": -54.816341400146484, + "logps/rejected": -232.84066772460938, + "loss": 0.3527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48226356506347656, + "rewards/margins": 1.3642032146453857, + "rewards/rejected": -0.881939709186554, + "step": 4582 + }, + { + "epoch": 0.27, + "learning_rate": 8.601539894831098e-08, + "logits/chosen": -1.9176512956619263, + "logits/rejected": -1.9195371866226196, + "logps/chosen": -57.611026763916016, + "logps/rejected": -153.77296447753906, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09848480671644211, + "rewards/margins": 1.2049224376678467, + "rewards/rejected": -1.1064376831054688, + "step": 4583 + }, + { + "epoch": 0.27, + "learning_rate": 8.600886129296395e-08, + "logits/chosen": -2.119591236114502, + "logits/rejected": -2.1054229736328125, + "logps/chosen": -0.00012755129137076437, + "logps/rejected": -176.21475219726562, + "loss": 0.4113, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5059691728965845e-06, + "rewards/margins": 1.9631272554397583, + "rewards/rejected": -1.9631317853927612, + "step": 4584 + }, + { + "epoch": 0.27, + "learning_rate": 8.600232235840604e-08, + "logits/chosen": -1.9077107906341553, + "logits/rejected": -1.9404889345169067, + "logps/chosen": -204.1089630126953, + "logps/rejected": -265.42669677734375, + "loss": 0.2018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3938201665878296, + "rewards/margins": 1.2304503917694092, + "rewards/rejected": 0.16336975991725922, + "step": 4585 + }, + { + "epoch": 0.27, + "learning_rate": 8.599578214486953e-08, + "logits/chosen": -2.137816905975342, + "logits/rejected": -2.1303131580352783, + "logps/chosen": -57.14752960205078, + "logps/rejected": -123.36146545410156, + "loss": 0.6361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0940479263663292, + "rewards/margins": 0.1528759002685547, + "rewards/rejected": -0.058827973902225494, + "step": 4586 + }, + { + "epoch": 0.27, + "learning_rate": 8.598924065258675e-08, + "logits/chosen": -2.060317277908325, + "logits/rejected": -2.0691189765930176, + "logps/chosen": -195.44558715820312, + "logps/rejected": -288.1512451171875, + "loss": 0.2452, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7102936506271362, + "rewards/margins": 0.7343537211418152, + "rewards/rejected": 0.975939929485321, + "step": 4587 + }, + { + "epoch": 0.27, + "learning_rate": 8.59826978817901e-08, + "logits/chosen": -2.097093105316162, + "logits/rejected": -2.1321139335632324, + "logps/chosen": -223.158203125, + "logps/rejected": -492.8191833496094, + "loss": 0.1254, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4120270013809204, + "rewards/margins": 2.104400634765625, + "rewards/rejected": -0.6923736929893494, + "step": 4588 + }, + { + "epoch": 0.27, + "learning_rate": 8.597615383271204e-08, + "logits/chosen": -2.0280637741088867, + "logits/rejected": -2.0290849208831787, + "logps/chosen": -0.0021100493613630533, + "logps/rejected": -124.35011291503906, + "loss": 0.4134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011692289263010025, + "rewards/margins": 1.913757085800171, + "rewards/rejected": -1.9138740301132202, + "step": 4589 + }, + { + "epoch": 0.27, + "learning_rate": 8.5969608505585e-08, + "logits/chosen": -2.1688570976257324, + "logits/rejected": -2.1564369201660156, + "logps/chosen": -3.9311563968658447, + "logps/rejected": -186.0915069580078, + "loss": 0.4835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03248026594519615, + "rewards/margins": 1.1344120502471924, + "rewards/rejected": -1.1019318103790283, + "step": 4590 + }, + { + "epoch": 0.27, + "learning_rate": 8.596306190064151e-08, + "logits/chosen": -2.1591336727142334, + "logits/rejected": -2.1554176807403564, + "logps/chosen": -4.758681297302246, + "logps/rejected": -105.44526672363281, + "loss": 0.5362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09945493191480637, + "rewards/margins": 0.6277554035186768, + "rewards/rejected": -0.5283004641532898, + "step": 4591 + }, + { + "epoch": 0.27, + "learning_rate": 8.595651401811417e-08, + "logits/chosen": -2.024674892425537, + "logits/rejected": -2.0290138721466064, + "logps/chosen": -14.476670265197754, + "logps/rejected": -214.73699951171875, + "loss": 0.4294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1833195686340332, + "rewards/margins": 0.9712529182434082, + "rewards/rejected": -0.787933349609375, + "step": 4592 + }, + { + "epoch": 0.27, + "learning_rate": 8.594996485823554e-08, + "logits/chosen": -2.040260076522827, + "logits/rejected": -2.0319650173187256, + "logps/chosen": -29.711402893066406, + "logps/rejected": -154.53570556640625, + "loss": 0.4226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16759586334228516, + "rewards/margins": 1.2447870969772339, + "rewards/rejected": -1.0771912336349487, + "step": 4593 + }, + { + "epoch": 0.27, + "learning_rate": 8.594341442123834e-08, + "logits/chosen": -1.9534590244293213, + "logits/rejected": -1.9362813234329224, + "logps/chosen": -257.57135009765625, + "logps/rejected": -495.0357666015625, + "loss": 0.0443, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.106396436691284, + "rewards/margins": 3.6011626720428467, + "rewards/rejected": -1.4947662353515625, + "step": 4594 + }, + { + "epoch": 0.27, + "learning_rate": 8.593686270735522e-08, + "logits/chosen": -2.1283559799194336, + "logits/rejected": -2.132894992828369, + "logps/chosen": -45.75184631347656, + "logps/rejected": -174.47801208496094, + "loss": 0.4825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.868329644203186, + "rewards/margins": 0.18004989624023438, + "rewards/rejected": 0.6882797479629517, + "step": 4595 + }, + { + "epoch": 0.27, + "learning_rate": 8.593030971681895e-08, + "logits/chosen": -1.9616581201553345, + "logits/rejected": -1.990959644317627, + "logps/chosen": -122.3100357055664, + "logps/rejected": -333.554443359375, + "loss": 0.3374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6486518979072571, + "rewards/margins": 0.834272027015686, + "rewards/rejected": -0.18562011420726776, + "step": 4596 + }, + { + "epoch": 0.27, + "learning_rate": 8.592375544986233e-08, + "logits/chosen": -2.054518461227417, + "logits/rejected": -2.0345897674560547, + "logps/chosen": -204.30030822753906, + "logps/rejected": -563.1720581054688, + "loss": 0.0524, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6083344221115112, + "rewards/margins": 3.7741241455078125, + "rewards/rejected": -2.165789842605591, + "step": 4597 + }, + { + "epoch": 0.27, + "learning_rate": 8.591719990671819e-08, + "logits/chosen": -1.6204317808151245, + "logits/rejected": -1.6181524991989136, + "logps/chosen": -128.6551513671875, + "logps/rejected": -284.10321044921875, + "loss": 0.2485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4592727720737457, + "rewards/margins": 3.804741144180298, + "rewards/rejected": -3.345468282699585, + "step": 4598 + }, + { + "epoch": 0.27, + "learning_rate": 8.591064308761944e-08, + "logits/chosen": -2.0999584197998047, + "logits/rejected": -2.0977237224578857, + "logps/chosen": -0.3498593270778656, + "logps/rejected": -166.58590698242188, + "loss": 0.3692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015480977483093739, + "rewards/margins": 2.9079596996307373, + "rewards/rejected": -2.92344069480896, + "step": 4599 + }, + { + "epoch": 0.27, + "learning_rate": 8.590408499279898e-08, + "logits/chosen": -1.979660987854004, + "logits/rejected": -1.9799288511276245, + "logps/chosen": -113.1783676147461, + "logps/rejected": -161.43817138671875, + "loss": 0.4458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8769440054893494, + "rewards/margins": 0.27035677433013916, + "rewards/rejected": 0.6065872311592102, + "step": 4600 + }, + { + "epoch": 0.27, + "learning_rate": 8.589752562248977e-08, + "logits/chosen": -1.9522979259490967, + "logits/rejected": -1.955087423324585, + "logps/chosen": -268.9159240722656, + "logps/rejected": -421.80792236328125, + "loss": 0.0869, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4969574213027954, + "rewards/margins": 2.553708076477051, + "rewards/rejected": -1.0567505359649658, + "step": 4601 + }, + { + "epoch": 0.27, + "learning_rate": 8.589096497692488e-08, + "logits/chosen": -1.9237148761749268, + "logits/rejected": -1.9238786697387695, + "logps/chosen": -110.66304779052734, + "logps/rejected": -434.38958740234375, + "loss": 0.2044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.725934624671936, + "rewards/margins": 2.4954538345336914, + "rewards/rejected": -1.7695190906524658, + "step": 4602 + }, + { + "epoch": 0.27, + "learning_rate": 8.588440305633735e-08, + "logits/chosen": -2.2188243865966797, + "logits/rejected": -2.224053382873535, + "logps/chosen": -100.3091049194336, + "logps/rejected": -250.02040100097656, + "loss": 0.4392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1847236603498459, + "rewards/margins": 0.716223955154419, + "rewards/rejected": -0.5315002799034119, + "step": 4603 + }, + { + "epoch": 0.27, + "learning_rate": 8.58778398609603e-08, + "logits/chosen": -2.1215083599090576, + "logits/rejected": -2.1056697368621826, + "logps/chosen": -167.57333374023438, + "logps/rejected": -257.86334228515625, + "loss": 0.4344, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2870346307754517, + "rewards/margins": -0.07074737548828125, + "rewards/rejected": 1.357782006263733, + "step": 4604 + }, + { + "epoch": 0.27, + "learning_rate": 8.587127539102686e-08, + "logits/chosen": -2.175400733947754, + "logits/rejected": -2.171840190887451, + "logps/chosen": -49.393218994140625, + "logps/rejected": -126.250732421875, + "loss": 0.5141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.534903347492218, + "rewards/margins": 0.2449619472026825, + "rewards/rejected": 0.2899414002895355, + "step": 4605 + }, + { + "epoch": 0.27, + "learning_rate": 8.586470964677027e-08, + "logits/chosen": -2.098710536956787, + "logits/rejected": -2.081073522567749, + "logps/chosen": -40.70039367675781, + "logps/rejected": -209.33941650390625, + "loss": 0.3862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1676788330078125, + "rewards/margins": 1.7856415510177612, + "rewards/rejected": -1.6179627180099487, + "step": 4606 + }, + { + "epoch": 0.27, + "learning_rate": 8.585814262842375e-08, + "logits/chosen": -2.1030361652374268, + "logits/rejected": -2.1072494983673096, + "logps/chosen": -0.0003367364115547389, + "logps/rejected": -32.1569938659668, + "loss": 0.7772, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.5196827007457614e-05, + "rewards/margins": -0.33829912543296814, + "rewards/rejected": 0.3382839262485504, + "step": 4607 + }, + { + "epoch": 0.27, + "learning_rate": 8.58515743362206e-08, + "logits/chosen": -2.177931785583496, + "logits/rejected": -2.18060564994812, + "logps/chosen": -20.41719627380371, + "logps/rejected": -99.33711242675781, + "loss": 0.603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10240326076745987, + "rewards/margins": 0.5335143804550171, + "rewards/rejected": -0.6359176635742188, + "step": 4608 + }, + { + "epoch": 0.27, + "learning_rate": 8.584500477039417e-08, + "logits/chosen": -1.865172266960144, + "logits/rejected": -1.8012696504592896, + "logps/chosen": -319.64825439453125, + "logps/rejected": -432.1925964355469, + "loss": 0.4811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8723083734512329, + "rewards/margins": 0.09627991914749146, + "rewards/rejected": 0.7760284543037415, + "step": 4609 + }, + { + "epoch": 0.27, + "learning_rate": 8.583843393117785e-08, + "logits/chosen": -2.051719903945923, + "logits/rejected": -2.0484488010406494, + "logps/chosen": -57.69905090332031, + "logps/rejected": -209.3272705078125, + "loss": 0.37, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07408600300550461, + "rewards/margins": 2.107081651687622, + "rewards/rejected": -2.0329957008361816, + "step": 4610 + }, + { + "epoch": 0.27, + "learning_rate": 8.583186181880501e-08, + "logits/chosen": -1.9226895570755005, + "logits/rejected": -1.9121003150939941, + "logps/chosen": -285.43634033203125, + "logps/rejected": -351.0798645019531, + "loss": 0.3931, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.6298158168792725, + "rewards/margins": -0.11058640480041504, + "rewards/rejected": 2.7404022216796875, + "step": 4611 + }, + { + "epoch": 0.27, + "learning_rate": 8.58252884335092e-08, + "logits/chosen": -2.0626819133758545, + "logits/rejected": -2.047711133956909, + "logps/chosen": -87.95275115966797, + "logps/rejected": -192.45681762695312, + "loss": 0.4735, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07595138996839523, + "rewards/margins": 1.073493242263794, + "rewards/rejected": -0.9975418448448181, + "step": 4612 + }, + { + "epoch": 0.27, + "learning_rate": 8.58187137755239e-08, + "logits/chosen": -2.3175573348999023, + "logits/rejected": -2.312023639678955, + "logps/chosen": -23.774133682250977, + "logps/rejected": -195.98548889160156, + "loss": 0.5322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13812504708766937, + "rewards/margins": 1.1097692251205444, + "rewards/rejected": -1.247894287109375, + "step": 4613 + }, + { + "epoch": 0.27, + "learning_rate": 8.581213784508268e-08, + "logits/chosen": -1.9619277715682983, + "logits/rejected": -1.9499073028564453, + "logps/chosen": -224.84922790527344, + "logps/rejected": -287.59588623046875, + "loss": 0.2548, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.43682861328125, + "rewards/margins": 0.9605468511581421, + "rewards/rejected": 0.4762817323207855, + "step": 4614 + }, + { + "epoch": 0.27, + "learning_rate": 8.580556064241913e-08, + "logits/chosen": -2.023374080657959, + "logits/rejected": -2.0244855880737305, + "logps/chosen": -34.808685302734375, + "logps/rejected": -169.20751953125, + "loss": 0.3818, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2594139277935028, + "rewards/margins": 1.4205070734024048, + "rewards/rejected": -1.1610931158065796, + "step": 4615 + }, + { + "epoch": 0.27, + "learning_rate": 8.579898216776693e-08, + "logits/chosen": -2.0756516456604004, + "logits/rejected": -2.053269147872925, + "logps/chosen": -176.32321166992188, + "logps/rejected": -243.9981689453125, + "loss": 0.3959, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5483826398849487, + "rewards/margins": -0.007781982421875, + "rewards/rejected": 1.5561646223068237, + "step": 4616 + }, + { + "epoch": 0.27, + "learning_rate": 8.579240242135978e-08, + "logits/chosen": -2.050370931625366, + "logits/rejected": -2.065157651901245, + "logps/chosen": -222.3601837158203, + "logps/rejected": -508.5518798828125, + "loss": 0.1391, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3718734979629517, + "rewards/margins": 2.811232089996338, + "rewards/rejected": -1.4393585920333862, + "step": 4617 + }, + { + "epoch": 0.27, + "learning_rate": 8.57858214034314e-08, + "logits/chosen": -1.8776237964630127, + "logits/rejected": -1.8643996715545654, + "logps/chosen": -95.29151153564453, + "logps/rejected": -394.2017517089844, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7458640933036804, + "rewards/margins": 0.8954246640205383, + "rewards/rejected": -0.14956055581569672, + "step": 4618 + }, + { + "epoch": 0.27, + "learning_rate": 8.577923911421561e-08, + "logits/chosen": -2.0891144275665283, + "logits/rejected": -2.0869016647338867, + "logps/chosen": -1.2833271026611328, + "logps/rejected": -59.89912796020508, + "loss": 0.6648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02615525759756565, + "rewards/margins": 0.07900216430425644, + "rewards/rejected": -0.05284690856933594, + "step": 4619 + }, + { + "epoch": 0.27, + "learning_rate": 8.577265555394624e-08, + "logits/chosen": -1.9123790264129639, + "logits/rejected": -1.9132686853408813, + "logps/chosen": -62.07341766357422, + "logps/rejected": -130.29672241210938, + "loss": 0.732, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3561553955078125, + "rewards/margins": -0.4236389398574829, + "rewards/rejected": 0.7797943353652954, + "step": 4620 + }, + { + "epoch": 0.27, + "learning_rate": 8.576607072285714e-08, + "logits/chosen": -1.915299415588379, + "logits/rejected": -1.9143022298812866, + "logps/chosen": -10.556382179260254, + "logps/rejected": -147.362548828125, + "loss": 0.7486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45377078652381897, + "rewards/margins": 0.2601483166217804, + "rewards/rejected": -0.7139191031455994, + "step": 4621 + }, + { + "epoch": 0.27, + "learning_rate": 8.575948462118228e-08, + "logits/chosen": -2.045306921005249, + "logits/rejected": -2.04590106010437, + "logps/chosen": -5.904887676239014, + "logps/rejected": -85.31702423095703, + "loss": 0.6706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05049920082092285, + "rewards/margins": 0.16151557862758636, + "rewards/rejected": -0.21201477944850922, + "step": 4622 + }, + { + "epoch": 0.27, + "learning_rate": 8.575289724915559e-08, + "logits/chosen": -2.0480027198791504, + "logits/rejected": -1.981330156326294, + "logps/chosen": -262.396484375, + "logps/rejected": -437.185791015625, + "loss": 0.1246, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8289978504180908, + "rewards/margins": 1.8040436506271362, + "rewards/rejected": 0.02495422400534153, + "step": 4623 + }, + { + "epoch": 0.27, + "learning_rate": 8.574630860701111e-08, + "logits/chosen": -2.066391944885254, + "logits/rejected": -2.082533836364746, + "logps/chosen": -76.39043426513672, + "logps/rejected": -229.25375366210938, + "loss": 0.3853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4374046325683594, + "rewards/margins": 0.808210015296936, + "rewards/rejected": -0.3708053529262543, + "step": 4624 + }, + { + "epoch": 0.27, + "learning_rate": 8.573971869498289e-08, + "logits/chosen": -1.7984060049057007, + "logits/rejected": -1.794702172279358, + "logps/chosen": -249.86521911621094, + "logps/rejected": -321.5278015136719, + "loss": 0.3568, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7564315795898438, + "rewards/margins": 0.14850616455078125, + "rewards/rejected": 1.6079254150390625, + "step": 4625 + }, + { + "epoch": 0.27, + "learning_rate": 8.573312751330504e-08, + "logits/chosen": -1.9641667604446411, + "logits/rejected": -1.9447143077850342, + "logps/chosen": -212.7127685546875, + "logps/rejected": -377.9342041015625, + "loss": 0.1065, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4345825910568237, + "rewards/margins": 2.65830397605896, + "rewards/rejected": -1.2237213850021362, + "step": 4626 + }, + { + "epoch": 0.27, + "learning_rate": 8.572653506221172e-08, + "logits/chosen": -1.916645884513855, + "logits/rejected": -1.9164113998413086, + "logps/chosen": -4.389834880828857, + "logps/rejected": -88.69038391113281, + "loss": 0.6557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012439346872270107, + "rewards/margins": 0.11218376457691193, + "rewards/rejected": -0.12462311238050461, + "step": 4627 + }, + { + "epoch": 0.27, + "learning_rate": 8.57199413419371e-08, + "logits/chosen": -2.06191349029541, + "logits/rejected": -2.0540738105773926, + "logps/chosen": -31.9383487701416, + "logps/rejected": -136.49057006835938, + "loss": 0.6408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04995784908533096, + "rewards/margins": 0.12366390228271484, + "rewards/rejected": -0.07370605319738388, + "step": 4628 + }, + { + "epoch": 0.27, + "learning_rate": 8.571334635271546e-08, + "logits/chosen": -2.1680805683135986, + "logits/rejected": -2.165696382522583, + "logps/chosen": -7.728511333465576, + "logps/rejected": -117.69490051269531, + "loss": 0.5344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03373995050787926, + "rewards/margins": 0.837123453617096, + "rewards/rejected": -0.8708633780479431, + "step": 4629 + }, + { + "epoch": 0.27, + "learning_rate": 8.570675009478105e-08, + "logits/chosen": -1.9914531707763672, + "logits/rejected": -1.9983028173446655, + "logps/chosen": -12.014208793640137, + "logps/rejected": -256.2640075683594, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1064690575003624, + "rewards/margins": 3.0322623252868652, + "rewards/rejected": -3.1387314796447754, + "step": 4630 + }, + { + "epoch": 0.27, + "learning_rate": 8.570015256836823e-08, + "logits/chosen": -2.0559449195861816, + "logits/rejected": -2.043614149093628, + "logps/chosen": -230.13430786132812, + "logps/rejected": -417.28814697265625, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3800506591796875, + "rewards/margins": 0.511364758014679, + "rewards/rejected": 0.8686859011650085, + "step": 4631 + }, + { + "epoch": 0.27, + "learning_rate": 8.569355377371133e-08, + "logits/chosen": -1.832379937171936, + "logits/rejected": -1.8002632856369019, + "logps/chosen": -219.2554931640625, + "logps/rejected": -390.3953552246094, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8550369143486023, + "rewards/margins": 0.6995711922645569, + "rewards/rejected": 0.15546570718288422, + "step": 4632 + }, + { + "epoch": 0.27, + "learning_rate": 8.568695371104483e-08, + "logits/chosen": -2.022723913192749, + "logits/rejected": -2.0127055644989014, + "logps/chosen": -168.92433166503906, + "logps/rejected": -301.39111328125, + "loss": 0.4234, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3165817260742188, + "rewards/margins": -0.01869046688079834, + "rewards/rejected": 1.335272192955017, + "step": 4633 + }, + { + "epoch": 0.27, + "learning_rate": 8.568035238060315e-08, + "logits/chosen": -2.0082104206085205, + "logits/rejected": -2.0214343070983887, + "logps/chosen": -253.11764526367188, + "logps/rejected": -348.0423278808594, + "loss": 0.139, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8109619617462158, + "rewards/margins": 1.6010559797286987, + "rewards/rejected": 0.20990601181983948, + "step": 4634 + }, + { + "epoch": 0.27, + "learning_rate": 8.567374978262085e-08, + "logits/chosen": -1.9202972650527954, + "logits/rejected": -1.906928539276123, + "logps/chosen": -27.319759368896484, + "logps/rejected": -305.92364501953125, + "loss": 0.2412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5595089197158813, + "rewards/margins": 3.6250972747802734, + "rewards/rejected": -3.0655884742736816, + "step": 4635 + }, + { + "epoch": 0.27, + "learning_rate": 8.566714591733247e-08, + "logits/chosen": -2.0744521617889404, + "logits/rejected": -2.0664191246032715, + "logps/chosen": -167.32574462890625, + "logps/rejected": -259.95452880859375, + "loss": 0.3005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3423645496368408, + "rewards/margins": 0.7231781482696533, + "rewards/rejected": 0.6191864013671875, + "step": 4636 + }, + { + "epoch": 0.27, + "learning_rate": 8.566054078497255e-08, + "logits/chosen": -2.059795618057251, + "logits/rejected": -2.044407844543457, + "logps/chosen": -153.28848266601562, + "logps/rejected": -417.7061767578125, + "loss": 0.1586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8347366452217102, + "rewards/margins": 2.3683488368988037, + "rewards/rejected": -1.5336121320724487, + "step": 4637 + }, + { + "epoch": 0.27, + "learning_rate": 8.565393438577581e-08, + "logits/chosen": -2.2717273235321045, + "logits/rejected": -2.266633987426758, + "logps/chosen": -0.18005453050136566, + "logps/rejected": -96.72078704833984, + "loss": 0.6533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005284469109028578, + "rewards/margins": 0.17193034291267395, + "rewards/rejected": -0.1772148162126541, + "step": 4638 + }, + { + "epoch": 0.27, + "learning_rate": 8.564732671997694e-08, + "logits/chosen": -1.8429142236709595, + "logits/rejected": -1.8441190719604492, + "logps/chosen": -189.84222412109375, + "logps/rejected": -560.2117309570312, + "loss": 0.0969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.40960693359375, + "rewards/margins": 2.7071290016174316, + "rewards/rejected": -1.297521948814392, + "step": 4639 + }, + { + "epoch": 0.27, + "learning_rate": 8.564071778781064e-08, + "logits/chosen": -2.112377643585205, + "logits/rejected": -2.0996365547180176, + "logps/chosen": -2.0184037685394287, + "logps/rejected": -205.48052978515625, + "loss": 0.3492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05609865114092827, + "rewards/margins": 4.723539352416992, + "rewards/rejected": -4.667440891265869, + "step": 4640 + }, + { + "epoch": 0.27, + "learning_rate": 8.56341075895117e-08, + "logits/chosen": -2.127634286880493, + "logits/rejected": -2.111524820327759, + "logps/chosen": -62.456329345703125, + "logps/rejected": -266.34759521484375, + "loss": 0.3267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30520325899124146, + "rewards/margins": 1.957550048828125, + "rewards/rejected": -1.6523468494415283, + "step": 4641 + }, + { + "epoch": 0.27, + "learning_rate": 8.562749612531495e-08, + "logits/chosen": -2.1445178985595703, + "logits/rejected": -2.1435494422912598, + "logps/chosen": -11.33415412902832, + "logps/rejected": -137.13345336914062, + "loss": 0.4825, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18712273240089417, + "rewards/margins": 1.7929890155792236, + "rewards/rejected": -1.9801117181777954, + "step": 4642 + }, + { + "epoch": 0.27, + "learning_rate": 8.562088339545527e-08, + "logits/chosen": -2.0131373405456543, + "logits/rejected": -2.002819299697876, + "logps/chosen": -0.0031084746588021517, + "logps/rejected": -150.4970245361328, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00021813793864566833, + "rewards/margins": 3.0158286094665527, + "rewards/rejected": -3.015610456466675, + "step": 4643 + }, + { + "epoch": 0.27, + "learning_rate": 8.561426940016758e-08, + "logits/chosen": -2.135793924331665, + "logits/rejected": -2.1232335567474365, + "logps/chosen": -48.70118713378906, + "logps/rejected": -194.79452514648438, + "loss": 0.3213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2648792266845703, + "rewards/margins": 2.3070530891418457, + "rewards/rejected": -2.0421738624572754, + "step": 4644 + }, + { + "epoch": 0.27, + "learning_rate": 8.560765413968682e-08, + "logits/chosen": -2.1878318786621094, + "logits/rejected": -2.1895675659179688, + "logps/chosen": -4.029243063996546e-05, + "logps/rejected": -154.43338012695312, + "loss": 0.3922, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.860953998151672e-07, + "rewards/margins": 2.279696464538574, + "rewards/rejected": -2.2796967029571533, + "step": 4645 + }, + { + "epoch": 0.27, + "learning_rate": 8.560103761424802e-08, + "logits/chosen": -2.048753261566162, + "logits/rejected": -2.042102336883545, + "logps/chosen": -140.52684020996094, + "logps/rejected": -294.50604248046875, + "loss": 0.2034, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4372116327285767, + "rewards/margins": 1.2182998657226562, + "rewards/rejected": 0.21891175210475922, + "step": 4646 + }, + { + "epoch": 0.27, + "learning_rate": 8.559441982408622e-08, + "logits/chosen": -2.137986898422241, + "logits/rejected": -2.1369924545288086, + "logps/chosen": -128.64971923828125, + "logps/rejected": -351.6562805175781, + "loss": 0.3981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03644103929400444, + "rewards/margins": 2.3554811477661133, + "rewards/rejected": -2.319040060043335, + "step": 4647 + }, + { + "epoch": 0.27, + "learning_rate": 8.55878007694365e-08, + "logits/chosen": -2.0145208835601807, + "logits/rejected": -2.003080368041992, + "logps/chosen": -183.229248046875, + "logps/rejected": -329.1358337402344, + "loss": 0.2081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7603882551193237, + "rewards/margins": 0.9770721793174744, + "rewards/rejected": 0.7833160758018494, + "step": 4648 + }, + { + "epoch": 0.27, + "learning_rate": 8.558118045053404e-08, + "logits/chosen": -1.85805344581604, + "logits/rejected": -1.8012375831604004, + "logps/chosen": -301.42266845703125, + "logps/rejected": -480.6376953125, + "loss": 0.1129, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0325684547424316, + "rewards/margins": 2.331045627593994, + "rewards/rejected": -0.2984771728515625, + "step": 4649 + }, + { + "epoch": 0.27, + "learning_rate": 8.5574558867614e-08, + "logits/chosen": -1.9686940908432007, + "logits/rejected": -1.9404710531234741, + "logps/chosen": -242.2047882080078, + "logps/rejected": -524.240478515625, + "loss": 0.1005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1598252058029175, + "rewards/margins": 2.5351762771606445, + "rewards/rejected": -1.3753509521484375, + "step": 4650 + }, + { + "epoch": 0.27, + "learning_rate": 8.55679360209116e-08, + "logits/chosen": -1.9508893489837646, + "logits/rejected": -1.9497706890106201, + "logps/chosen": -63.77114486694336, + "logps/rejected": -296.072265625, + "loss": 0.4585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1819995939731598, + "rewards/margins": 1.1768543720245361, + "rewards/rejected": -0.994854748249054, + "step": 4651 + }, + { + "epoch": 0.27, + "learning_rate": 8.556131191066214e-08, + "logits/chosen": -1.9878969192504883, + "logits/rejected": -1.9933040142059326, + "logps/chosen": -0.4894716143608093, + "logps/rejected": -89.67536926269531, + "loss": 0.4617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009576383046805859, + "rewards/margins": 1.2414947748184204, + "rewards/rejected": -1.2510712146759033, + "step": 4652 + }, + { + "epoch": 0.27, + "learning_rate": 8.555468653710094e-08, + "logits/chosen": -1.9983636140823364, + "logits/rejected": -1.9536460638046265, + "logps/chosen": -80.31137084960938, + "logps/rejected": -402.85601806640625, + "loss": 0.4944, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4179138243198395, + "rewards/margins": 2.571575880050659, + "rewards/rejected": -2.989489793777466, + "step": 4653 + }, + { + "epoch": 0.27, + "learning_rate": 8.554805990046334e-08, + "logits/chosen": -1.9493352174758911, + "logits/rejected": -1.9465678930282593, + "logps/chosen": -44.56993103027344, + "logps/rejected": -170.12039184570312, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48959046602249146, + "rewards/margins": 1.4601714611053467, + "rewards/rejected": -0.9705810546875, + "step": 4654 + }, + { + "epoch": 0.27, + "learning_rate": 8.554143200098479e-08, + "logits/chosen": -1.797472357749939, + "logits/rejected": -1.788469672203064, + "logps/chosen": -293.6282958984375, + "logps/rejected": -336.939453125, + "loss": 0.6052, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7361847162246704, + "rewards/margins": -0.4825531244277954, + "rewards/rejected": 1.2187378406524658, + "step": 4655 + }, + { + "epoch": 0.27, + "learning_rate": 8.55348028389007e-08, + "logits/chosen": -1.9348833560943604, + "logits/rejected": -1.9363961219787598, + "logps/chosen": -0.670927107334137, + "logps/rejected": -156.60768127441406, + "loss": 0.4132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007342666387557983, + "rewards/margins": 1.9263075590133667, + "rewards/rejected": -1.933650255203247, + "step": 4656 + }, + { + "epoch": 0.27, + "learning_rate": 8.552817241444662e-08, + "logits/chosen": -2.1348371505737305, + "logits/rejected": -2.133941411972046, + "logps/chosen": -0.11345282196998596, + "logps/rejected": -180.2422637939453, + "loss": 0.4433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005735450424253941, + "rewards/margins": 1.554294228553772, + "rewards/rejected": -1.5600296258926392, + "step": 4657 + }, + { + "epoch": 0.27, + "learning_rate": 8.552154072785805e-08, + "logits/chosen": -2.0782687664031982, + "logits/rejected": -2.0706663131713867, + "logps/chosen": -153.25302124023438, + "logps/rejected": -334.1941833496094, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1030136346817017, + "rewards/margins": 0.6121109127998352, + "rewards/rejected": 0.49090272188186646, + "step": 4658 + }, + { + "epoch": 0.27, + "learning_rate": 8.551490777937058e-08, + "logits/chosen": -1.8603205680847168, + "logits/rejected": -1.8557169437408447, + "logps/chosen": -253.2744140625, + "logps/rejected": -513.9891357421875, + "loss": 0.0535, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.596258521080017, + "rewards/margins": 3.6876039505004883, + "rewards/rejected": -2.0913453102111816, + "step": 4659 + }, + { + "epoch": 0.27, + "learning_rate": 8.55082735692199e-08, + "logits/chosen": -2.1909985542297363, + "logits/rejected": -2.1841237545013428, + "logps/chosen": -54.061309814453125, + "logps/rejected": -187.10586547851562, + "loss": 0.8566, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0340782403945923, + "rewards/margins": 0.9617561101913452, + "rewards/rejected": -1.9958343505859375, + "step": 4660 + }, + { + "epoch": 0.27, + "learning_rate": 8.550163809764163e-08, + "logits/chosen": -1.9983978271484375, + "logits/rejected": -2.0414347648620605, + "logps/chosen": -178.68856811523438, + "logps/rejected": -327.2213134765625, + "loss": 0.1856, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7133606672286987, + "rewards/margins": 1.1323120594024658, + "rewards/rejected": 0.5810486078262329, + "step": 4661 + }, + { + "epoch": 0.27, + "learning_rate": 8.549500136487153e-08, + "logits/chosen": -1.9870535135269165, + "logits/rejected": -2.0096302032470703, + "logps/chosen": -232.46099853515625, + "logps/rejected": -244.01504516601562, + "loss": 0.2337, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1966781616210938, + "rewards/margins": 0.6511580944061279, + "rewards/rejected": 1.5455200672149658, + "step": 4662 + }, + { + "epoch": 0.27, + "learning_rate": 8.548836337114535e-08, + "logits/chosen": -2.1098415851593018, + "logits/rejected": -2.1051347255706787, + "logps/chosen": -46.09916687011719, + "logps/rejected": -139.41847229003906, + "loss": 0.4457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2754531800746918, + "rewards/margins": 0.8400818109512329, + "rewards/rejected": -0.5646286010742188, + "step": 4663 + }, + { + "epoch": 0.27, + "learning_rate": 8.548172411669892e-08, + "logits/chosen": -1.956438422203064, + "logits/rejected": -1.9267516136169434, + "logps/chosen": -31.005109786987305, + "logps/rejected": -475.0603332519531, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10817890614271164, + "rewards/margins": 5.294742107391357, + "rewards/rejected": -5.186563014984131, + "step": 4664 + }, + { + "epoch": 0.27, + "learning_rate": 8.547508360176807e-08, + "logits/chosen": -2.1192610263824463, + "logits/rejected": -2.1186554431915283, + "logps/chosen": -6.003844261169434, + "logps/rejected": -62.96404266357422, + "loss": 0.6889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07833290100097656, + "rewards/margins": 0.0809299498796463, + "rewards/rejected": -0.15926285088062286, + "step": 4665 + }, + { + "epoch": 0.27, + "learning_rate": 8.546844182658874e-08, + "logits/chosen": -2.004152774810791, + "logits/rejected": -1.987979769706726, + "logps/chosen": -196.4774932861328, + "logps/rejected": -398.690185546875, + "loss": 0.1614, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2821838855743408, + "rewards/margins": 1.6851319074630737, + "rewards/rejected": -0.4029479920864105, + "step": 4666 + }, + { + "epoch": 0.27, + "learning_rate": 8.546179879139686e-08, + "logits/chosen": -2.145796060562134, + "logits/rejected": -2.1535418033599854, + "logps/chosen": -2.996955394744873, + "logps/rejected": -295.65484619140625, + "loss": 0.3611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021825361996889114, + "rewards/margins": 2.9553701877593994, + "rewards/rejected": -2.933544874191284, + "step": 4667 + }, + { + "epoch": 0.27, + "learning_rate": 8.545515449642842e-08, + "logits/chosen": -2.2280285358428955, + "logits/rejected": -2.221402406692505, + "logps/chosen": -35.430145263671875, + "logps/rejected": -160.95480346679688, + "loss": 0.8267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7614849209785461, + "rewards/margins": 0.498953640460968, + "rewards/rejected": -1.2604385614395142, + "step": 4668 + }, + { + "epoch": 0.27, + "learning_rate": 8.544850894191946e-08, + "logits/chosen": -2.087984561920166, + "logits/rejected": -2.081686496734619, + "logps/chosen": -54.50452423095703, + "logps/rejected": -156.90786743164062, + "loss": 0.3835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3587512969970703, + "rewards/margins": 1.2945423126220703, + "rewards/rejected": -0.935791015625, + "step": 4669 + }, + { + "epoch": 0.27, + "learning_rate": 8.544186212810606e-08, + "logits/chosen": -1.946393370628357, + "logits/rejected": -1.9187898635864258, + "logps/chosen": -178.09857177734375, + "logps/rejected": -424.74420166015625, + "loss": 0.07, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.248486280441284, + "rewards/margins": 2.424774169921875, + "rewards/rejected": -0.17628784477710724, + "step": 4670 + }, + { + "epoch": 0.27, + "learning_rate": 8.543521405522438e-08, + "logits/chosen": -2.0168683528900146, + "logits/rejected": -2.008513927459717, + "logps/chosen": -157.30877685546875, + "logps/rejected": -227.4583740234375, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5247879028320312, + "rewards/margins": 0.22737884521484375, + "rewards/rejected": 1.2974090576171875, + "step": 4671 + }, + { + "epoch": 0.27, + "learning_rate": 8.542856472351054e-08, + "logits/chosen": -2.0004842281341553, + "logits/rejected": -2.0028295516967773, + "logps/chosen": -0.0018807859160006046, + "logps/rejected": -135.7222137451172, + "loss": 0.3897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00015405887097585946, + "rewards/margins": 2.4009947776794434, + "rewards/rejected": -2.4008407592773438, + "step": 4672 + }, + { + "epoch": 0.27, + "learning_rate": 8.542191413320079e-08, + "logits/chosen": -2.055534601211548, + "logits/rejected": -2.0726375579833984, + "logps/chosen": -230.0650634765625, + "logps/rejected": -223.14154052734375, + "loss": 0.1487, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7809845209121704, + "rewards/margins": 1.4432251453399658, + "rewards/rejected": 0.337759405374527, + "step": 4673 + }, + { + "epoch": 0.27, + "learning_rate": 8.541526228453138e-08, + "logits/chosen": -1.9317229986190796, + "logits/rejected": -1.9116239547729492, + "logps/chosen": -106.91230010986328, + "logps/rejected": -402.849853515625, + "loss": 0.2001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7097083926200867, + "rewards/margins": 3.4859352111816406, + "rewards/rejected": -2.776226758956909, + "step": 4674 + }, + { + "epoch": 0.27, + "learning_rate": 8.540860917773862e-08, + "logits/chosen": -1.9174425601959229, + "logits/rejected": -1.9274518489837646, + "logps/chosen": -11.124920845031738, + "logps/rejected": -209.692138671875, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2701359689235687, + "rewards/margins": 2.042023181915283, + "rewards/rejected": -1.771887183189392, + "step": 4675 + }, + { + "epoch": 0.27, + "learning_rate": 8.540195481305886e-08, + "logits/chosen": -2.014051914215088, + "logits/rejected": -2.0182785987854004, + "logps/chosen": -44.487823486328125, + "logps/rejected": -106.63121032714844, + "loss": 0.4765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31011316180229187, + "rewards/margins": 0.3999157249927521, + "rewards/rejected": -0.08980255573987961, + "step": 4676 + }, + { + "epoch": 0.27, + "learning_rate": 8.539529919072849e-08, + "logits/chosen": -2.146059036254883, + "logits/rejected": -2.121586561203003, + "logps/chosen": -76.36541748046875, + "logps/rejected": -274.71197509765625, + "loss": 0.4051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19933167099952698, + "rewards/margins": 1.0488067865371704, + "rewards/rejected": -0.849475085735321, + "step": 4677 + }, + { + "epoch": 0.27, + "learning_rate": 8.538864231098395e-08, + "logits/chosen": -2.0485033988952637, + "logits/rejected": -2.0456104278564453, + "logps/chosen": -34.45624542236328, + "logps/rejected": -168.63294982910156, + "loss": 0.514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004971695132553577, + "rewards/margins": 0.9735614657402039, + "rewards/rejected": -0.9685897827148438, + "step": 4678 + }, + { + "epoch": 0.27, + "learning_rate": 8.538198417406175e-08, + "logits/chosen": -2.0649516582489014, + "logits/rejected": -2.0388166904449463, + "logps/chosen": -133.56787109375, + "logps/rejected": -135.54684448242188, + "loss": 0.5883, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4591522216796875, + "rewards/margins": 0.12760162353515625, + "rewards/rejected": 0.33155059814453125, + "step": 4679 + }, + { + "epoch": 0.27, + "learning_rate": 8.537532478019839e-08, + "logits/chosen": -2.091553211212158, + "logits/rejected": -2.0668375492095947, + "logps/chosen": -73.17869567871094, + "logps/rejected": -228.04620361328125, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42108917236328125, + "rewards/margins": 1.2161026000976562, + "rewards/rejected": -0.795013427734375, + "step": 4680 + }, + { + "epoch": 0.27, + "learning_rate": 8.536866412963045e-08, + "logits/chosen": -2.097642660140991, + "logits/rejected": -2.087617874145508, + "logps/chosen": -0.0004746023041661829, + "logps/rejected": -216.53042602539062, + "loss": 0.3548, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.663586408540141e-05, + "rewards/margins": 3.9135239124298096, + "rewards/rejected": -3.9135406017303467, + "step": 4681 + }, + { + "epoch": 0.27, + "learning_rate": 8.536200222259456e-08, + "logits/chosen": -2.0459325313568115, + "logits/rejected": -2.032588005065918, + "logps/chosen": -68.59595489501953, + "logps/rejected": -287.3308410644531, + "loss": 0.2551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5809715390205383, + "rewards/margins": 3.2701468467712402, + "rewards/rejected": -2.6891753673553467, + "step": 4682 + }, + { + "epoch": 0.27, + "learning_rate": 8.535533905932736e-08, + "logits/chosen": -1.8728225231170654, + "logits/rejected": -1.8759807348251343, + "logps/chosen": -20.358503341674805, + "logps/rejected": -142.01333618164062, + "loss": 0.6204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06100425869226456, + "rewards/margins": 0.35430335998535156, + "rewards/rejected": -0.4153076112270355, + "step": 4683 + }, + { + "epoch": 0.27, + "learning_rate": 8.534867464006561e-08, + "logits/chosen": -1.970109224319458, + "logits/rejected": -1.965971827507019, + "logps/chosen": -0.03363071009516716, + "logps/rejected": -209.77395629882812, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003001779259648174, + "rewards/margins": 2.868257522583008, + "rewards/rejected": -2.8685576915740967, + "step": 4684 + }, + { + "epoch": 0.27, + "learning_rate": 8.5342008965046e-08, + "logits/chosen": -1.877347469329834, + "logits/rejected": -1.8777341842651367, + "logps/chosen": -42.877532958984375, + "logps/rejected": -253.78790283203125, + "loss": 0.3696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07778777927160263, + "rewards/margins": 1.7064483165740967, + "rewards/rejected": -1.6286605596542358, + "step": 4685 + }, + { + "epoch": 0.27, + "learning_rate": 8.533534203450537e-08, + "logits/chosen": -1.9969934225082397, + "logits/rejected": -1.9783028364181519, + "logps/chosen": -49.71176528930664, + "logps/rejected": -288.923583984375, + "loss": 0.3504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05306282266974449, + "rewards/margins": 3.5479512214660645, + "rewards/rejected": -3.4948883056640625, + "step": 4686 + }, + { + "epoch": 0.27, + "learning_rate": 8.532867384868053e-08, + "logits/chosen": -2.056239128112793, + "logits/rejected": -2.0461056232452393, + "logps/chosen": -0.004377012141048908, + "logps/rejected": -172.8037567138672, + "loss": 0.3952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000274407968390733, + "rewards/margins": 2.282466411590576, + "rewards/rejected": -2.282740831375122, + "step": 4687 + }, + { + "epoch": 0.27, + "learning_rate": 8.53220044078084e-08, + "logits/chosen": -2.070378065109253, + "logits/rejected": -2.076202630996704, + "logps/chosen": -1.4116425514221191, + "logps/rejected": -71.50917053222656, + "loss": 0.5088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03974255546927452, + "rewards/margins": 0.873769998550415, + "rewards/rejected": -0.8340274691581726, + "step": 4688 + }, + { + "epoch": 0.27, + "learning_rate": 8.531533371212589e-08, + "logits/chosen": -2.1433863639831543, + "logits/rejected": -2.153146266937256, + "logps/chosen": -0.00013112538727000356, + "logps/rejected": -128.35951232910156, + "loss": 0.503, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.132303168560611e-05, + "rewards/margins": 0.9991362690925598, + "rewards/rejected": -0.9991249442100525, + "step": 4689 + }, + { + "epoch": 0.27, + "learning_rate": 8.530866176186997e-08, + "logits/chosen": -2.0891988277435303, + "logits/rejected": -2.071732997894287, + "logps/chosen": -175.76028442382812, + "logps/rejected": -307.420166015625, + "loss": 0.4951, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.349507212638855, + "rewards/margins": -0.30602264404296875, + "rewards/rejected": 1.6555298566818237, + "step": 4690 + }, + { + "epoch": 0.27, + "learning_rate": 8.530198855727768e-08, + "logits/chosen": -2.061089038848877, + "logits/rejected": -2.0467946529388428, + "logps/chosen": -135.88723754882812, + "logps/rejected": -270.69854736328125, + "loss": 0.1897, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0453170537948608, + "rewards/margins": 1.8494186401367188, + "rewards/rejected": -0.8041015863418579, + "step": 4691 + }, + { + "epoch": 0.27, + "learning_rate": 8.529531409858608e-08, + "logits/chosen": -2.0384833812713623, + "logits/rejected": -2.039579391479492, + "logps/chosen": -4.6208367347717285, + "logps/rejected": -121.64872741699219, + "loss": 0.53, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15593580901622772, + "rewards/margins": 1.0234389305114746, + "rewards/rejected": -1.1793746948242188, + "step": 4692 + }, + { + "epoch": 0.27, + "learning_rate": 8.528863838603226e-08, + "logits/chosen": -2.0506539344787598, + "logits/rejected": -2.035290479660034, + "logps/chosen": -126.59292602539062, + "logps/rejected": -272.7540588378906, + "loss": 0.3803, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0216797590255737, + "rewards/margins": 0.2887360453605652, + "rewards/rejected": 0.7329437136650085, + "step": 4693 + }, + { + "epoch": 0.27, + "learning_rate": 8.52819614198534e-08, + "logits/chosen": -1.9282193183898926, + "logits/rejected": -1.903991460800171, + "logps/chosen": -157.59584045410156, + "logps/rejected": -365.8341979980469, + "loss": 0.114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4839859008789062, + "rewards/margins": 2.0850815773010254, + "rewards/rejected": -0.6010956168174744, + "step": 4694 + }, + { + "epoch": 0.27, + "learning_rate": 8.527528320028669e-08, + "logits/chosen": -1.9494616985321045, + "logits/rejected": -1.9529887437820435, + "logps/chosen": -23.41905403137207, + "logps/rejected": -123.60165405273438, + "loss": 0.5277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10072117298841476, + "rewards/margins": 0.6540094614028931, + "rewards/rejected": -0.5532882809638977, + "step": 4695 + }, + { + "epoch": 0.27, + "learning_rate": 8.526860372756936e-08, + "logits/chosen": -2.2035672664642334, + "logits/rejected": -2.177582025527954, + "logps/chosen": -19.347023010253906, + "logps/rejected": -327.7164001464844, + "loss": 0.396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1058071181178093, + "rewards/margins": 2.753274917602539, + "rewards/rejected": -2.859081983566284, + "step": 4696 + }, + { + "epoch": 0.27, + "learning_rate": 8.526192300193872e-08, + "logits/chosen": -1.9115022420883179, + "logits/rejected": -1.912203073501587, + "logps/chosen": -27.491121292114258, + "logps/rejected": -138.33682250976562, + "loss": 0.5903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10718803852796555, + "rewards/margins": 0.11590538173913956, + "rewards/rejected": -0.008717346005141735, + "step": 4697 + }, + { + "epoch": 0.27, + "learning_rate": 8.525524102363208e-08, + "logits/chosen": -2.081820011138916, + "logits/rejected": -2.0656521320343018, + "logps/chosen": -0.00023566378513351083, + "logps/rejected": -119.76655578613281, + "loss": 0.3917, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.803322553925682e-06, + "rewards/margins": 2.3475711345672607, + "rewards/rejected": -2.3475663661956787, + "step": 4698 + }, + { + "epoch": 0.27, + "learning_rate": 8.524855779288685e-08, + "logits/chosen": -2.0834858417510986, + "logits/rejected": -2.0717785358428955, + "logps/chosen": -36.66718292236328, + "logps/rejected": -240.13218688964844, + "loss": 0.5358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42588385939598083, + "rewards/margins": 1.234548568725586, + "rewards/rejected": -1.6604324579238892, + "step": 4699 + }, + { + "epoch": 0.27, + "learning_rate": 8.52418733099404e-08, + "logits/chosen": -1.915459156036377, + "logits/rejected": -1.8910226821899414, + "logps/chosen": -208.77430725097656, + "logps/rejected": -418.0827941894531, + "loss": 0.1695, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1751266717910767, + "rewards/margins": 1.8957443237304688, + "rewards/rejected": -0.7206177115440369, + "step": 4700 + }, + { + "epoch": 0.27, + "learning_rate": 8.523518757503023e-08, + "logits/chosen": -1.9541022777557373, + "logits/rejected": -1.957311749458313, + "logps/chosen": -4.276556968688965, + "logps/rejected": -114.99622344970703, + "loss": 0.6562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09464430809020996, + "rewards/margins": 0.015400074422359467, + "rewards/rejected": 0.0792442336678505, + "step": 4701 + }, + { + "epoch": 0.27, + "learning_rate": 8.522850058839386e-08, + "logits/chosen": -2.104015588760376, + "logits/rejected": -2.1642491817474365, + "logps/chosen": -220.8387451171875, + "logps/rejected": -367.7876892089844, + "loss": 0.0608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8993881940841675, + "rewards/margins": 3.2902238368988037, + "rewards/rejected": -1.3908356428146362, + "step": 4702 + }, + { + "epoch": 0.27, + "learning_rate": 8.522181235026882e-08, + "logits/chosen": -2.0453474521636963, + "logits/rejected": -2.040756940841675, + "logps/chosen": -2.2858059406280518, + "logps/rejected": -261.13604736328125, + "loss": 0.4112, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01321644801646471, + "rewards/margins": 2.109877824783325, + "rewards/rejected": -2.123094320297241, + "step": 4703 + }, + { + "epoch": 0.27, + "learning_rate": 8.52151228608927e-08, + "logits/chosen": -2.1209969520568848, + "logits/rejected": -2.114102840423584, + "logps/chosen": -0.0019523381488397717, + "logps/rejected": -154.96066284179688, + "loss": 0.4494, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.855910032754764e-05, + "rewards/margins": 1.4856034517288208, + "rewards/rejected": -1.4856719970703125, + "step": 4704 + }, + { + "epoch": 0.27, + "learning_rate": 8.520843212050317e-08, + "logits/chosen": -2.152303695678711, + "logits/rejected": -2.1456658840179443, + "logps/chosen": -10.945741653442383, + "logps/rejected": -246.70529174804688, + "loss": 0.3673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15996551513671875, + "rewards/margins": 2.2454757690429688, + "rewards/rejected": -2.08551025390625, + "step": 4705 + }, + { + "epoch": 0.27, + "learning_rate": 8.520174012933791e-08, + "logits/chosen": -2.0479447841644287, + "logits/rejected": -2.0417490005493164, + "logps/chosen": -65.39197540283203, + "logps/rejected": -212.12583923339844, + "loss": 0.6745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43896713852882385, + "rewards/margins": 0.36753466725349426, + "rewards/rejected": -0.8065018057823181, + "step": 4706 + }, + { + "epoch": 0.27, + "learning_rate": 8.519504688763466e-08, + "logits/chosen": -1.881881833076477, + "logits/rejected": -1.881178379058838, + "logps/chosen": -10.47690486907959, + "logps/rejected": -300.17669677734375, + "loss": 0.3468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012556934729218483, + "rewards/margins": 4.450235843658447, + "rewards/rejected": -4.462792873382568, + "step": 4707 + }, + { + "epoch": 0.27, + "learning_rate": 8.518835239563118e-08, + "logits/chosen": -1.9298619031906128, + "logits/rejected": -1.9251176118850708, + "logps/chosen": -202.22103881835938, + "logps/rejected": -249.57992553710938, + "loss": 0.1972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5716079473495483, + "rewards/margins": 1.4319076538085938, + "rewards/rejected": 0.13970032334327698, + "step": 4708 + }, + { + "epoch": 0.27, + "learning_rate": 8.518165665356529e-08, + "logits/chosen": -2.020581007003784, + "logits/rejected": -2.0250871181488037, + "logps/chosen": -16.614696502685547, + "logps/rejected": -362.234130859375, + "loss": 0.2711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3400648236274719, + "rewards/margins": 5.122254848480225, + "rewards/rejected": -4.782189846038818, + "step": 4709 + }, + { + "epoch": 0.27, + "learning_rate": 8.517495966167488e-08, + "logits/chosen": -2.0164501667022705, + "logits/rejected": -2.0259368419647217, + "logps/chosen": -196.17835998535156, + "logps/rejected": -272.71905517578125, + "loss": 0.1375, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.602070689201355, + "rewards/margins": 1.642964243888855, + "rewards/rejected": -0.0408935546875, + "step": 4710 + }, + { + "epoch": 0.27, + "learning_rate": 8.516826142019782e-08, + "logits/chosen": -2.056915760040283, + "logits/rejected": -1.9861491918563843, + "logps/chosen": -129.33074951171875, + "logps/rejected": -253.43565368652344, + "loss": 0.2227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7262634634971619, + "rewards/margins": 2.2251205444335938, + "rewards/rejected": -1.4988571405410767, + "step": 4711 + }, + { + "epoch": 0.27, + "learning_rate": 8.516156192937211e-08, + "logits/chosen": -1.9148465394973755, + "logits/rejected": -1.8943884372711182, + "logps/chosen": -205.80810546875, + "logps/rejected": -279.6539001464844, + "loss": 0.3924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3509552478790283, + "rewards/margins": 0.2489715814590454, + "rewards/rejected": 1.101983666419983, + "step": 4712 + }, + { + "epoch": 0.27, + "learning_rate": 8.515486118943571e-08, + "logits/chosen": -1.8138082027435303, + "logits/rejected": -1.8404442071914673, + "logps/chosen": -241.4468536376953, + "logps/rejected": -386.56927490234375, + "loss": 0.098, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6385300159454346, + "rewards/margins": 2.212419271469116, + "rewards/rejected": -0.5738891959190369, + "step": 4713 + }, + { + "epoch": 0.27, + "learning_rate": 8.514815920062669e-08, + "logits/chosen": -2.1514201164245605, + "logits/rejected": -2.1423399448394775, + "logps/chosen": -23.99339485168457, + "logps/rejected": -228.48486328125, + "loss": 0.5274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28201714158058167, + "rewards/margins": 0.3666500151157379, + "rewards/rejected": -0.08463287353515625, + "step": 4714 + }, + { + "epoch": 0.27, + "learning_rate": 8.514145596318311e-08, + "logits/chosen": -2.2165606021881104, + "logits/rejected": -2.1965882778167725, + "logps/chosen": -37.209075927734375, + "logps/rejected": -203.8690185546875, + "loss": 0.3703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20843124389648438, + "rewards/margins": 1.583936333656311, + "rewards/rejected": -1.3755050897598267, + "step": 4715 + }, + { + "epoch": 0.27, + "learning_rate": 8.513475147734315e-08, + "logits/chosen": -2.081137180328369, + "logits/rejected": -2.0912396907806396, + "logps/chosen": -0.0002154004614567384, + "logps/rejected": -103.65592956542969, + "loss": 0.5365, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.191887690765725e-06, + "rewards/margins": 0.7692585587501526, + "rewards/rejected": -0.7692573666572571, + "step": 4716 + }, + { + "epoch": 0.27, + "learning_rate": 8.512804574334493e-08, + "logits/chosen": -2.021110773086548, + "logits/rejected": -2.013521432876587, + "logps/chosen": -49.2403564453125, + "logps/rejected": -188.44483947753906, + "loss": 0.6514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6196266412734985, + "rewards/margins": 0.987866997718811, + "rewards/rejected": -1.6074936389923096, + "step": 4717 + }, + { + "epoch": 0.27, + "learning_rate": 8.51213387614267e-08, + "logits/chosen": -2.013465404510498, + "logits/rejected": -1.9706733226776123, + "logps/chosen": -265.8395080566406, + "logps/rejected": -423.6253356933594, + "loss": 0.1906, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6042450666427612, + "rewards/margins": 1.6274476051330566, + "rewards/rejected": -0.02320251427590847, + "step": 4718 + }, + { + "epoch": 0.27, + "learning_rate": 8.511463053182672e-08, + "logits/chosen": -1.9423370361328125, + "logits/rejected": -1.9354639053344727, + "logps/chosen": -23.243717193603516, + "logps/rejected": -234.44326782226562, + "loss": 0.3075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1553489714860916, + "rewards/margins": 2.806468963623047, + "rewards/rejected": -2.6511199474334717, + "step": 4719 + }, + { + "epoch": 0.27, + "learning_rate": 8.51079210547833e-08, + "logits/chosen": -2.022660732269287, + "logits/rejected": -1.9867937564849854, + "logps/chosen": -239.0994415283203, + "logps/rejected": -474.5393371582031, + "loss": 0.2585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7135330438613892, + "rewards/margins": 0.5994735956192017, + "rewards/rejected": 1.1140594482421875, + "step": 4720 + }, + { + "epoch": 0.27, + "learning_rate": 8.51012103305348e-08, + "logits/chosen": -2.0731732845306396, + "logits/rejected": -2.088961124420166, + "logps/chosen": -238.75924682617188, + "logps/rejected": -539.6702880859375, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.86850905418396, + "rewards/margins": 5.500674724578857, + "rewards/rejected": -2.6321656703948975, + "step": 4721 + }, + { + "epoch": 0.27, + "learning_rate": 8.509449835931961e-08, + "logits/chosen": -2.1002070903778076, + "logits/rejected": -2.096944570541382, + "logps/chosen": -47.81217956542969, + "logps/rejected": -140.47918701171875, + "loss": 0.5786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08965454250574112, + "rewards/margins": 0.802258312702179, + "rewards/rejected": -0.712603747844696, + "step": 4722 + }, + { + "epoch": 0.27, + "learning_rate": 8.508778514137617e-08, + "logits/chosen": -2.0544023513793945, + "logits/rejected": -2.056339740753174, + "logps/chosen": -0.2601231336593628, + "logps/rejected": -247.17413330078125, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002582699235063046, + "rewards/margins": 5.666236400604248, + "rewards/rejected": -5.666494846343994, + "step": 4723 + }, + { + "epoch": 0.27, + "learning_rate": 8.508107067694297e-08, + "logits/chosen": -2.0764031410217285, + "logits/rejected": -2.0788209438323975, + "logps/chosen": -30.558269500732422, + "logps/rejected": -131.7609100341797, + "loss": 0.5919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03554534912109375, + "rewards/margins": 0.40967026352882385, + "rewards/rejected": -0.4452156126499176, + "step": 4724 + }, + { + "epoch": 0.27, + "learning_rate": 8.507435496625852e-08, + "logits/chosen": -1.9147030115127563, + "logits/rejected": -1.9123698472976685, + "logps/chosen": -0.17309054732322693, + "logps/rejected": -215.04730224609375, + "loss": 0.402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0027822391130030155, + "rewards/margins": 2.1307761669158936, + "rewards/rejected": -2.1279938220977783, + "step": 4725 + }, + { + "epoch": 0.28, + "learning_rate": 8.506763800956144e-08, + "logits/chosen": -1.884140133857727, + "logits/rejected": -1.8807954788208008, + "logps/chosen": -41.561012268066406, + "logps/rejected": -168.09841918945312, + "loss": 0.8578, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4650619626045227, + "rewards/margins": -0.1590728759765625, + "rewards/rejected": -0.3059890866279602, + "step": 4726 + }, + { + "epoch": 0.28, + "learning_rate": 8.506091980709031e-08, + "logits/chosen": -1.8451764583587646, + "logits/rejected": -1.771620273590088, + "logps/chosen": -235.3650665283203, + "logps/rejected": -470.73101806640625, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48640748858451843, + "rewards/margins": 1.5063873529434204, + "rewards/rejected": -1.0199798345565796, + "step": 4727 + }, + { + "epoch": 0.28, + "learning_rate": 8.505420035908381e-08, + "logits/chosen": -2.042201042175293, + "logits/rejected": -2.0387938022613525, + "logps/chosen": -125.64945983886719, + "logps/rejected": -356.09637451171875, + "loss": 0.3927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19830703735351562, + "rewards/margins": 2.4098961353302, + "rewards/rejected": -2.608203172683716, + "step": 4728 + }, + { + "epoch": 0.28, + "learning_rate": 8.504747966578065e-08, + "logits/chosen": -2.0086891651153564, + "logits/rejected": -1.9915223121643066, + "logps/chosen": -45.30534362792969, + "logps/rejected": -149.42477416992188, + "loss": 0.3949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5552131533622742, + "rewards/margins": 0.8565818667411804, + "rewards/rejected": -0.30136871337890625, + "step": 4729 + }, + { + "epoch": 0.28, + "learning_rate": 8.504075772741958e-08, + "logits/chosen": -2.0178706645965576, + "logits/rejected": -1.9968565702438354, + "logps/chosen": -267.5011901855469, + "logps/rejected": -420.607666015625, + "loss": 0.3326, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0318238735198975, + "rewards/margins": 0.25862133502960205, + "rewards/rejected": 1.7732025384902954, + "step": 4730 + }, + { + "epoch": 0.28, + "learning_rate": 8.503403454423939e-08, + "logits/chosen": -2.0177645683288574, + "logits/rejected": -2.0060312747955322, + "logps/chosen": -0.32373565435409546, + "logps/rejected": -126.87089538574219, + "loss": 0.3832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018541719764471054, + "rewards/margins": 2.728679656982422, + "rewards/rejected": -2.7472214698791504, + "step": 4731 + }, + { + "epoch": 0.28, + "learning_rate": 8.50273101164789e-08, + "logits/chosen": -1.8496919870376587, + "logits/rejected": -1.8519768714904785, + "logps/chosen": -0.0004064548993483186, + "logps/rejected": -157.29000854492188, + "loss": 0.3823, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2746080528813764e-06, + "rewards/margins": 2.5821316242218018, + "rewards/rejected": -2.5821304321289062, + "step": 4732 + }, + { + "epoch": 0.28, + "learning_rate": 8.502058444437705e-08, + "logits/chosen": -2.0473341941833496, + "logits/rejected": -1.9913140535354614, + "logps/chosen": -173.152587890625, + "logps/rejected": -415.9920654296875, + "loss": 0.4627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2482757568359375, + "rewards/margins": 0.9846343994140625, + "rewards/rejected": -0.736358642578125, + "step": 4733 + }, + { + "epoch": 0.28, + "learning_rate": 8.501385752817272e-08, + "logits/chosen": -1.8560665845870972, + "logits/rejected": -1.854156255722046, + "logps/chosen": -249.22406005859375, + "logps/rejected": -240.75738525390625, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.496179223060608, + "rewards/margins": 0.18404841423034668, + "rewards/rejected": 1.3121308088302612, + "step": 4734 + }, + { + "epoch": 0.28, + "learning_rate": 8.50071293681049e-08, + "logits/chosen": -1.8344775438308716, + "logits/rejected": -1.8354272842407227, + "logps/chosen": -233.27734375, + "logps/rejected": -416.5118408203125, + "loss": 0.3966, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.521697998046875, + "rewards/margins": 0.7924163937568665, + "rewards/rejected": -0.27071839570999146, + "step": 4735 + }, + { + "epoch": 0.28, + "learning_rate": 8.50003999644126e-08, + "logits/chosen": -2.103280544281006, + "logits/rejected": -2.0818188190460205, + "logps/chosen": -193.41798400878906, + "logps/rejected": -388.03466796875, + "loss": 0.1391, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4356063604354858, + "rewards/margins": 1.6149978637695312, + "rewards/rejected": -0.17939148843288422, + "step": 4736 + }, + { + "epoch": 0.28, + "learning_rate": 8.499366931733489e-08, + "logits/chosen": -1.7911468744277954, + "logits/rejected": -1.8218863010406494, + "logps/chosen": -163.13461303710938, + "logps/rejected": -220.6876220703125, + "loss": 0.1341, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7231429815292358, + "rewards/margins": 1.6012694835662842, + "rewards/rejected": 0.12187347561120987, + "step": 4737 + }, + { + "epoch": 0.28, + "learning_rate": 8.498693742711087e-08, + "logits/chosen": -1.9586760997772217, + "logits/rejected": -1.9455149173736572, + "logps/chosen": -181.04562377929688, + "logps/rejected": -273.7943115234375, + "loss": 0.4405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5955871939659119, + "rewards/margins": 0.5392090082168579, + "rewards/rejected": 0.05637817457318306, + "step": 4738 + }, + { + "epoch": 0.28, + "learning_rate": 8.498020429397972e-08, + "logits/chosen": -1.9395185708999634, + "logits/rejected": -1.9302269220352173, + "logps/chosen": -47.66912078857422, + "logps/rejected": -209.78248596191406, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6900092959403992, + "rewards/margins": 1.3749167919158936, + "rewards/rejected": -0.6849075555801392, + "step": 4739 + }, + { + "epoch": 0.28, + "learning_rate": 8.497346991818057e-08, + "logits/chosen": -2.150423288345337, + "logits/rejected": -2.1508007049560547, + "logps/chosen": -0.2024478316307068, + "logps/rejected": -183.1140899658203, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021641866769641638, + "rewards/margins": 2.8933238983154297, + "rewards/rejected": -2.8954880237579346, + "step": 4740 + }, + { + "epoch": 0.28, + "learning_rate": 8.496673429995272e-08, + "logits/chosen": -1.8610942363739014, + "logits/rejected": -1.836138367652893, + "logps/chosen": -325.07135009765625, + "logps/rejected": -420.9961242675781, + "loss": 0.118, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.497167944908142, + "rewards/margins": 2.4248931407928467, + "rewards/rejected": -0.9277252554893494, + "step": 4741 + }, + { + "epoch": 0.28, + "learning_rate": 8.495999743953542e-08, + "logits/chosen": -2.009442090988159, + "logits/rejected": -2.0074515342712402, + "logps/chosen": -14.466313362121582, + "logps/rejected": -230.39340209960938, + "loss": 0.4294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10899543762207031, + "rewards/margins": 2.318906784057617, + "rewards/rejected": -2.4279022216796875, + "step": 4742 + }, + { + "epoch": 0.28, + "learning_rate": 8.495325933716801e-08, + "logits/chosen": -2.1040287017822266, + "logits/rejected": -2.043933629989624, + "logps/chosen": -414.70941162109375, + "logps/rejected": -727.9808349609375, + "loss": 2.864, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.738195896148682, + "rewards/margins": 3.467212200164795, + "rewards/rejected": -9.205408096313477, + "step": 4743 + }, + { + "epoch": 0.28, + "learning_rate": 8.494651999308984e-08, + "logits/chosen": -2.0576939582824707, + "logits/rejected": -2.045941114425659, + "logps/chosen": -34.3662223815918, + "logps/rejected": -144.7189178466797, + "loss": 0.2687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6671028137207031, + "rewards/margins": 1.8364495038986206, + "rewards/rejected": -1.1693466901779175, + "step": 4744 + }, + { + "epoch": 0.28, + "learning_rate": 8.493977940754036e-08, + "logits/chosen": -2.0617308616638184, + "logits/rejected": -2.0558435916900635, + "logps/chosen": -23.517107009887695, + "logps/rejected": -175.30015563964844, + "loss": 0.5103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09939785301685333, + "rewards/margins": 0.9519861340522766, + "rewards/rejected": -1.0513839721679688, + "step": 4745 + }, + { + "epoch": 0.28, + "learning_rate": 8.493303758075898e-08, + "logits/chosen": -2.029127597808838, + "logits/rejected": -2.02123761177063, + "logps/chosen": -139.53826904296875, + "logps/rejected": -298.725830078125, + "loss": 0.8928, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07826843112707138, + "rewards/margins": -0.6904968619346619, + "rewards/rejected": 0.7687652707099915, + "step": 4746 + }, + { + "epoch": 0.28, + "learning_rate": 8.492629451298524e-08, + "logits/chosen": -1.7824054956436157, + "logits/rejected": -1.783512830734253, + "logps/chosen": -0.2038005143404007, + "logps/rejected": -126.29845428466797, + "loss": 0.473, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.921291787875816e-05, + "rewards/margins": 1.2329126596450806, + "rewards/rejected": -1.2329819202423096, + "step": 4747 + }, + { + "epoch": 0.28, + "learning_rate": 8.491955020445868e-08, + "logits/chosen": -2.1990177631378174, + "logits/rejected": -2.2010653018951416, + "logps/chosen": -0.053647641092538834, + "logps/rejected": -192.51425170898438, + "loss": 0.379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003978615161031485, + "rewards/margins": 2.581571340560913, + "rewards/rejected": -2.58555006980896, + "step": 4748 + }, + { + "epoch": 0.28, + "learning_rate": 8.491280465541888e-08, + "logits/chosen": -2.1615118980407715, + "logits/rejected": -2.1568098068237305, + "logps/chosen": -14.107505798339844, + "logps/rejected": -196.8915557861328, + "loss": 0.4342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04892626032233238, + "rewards/margins": 1.2682331800460815, + "rewards/rejected": -1.2193069458007812, + "step": 4749 + }, + { + "epoch": 0.28, + "learning_rate": 8.49060578661055e-08, + "logits/chosen": -2.069521188735962, + "logits/rejected": -2.0054426193237305, + "logps/chosen": -291.6739501953125, + "logps/rejected": -617.1380004882812, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.427746534347534, + "rewards/margins": 3.857586622238159, + "rewards/rejected": -1.429840087890625, + "step": 4750 + }, + { + "epoch": 0.28, + "learning_rate": 8.489930983675817e-08, + "logits/chosen": -2.1534535884857178, + "logits/rejected": -2.1425020694732666, + "logps/chosen": -68.16804504394531, + "logps/rejected": -158.81329345703125, + "loss": 0.4332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4900955259799957, + "rewards/margins": 0.6020782589912415, + "rewards/rejected": -0.11198272556066513, + "step": 4751 + }, + { + "epoch": 0.28, + "learning_rate": 8.489256056761667e-08, + "logits/chosen": -2.058830976486206, + "logits/rejected": -2.0335464477539062, + "logps/chosen": -209.14230346679688, + "logps/rejected": -414.5992431640625, + "loss": 0.4164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.365121454000473, + "rewards/margins": 0.5011383295059204, + "rewards/rejected": -0.136016845703125, + "step": 4752 + }, + { + "epoch": 0.28, + "learning_rate": 8.488581005892073e-08, + "logits/chosen": -2.106398820877075, + "logits/rejected": -2.102360486984253, + "logps/chosen": -4.269059181213379, + "logps/rejected": -90.11573791503906, + "loss": 0.5655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1361105889081955, + "rewards/margins": 0.40792685747146606, + "rewards/rejected": -0.2718162536621094, + "step": 4753 + }, + { + "epoch": 0.28, + "learning_rate": 8.487905831091015e-08, + "logits/chosen": -1.8764067888259888, + "logits/rejected": -1.8539254665374756, + "logps/chosen": -36.80944061279297, + "logps/rejected": -334.6430969238281, + "loss": 0.3423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.046234894543886185, + "rewards/margins": 5.452877044677734, + "rewards/rejected": -5.499112129211426, + "step": 4754 + }, + { + "epoch": 0.28, + "learning_rate": 8.487230532382483e-08, + "logits/chosen": -2.149448871612549, + "logits/rejected": -2.1436753273010254, + "logps/chosen": -174.76202392578125, + "logps/rejected": -343.7584533691406, + "loss": 0.1895, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8675415515899658, + "rewards/margins": 1.0173554420471191, + "rewards/rejected": 0.8501861691474915, + "step": 4755 + }, + { + "epoch": 0.28, + "learning_rate": 8.486555109790465e-08, + "logits/chosen": -2.093362331390381, + "logits/rejected": -2.0841474533081055, + "logps/chosen": -23.298809051513672, + "logps/rejected": -188.98178100585938, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43192368745803833, + "rewards/margins": 1.8395226001739502, + "rewards/rejected": -1.407598853111267, + "step": 4756 + }, + { + "epoch": 0.28, + "learning_rate": 8.485879563338955e-08, + "logits/chosen": -1.9919757843017578, + "logits/rejected": -1.9918882846832275, + "logps/chosen": -9.384227752685547, + "logps/rejected": -28.832582473754883, + "loss": 0.6997, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00012483597674872726, + "rewards/margins": -0.004444408696144819, + "rewards/rejected": 0.00456924457103014, + "step": 4757 + }, + { + "epoch": 0.28, + "learning_rate": 8.485203893051951e-08, + "logits/chosen": -2.21612548828125, + "logits/rejected": -2.2180840969085693, + "logps/chosen": -0.0004295713733881712, + "logps/rejected": -178.99044799804688, + "loss": 0.5167, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.793845826294273e-05, + "rewards/margins": 0.8983125686645508, + "rewards/rejected": -0.8983505368232727, + "step": 4758 + }, + { + "epoch": 0.28, + "learning_rate": 8.484528098953457e-08, + "logits/chosen": -1.8487070798873901, + "logits/rejected": -1.9063518047332764, + "logps/chosen": -327.3447265625, + "logps/rejected": -428.2793884277344, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1600098609924316, + "rewards/margins": 2.92197585105896, + "rewards/rejected": -0.7619659304618835, + "step": 4759 + }, + { + "epoch": 0.28, + "learning_rate": 8.483852181067479e-08, + "logits/chosen": -2.0397865772247314, + "logits/rejected": -2.023021697998047, + "logps/chosen": -209.96481323242188, + "logps/rejected": -370.3271179199219, + "loss": 0.4194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.579754650592804, + "rewards/margins": 0.36089783906936646, + "rewards/rejected": 0.2188568115234375, + "step": 4760 + }, + { + "epoch": 0.28, + "learning_rate": 8.483176139418032e-08, + "logits/chosen": -2.025238275527954, + "logits/rejected": -1.9962979555130005, + "logps/chosen": -181.20257568359375, + "logps/rejected": -361.3394470214844, + "loss": 0.1729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8146652579307556, + "rewards/margins": 2.2680070400238037, + "rewards/rejected": -1.4533417224884033, + "step": 4761 + }, + { + "epoch": 0.28, + "learning_rate": 8.482499974029129e-08, + "logits/chosen": -2.104346513748169, + "logits/rejected": -2.116715431213379, + "logps/chosen": -255.262939453125, + "logps/rejected": -515.4867553710938, + "loss": 0.0649, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8973877429962158, + "rewards/margins": 5.083996772766113, + "rewards/rejected": -3.1866090297698975, + "step": 4762 + }, + { + "epoch": 0.28, + "learning_rate": 8.481823684924792e-08, + "logits/chosen": -2.090613842010498, + "logits/rejected": -2.0845086574554443, + "logps/chosen": -0.05752139165997505, + "logps/rejected": -204.8732452392578, + "loss": 0.4812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012940306914970279, + "rewards/margins": 1.1486282348632812, + "rewards/rejected": -1.149922251701355, + "step": 4763 + }, + { + "epoch": 0.28, + "learning_rate": 8.481147272129048e-08, + "logits/chosen": -1.7319953441619873, + "logits/rejected": -1.7432321310043335, + "logps/chosen": -153.393798828125, + "logps/rejected": -529.1398315429688, + "loss": 0.1167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9374603629112244, + "rewards/margins": 4.045702934265137, + "rewards/rejected": -3.1082427501678467, + "step": 4764 + }, + { + "epoch": 0.28, + "learning_rate": 8.480470735665923e-08, + "logits/chosen": -2.117493152618408, + "logits/rejected": -2.123276472091675, + "logps/chosen": -26.787273406982422, + "logps/rejected": -91.80066680908203, + "loss": 0.4814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4807266294956207, + "rewards/margins": 0.4457351863384247, + "rewards/rejected": 0.03499145433306694, + "step": 4765 + }, + { + "epoch": 0.28, + "learning_rate": 8.479794075559455e-08, + "logits/chosen": -2.014631986618042, + "logits/rejected": -1.9987002611160278, + "logps/chosen": -50.00852966308594, + "logps/rejected": -132.5724334716797, + "loss": 0.4365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09167861938476562, + "rewards/margins": 0.7252052426338196, + "rewards/rejected": -0.633526623249054, + "step": 4766 + }, + { + "epoch": 0.28, + "learning_rate": 8.479117291833677e-08, + "logits/chosen": -1.9702179431915283, + "logits/rejected": -1.9498167037963867, + "logps/chosen": -219.2061309814453, + "logps/rejected": -380.990478515625, + "loss": 0.3668, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.246678113937378, + "rewards/margins": 0.03821849822998047, + "rewards/rejected": 2.2084596157073975, + "step": 4767 + }, + { + "epoch": 0.28, + "learning_rate": 8.478440384512637e-08, + "logits/chosen": -2.171821117401123, + "logits/rejected": -2.1713454723358154, + "logps/chosen": -5.75339937210083, + "logps/rejected": -152.4720458984375, + "loss": 0.4453, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12476596981287003, + "rewards/margins": 1.0795068740844727, + "rewards/rejected": -0.9547409415245056, + "step": 4768 + }, + { + "epoch": 0.28, + "learning_rate": 8.477763353620378e-08, + "logits/chosen": -2.0723564624786377, + "logits/rejected": -2.080625534057617, + "logps/chosen": -0.0012025616597384214, + "logps/rejected": -264.71502685546875, + "loss": 0.3963, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.564281258964911e-05, + "rewards/margins": 2.201218366622925, + "rewards/rejected": -2.2012939453125, + "step": 4769 + }, + { + "epoch": 0.28, + "learning_rate": 8.477086199180954e-08, + "logits/chosen": -2.1689889430999756, + "logits/rejected": -2.1674699783325195, + "logps/chosen": -300.2308349609375, + "logps/rejected": -384.2662353515625, + "loss": 0.461, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3956482410430908, + "rewards/margins": -0.20221245288848877, + "rewards/rejected": 1.5978606939315796, + "step": 4770 + }, + { + "epoch": 0.28, + "learning_rate": 8.47640892121842e-08, + "logits/chosen": -1.9608840942382812, + "logits/rejected": -1.9427717924118042, + "logps/chosen": -210.59292602539062, + "logps/rejected": -374.879638671875, + "loss": 0.2482, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3491973876953125, + "rewards/margins": 0.9670135378837585, + "rewards/rejected": 0.38218384981155396, + "step": 4771 + }, + { + "epoch": 0.28, + "learning_rate": 8.475731519756836e-08, + "logits/chosen": -2.026249647140503, + "logits/rejected": -2.04565691947937, + "logps/chosen": -273.5877685546875, + "logps/rejected": -434.16015625, + "loss": 0.1134, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4239838123321533, + "rewards/margins": 1.5689911842346191, + "rewards/rejected": 0.854992687702179, + "step": 4772 + }, + { + "epoch": 0.28, + "learning_rate": 8.475053994820267e-08, + "logits/chosen": -1.9674149751663208, + "logits/rejected": -1.9722391366958618, + "logps/chosen": -25.55515480041504, + "logps/rejected": -161.15589904785156, + "loss": 0.4406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0640401840209961, + "rewards/margins": 1.2343953847885132, + "rewards/rejected": -1.170355200767517, + "step": 4773 + }, + { + "epoch": 0.28, + "learning_rate": 8.47437634643278e-08, + "logits/chosen": -2.151186227798462, + "logits/rejected": -2.143016815185547, + "logps/chosen": -28.577308654785156, + "logps/rejected": -233.10606384277344, + "loss": 0.3261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13573037087917328, + "rewards/margins": 2.5253283977508545, + "rewards/rejected": -2.3895981311798096, + "step": 4774 + }, + { + "epoch": 0.28, + "learning_rate": 8.473698574618451e-08, + "logits/chosen": -2.081620216369629, + "logits/rejected": -2.075490713119507, + "logps/chosen": -6.818713882239535e-05, + "logps/rejected": -119.01805114746094, + "loss": 0.5272, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1920638343099199e-07, + "rewards/margins": 0.8322877287864685, + "rewards/rejected": -0.832287609577179, + "step": 4775 + }, + { + "epoch": 0.28, + "learning_rate": 8.473020679401358e-08, + "logits/chosen": -1.9546570777893066, + "logits/rejected": -1.9491615295410156, + "logps/chosen": -3.063650365220383e-05, + "logps/rejected": -99.89105224609375, + "loss": 0.5982, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.649096752018522e-07, + "rewards/margins": 0.4244731068611145, + "rewards/rejected": -0.4244735836982727, + "step": 4776 + }, + { + "epoch": 0.28, + "learning_rate": 8.472342660805582e-08, + "logits/chosen": -1.8754830360412598, + "logits/rejected": -1.8948681354522705, + "logps/chosen": -243.9259490966797, + "logps/rejected": -318.63909912109375, + "loss": 0.1564, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6072372198104858, + "rewards/margins": 1.865596055984497, + "rewards/rejected": -0.25835877656936646, + "step": 4777 + }, + { + "epoch": 0.28, + "learning_rate": 8.471664518855209e-08, + "logits/chosen": -2.0798206329345703, + "logits/rejected": -2.073096990585327, + "logps/chosen": -20.382965087890625, + "logps/rejected": -213.53533935546875, + "loss": 0.3127, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3497594892978668, + "rewards/margins": 2.3381872177124023, + "rewards/rejected": -1.988427758216858, + "step": 4778 + }, + { + "epoch": 0.28, + "learning_rate": 8.470986253574331e-08, + "logits/chosen": -1.7989040613174438, + "logits/rejected": -1.7947745323181152, + "logps/chosen": -24.9254207611084, + "logps/rejected": -184.9677276611328, + "loss": 0.6544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5959436297416687, + "rewards/margins": 1.0805792808532715, + "rewards/rejected": -1.6765228509902954, + "step": 4779 + }, + { + "epoch": 0.28, + "learning_rate": 8.470307864987042e-08, + "logits/chosen": -1.847861409187317, + "logits/rejected": -1.8499622344970703, + "logps/chosen": -20.429014205932617, + "logps/rejected": -46.73484420776367, + "loss": 0.6816, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019518280401825905, + "rewards/margins": 0.013855362311005592, + "rewards/rejected": 0.0056629180908203125, + "step": 4780 + }, + { + "epoch": 0.28, + "learning_rate": 8.469629353117442e-08, + "logits/chosen": -2.028400421142578, + "logits/rejected": -2.024789333343506, + "logps/chosen": -7.438527245540172e-05, + "logps/rejected": -164.03623962402344, + "loss": 0.3758, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.748407711005711e-07, + "rewards/margins": 2.6629936695098877, + "rewards/rejected": -2.662994384765625, + "step": 4781 + }, + { + "epoch": 0.28, + "learning_rate": 8.468950717989638e-08, + "logits/chosen": -2.0056941509246826, + "logits/rejected": -2.001300811767578, + "logps/chosen": -47.820701599121094, + "logps/rejected": -167.49249267578125, + "loss": 0.4797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.204447939991951, + "rewards/margins": 0.7474807500839233, + "rewards/rejected": -0.5430328249931335, + "step": 4782 + }, + { + "epoch": 0.28, + "learning_rate": 8.468271959627736e-08, + "logits/chosen": -1.88473379611969, + "logits/rejected": -1.8796254396438599, + "logps/chosen": -133.09718322753906, + "logps/rejected": -238.80364990234375, + "loss": 0.4049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2385085821151733, + "rewards/margins": 0.10336148738861084, + "rewards/rejected": 1.1351470947265625, + "step": 4783 + }, + { + "epoch": 0.28, + "learning_rate": 8.467593078055846e-08, + "logits/chosen": -2.033233880996704, + "logits/rejected": -2.036287307739258, + "logps/chosen": -214.83706665039062, + "logps/rejected": -331.0062255859375, + "loss": 0.1598, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.309906005859375, + "rewards/margins": 1.232147216796875, + "rewards/rejected": 1.0777587890625, + "step": 4784 + }, + { + "epoch": 0.28, + "learning_rate": 8.466914073298091e-08, + "logits/chosen": -2.0377728939056396, + "logits/rejected": -2.0369715690612793, + "logps/chosen": -50.66792297363281, + "logps/rejected": -263.23077392578125, + "loss": 0.4563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1627521514892578, + "rewards/margins": 1.0553791522979736, + "rewards/rejected": -0.892626941204071, + "step": 4785 + }, + { + "epoch": 0.28, + "learning_rate": 8.466234945378587e-08, + "logits/chosen": -2.1172449588775635, + "logits/rejected": -2.1191420555114746, + "logps/chosen": -37.03556823730469, + "logps/rejected": -189.266357421875, + "loss": 0.2417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7006843686103821, + "rewards/margins": 2.120900869369507, + "rewards/rejected": -1.42021644115448, + "step": 4786 + }, + { + "epoch": 0.28, + "learning_rate": 8.465555694321464e-08, + "logits/chosen": -2.038999080657959, + "logits/rejected": -2.021655797958374, + "logps/chosen": -213.87083435058594, + "logps/rejected": -504.796142578125, + "loss": 0.2012, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0026352405548096, + "rewards/margins": 0.9087021350860596, + "rewards/rejected": 1.09393310546875, + "step": 4787 + }, + { + "epoch": 0.28, + "learning_rate": 8.464876320150851e-08, + "logits/chosen": -1.8921436071395874, + "logits/rejected": -1.8874461650848389, + "logps/chosen": -118.8223648071289, + "logps/rejected": -295.5558776855469, + "loss": 0.1295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3095954656600952, + "rewards/margins": 2.125080108642578, + "rewards/rejected": -0.8154846429824829, + "step": 4788 + }, + { + "epoch": 0.28, + "learning_rate": 8.464196822890882e-08, + "logits/chosen": -2.008209705352783, + "logits/rejected": -2.015617847442627, + "logps/chosen": -37.09295654296875, + "logps/rejected": -135.29745483398438, + "loss": 0.3748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29551011323928833, + "rewards/margins": 1.4830436706542969, + "rewards/rejected": -1.1875336170196533, + "step": 4789 + }, + { + "epoch": 0.28, + "learning_rate": 8.463517202565696e-08, + "logits/chosen": -1.844444990158081, + "logits/rejected": -1.8405297994613647, + "logps/chosen": -191.301025390625, + "logps/rejected": -399.9410095214844, + "loss": 0.1747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.961352527141571, + "rewards/margins": 1.8586273193359375, + "rewards/rejected": -0.8972747921943665, + "step": 4790 + }, + { + "epoch": 0.28, + "learning_rate": 8.462837459199439e-08, + "logits/chosen": -2.032426118850708, + "logits/rejected": -2.0292065143585205, + "logps/chosen": -13.461666107177734, + "logps/rejected": -172.58656311035156, + "loss": 0.356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.041076090186834335, + "rewards/margins": 2.9085841178894043, + "rewards/rejected": -2.8675079345703125, + "step": 4791 + }, + { + "epoch": 0.28, + "learning_rate": 8.462157592816255e-08, + "logits/chosen": -2.1928653717041016, + "logits/rejected": -2.1756680011749268, + "logps/chosen": -46.677337646484375, + "logps/rejected": -154.26974487304688, + "loss": 0.3585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010843658819794655, + "rewards/margins": 2.1975197792053223, + "rewards/rejected": -2.186676025390625, + "step": 4792 + }, + { + "epoch": 0.28, + "learning_rate": 8.461477603440299e-08, + "logits/chosen": -2.033195972442627, + "logits/rejected": -2.002631425857544, + "logps/chosen": -204.65232849121094, + "logps/rejected": -386.32025146484375, + "loss": 0.1963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3988617658615112, + "rewards/margins": 1.544143795967102, + "rewards/rejected": -0.14528198540210724, + "step": 4793 + }, + { + "epoch": 0.28, + "learning_rate": 8.460797491095726e-08, + "logits/chosen": -2.05017352104187, + "logits/rejected": -2.0411365032196045, + "logps/chosen": -86.96825408935547, + "logps/rejected": -291.5460205078125, + "loss": 0.4424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16861343383789062, + "rewards/margins": 2.1672019958496094, + "rewards/rejected": -2.3358154296875, + "step": 4794 + }, + { + "epoch": 0.28, + "learning_rate": 8.460117255806698e-08, + "logits/chosen": -1.829001545906067, + "logits/rejected": -1.8706990480422974, + "logps/chosen": -210.3395233154297, + "logps/rejected": -264.30584716796875, + "loss": 0.4185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8528701663017273, + "rewards/margins": 0.38834378123283386, + "rewards/rejected": 0.46452638506889343, + "step": 4795 + }, + { + "epoch": 0.28, + "learning_rate": 8.459436897597379e-08, + "logits/chosen": -2.207313060760498, + "logits/rejected": -2.1843082904815674, + "logps/chosen": -37.1485595703125, + "logps/rejected": -221.7125244140625, + "loss": 0.5123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019305039197206497, + "rewards/margins": 0.8478931188583374, + "rewards/rejected": -0.8671981692314148, + "step": 4796 + }, + { + "epoch": 0.28, + "learning_rate": 8.458756416491942e-08, + "logits/chosen": -2.2298834323883057, + "logits/rejected": -2.2220194339752197, + "logps/chosen": -0.0013884969521313906, + "logps/rejected": -176.73483276367188, + "loss": 0.3805, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.563281491864473e-05, + "rewards/margins": 2.649491310119629, + "rewards/rejected": -2.649395704269409, + "step": 4797 + }, + { + "epoch": 0.28, + "learning_rate": 8.458075812514556e-08, + "logits/chosen": -2.054823875427246, + "logits/rejected": -2.0539610385894775, + "logps/chosen": -169.57789611816406, + "logps/rejected": -279.2877502441406, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2773773670196533, + "rewards/margins": 0.47523194551467896, + "rewards/rejected": 0.8021454215049744, + "step": 4798 + }, + { + "epoch": 0.28, + "learning_rate": 8.457395085689403e-08, + "logits/chosen": -1.9996273517608643, + "logits/rejected": -1.981764316558838, + "logps/chosen": -180.78744506835938, + "logps/rejected": -275.140380859375, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4468491077423096, + "rewards/margins": 1.7763595581054688, + "rewards/rejected": -0.32951050996780396, + "step": 4799 + }, + { + "epoch": 0.28, + "learning_rate": 8.456714236040663e-08, + "logits/chosen": -1.9414082765579224, + "logits/rejected": -1.9495360851287842, + "logps/chosen": -1.1783456802368164, + "logps/rejected": -130.17559814453125, + "loss": 0.5008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009301781537942588, + "rewards/margins": 1.019576907157898, + "rewards/rejected": -1.0205070972442627, + "step": 4800 + }, + { + "epoch": 0.28, + "learning_rate": 8.456033263592526e-08, + "logits/chosen": -2.186708688735962, + "logits/rejected": -2.1879758834838867, + "logps/chosen": -0.0022880886681377888, + "logps/rejected": -50.7240104675293, + "loss": 0.6244, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.994670497486368e-05, + "rewards/margins": 0.29685983061790466, + "rewards/rejected": -0.2969497740268707, + "step": 4801 + }, + { + "epoch": 0.28, + "learning_rate": 8.455352168369183e-08, + "logits/chosen": -1.9996637105941772, + "logits/rejected": -1.9634902477264404, + "logps/chosen": -109.09217834472656, + "logps/rejected": -325.4261169433594, + "loss": 0.2163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9047843813896179, + "rewards/margins": 1.3687279224395752, + "rewards/rejected": -0.4639434814453125, + "step": 4802 + }, + { + "epoch": 0.28, + "learning_rate": 8.454670950394827e-08, + "logits/chosen": -1.943048119544983, + "logits/rejected": -1.9462521076202393, + "logps/chosen": -41.856773376464844, + "logps/rejected": -67.09012603759766, + "loss": 0.477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37436142563819885, + "rewards/margins": 0.5568572878837585, + "rewards/rejected": -0.1824958771467209, + "step": 4803 + }, + { + "epoch": 0.28, + "learning_rate": 8.453989609693662e-08, + "logits/chosen": -2.0376079082489014, + "logits/rejected": -1.989040732383728, + "logps/chosen": -190.51744079589844, + "logps/rejected": -437.38702392578125, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2746200561523438, + "rewards/margins": 3.1459031105041504, + "rewards/rejected": -0.8712829947471619, + "step": 4804 + }, + { + "epoch": 0.28, + "learning_rate": 8.453308146289891e-08, + "logits/chosen": -1.8878839015960693, + "logits/rejected": -1.885874629020691, + "logps/chosen": -269.6997375488281, + "logps/rejected": -387.50860595703125, + "loss": 0.098, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6469604969024658, + "rewards/margins": 2.1576356887817383, + "rewards/rejected": -0.5106750726699829, + "step": 4805 + }, + { + "epoch": 0.28, + "learning_rate": 8.452626560207721e-08, + "logits/chosen": -2.0201611518859863, + "logits/rejected": -2.0153844356536865, + "logps/chosen": -7.986868149600923e-05, + "logps/rejected": -148.29685974121094, + "loss": 0.432, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2516837841758388e-06, + "rewards/margins": 1.5634021759033203, + "rewards/rejected": -1.5634033679962158, + "step": 4806 + }, + { + "epoch": 0.28, + "learning_rate": 8.451944851471368e-08, + "logits/chosen": -2.093881368637085, + "logits/rejected": -2.0455076694488525, + "logps/chosen": -198.6577911376953, + "logps/rejected": -278.3984375, + "loss": 0.2472, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4981002807617188, + "rewards/margins": 1.004460096359253, + "rewards/rejected": 0.49364015460014343, + "step": 4807 + }, + { + "epoch": 0.28, + "learning_rate": 8.451263020105049e-08, + "logits/chosen": -2.055863857269287, + "logits/rejected": -2.0472233295440674, + "logps/chosen": -15.249687194824219, + "logps/rejected": -125.95149230957031, + "loss": 0.4053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12446250766515732, + "rewards/margins": 1.1618359088897705, + "rewards/rejected": -1.037373423576355, + "step": 4808 + }, + { + "epoch": 0.28, + "learning_rate": 8.450581066132984e-08, + "logits/chosen": -2.011659860610962, + "logits/rejected": -2.0044138431549072, + "logps/chosen": -14.855755805969238, + "logps/rejected": -191.34939575195312, + "loss": 0.3772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3642565906047821, + "rewards/margins": 1.1729434728622437, + "rewards/rejected": -0.8086868524551392, + "step": 4809 + }, + { + "epoch": 0.28, + "learning_rate": 8.449898989579402e-08, + "logits/chosen": -1.9993544816970825, + "logits/rejected": -1.9993292093276978, + "logps/chosen": -0.1007644385099411, + "logps/rejected": -235.77835083007812, + "loss": 0.3779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007745377952232957, + "rewards/margins": 2.5037970542907715, + "rewards/rejected": -2.5045716762542725, + "step": 4810 + }, + { + "epoch": 0.28, + "learning_rate": 8.449216790468532e-08, + "logits/chosen": -2.094444990158081, + "logits/rejected": -2.137831926345825, + "logps/chosen": -186.33334350585938, + "logps/rejected": -376.69915771484375, + "loss": 0.1889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1949585676193237, + "rewards/margins": 1.5455445051193237, + "rewards/rejected": -0.3505859375, + "step": 4811 + }, + { + "epoch": 0.28, + "learning_rate": 8.448534468824611e-08, + "logits/chosen": -2.300959825515747, + "logits/rejected": -2.2932851314544678, + "logps/chosen": -31.454364776611328, + "logps/rejected": -157.85633850097656, + "loss": 0.4461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1180814728140831, + "rewards/margins": 1.6648882627487183, + "rewards/rejected": -1.7829697132110596, + "step": 4812 + }, + { + "epoch": 0.28, + "learning_rate": 8.447852024671876e-08, + "logits/chosen": -2.0032968521118164, + "logits/rejected": -1.9929895401000977, + "logps/chosen": -101.21156311035156, + "logps/rejected": -157.76126098632812, + "loss": 0.6419, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3703628480434418, + "rewards/margins": -0.18183746933937073, + "rewards/rejected": 0.5522003173828125, + "step": 4813 + }, + { + "epoch": 0.28, + "learning_rate": 8.447169458034571e-08, + "logits/chosen": -2.0918967723846436, + "logits/rejected": -2.0930910110473633, + "logps/chosen": -47.40770721435547, + "logps/rejected": -92.75318908691406, + "loss": 0.6705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06414337456226349, + "rewards/margins": 0.14040526747703552, + "rewards/rejected": -0.204548642039299, + "step": 4814 + }, + { + "epoch": 0.28, + "learning_rate": 8.446486768936947e-08, + "logits/chosen": -2.0886547565460205, + "logits/rejected": -2.0772156715393066, + "logps/chosen": -48.09021759033203, + "logps/rejected": -183.6978302001953, + "loss": 0.6274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35239753127098083, + "rewards/margins": 0.4501217305660248, + "rewards/rejected": -0.8025192618370056, + "step": 4815 + }, + { + "epoch": 0.28, + "learning_rate": 8.445803957403253e-08, + "logits/chosen": -2.0298871994018555, + "logits/rejected": -2.00516939163208, + "logps/chosen": -176.85272216796875, + "logps/rejected": -384.98333740234375, + "loss": 0.2088, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.228840708732605, + "rewards/margins": 1.281855821609497, + "rewards/rejected": -0.05301513895392418, + "step": 4816 + }, + { + "epoch": 0.28, + "learning_rate": 8.445121023457746e-08, + "logits/chosen": -2.0102670192718506, + "logits/rejected": -1.9846725463867188, + "logps/chosen": -238.23373413085938, + "logps/rejected": -359.7807922363281, + "loss": 0.1278, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8959197998046875, + "rewards/margins": 1.8201751708984375, + "rewards/rejected": 0.07574462890625, + "step": 4817 + }, + { + "epoch": 0.28, + "learning_rate": 8.444437967124691e-08, + "logits/chosen": -2.2602176666259766, + "logits/rejected": -2.2547292709350586, + "logps/chosen": -37.31256103515625, + "logps/rejected": -106.30774688720703, + "loss": 0.4218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3724479675292969, + "rewards/margins": 0.9198288321495056, + "rewards/rejected": -0.5473808646202087, + "step": 4818 + }, + { + "epoch": 0.28, + "learning_rate": 8.44375478842835e-08, + "logits/chosen": -1.946521520614624, + "logits/rejected": -1.8762290477752686, + "logps/chosen": -191.07432556152344, + "logps/rejected": -328.3037109375, + "loss": 0.288, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0351380109786987, + "rewards/margins": 0.9492188096046448, + "rewards/rejected": 0.08591919392347336, + "step": 4819 + }, + { + "epoch": 0.28, + "learning_rate": 8.443071487392993e-08, + "logits/chosen": -2.14015793800354, + "logits/rejected": -2.1383020877838135, + "logps/chosen": -0.025151260197162628, + "logps/rejected": -99.08539581298828, + "loss": 0.5261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015744628617540002, + "rewards/margins": 0.8164347410202026, + "rewards/rejected": -0.8180091977119446, + "step": 4820 + }, + { + "epoch": 0.28, + "learning_rate": 8.442388064042895e-08, + "logits/chosen": -1.9436440467834473, + "logits/rejected": -2.012411594390869, + "logps/chosen": -258.1307373046875, + "logps/rejected": -456.87017822265625, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.017120361328125, + "rewards/margins": 3.653420925140381, + "rewards/rejected": -1.6363006830215454, + "step": 4821 + }, + { + "epoch": 0.28, + "learning_rate": 8.441704518402335e-08, + "logits/chosen": -2.148261070251465, + "logits/rejected": -2.131802558898926, + "logps/chosen": -83.94680786132812, + "logps/rejected": -132.13125610351562, + "loss": 0.3715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6135498285293579, + "rewards/margins": 1.4054794311523438, + "rewards/rejected": -0.7919296622276306, + "step": 4822 + }, + { + "epoch": 0.28, + "learning_rate": 8.441020850495595e-08, + "logits/chosen": -1.8262993097305298, + "logits/rejected": -1.8079047203063965, + "logps/chosen": -255.6990509033203, + "logps/rejected": -308.0675354003906, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0374008417129517, + "rewards/margins": 0.7243057489395142, + "rewards/rejected": 0.3130950927734375, + "step": 4823 + }, + { + "epoch": 0.28, + "learning_rate": 8.440337060346963e-08, + "logits/chosen": -1.7268210649490356, + "logits/rejected": -1.672606110572815, + "logps/chosen": -178.7818603515625, + "logps/rejected": -585.7222900390625, + "loss": 0.074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3256438970565796, + "rewards/margins": 3.695590019226074, + "rewards/rejected": -2.369946241378784, + "step": 4824 + }, + { + "epoch": 0.28, + "learning_rate": 8.439653147980728e-08, + "logits/chosen": -1.9773260354995728, + "logits/rejected": -1.98060142993927, + "logps/chosen": -22.305145263671875, + "logps/rejected": -215.9195098876953, + "loss": 0.5393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.034067343920469284, + "rewards/margins": 0.5960623025894165, + "rewards/rejected": -0.5619949698448181, + "step": 4825 + }, + { + "epoch": 0.28, + "learning_rate": 8.43896911342119e-08, + "logits/chosen": -2.1983141899108887, + "logits/rejected": -2.1922061443328857, + "logps/chosen": -22.432422637939453, + "logps/rejected": -140.20941162109375, + "loss": 0.3677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3276123106479645, + "rewards/margins": 0.9951232671737671, + "rewards/rejected": -0.667510986328125, + "step": 4826 + }, + { + "epoch": 0.28, + "learning_rate": 8.438284956692647e-08, + "logits/chosen": -2.2007133960723877, + "logits/rejected": -2.1650030612945557, + "logps/chosen": -206.82766723632812, + "logps/rejected": -499.3714599609375, + "loss": 0.1883, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3937408924102783, + "rewards/margins": 1.0109986066818237, + "rewards/rejected": 0.382742315530777, + "step": 4827 + }, + { + "epoch": 0.28, + "learning_rate": 8.437600677819405e-08, + "logits/chosen": -2.078813076019287, + "logits/rejected": -2.0765793323516846, + "logps/chosen": -17.404224395751953, + "logps/rejected": -137.60707092285156, + "loss": 0.5447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061660002917051315, + "rewards/margins": 0.5972099304199219, + "rewards/rejected": -0.5355499386787415, + "step": 4828 + }, + { + "epoch": 0.28, + "learning_rate": 8.436916276825769e-08, + "logits/chosen": -2.140841007232666, + "logits/rejected": -2.128647804260254, + "logps/chosen": -157.8561248779297, + "logps/rejected": -304.9768371582031, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6136001944541931, + "rewards/margins": 1.0532333850860596, + "rewards/rejected": -0.43963319063186646, + "step": 4829 + }, + { + "epoch": 0.28, + "learning_rate": 8.436231753736058e-08, + "logits/chosen": -2.007624387741089, + "logits/rejected": -1.996608853340149, + "logps/chosen": -32.40278244018555, + "logps/rejected": -126.60003662109375, + "loss": 0.5193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5839012265205383, + "rewards/margins": 0.2280830442905426, + "rewards/rejected": 0.3558181822299957, + "step": 4830 + }, + { + "epoch": 0.28, + "learning_rate": 8.435547108574585e-08, + "logits/chosen": -1.8755536079406738, + "logits/rejected": -1.8748213052749634, + "logps/chosen": -11.457155227661133, + "logps/rejected": -100.40373992919922, + "loss": 0.6024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0915464386343956, + "rewards/margins": 0.26540127396583557, + "rewards/rejected": -0.17385482788085938, + "step": 4831 + }, + { + "epoch": 0.28, + "learning_rate": 8.434862341365674e-08, + "logits/chosen": -2.0595428943634033, + "logits/rejected": -2.0635793209075928, + "logps/chosen": -6.716981887817383, + "logps/rejected": -53.947593688964844, + "loss": 0.6534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021236801519989967, + "rewards/margins": 0.2211071103811264, + "rewards/rejected": -0.199870303273201, + "step": 4832 + }, + { + "epoch": 0.28, + "learning_rate": 8.43417745213365e-08, + "logits/chosen": -1.8919461965560913, + "logits/rejected": -1.8797065019607544, + "logps/chosen": -0.7803849577903748, + "logps/rejected": -104.56546783447266, + "loss": 0.5331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00027604104252532125, + "rewards/margins": 0.8883901834487915, + "rewards/rejected": -0.8881141543388367, + "step": 4833 + }, + { + "epoch": 0.28, + "learning_rate": 8.433492440902846e-08, + "logits/chosen": -1.9457448720932007, + "logits/rejected": -1.8479008674621582, + "logps/chosen": -294.206787109375, + "logps/rejected": -523.867919921875, + "loss": 0.1543, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.986120581626892, + "rewards/margins": 1.6920043230056763, + "rewards/rejected": 0.29411622881889343, + "step": 4834 + }, + { + "epoch": 0.28, + "learning_rate": 8.432807307697594e-08, + "logits/chosen": -2.010507345199585, + "logits/rejected": -2.0068039894104004, + "logps/chosen": -22.088821411132812, + "logps/rejected": -103.54475402832031, + "loss": 0.5357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009150314144790173, + "rewards/margins": 0.7955280542373657, + "rewards/rejected": -0.7863777279853821, + "step": 4835 + }, + { + "epoch": 0.28, + "learning_rate": 8.432122052542237e-08, + "logits/chosen": -2.2171125411987305, + "logits/rejected": -2.210681438446045, + "logps/chosen": -35.17686462402344, + "logps/rejected": -101.02828979492188, + "loss": 0.5226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7411495447158813, + "rewards/margins": 0.06657755374908447, + "rewards/rejected": 0.6745719909667969, + "step": 4836 + }, + { + "epoch": 0.28, + "learning_rate": 8.431436675461114e-08, + "logits/chosen": -1.8778133392333984, + "logits/rejected": -1.8744850158691406, + "logps/chosen": -268.8471374511719, + "logps/rejected": -401.8788757324219, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.77679443359375, + "rewards/margins": 0.4463653564453125, + "rewards/rejected": 1.3304290771484375, + "step": 4837 + }, + { + "epoch": 0.28, + "learning_rate": 8.430751176478576e-08, + "logits/chosen": -1.8874562978744507, + "logits/rejected": -1.8892598152160645, + "logps/chosen": -1.2267098426818848, + "logps/rejected": -171.55569458007812, + "loss": 0.4548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018071843311190605, + "rewards/margins": 1.2351205348968506, + "rewards/rejected": -1.2170486450195312, + "step": 4838 + }, + { + "epoch": 0.28, + "learning_rate": 8.430065555618975e-08, + "logits/chosen": -2.0393457412719727, + "logits/rejected": -2.0421457290649414, + "logps/chosen": -14.362422943115234, + "logps/rejected": -88.04704284667969, + "loss": 0.515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03657589107751846, + "rewards/margins": 0.8212530612945557, + "rewards/rejected": -0.7846771478652954, + "step": 4839 + }, + { + "epoch": 0.28, + "learning_rate": 8.429379812906667e-08, + "logits/chosen": -1.8982534408569336, + "logits/rejected": -1.8968878984451294, + "logps/chosen": -0.002241280162706971, + "logps/rejected": -161.7633056640625, + "loss": 0.3771, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.014021295821294e-05, + "rewards/margins": 2.6776177883148193, + "rewards/rejected": -2.677537679672241, + "step": 4840 + }, + { + "epoch": 0.28, + "learning_rate": 8.428693948366014e-08, + "logits/chosen": -1.9197824001312256, + "logits/rejected": -1.9349712133407593, + "logps/chosen": -34.22978973388672, + "logps/rejected": -219.12940979003906, + "loss": 0.4274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1455024778842926, + "rewards/margins": 1.9048608541488647, + "rewards/rejected": -2.050363302230835, + "step": 4841 + }, + { + "epoch": 0.28, + "learning_rate": 8.42800796202138e-08, + "logits/chosen": -2.0037307739257812, + "logits/rejected": -2.0424888134002686, + "logps/chosen": -208.326416015625, + "logps/rejected": -161.3771514892578, + "loss": 0.3078, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5607315301895142, + "rewards/margins": 0.3922516107559204, + "rewards/rejected": 1.1684799194335938, + "step": 4842 + }, + { + "epoch": 0.28, + "learning_rate": 8.427321853897134e-08, + "logits/chosen": -1.9379780292510986, + "logits/rejected": -1.9359880685806274, + "logps/chosen": -296.40240478515625, + "logps/rejected": -422.4172058105469, + "loss": 0.0501, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.700305223464966, + "rewards/margins": 2.649914503097534, + "rewards/rejected": 0.05039062723517418, + "step": 4843 + }, + { + "epoch": 0.28, + "learning_rate": 8.426635624017651e-08, + "logits/chosen": -2.1823582649230957, + "logits/rejected": -2.1748383045196533, + "logps/chosen": -7.819959137123078e-05, + "logps/rejected": -106.73371887207031, + "loss": 0.421, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3111566943280195e-07, + "rewards/margins": 1.8194457292556763, + "rewards/rejected": -1.8194458484649658, + "step": 4844 + }, + { + "epoch": 0.28, + "learning_rate": 8.42594927240731e-08, + "logits/chosen": -2.0839357376098633, + "logits/rejected": -2.081099510192871, + "logps/chosen": -0.0011005988344550133, + "logps/rejected": -250.7462158203125, + "loss": 0.3668, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3367669200524688e-05, + "rewards/margins": 3.1594645977020264, + "rewards/rejected": -3.1594879627227783, + "step": 4845 + }, + { + "epoch": 0.28, + "learning_rate": 8.425262799090492e-08, + "logits/chosen": -2.034255027770996, + "logits/rejected": -2.026850938796997, + "logps/chosen": -37.767417907714844, + "logps/rejected": -259.25885009765625, + "loss": 0.3651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1165393814444542, + "rewards/margins": 1.5137302875518799, + "rewards/rejected": -1.3971909284591675, + "step": 4846 + }, + { + "epoch": 0.28, + "learning_rate": 8.424576204091586e-08, + "logits/chosen": -2.0952811241149902, + "logits/rejected": -2.0168728828430176, + "logps/chosen": -175.6607666015625, + "logps/rejected": -424.58319091796875, + "loss": 0.2678, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.532934546470642, + "rewards/margins": 0.6720214486122131, + "rewards/rejected": 0.860913097858429, + "step": 4847 + }, + { + "epoch": 0.28, + "learning_rate": 8.42388948743498e-08, + "logits/chosen": -2.1365442276000977, + "logits/rejected": -2.132112979888916, + "logps/chosen": -0.18501576781272888, + "logps/rejected": -162.33961486816406, + "loss": 0.4706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013447254605125636, + "rewards/margins": 1.1048659086227417, + "rewards/rejected": -1.105000376701355, + "step": 4848 + }, + { + "epoch": 0.28, + "learning_rate": 8.423202649145072e-08, + "logits/chosen": -2.1189327239990234, + "logits/rejected": -2.120748519897461, + "logps/chosen": -0.0003337619127705693, + "logps/rejected": -101.05494689941406, + "loss": 0.4363, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5197428183455486e-05, + "rewards/margins": 1.6315584182739258, + "rewards/rejected": -1.6315735578536987, + "step": 4849 + }, + { + "epoch": 0.28, + "learning_rate": 8.422515689246262e-08, + "logits/chosen": -2.144026041030884, + "logits/rejected": -2.1348018646240234, + "logps/chosen": -0.00011551205534487963, + "logps/rejected": -137.7703399658203, + "loss": 0.6655, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1695370833185734e-06, + "rewards/margins": 0.11434872448444366, + "rewards/rejected": -0.11435089260339737, + "step": 4850 + }, + { + "epoch": 0.28, + "learning_rate": 8.421828607762954e-08, + "logits/chosen": -2.0557706356048584, + "logits/rejected": -2.05389666557312, + "logps/chosen": -119.22664642333984, + "logps/rejected": -245.77682495117188, + "loss": 0.2507, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0283836126327515, + "rewards/margins": 1.5020911693572998, + "rewards/rejected": -0.4737075865268707, + "step": 4851 + }, + { + "epoch": 0.28, + "learning_rate": 8.421141404719554e-08, + "logits/chosen": -2.0167505741119385, + "logits/rejected": -2.0178754329681396, + "logps/chosen": -175.8844451904297, + "logps/rejected": -223.787841796875, + "loss": 0.4748, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.6842514276504517, + "rewards/margins": -0.3016449213027954, + "rewards/rejected": 1.985896348953247, + "step": 4852 + }, + { + "epoch": 0.28, + "learning_rate": 8.420454080140478e-08, + "logits/chosen": -2.2090022563934326, + "logits/rejected": -2.208153247833252, + "logps/chosen": -13.120340347290039, + "logps/rejected": -104.3180160522461, + "loss": 0.4478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15740089118480682, + "rewards/margins": 1.2775455713272095, + "rewards/rejected": -1.1201447248458862, + "step": 4853 + }, + { + "epoch": 0.28, + "learning_rate": 8.419766634050141e-08, + "logits/chosen": -1.9077235460281372, + "logits/rejected": -1.9072984457015991, + "logps/chosen": -197.5029296875, + "logps/rejected": -228.96200561523438, + "loss": 0.4654, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3344391584396362, + "rewards/margins": -0.1930999755859375, + "rewards/rejected": 1.5275391340255737, + "step": 4854 + }, + { + "epoch": 0.28, + "learning_rate": 8.419079066472965e-08, + "logits/chosen": -2.0713069438934326, + "logits/rejected": -2.0592801570892334, + "logps/chosen": -32.56230163574219, + "logps/rejected": -214.96444702148438, + "loss": 0.302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4529705047607422, + "rewards/margins": 1.6853939294815063, + "rewards/rejected": -1.2324234247207642, + "step": 4855 + }, + { + "epoch": 0.28, + "learning_rate": 8.418391377433376e-08, + "logits/chosen": -2.046870708465576, + "logits/rejected": -2.0521717071533203, + "logps/chosen": -49.99283218383789, + "logps/rejected": -138.43740844726562, + "loss": 0.5153, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04773750528693199, + "rewards/margins": 0.7158573269844055, + "rewards/rejected": -0.6681198477745056, + "step": 4856 + }, + { + "epoch": 0.28, + "learning_rate": 8.417703566955803e-08, + "logits/chosen": -2.040593385696411, + "logits/rejected": -1.9932314157485962, + "logps/chosen": -228.8509979248047, + "logps/rejected": -472.39617919921875, + "loss": 0.1905, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.729570150375366, + "rewards/margins": 0.8721177577972412, + "rewards/rejected": 1.857452392578125, + "step": 4857 + }, + { + "epoch": 0.28, + "learning_rate": 8.417015635064684e-08, + "logits/chosen": -1.8847907781600952, + "logits/rejected": -1.8795486688613892, + "logps/chosen": -209.57415771484375, + "logps/rejected": -306.4046936035156, + "loss": 0.0676, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6547393798828125, + "rewards/margins": 2.47772216796875, + "rewards/rejected": 0.1770172119140625, + "step": 4858 + }, + { + "epoch": 0.28, + "learning_rate": 8.416327581784455e-08, + "logits/chosen": -1.8831018209457397, + "logits/rejected": -1.8875032663345337, + "logps/chosen": -186.59475708007812, + "logps/rejected": -286.5813293457031, + "loss": 0.3853, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7493256330490112, + "rewards/margins": 0.037872314453125, + "rewards/rejected": 1.7114533185958862, + "step": 4859 + }, + { + "epoch": 0.28, + "learning_rate": 8.415639407139559e-08, + "logits/chosen": -2.0186846256256104, + "logits/rejected": -2.000035047531128, + "logps/chosen": -57.86670684814453, + "logps/rejected": -250.7313995361328, + "loss": 0.1682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.496786892414093, + "rewards/margins": 3.0718770027160645, + "rewards/rejected": -2.575090169906616, + "step": 4860 + }, + { + "epoch": 0.28, + "learning_rate": 8.414951111154443e-08, + "logits/chosen": -2.0046446323394775, + "logits/rejected": -2.0037500858306885, + "logps/chosen": -7.042447090148926, + "logps/rejected": -88.00082397460938, + "loss": 0.4602, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16828690469264984, + "rewards/margins": 1.8267982006072998, + "rewards/rejected": -1.9950851202011108, + "step": 4861 + }, + { + "epoch": 0.28, + "learning_rate": 8.414262693853558e-08, + "logits/chosen": -1.8943548202514648, + "logits/rejected": -1.939983606338501, + "logps/chosen": -221.0130615234375, + "logps/rejected": -192.27552795410156, + "loss": 0.2734, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6000595092773438, + "rewards/margins": 0.6510955691337585, + "rewards/rejected": 0.9489639401435852, + "step": 4862 + }, + { + "epoch": 0.28, + "learning_rate": 8.413574155261363e-08, + "logits/chosen": -2.0315160751342773, + "logits/rejected": -2.0224180221557617, + "logps/chosen": -9.159276962280273, + "logps/rejected": -313.146728515625, + "loss": 0.4367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17706279456615448, + "rewards/margins": 2.2119174003601074, + "rewards/rejected": -2.3889801502227783, + "step": 4863 + }, + { + "epoch": 0.28, + "learning_rate": 8.412885495402316e-08, + "logits/chosen": -2.079221725463867, + "logits/rejected": -2.079662561416626, + "logps/chosen": -54.80706787109375, + "logps/rejected": -101.00755310058594, + "loss": 0.4998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6362064480781555, + "rewards/margins": 0.309775173664093, + "rewards/rejected": 0.3264312744140625, + "step": 4864 + }, + { + "epoch": 0.28, + "learning_rate": 8.412196714300881e-08, + "logits/chosen": -2.1952431201934814, + "logits/rejected": -2.168644428253174, + "logps/chosen": -79.51561737060547, + "logps/rejected": -237.5177459716797, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6999519467353821, + "rewards/margins": 1.3522850275039673, + "rewards/rejected": -0.6523330807685852, + "step": 4865 + }, + { + "epoch": 0.28, + "learning_rate": 8.411507811981529e-08, + "logits/chosen": -2.1752803325653076, + "logits/rejected": -2.172595262527466, + "logps/chosen": -13.09083366394043, + "logps/rejected": -120.78366088867188, + "loss": 0.6083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04484281688928604, + "rewards/margins": 0.2731989920139313, + "rewards/rejected": -0.22835616767406464, + "step": 4866 + }, + { + "epoch": 0.28, + "learning_rate": 8.410818788468732e-08, + "logits/chosen": -1.993546485900879, + "logits/rejected": -1.9875798225402832, + "logps/chosen": -75.65731811523438, + "logps/rejected": -297.4244689941406, + "loss": 0.4435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13283538818359375, + "rewards/margins": 1.1715774536132812, + "rewards/rejected": -1.0387420654296875, + "step": 4867 + }, + { + "epoch": 0.28, + "learning_rate": 8.410129643786967e-08, + "logits/chosen": -2.087979316711426, + "logits/rejected": -2.0825650691986084, + "logps/chosen": -192.66693115234375, + "logps/rejected": -298.2633361816406, + "loss": 0.2407, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5230683088302612, + "rewards/margins": 0.8004700541496277, + "rewards/rejected": 0.7225982546806335, + "step": 4868 + }, + { + "epoch": 0.28, + "learning_rate": 8.409440377960717e-08, + "logits/chosen": -2.1147518157958984, + "logits/rejected": -2.113081693649292, + "logps/chosen": -21.79641342163086, + "logps/rejected": -189.66983032226562, + "loss": 0.4198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5893514752388, + "rewards/margins": 0.6384298205375671, + "rewards/rejected": -0.04907837137579918, + "step": 4869 + }, + { + "epoch": 0.28, + "learning_rate": 8.408750991014465e-08, + "logits/chosen": -1.9953473806381226, + "logits/rejected": -1.9161657094955444, + "logps/chosen": -207.51707458496094, + "logps/rejected": -563.3238525390625, + "loss": 0.1219, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.059260606765747, + "rewards/margins": 1.6240860223770142, + "rewards/rejected": 0.4351745545864105, + "step": 4870 + }, + { + "epoch": 0.28, + "learning_rate": 8.408061482972708e-08, + "logits/chosen": -1.995259404182434, + "logits/rejected": -1.988198161125183, + "logps/chosen": -49.41648864746094, + "logps/rejected": -159.82066345214844, + "loss": 0.2353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.820071816444397, + "rewards/margins": 1.9411197900772095, + "rewards/rejected": -1.1210479736328125, + "step": 4871 + }, + { + "epoch": 0.28, + "learning_rate": 8.407371853859933e-08, + "logits/chosen": -1.9880614280700684, + "logits/rejected": -1.9720913171768188, + "logps/chosen": -146.54150390625, + "logps/rejected": -390.9437561035156, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7932464480400085, + "rewards/margins": 1.032281517982483, + "rewards/rejected": -0.23903504014015198, + "step": 4872 + }, + { + "epoch": 0.28, + "learning_rate": 8.406682103700644e-08, + "logits/chosen": -2.047454833984375, + "logits/rejected": -1.9754408597946167, + "logps/chosen": -271.17633056640625, + "logps/rejected": -395.3681945800781, + "loss": 0.184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.199273705482483, + "rewards/margins": 1.6402161121368408, + "rewards/rejected": -0.4409423768520355, + "step": 4873 + }, + { + "epoch": 0.28, + "learning_rate": 8.405992232519344e-08, + "logits/chosen": -2.0877082347869873, + "logits/rejected": -2.089841365814209, + "logps/chosen": -4.398789315018803e-05, + "logps/rejected": -202.63015747070312, + "loss": 0.3561, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.887522777607956e-07, + "rewards/margins": 4.114289283752441, + "rewards/rejected": -4.1142897605896, + "step": 4874 + }, + { + "epoch": 0.28, + "learning_rate": 8.405302240340538e-08, + "logits/chosen": -2.099674701690674, + "logits/rejected": -2.096919059753418, + "logps/chosen": -47.433982849121094, + "logps/rejected": -145.78363037109375, + "loss": 0.4588, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0850604996085167, + "rewards/margins": 0.9597805738449097, + "rewards/rejected": -1.0448410511016846, + "step": 4875 + }, + { + "epoch": 0.28, + "learning_rate": 8.404612127188737e-08, + "logits/chosen": -2.111952781677246, + "logits/rejected": -2.1185219287872314, + "logps/chosen": -7.005289554595947, + "logps/rejected": -147.19573974609375, + "loss": 0.4714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18708615005016327, + "rewards/margins": 0.919431746006012, + "rewards/rejected": -0.7323455810546875, + "step": 4876 + }, + { + "epoch": 0.28, + "learning_rate": 8.403921893088462e-08, + "logits/chosen": -2.0354087352752686, + "logits/rejected": -2.0119788646698, + "logps/chosen": -63.86956024169922, + "logps/rejected": -257.3296813964844, + "loss": 0.1599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.367578148841858, + "rewards/margins": 1.7822906970977783, + "rewards/rejected": -0.414712518453598, + "step": 4877 + }, + { + "epoch": 0.28, + "learning_rate": 8.40323153806423e-08, + "logits/chosen": -1.9268524646759033, + "logits/rejected": -1.9288897514343262, + "logps/chosen": -12.426054000854492, + "logps/rejected": -125.84075927734375, + "loss": 0.4105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07890138775110245, + "rewards/margins": 1.706494688987732, + "rewards/rejected": -1.6275932788848877, + "step": 4878 + }, + { + "epoch": 0.28, + "learning_rate": 8.402541062140568e-08, + "logits/chosen": -1.963299036026001, + "logits/rejected": -1.9599609375, + "logps/chosen": -49.489898681640625, + "logps/rejected": -133.171875, + "loss": 0.5159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014378356747329235, + "rewards/margins": 0.8444069027900696, + "rewards/rejected": -0.8300285339355469, + "step": 4879 + }, + { + "epoch": 0.28, + "learning_rate": 8.401850465342004e-08, + "logits/chosen": -2.1203038692474365, + "logits/rejected": -2.0970990657806396, + "logps/chosen": -58.364768981933594, + "logps/rejected": -260.1767578125, + "loss": 0.4968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15082626044750214, + "rewards/margins": 0.6187461614608765, + "rewards/rejected": -0.4679199159145355, + "step": 4880 + }, + { + "epoch": 0.28, + "learning_rate": 8.40115974769307e-08, + "logits/chosen": -2.1371500492095947, + "logits/rejected": -2.1652424335479736, + "logps/chosen": -192.18228149414062, + "logps/rejected": -461.1028137207031, + "loss": 0.1264, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7099732160568237, + "rewards/margins": 1.7101471424102783, + "rewards/rejected": -0.00017395020404364914, + "step": 4881 + }, + { + "epoch": 0.28, + "learning_rate": 8.400468909218306e-08, + "logits/chosen": -1.9946868419647217, + "logits/rejected": -2.051694393157959, + "logps/chosen": -180.99453735351562, + "logps/rejected": -345.8977355957031, + "loss": 0.1266, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2502533197402954, + "rewards/margins": 1.8989624977111816, + "rewards/rejected": -0.6487091183662415, + "step": 4882 + }, + { + "epoch": 0.28, + "learning_rate": 8.399777949942251e-08, + "logits/chosen": -1.9906607866287231, + "logits/rejected": -1.9677064418792725, + "logps/chosen": -244.83555603027344, + "logps/rejected": -372.05255126953125, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2181991338729858, + "rewards/margins": 1.0574814081192017, + "rewards/rejected": 0.16071777045726776, + "step": 4883 + }, + { + "epoch": 0.28, + "learning_rate": 8.399086869889455e-08, + "logits/chosen": -2.0132720470428467, + "logits/rejected": -2.0329480171203613, + "logps/chosen": -0.00010061005013994873, + "logps/rejected": -135.48599243164062, + "loss": 0.5113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7642108787185862e-06, + "rewards/margins": 0.9358182549476624, + "rewards/rejected": -0.9358200430870056, + "step": 4884 + }, + { + "epoch": 0.28, + "learning_rate": 8.398395669084466e-08, + "logits/chosen": -1.9358224868774414, + "logits/rejected": -1.9320560693740845, + "logps/chosen": -26.777830123901367, + "logps/rejected": -136.74954223632812, + "loss": 0.6308, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12381172180175781, + "rewards/margins": -0.14697685837745667, + "rewards/rejected": 0.2707885801792145, + "step": 4885 + }, + { + "epoch": 0.28, + "learning_rate": 8.397704347551839e-08, + "logits/chosen": -1.9574657678604126, + "logits/rejected": -1.9527212381362915, + "logps/chosen": -47.831172943115234, + "logps/rejected": -194.36749267578125, + "loss": 0.2179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6923274993896484, + "rewards/margins": 2.334989547729492, + "rewards/rejected": -1.6426620483398438, + "step": 4886 + }, + { + "epoch": 0.28, + "learning_rate": 8.397012905316134e-08, + "logits/chosen": -2.100963592529297, + "logits/rejected": -2.1000773906707764, + "logps/chosen": -0.016194693744182587, + "logps/rejected": -89.87383270263672, + "loss": 0.5633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010451894486323, + "rewards/margins": 0.6279427409172058, + "rewards/rejected": -0.6289879083633423, + "step": 4887 + }, + { + "epoch": 0.28, + "learning_rate": 8.396321342401914e-08, + "logits/chosen": -1.83378267288208, + "logits/rejected": -1.822874665260315, + "logps/chosen": -192.48483276367188, + "logps/rejected": -310.3656921386719, + "loss": 0.1044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.740881323814392, + "rewards/margins": 2.083090305328369, + "rewards/rejected": -0.3422088623046875, + "step": 4888 + }, + { + "epoch": 0.28, + "learning_rate": 8.395629658833747e-08, + "logits/chosen": -2.0310275554656982, + "logits/rejected": -2.0264222621917725, + "logps/chosen": -238.4571990966797, + "logps/rejected": -311.8401184082031, + "loss": 0.2608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.18011474609375, + "rewards/margins": 0.9199187755584717, + "rewards/rejected": 0.26019594073295593, + "step": 4889 + }, + { + "epoch": 0.28, + "learning_rate": 8.394937854636205e-08, + "logits/chosen": -1.8416837453842163, + "logits/rejected": -1.8488191366195679, + "logps/chosen": -49.84758377075195, + "logps/rejected": -233.26828002929688, + "loss": 0.4155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03107604943215847, + "rewards/margins": 1.690972924232483, + "rewards/rejected": -1.7220489978790283, + "step": 4890 + }, + { + "epoch": 0.28, + "learning_rate": 8.394245929833863e-08, + "logits/chosen": -1.9306354522705078, + "logits/rejected": -1.9188827276229858, + "logps/chosen": -245.3095703125, + "logps/rejected": -358.5692138671875, + "loss": 0.5191, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.8191711902618408, + "rewards/margins": -0.4739868640899658, + "rewards/rejected": 2.2931580543518066, + "step": 4891 + }, + { + "epoch": 0.28, + "learning_rate": 8.393553884451303e-08, + "logits/chosen": -2.0474843978881836, + "logits/rejected": -2.0435969829559326, + "logps/chosen": -2.812204360961914, + "logps/rejected": -150.3677520751953, + "loss": 0.3431, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09611719101667404, + "rewards/margins": 2.907907724380493, + "rewards/rejected": -2.8117904663085938, + "step": 4892 + }, + { + "epoch": 0.28, + "learning_rate": 8.392861718513109e-08, + "logits/chosen": -2.072256565093994, + "logits/rejected": -2.077165365219116, + "logps/chosen": -0.38872236013412476, + "logps/rejected": -162.62428283691406, + "loss": 0.3678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05867474898695946, + "rewards/margins": 2.5938098430633545, + "rewards/rejected": -2.53513503074646, + "step": 4893 + }, + { + "epoch": 0.28, + "learning_rate": 8.392169432043872e-08, + "logits/chosen": -2.025864362716675, + "logits/rejected": -2.0212583541870117, + "logps/chosen": -8.829797744750977, + "logps/rejected": -124.4508285522461, + "loss": 0.5671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0735052153468132, + "rewards/margins": 0.6577783823013306, + "rewards/rejected": -0.7312836050987244, + "step": 4894 + }, + { + "epoch": 0.28, + "learning_rate": 8.391477025068182e-08, + "logits/chosen": -2.0587692260742188, + "logits/rejected": -2.050809144973755, + "logps/chosen": -16.36028289794922, + "logps/rejected": -208.61856079101562, + "loss": 0.3829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06384487450122833, + "rewards/margins": 2.437469005584717, + "rewards/rejected": -2.5013139247894287, + "step": 4895 + }, + { + "epoch": 0.28, + "learning_rate": 8.390784497610638e-08, + "logits/chosen": -1.941392421722412, + "logits/rejected": -1.9159249067306519, + "logps/chosen": -210.66192626953125, + "logps/rejected": -350.88909912109375, + "loss": 0.3479, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.321295142173767, + "rewards/margins": 0.40902096033096313, + "rewards/rejected": 0.912274181842804, + "step": 4896 + }, + { + "epoch": 0.28, + "learning_rate": 8.390091849695844e-08, + "logits/chosen": -1.8812426328659058, + "logits/rejected": -1.8570399284362793, + "logps/chosen": -223.54788208007812, + "logps/rejected": -383.86920166015625, + "loss": 0.3583, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7743911743164062, + "rewards/margins": 0.08551180362701416, + "rewards/rejected": 1.688879370689392, + "step": 4897 + }, + { + "epoch": 0.29, + "learning_rate": 8.389399081348405e-08, + "logits/chosen": -1.9787499904632568, + "logits/rejected": -1.9810166358947754, + "logps/chosen": -136.37692260742188, + "logps/rejected": -300.8695373535156, + "loss": 0.6392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4799636900424957, + "rewards/margins": 0.7969741821289062, + "rewards/rejected": -1.2769378423690796, + "step": 4898 + }, + { + "epoch": 0.29, + "learning_rate": 8.388706192592931e-08, + "logits/chosen": -1.8395161628723145, + "logits/rejected": -1.7964963912963867, + "logps/chosen": -161.15252685546875, + "logps/rejected": -331.62213134765625, + "loss": 0.1743, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.766302466392517, + "rewards/margins": 1.3468047380447388, + "rewards/rejected": 0.41949769854545593, + "step": 4899 + }, + { + "epoch": 0.29, + "learning_rate": 8.388013183454037e-08, + "logits/chosen": -2.1957075595855713, + "logits/rejected": -2.188555955886841, + "logps/chosen": -18.865047454833984, + "logps/rejected": -138.10409545898438, + "loss": 0.3442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33353883028030396, + "rewards/margins": 1.2909866571426392, + "rewards/rejected": -0.9574478268623352, + "step": 4900 + }, + { + "epoch": 0.29, + "learning_rate": 8.387320053956341e-08, + "logits/chosen": -2.073437213897705, + "logits/rejected": -2.0802998542785645, + "logps/chosen": -0.0005456397193484008, + "logps/rejected": -81.11693572998047, + "loss": 0.5752, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.847083487722557e-05, + "rewards/margins": 0.5348897576332092, + "rewards/rejected": -0.5349182486534119, + "step": 4901 + }, + { + "epoch": 0.29, + "learning_rate": 8.38662680412447e-08, + "logits/chosen": -2.1457295417785645, + "logits/rejected": -2.1083829402923584, + "logps/chosen": -162.6153106689453, + "logps/rejected": -306.2645263671875, + "loss": 0.3138, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0991989374160767, + "rewards/margins": 1.0134811401367188, + "rewards/rejected": 0.08571777492761612, + "step": 4902 + }, + { + "epoch": 0.29, + "learning_rate": 8.385933433983048e-08, + "logits/chosen": -2.0545268058776855, + "logits/rejected": -2.0406076908111572, + "logps/chosen": -57.20813751220703, + "logps/rejected": -370.2762451171875, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2731742858886719, + "rewards/margins": 2.2131552696228027, + "rewards/rejected": -1.9399811029434204, + "step": 4903 + }, + { + "epoch": 0.29, + "learning_rate": 8.385239943556707e-08, + "logits/chosen": -2.0891318321228027, + "logits/rejected": -2.0868518352508545, + "logps/chosen": -52.44659423828125, + "logps/rejected": -128.31808471679688, + "loss": 0.8642, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5199363827705383, + "rewards/margins": -0.11004945635795593, + "rewards/rejected": -0.4098869264125824, + "step": 4904 + }, + { + "epoch": 0.29, + "learning_rate": 8.384546332870086e-08, + "logits/chosen": -2.1433844566345215, + "logits/rejected": -2.129855155944824, + "logps/chosen": -61.665802001953125, + "logps/rejected": -288.3958740234375, + "loss": 0.2126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7248573303222656, + "rewards/margins": 2.2589850425720215, + "rewards/rejected": -1.5341278314590454, + "step": 4905 + }, + { + "epoch": 0.29, + "learning_rate": 8.383852601947821e-08, + "logits/chosen": -1.925015926361084, + "logits/rejected": -1.9262577295303345, + "logps/chosen": -2.5408785343170166, + "logps/rejected": -107.18196105957031, + "loss": 0.6501, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004812383558601141, + "rewards/margins": 0.325948566198349, + "rewards/rejected": -0.3307609558105469, + "step": 4906 + }, + { + "epoch": 0.29, + "learning_rate": 8.383158750814562e-08, + "logits/chosen": -2.045149326324463, + "logits/rejected": -2.039302110671997, + "logps/chosen": -0.00599405774846673, + "logps/rejected": -155.92437744140625, + "loss": 0.4582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00014372728765010834, + "rewards/margins": 1.3483645915985107, + "rewards/rejected": -1.3482208251953125, + "step": 4907 + }, + { + "epoch": 0.29, + "learning_rate": 8.382464779494953e-08, + "logits/chosen": -2.028372287750244, + "logits/rejected": -2.0301294326782227, + "logps/chosen": -182.22412109375, + "logps/rejected": -380.1678466796875, + "loss": 0.1378, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4432541131973267, + "rewards/margins": 1.7062424421310425, + "rewards/rejected": -0.26298829913139343, + "step": 4908 + }, + { + "epoch": 0.29, + "learning_rate": 8.381770688013651e-08, + "logits/chosen": -2.2202279567718506, + "logits/rejected": -2.209991455078125, + "logps/chosen": -17.422256469726562, + "logps/rejected": -395.0758972167969, + "loss": 0.4154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17012129724025726, + "rewards/margins": 1.7835408449172974, + "rewards/rejected": -1.9536621570587158, + "step": 4909 + }, + { + "epoch": 0.29, + "learning_rate": 8.381076476395312e-08, + "logits/chosen": -2.039433479309082, + "logits/rejected": -2.0403618812561035, + "logps/chosen": -88.0614242553711, + "logps/rejected": -293.80511474609375, + "loss": 0.2321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5956840515136719, + "rewards/margins": 2.864403486251831, + "rewards/rejected": -2.268719434738159, + "step": 4910 + }, + { + "epoch": 0.29, + "learning_rate": 8.380382144664599e-08, + "logits/chosen": -1.9743114709854126, + "logits/rejected": -1.9696348905563354, + "logps/chosen": -62.70787811279297, + "logps/rejected": -188.59970092773438, + "loss": 0.4694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3490920960903168, + "rewards/margins": 0.6795684695243835, + "rewards/rejected": -0.3304763734340668, + "step": 4911 + }, + { + "epoch": 0.29, + "learning_rate": 8.379687692846175e-08, + "logits/chosen": -2.0674526691436768, + "logits/rejected": -2.056708574295044, + "logps/chosen": -202.7694091796875, + "logps/rejected": -372.00555419921875, + "loss": 0.3861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8494629263877869, + "rewards/margins": 0.22495120763778687, + "rewards/rejected": 0.62451171875, + "step": 4912 + }, + { + "epoch": 0.29, + "learning_rate": 8.378993120964715e-08, + "logits/chosen": -2.1055872440338135, + "logits/rejected": -2.0874297618865967, + "logps/chosen": -51.411441802978516, + "logps/rejected": -338.3786315917969, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3089984953403473, + "rewards/margins": 5.382561206817627, + "rewards/rejected": -5.0735626220703125, + "step": 4913 + }, + { + "epoch": 0.29, + "learning_rate": 8.378298429044889e-08, + "logits/chosen": -2.153414726257324, + "logits/rejected": -2.1470654010772705, + "logps/chosen": -32.80436325073242, + "logps/rejected": -230.34445190429688, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17599983513355255, + "rewards/margins": 2.528569459915161, + "rewards/rejected": -2.352569580078125, + "step": 4914 + }, + { + "epoch": 0.29, + "learning_rate": 8.377603617111378e-08, + "logits/chosen": -1.9655168056488037, + "logits/rejected": -1.9615468978881836, + "logps/chosen": -175.58782958984375, + "logps/rejected": -285.43084716796875, + "loss": 0.4533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9823883175849915, + "rewards/margins": 0.1389373540878296, + "rewards/rejected": 0.8434509634971619, + "step": 4915 + }, + { + "epoch": 0.29, + "learning_rate": 8.376908685188865e-08, + "logits/chosen": -2.1283864974975586, + "logits/rejected": -2.088273286819458, + "logps/chosen": -207.72410583496094, + "logps/rejected": -413.9816589355469, + "loss": 0.5708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.554150402545929, + "rewards/margins": 0.0145416259765625, + "rewards/rejected": 0.5396087765693665, + "step": 4916 + }, + { + "epoch": 0.29, + "learning_rate": 8.376213633302036e-08, + "logits/chosen": -1.9457744359970093, + "logits/rejected": -1.9417332410812378, + "logps/chosen": -0.12699274718761444, + "logps/rejected": -298.1441955566406, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004806943237781525, + "rewards/margins": 3.3885586261749268, + "rewards/rejected": -3.3933656215667725, + "step": 4917 + }, + { + "epoch": 0.29, + "learning_rate": 8.375518461475586e-08, + "logits/chosen": -1.9950790405273438, + "logits/rejected": -1.9747871160507202, + "logps/chosen": -187.57797241210938, + "logps/rejected": -263.1069641113281, + "loss": 0.1785, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8085845708847046, + "rewards/margins": 1.3085815906524658, + "rewards/rejected": 0.5000030398368835, + "step": 4918 + }, + { + "epoch": 0.29, + "learning_rate": 8.374823169734206e-08, + "logits/chosen": -2.1531994342803955, + "logits/rejected": -2.1365861892700195, + "logps/chosen": -70.70069885253906, + "logps/rejected": -324.66644287109375, + "loss": 0.4933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21761322021484375, + "rewards/margins": 1.4159287214279175, + "rewards/rejected": -1.6335419416427612, + "step": 4919 + }, + { + "epoch": 0.29, + "learning_rate": 8.374127758102602e-08, + "logits/chosen": -1.9985880851745605, + "logits/rejected": -1.9997750520706177, + "logps/chosen": -0.937157928943634, + "logps/rejected": -32.50849533081055, + "loss": 0.6736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02000982128083706, + "rewards/margins": 0.08646677434444427, + "rewards/rejected": -0.10647659748792648, + "step": 4920 + }, + { + "epoch": 0.29, + "learning_rate": 8.373432226605475e-08, + "logits/chosen": -2.0027477741241455, + "logits/rejected": -2.003749132156372, + "logps/chosen": -19.38127326965332, + "logps/rejected": -199.99227905273438, + "loss": 0.3571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012458610348403454, + "rewards/margins": 3.587049722671509, + "rewards/rejected": -3.5745911598205566, + "step": 4921 + }, + { + "epoch": 0.29, + "learning_rate": 8.372736575267531e-08, + "logits/chosen": -2.0392661094665527, + "logits/rejected": -2.021972179412842, + "logps/chosen": -36.58790588378906, + "logps/rejected": -309.943603515625, + "loss": 0.2516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5775562524795532, + "rewards/margins": 2.1779041290283203, + "rewards/rejected": -1.600347876548767, + "step": 4922 + }, + { + "epoch": 0.29, + "learning_rate": 8.37204080411349e-08, + "logits/chosen": -2.285645008087158, + "logits/rejected": -2.282043218612671, + "logps/chosen": -10.351492881774902, + "logps/rejected": -304.3330993652344, + "loss": 0.2803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.353814035654068, + "rewards/margins": 3.4002528190612793, + "rewards/rejected": -3.046438694000244, + "step": 4923 + }, + { + "epoch": 0.29, + "learning_rate": 8.371344913168062e-08, + "logits/chosen": -1.9519784450531006, + "logits/rejected": -1.947292685508728, + "logps/chosen": -47.85112380981445, + "logps/rejected": -83.32240295410156, + "loss": 0.5323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022043991833925247, + "rewards/margins": 0.6480907201766968, + "rewards/rejected": -0.6701347231864929, + "step": 4924 + }, + { + "epoch": 0.29, + "learning_rate": 8.370648902455973e-08, + "logits/chosen": -1.9982258081436157, + "logits/rejected": -2.0002243518829346, + "logps/chosen": -240.09559631347656, + "logps/rejected": -357.198974609375, + "loss": 0.1434, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7618576288223267, + "rewards/margins": 1.4992660284042358, + "rewards/rejected": 0.26259157061576843, + "step": 4925 + }, + { + "epoch": 0.29, + "learning_rate": 8.369952772001948e-08, + "logits/chosen": -2.0787460803985596, + "logits/rejected": -2.071589231491089, + "logps/chosen": -32.13438034057617, + "logps/rejected": -166.22415161132812, + "loss": 0.4965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40236932039260864, + "rewards/margins": 0.5618343353271484, + "rewards/rejected": -0.159465029835701, + "step": 4926 + }, + { + "epoch": 0.29, + "learning_rate": 8.369256521830717e-08, + "logits/chosen": -2.0421128273010254, + "logits/rejected": -2.0391881465911865, + "logps/chosen": -2.7294914722442627, + "logps/rejected": -117.70925903320312, + "loss": 0.5275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022621989250183105, + "rewards/margins": 0.7801735997200012, + "rewards/rejected": -0.7575516104698181, + "step": 4927 + }, + { + "epoch": 0.29, + "learning_rate": 8.368560151967011e-08, + "logits/chosen": -1.8809887170791626, + "logits/rejected": -1.8818564414978027, + "logps/chosen": -262.7529296875, + "logps/rejected": -270.80828857421875, + "loss": 0.1027, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4061431884765625, + "rewards/margins": 1.7796111106872559, + "rewards/rejected": 0.6265320181846619, + "step": 4928 + }, + { + "epoch": 0.29, + "learning_rate": 8.367863662435572e-08, + "logits/chosen": -2.168107032775879, + "logits/rejected": -2.154561758041382, + "logps/chosen": -9.524531196802855e-05, + "logps/rejected": -280.82666015625, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3377801855749567e-07, + "rewards/margins": 5.3079447746276855, + "rewards/rejected": -5.307945251464844, + "step": 4929 + }, + { + "epoch": 0.29, + "learning_rate": 8.367167053261142e-08, + "logits/chosen": -2.053189277648926, + "logits/rejected": -2.0423970222473145, + "logps/chosen": -233.71969604492188, + "logps/rejected": -340.73699951171875, + "loss": 0.6063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11529236286878586, + "rewards/margins": 0.4232391119003296, + "rewards/rejected": -0.538531482219696, + "step": 4930 + }, + { + "epoch": 0.29, + "learning_rate": 8.366470324468468e-08, + "logits/chosen": -1.8625214099884033, + "logits/rejected": -1.8905599117279053, + "logps/chosen": -238.23171997070312, + "logps/rejected": -293.4132995605469, + "loss": 0.4042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9389587640762329, + "rewards/margins": 0.43388670682907104, + "rewards/rejected": 0.5050720572471619, + "step": 4931 + }, + { + "epoch": 0.29, + "learning_rate": 8.3657734760823e-08, + "logits/chosen": -2.016620635986328, + "logits/rejected": -1.9587949514389038, + "logps/chosen": -299.9339904785156, + "logps/rejected": -431.04278564453125, + "loss": 0.3536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8287445306777954, + "rewards/margins": 0.8415130972862244, + "rewards/rejected": -0.012768554501235485, + "step": 4932 + }, + { + "epoch": 0.29, + "learning_rate": 8.365076508127396e-08, + "logits/chosen": -2.1343586444854736, + "logits/rejected": -2.122849941253662, + "logps/chosen": -66.97770690917969, + "logps/rejected": -248.18539428710938, + "loss": 0.611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5818508267402649, + "rewards/margins": 1.8254826068878174, + "rewards/rejected": -2.4073333740234375, + "step": 4933 + }, + { + "epoch": 0.29, + "learning_rate": 8.364379420628513e-08, + "logits/chosen": -2.127051591873169, + "logits/rejected": -2.113711357116699, + "logps/chosen": -201.77330017089844, + "logps/rejected": -371.2923583984375, + "loss": 0.0997, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2096924781799316, + "rewards/margins": 1.8327820301055908, + "rewards/rejected": 0.37691041827201843, + "step": 4934 + }, + { + "epoch": 0.29, + "learning_rate": 8.363682213610415e-08, + "logits/chosen": -2.0424511432647705, + "logits/rejected": -2.038693904876709, + "logps/chosen": -23.208202362060547, + "logps/rejected": -161.90951538085938, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3081333339214325, + "rewards/margins": 1.6944035291671753, + "rewards/rejected": -1.3862701654434204, + "step": 4935 + }, + { + "epoch": 0.29, + "learning_rate": 8.362984887097872e-08, + "logits/chosen": -1.9032607078552246, + "logits/rejected": -1.8898423910140991, + "logps/chosen": -119.08967590332031, + "logps/rejected": -294.8580627441406, + "loss": 0.5717, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4948562681674957, + "rewards/margins": -0.12518158555030823, + "rewards/rejected": 0.620037853717804, + "step": 4936 + }, + { + "epoch": 0.29, + "learning_rate": 8.362287441115657e-08, + "logits/chosen": -1.9613983631134033, + "logits/rejected": -1.9630577564239502, + "logps/chosen": -9.500258445739746, + "logps/rejected": -136.7932891845703, + "loss": 0.4341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09999704360961914, + "rewards/margins": 1.3626412153244019, + "rewards/rejected": -1.2626441717147827, + "step": 4937 + }, + { + "epoch": 0.29, + "learning_rate": 8.361589875688544e-08, + "logits/chosen": -2.032355546951294, + "logits/rejected": -2.030681610107422, + "logps/chosen": -4.6610060962848365e-05, + "logps/rejected": -84.4376220703125, + "loss": 0.5923, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.960250177849957e-07, + "rewards/margins": 0.4547837972640991, + "rewards/rejected": -0.4547843933105469, + "step": 4938 + }, + { + "epoch": 0.29, + "learning_rate": 8.360892190841316e-08, + "logits/chosen": -1.8958213329315186, + "logits/rejected": -1.8963062763214111, + "logps/chosen": -75.11370086669922, + "logps/rejected": -113.13650512695312, + "loss": 0.4858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5561004877090454, + "rewards/margins": 0.41327211260795593, + "rewards/rejected": 0.14282837510108948, + "step": 4939 + }, + { + "epoch": 0.29, + "learning_rate": 8.360194386598756e-08, + "logits/chosen": -1.9850752353668213, + "logits/rejected": -1.998019814491272, + "logps/chosen": -147.76846313476562, + "logps/rejected": -217.07977294921875, + "loss": 0.5134, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7180816531181335, + "rewards/margins": -0.015217602252960205, + "rewards/rejected": 0.7332992553710938, + "step": 4940 + }, + { + "epoch": 0.29, + "learning_rate": 8.359496462985656e-08, + "logits/chosen": -2.0028605461120605, + "logits/rejected": -2.029107093811035, + "logps/chosen": -192.27731323242188, + "logps/rejected": -308.72552490234375, + "loss": 0.0999, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.93206787109375, + "rewards/margins": 2.021649122238159, + "rewards/rejected": -0.08958130329847336, + "step": 4941 + }, + { + "epoch": 0.29, + "learning_rate": 8.358798420026809e-08, + "logits/chosen": -2.010408878326416, + "logits/rejected": -2.0032808780670166, + "logps/chosen": -3.6629583835601807, + "logps/rejected": -86.38471984863281, + "loss": 0.5529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04640982300043106, + "rewards/margins": 0.5915057063102722, + "rewards/rejected": -0.5450958609580994, + "step": 4942 + }, + { + "epoch": 0.29, + "learning_rate": 8.358100257747012e-08, + "logits/chosen": -1.8676600456237793, + "logits/rejected": -1.8192845582962036, + "logps/chosen": -246.95596313476562, + "logps/rejected": -392.49920654296875, + "loss": 0.1575, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.630194067955017, + "rewards/margins": 1.7655761241912842, + "rewards/rejected": -0.13538208603858948, + "step": 4943 + }, + { + "epoch": 0.29, + "learning_rate": 8.357401976171066e-08, + "logits/chosen": -2.032278060913086, + "logits/rejected": -2.0217058658599854, + "logps/chosen": -133.71182250976562, + "logps/rejected": -514.7216796875, + "loss": 0.1036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.507421851158142, + "rewards/margins": 3.059884548187256, + "rewards/rejected": -1.5524628162384033, + "step": 4944 + }, + { + "epoch": 0.29, + "learning_rate": 8.356703575323783e-08, + "logits/chosen": -2.079265832901001, + "logits/rejected": -2.1018311977386475, + "logps/chosen": -259.45794677734375, + "logps/rejected": -392.59698486328125, + "loss": 0.2755, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49392396211624146, + "rewards/margins": 0.893566906452179, + "rewards/rejected": -0.3996429443359375, + "step": 4945 + }, + { + "epoch": 0.29, + "learning_rate": 8.356005055229967e-08, + "logits/chosen": -2.033094644546509, + "logits/rejected": -2.0392651557922363, + "logps/chosen": -303.43829345703125, + "logps/rejected": -445.1395263671875, + "loss": 0.1004, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3805725574493408, + "rewards/margins": 2.402657985687256, + "rewards/rejected": -1.0220855474472046, + "step": 4946 + }, + { + "epoch": 0.29, + "learning_rate": 8.355306415914434e-08, + "logits/chosen": -2.0547640323638916, + "logits/rejected": -2.063028573989868, + "logps/chosen": -83.88140869140625, + "logps/rejected": -263.90216064453125, + "loss": 0.457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1325889676809311, + "rewards/margins": 1.8578956127166748, + "rewards/rejected": -1.990484595298767, + "step": 4947 + }, + { + "epoch": 0.29, + "learning_rate": 8.354607657402006e-08, + "logits/chosen": -2.0442230701446533, + "logits/rejected": -2.066575765609741, + "logps/chosen": -212.26333618164062, + "logps/rejected": -308.2607116699219, + "loss": 0.2286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7179504632949829, + "rewards/margins": 1.5947234630584717, + "rewards/rejected": -0.8767730593681335, + "step": 4948 + }, + { + "epoch": 0.29, + "learning_rate": 8.353908779717506e-08, + "logits/chosen": -2.0831000804901123, + "logits/rejected": -2.084749460220337, + "logps/chosen": -51.8474006652832, + "logps/rejected": -208.0635528564453, + "loss": 0.6518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13950768113136292, + "rewards/margins": 0.5618579387664795, + "rewards/rejected": -0.7013656497001648, + "step": 4949 + }, + { + "epoch": 0.29, + "learning_rate": 8.35320978288576e-08, + "logits/chosen": -1.9035892486572266, + "logits/rejected": -1.9093375205993652, + "logps/chosen": -192.4226837158203, + "logps/rejected": -399.0511779785156, + "loss": 0.0957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.502923607826233, + "rewards/margins": 2.578549385070801, + "rewards/rejected": -1.0756256580352783, + "step": 4950 + }, + { + "epoch": 0.29, + "learning_rate": 8.352510666931599e-08, + "logits/chosen": -1.7952923774719238, + "logits/rejected": -1.7976343631744385, + "logps/chosen": -0.0508398599922657, + "logps/rejected": -42.81704330444336, + "loss": 0.6224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036520850844681263, + "rewards/margins": 0.28669673204421997, + "rewards/rejected": -0.2903488278388977, + "step": 4951 + }, + { + "epoch": 0.29, + "learning_rate": 8.351811431879861e-08, + "logits/chosen": -1.9513999223709106, + "logits/rejected": -1.8935811519622803, + "logps/chosen": -392.35772705078125, + "logps/rejected": -624.0843505859375, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0770509243011475, + "rewards/margins": 3.5260987281799316, + "rewards/rejected": -0.44904786348342896, + "step": 4952 + }, + { + "epoch": 0.29, + "learning_rate": 8.351112077755385e-08, + "logits/chosen": -2.1468312740325928, + "logits/rejected": -2.1475610733032227, + "logps/chosen": -3.655327081680298, + "logps/rejected": -62.93190383911133, + "loss": 0.6419, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009304833598434925, + "rewards/margins": 0.20938646793365479, + "rewards/rejected": -0.20008163154125214, + "step": 4953 + }, + { + "epoch": 0.29, + "learning_rate": 8.350412604583016e-08, + "logits/chosen": -1.7592922449111938, + "logits/rejected": -1.7505316734313965, + "logps/chosen": -170.9214324951172, + "logps/rejected": -198.80612182617188, + "loss": 0.3518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.890881359577179, + "rewards/margins": 0.6516281366348267, + "rewards/rejected": 0.2392532378435135, + "step": 4954 + }, + { + "epoch": 0.29, + "learning_rate": 8.349713012387602e-08, + "logits/chosen": -2.1287050247192383, + "logits/rejected": -2.109489917755127, + "logps/chosen": -132.18154907226562, + "logps/rejected": -302.34698486328125, + "loss": 0.2056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2535218000411987, + "rewards/margins": 1.835052490234375, + "rewards/rejected": -0.581530749797821, + "step": 4955 + }, + { + "epoch": 0.29, + "learning_rate": 8.349013301193998e-08, + "logits/chosen": -1.9614816904067993, + "logits/rejected": -1.9543652534484863, + "logps/chosen": -44.36146926879883, + "logps/rejected": -208.14813232421875, + "loss": 0.5343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22277222573757172, + "rewards/margins": 1.301812767982483, + "rewards/rejected": -1.5245850086212158, + "step": 4956 + }, + { + "epoch": 0.29, + "learning_rate": 8.34831347102706e-08, + "logits/chosen": -1.9984995126724243, + "logits/rejected": -1.936959147453308, + "logps/chosen": -285.7316589355469, + "logps/rejected": -530.0238037109375, + "loss": 0.1928, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2123931646347046, + "rewards/margins": 1.9425323009490967, + "rewards/rejected": -0.7301391959190369, + "step": 4957 + }, + { + "epoch": 0.29, + "learning_rate": 8.347613521911648e-08, + "logits/chosen": -2.0595521926879883, + "logits/rejected": -2.051591396331787, + "logps/chosen": -65.77105712890625, + "logps/rejected": -262.9570617675781, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13937607407569885, + "rewards/margins": 2.165398597717285, + "rewards/rejected": -2.026022434234619, + "step": 4958 + }, + { + "epoch": 0.29, + "learning_rate": 8.346913453872628e-08, + "logits/chosen": -2.0506973266601562, + "logits/rejected": -2.050917148590088, + "logps/chosen": -107.34485626220703, + "logps/rejected": -241.77780151367188, + "loss": 0.6614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12672196328639984, + "rewards/margins": 0.07052534818649292, + "rewards/rejected": -0.19724731147289276, + "step": 4959 + }, + { + "epoch": 0.29, + "learning_rate": 8.346213266934871e-08, + "logits/chosen": -2.0284957885742188, + "logits/rejected": -1.9914333820343018, + "logps/chosen": -182.56912231445312, + "logps/rejected": -364.4696044921875, + "loss": 0.1671, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5909805297851562, + "rewards/margins": 1.6308609247207642, + "rewards/rejected": -0.03988037258386612, + "step": 4960 + }, + { + "epoch": 0.29, + "learning_rate": 8.345512961123251e-08, + "logits/chosen": -2.0411081314086914, + "logits/rejected": -2.028322458267212, + "logps/chosen": -139.96658325195312, + "logps/rejected": -240.4818115234375, + "loss": 0.2494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8807617425918579, + "rewards/margins": 1.4373397827148438, + "rewards/rejected": -0.5565780997276306, + "step": 4961 + }, + { + "epoch": 0.29, + "learning_rate": 8.344812536462647e-08, + "logits/chosen": -1.821564793586731, + "logits/rejected": -1.830748438835144, + "logps/chosen": -171.97235107421875, + "logps/rejected": -308.1267395019531, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.803308129310608, + "rewards/margins": 0.3990752696990967, + "rewards/rejected": 1.4042328596115112, + "step": 4962 + }, + { + "epoch": 0.29, + "learning_rate": 8.34411199297794e-08, + "logits/chosen": -1.9351266622543335, + "logits/rejected": -1.943992018699646, + "logps/chosen": -3.895037889480591, + "logps/rejected": -145.72052001953125, + "loss": 0.4925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09266336262226105, + "rewards/margins": 1.2716310024261475, + "rewards/rejected": -1.364294409751892, + "step": 4963 + }, + { + "epoch": 0.29, + "learning_rate": 8.343411330694017e-08, + "logits/chosen": -1.7973573207855225, + "logits/rejected": -1.801919937133789, + "logps/chosen": -6.522623062133789, + "logps/rejected": -148.948974609375, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32366687059402466, + "rewards/margins": 3.5158963203430176, + "rewards/rejected": -3.1922295093536377, + "step": 4964 + }, + { + "epoch": 0.29, + "learning_rate": 8.342710549635769e-08, + "logits/chosen": -2.0032241344451904, + "logits/rejected": -1.9899929761886597, + "logps/chosen": -281.1383361816406, + "logps/rejected": -564.9500122070312, + "loss": 0.0568, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7112396955490112, + "rewards/margins": 5.753824234008789, + "rewards/rejected": -4.042584419250488, + "step": 4965 + }, + { + "epoch": 0.29, + "learning_rate": 8.34200964982809e-08, + "logits/chosen": -2.0848584175109863, + "logits/rejected": -2.069242000579834, + "logps/chosen": -125.95867156982422, + "logps/rejected": -226.29823303222656, + "loss": 0.5232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13738785684108734, + "rewards/margins": 0.9885292053222656, + "rewards/rejected": -1.1259170770645142, + "step": 4966 + }, + { + "epoch": 0.29, + "learning_rate": 8.341308631295882e-08, + "logits/chosen": -1.9571642875671387, + "logits/rejected": -1.9529553651809692, + "logps/chosen": -0.00018559794989414513, + "logps/rejected": -274.63653564453125, + "loss": 0.3539, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.187583833001554e-06, + "rewards/margins": 3.573643922805786, + "rewards/rejected": -3.573651075363159, + "step": 4967 + }, + { + "epoch": 0.29, + "learning_rate": 8.340607494064048e-08, + "logits/chosen": -2.1544177532196045, + "logits/rejected": -2.139946937561035, + "logps/chosen": -162.9530792236328, + "logps/rejected": -335.6767578125, + "loss": 0.3951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0790694952011108, + "rewards/margins": 0.3064956068992615, + "rewards/rejected": 0.7725738883018494, + "step": 4968 + }, + { + "epoch": 0.29, + "learning_rate": 8.339906238157495e-08, + "logits/chosen": -2.059929609298706, + "logits/rejected": -1.9510886669158936, + "logps/chosen": -240.6998748779297, + "logps/rejected": -417.7645263671875, + "loss": 0.2815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6267868280410767, + "rewards/margins": 1.5140609741210938, + "rewards/rejected": -0.8872742056846619, + "step": 4969 + }, + { + "epoch": 0.29, + "learning_rate": 8.339204863601135e-08, + "logits/chosen": -2.1055195331573486, + "logits/rejected": -2.08120059967041, + "logps/chosen": -205.4698486328125, + "logps/rejected": -311.43365478515625, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3140504360198975, + "rewards/margins": 0.5528504848480225, + "rewards/rejected": 1.761199951171875, + "step": 4970 + }, + { + "epoch": 0.29, + "learning_rate": 8.338503370419884e-08, + "logits/chosen": -2.100754737854004, + "logits/rejected": -2.0990676879882812, + "logps/chosen": -21.690855026245117, + "logps/rejected": -94.102783203125, + "loss": 0.6811, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0654691681265831, + "rewards/margins": 0.048952676355838776, + "rewards/rejected": -0.11442184448242188, + "step": 4971 + }, + { + "epoch": 0.29, + "learning_rate": 8.337801758638663e-08, + "logits/chosen": -2.001067638397217, + "logits/rejected": -1.9999254941940308, + "logps/chosen": -58.240501403808594, + "logps/rejected": -159.7429962158203, + "loss": 0.6338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030672455206513405, + "rewards/margins": 0.4679527282714844, + "rewards/rejected": -0.4986251890659332, + "step": 4972 + }, + { + "epoch": 0.29, + "learning_rate": 8.337100028282396e-08, + "logits/chosen": -1.975075125694275, + "logits/rejected": -1.993788719177246, + "logps/chosen": -185.70431518554688, + "logps/rejected": -261.4033203125, + "loss": 0.2716, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7131088972091675, + "rewards/margins": 0.6478012800216675, + "rewards/rejected": 1.0653076171875, + "step": 4973 + }, + { + "epoch": 0.29, + "learning_rate": 8.336398179376014e-08, + "logits/chosen": -2.1253573894500732, + "logits/rejected": -2.1290314197540283, + "logps/chosen": -10.364933013916016, + "logps/rejected": -146.36349487304688, + "loss": 0.5715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16277705132961273, + "rewards/margins": 0.3797829747200012, + "rewards/rejected": -0.2170059233903885, + "step": 4974 + }, + { + "epoch": 0.29, + "learning_rate": 8.335696211944447e-08, + "logits/chosen": -1.9723447561264038, + "logits/rejected": -1.9666095972061157, + "logps/chosen": -93.11843872070312, + "logps/rejected": -223.21104431152344, + "loss": 0.485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001003265380859375, + "rewards/margins": 0.8331230282783508, + "rewards/rejected": -0.8341262936592102, + "step": 4975 + }, + { + "epoch": 0.29, + "learning_rate": 8.334994126012635e-08, + "logits/chosen": -2.186328172683716, + "logits/rejected": -2.187063694000244, + "logps/chosen": -0.14047960937023163, + "logps/rejected": -48.90531921386719, + "loss": 0.5249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008543292991816998, + "rewards/margins": 0.7968539595603943, + "rewards/rejected": -0.7883106470108032, + "step": 4976 + }, + { + "epoch": 0.29, + "learning_rate": 8.334291921605517e-08, + "logits/chosen": -1.9856140613555908, + "logits/rejected": -1.9596786499023438, + "logps/chosen": -166.4401092529297, + "logps/rejected": -245.16790771484375, + "loss": 0.3017, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.015570044517517, + "rewards/margins": 0.8496596813201904, + "rewards/rejected": 0.16591034829616547, + "step": 4977 + }, + { + "epoch": 0.29, + "learning_rate": 8.333589598748041e-08, + "logits/chosen": -1.9418368339538574, + "logits/rejected": -1.9266645908355713, + "logps/chosen": -227.796630859375, + "logps/rejected": -426.0810546875, + "loss": 0.4879, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.050541639328003, + "rewards/margins": -0.3713669776916504, + "rewards/rejected": 2.4219086170196533, + "step": 4978 + }, + { + "epoch": 0.29, + "learning_rate": 8.332887157465156e-08, + "logits/chosen": -2.2469890117645264, + "logits/rejected": -2.240846633911133, + "logps/chosen": -1.2227815389633179, + "logps/rejected": -116.4039306640625, + "loss": 0.6465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003455305239185691, + "rewards/margins": 0.2434043437242508, + "rewards/rejected": -0.239949032664299, + "step": 4979 + }, + { + "epoch": 0.29, + "learning_rate": 8.332184597781816e-08, + "logits/chosen": -2.0807230472564697, + "logits/rejected": -2.080627202987671, + "logps/chosen": -0.00011360416101524606, + "logps/rejected": -146.10586547851562, + "loss": 0.4524, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1228079529246315e-05, + "rewards/margins": 1.3443334102630615, + "rewards/rejected": -1.3443222045898438, + "step": 4980 + }, + { + "epoch": 0.29, + "learning_rate": 8.33148191972298e-08, + "logits/chosen": -2.1175882816314697, + "logits/rejected": -2.1114447116851807, + "logps/chosen": -0.01755659282207489, + "logps/rejected": -289.3720703125, + "loss": 0.3649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038395822048187256, + "rewards/margins": 3.138385772705078, + "rewards/rejected": -3.1387696266174316, + "step": 4981 + }, + { + "epoch": 0.29, + "learning_rate": 8.330779123313611e-08, + "logits/chosen": -2.0957934856414795, + "logits/rejected": -2.0871646404266357, + "logps/chosen": -239.87698364257812, + "logps/rejected": -330.2811279296875, + "loss": 0.2914, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3697388172149658, + "rewards/margins": 0.822216808795929, + "rewards/rejected": 0.5475220084190369, + "step": 4982 + }, + { + "epoch": 0.29, + "learning_rate": 8.330076208578674e-08, + "logits/chosen": -2.0024499893188477, + "logits/rejected": -2.009315013885498, + "logps/chosen": -173.95143127441406, + "logps/rejected": -439.044189453125, + "loss": 0.1591, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1585311889648438, + "rewards/margins": 2.4738540649414062, + "rewards/rejected": -1.3153228759765625, + "step": 4983 + }, + { + "epoch": 0.29, + "learning_rate": 8.329373175543142e-08, + "logits/chosen": -1.942480444908142, + "logits/rejected": -1.9348407983779907, + "logps/chosen": -67.2044677734375, + "logps/rejected": -229.5847930908203, + "loss": 0.3503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022649383172392845, + "rewards/margins": 1.950025200843811, + "rewards/rejected": -1.9273757934570312, + "step": 4984 + }, + { + "epoch": 0.29, + "learning_rate": 8.328670024231987e-08, + "logits/chosen": -1.8801584243774414, + "logits/rejected": -1.8707904815673828, + "logps/chosen": -38.28162384033203, + "logps/rejected": -225.63380432128906, + "loss": 0.4914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037245940417051315, + "rewards/margins": 1.0288276672363281, + "rewards/rejected": -1.0660736560821533, + "step": 4985 + }, + { + "epoch": 0.29, + "learning_rate": 8.327966754670192e-08, + "logits/chosen": -2.018655776977539, + "logits/rejected": -2.0202057361602783, + "logps/chosen": -22.786766052246094, + "logps/rejected": -181.0526580810547, + "loss": 0.3851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31747913360595703, + "rewards/margins": 1.0855424404144287, + "rewards/rejected": -0.7680633664131165, + "step": 4986 + }, + { + "epoch": 0.29, + "learning_rate": 8.327263366882738e-08, + "logits/chosen": -2.12194561958313, + "logits/rejected": -2.121752977371216, + "logps/chosen": -24.018423080444336, + "logps/rejected": -234.68063354492188, + "loss": 0.5632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0439847968518734, + "rewards/margins": 0.4014192521572113, + "rewards/rejected": -0.445404052734375, + "step": 4987 + }, + { + "epoch": 0.29, + "learning_rate": 8.326559860894616e-08, + "logits/chosen": -1.848858118057251, + "logits/rejected": -1.8285964727401733, + "logps/chosen": -292.89288330078125, + "logps/rejected": -366.0209045410156, + "loss": 0.2392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9974914789199829, + "rewards/margins": 1.4540588855743408, + "rewards/rejected": -0.4565673768520355, + "step": 4988 + }, + { + "epoch": 0.29, + "learning_rate": 8.325856236730813e-08, + "logits/chosen": -1.9451576471328735, + "logits/rejected": -1.9407802820205688, + "logps/chosen": -19.344547271728516, + "logps/rejected": -99.77125549316406, + "loss": 0.5075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2746906280517578, + "rewards/margins": 0.4983593225479126, + "rewards/rejected": -0.2236686795949936, + "step": 4989 + }, + { + "epoch": 0.29, + "learning_rate": 8.32515249441633e-08, + "logits/chosen": -2.06899094581604, + "logits/rejected": -2.064502239227295, + "logps/chosen": -8.626581192016602, + "logps/rejected": -38.148380279541016, + "loss": 0.5923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13740091025829315, + "rewards/margins": 0.291018009185791, + "rewards/rejected": -0.15361709892749786, + "step": 4990 + }, + { + "epoch": 0.29, + "learning_rate": 8.324448633976165e-08, + "logits/chosen": -2.022334098815918, + "logits/rejected": -2.0334625244140625, + "logps/chosen": -3.3855038054753095e-05, + "logps/rejected": -172.97703552246094, + "loss": 0.3668, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.960282578598708e-08, + "rewards/margins": 3.045640707015991, + "rewards/rejected": -3.045640707015991, + "step": 4991 + }, + { + "epoch": 0.29, + "learning_rate": 8.32374465543532e-08, + "logits/chosen": -2.0734617710113525, + "logits/rejected": -2.0703296661376953, + "logps/chosen": -0.01647714525461197, + "logps/rejected": -136.40496826171875, + "loss": 0.4495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0022103197406977415, + "rewards/margins": 1.460820198059082, + "rewards/rejected": -1.4586098194122314, + "step": 4992 + }, + { + "epoch": 0.29, + "learning_rate": 8.323040558818811e-08, + "logits/chosen": -2.0315163135528564, + "logits/rejected": -2.023420572280884, + "logps/chosen": -91.94659423828125, + "logps/rejected": -406.4648132324219, + "loss": 0.2671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6049278378486633, + "rewards/margins": 2.196343183517456, + "rewards/rejected": -1.5914154052734375, + "step": 4993 + }, + { + "epoch": 0.29, + "learning_rate": 8.322336344151644e-08, + "logits/chosen": -2.0416553020477295, + "logits/rejected": -2.03304123878479, + "logps/chosen": -24.001388549804688, + "logps/rejected": -205.64370727539062, + "loss": 0.3286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38844966888427734, + "rewards/margins": 1.9195730686187744, + "rewards/rejected": -1.531123399734497, + "step": 4994 + }, + { + "epoch": 0.29, + "learning_rate": 8.32163201145884e-08, + "logits/chosen": -2.0170605182647705, + "logits/rejected": -2.009345293045044, + "logps/chosen": -0.0008516121306456625, + "logps/rejected": -106.56078338623047, + "loss": 0.4, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.121275535202585e-05, + "rewards/margins": 2.0258703231811523, + "rewards/rejected": -2.025921583175659, + "step": 4995 + }, + { + "epoch": 0.29, + "learning_rate": 8.320927560765419e-08, + "logits/chosen": -2.1179330348968506, + "logits/rejected": -2.119995594024658, + "logps/chosen": -13.849594116210938, + "logps/rejected": -205.78253173828125, + "loss": 0.5014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07897863537073135, + "rewards/margins": 1.2429451942443848, + "rewards/rejected": -1.321923851966858, + "step": 4996 + }, + { + "epoch": 0.29, + "learning_rate": 8.320222992096404e-08, + "logits/chosen": -1.8170454502105713, + "logits/rejected": -1.751291036605835, + "logps/chosen": -287.64361572265625, + "logps/rejected": -483.358642578125, + "loss": 0.245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2063111066818237, + "rewards/margins": 1.0561401844024658, + "rewards/rejected": 0.15017090737819672, + "step": 4997 + }, + { + "epoch": 0.29, + "learning_rate": 8.31951830547683e-08, + "logits/chosen": -1.74440336227417, + "logits/rejected": -1.7408334016799927, + "logps/chosen": -39.59886932373047, + "logps/rejected": -308.56292724609375, + "loss": 0.2034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4795181453227997, + "rewards/margins": 5.168663024902344, + "rewards/rejected": -4.689145088195801, + "step": 4998 + }, + { + "epoch": 0.29, + "learning_rate": 8.318813500931727e-08, + "logits/chosen": -1.9228965044021606, + "logits/rejected": -1.926496982574463, + "logps/chosen": -37.10601806640625, + "logps/rejected": -163.57803344726562, + "loss": 0.3935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2550926208496094, + "rewards/margins": 1.3006294965744019, + "rewards/rejected": -1.0455368757247925, + "step": 4999 + }, + { + "epoch": 0.29, + "learning_rate": 8.318108578486136e-08, + "logits/chosen": -2.195556879043579, + "logits/rejected": -2.1887118816375732, + "logps/chosen": -55.257911682128906, + "logps/rejected": -128.34320068359375, + "loss": 0.8975, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6876888275146484, + "rewards/margins": -0.11057621240615845, + "rewards/rejected": -0.57711261510849, + "step": 5000 + }, + { + "epoch": 0.29, + "learning_rate": 8.317403538165097e-08, + "logits/chosen": -1.9593956470489502, + "logits/rejected": -1.9630171060562134, + "logps/chosen": -5.149762364453636e-05, + "logps/rejected": -199.41326904296875, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.867878935030603e-07, + "rewards/margins": 2.960186004638672, + "rewards/rejected": -2.9601852893829346, + "step": 5001 + }, + { + "epoch": 0.29, + "learning_rate": 8.316698379993656e-08, + "logits/chosen": -2.0453391075134277, + "logits/rejected": -2.0523314476013184, + "logps/chosen": -7.426293849945068, + "logps/rejected": -65.68700408935547, + "loss": 0.657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04774441942572594, + "rewards/margins": 0.33086544275283813, + "rewards/rejected": -0.37860986590385437, + "step": 5002 + }, + { + "epoch": 0.29, + "learning_rate": 8.315993103996865e-08, + "logits/chosen": -1.9746171236038208, + "logits/rejected": -1.972153663635254, + "logps/chosen": -0.002644601510837674, + "logps/rejected": -247.2613067626953, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2051267731294502e-05, + "rewards/margins": 3.276127815246582, + "rewards/rejected": -3.276139974594116, + "step": 5003 + }, + { + "epoch": 0.29, + "learning_rate": 8.315287710199781e-08, + "logits/chosen": -2.0550224781036377, + "logits/rejected": -1.9829930067062378, + "logps/chosen": -250.45802307128906, + "logps/rejected": -463.1021423339844, + "loss": 0.1349, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.051530599594116, + "rewards/margins": 1.6994645595550537, + "rewards/rejected": 0.3520660400390625, + "step": 5004 + }, + { + "epoch": 0.29, + "learning_rate": 8.314582198627458e-08, + "logits/chosen": -1.9548814296722412, + "logits/rejected": -1.9808940887451172, + "logps/chosen": -200.96359252929688, + "logps/rejected": -417.64068603515625, + "loss": 0.2011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.206475853919983, + "rewards/margins": 1.251470923423767, + "rewards/rejected": -0.04499511793255806, + "step": 5005 + }, + { + "epoch": 0.29, + "learning_rate": 8.313876569304963e-08, + "logits/chosen": -2.1043636798858643, + "logits/rejected": -2.1046199798583984, + "logps/chosen": -3.235898494720459, + "logps/rejected": -53.36773681640625, + "loss": 0.6646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1700911819934845, + "rewards/margins": 0.3632607161998749, + "rewards/rejected": -0.5333518981933594, + "step": 5006 + }, + { + "epoch": 0.29, + "learning_rate": 8.313170822257362e-08, + "logits/chosen": -2.1035547256469727, + "logits/rejected": -2.107034683227539, + "logps/chosen": -189.49632263183594, + "logps/rejected": -198.74105834960938, + "loss": 0.5152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6349884271621704, + "rewards/margins": 0.12383121252059937, + "rewards/rejected": 0.511157214641571, + "step": 5007 + }, + { + "epoch": 0.29, + "learning_rate": 8.312464957509728e-08, + "logits/chosen": -2.029644727706909, + "logits/rejected": -2.028496503829956, + "logps/chosen": -12.234175682067871, + "logps/rejected": -199.110595703125, + "loss": 0.4828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012804794125258923, + "rewards/margins": 1.1343235969543457, + "rewards/rejected": -1.1471283435821533, + "step": 5008 + }, + { + "epoch": 0.29, + "learning_rate": 8.311758975087136e-08, + "logits/chosen": -1.8976521492004395, + "logits/rejected": -1.8300105333328247, + "logps/chosen": -264.3954162597656, + "logps/rejected": -370.7347412109375, + "loss": 0.0501, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9699127674102783, + "rewards/margins": 2.8013885021209717, + "rewards/rejected": 0.16852417588233948, + "step": 5009 + }, + { + "epoch": 0.29, + "learning_rate": 8.311052875014664e-08, + "logits/chosen": -2.107937812805176, + "logits/rejected": -2.1138131618499756, + "logps/chosen": -54.4961051940918, + "logps/rejected": -408.19500732421875, + "loss": 0.2857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25162696838378906, + "rewards/margins": 2.187601089477539, + "rewards/rejected": -1.93597412109375, + "step": 5010 + }, + { + "epoch": 0.29, + "learning_rate": 8.310346657317397e-08, + "logits/chosen": -2.022885322570801, + "logits/rejected": -2.0312044620513916, + "logps/chosen": -46.3818244934082, + "logps/rejected": -174.1975860595703, + "loss": 0.3075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3708019256591797, + "rewards/margins": 1.8616176843643188, + "rewards/rejected": -1.4908157587051392, + "step": 5011 + }, + { + "epoch": 0.29, + "learning_rate": 8.309640322020426e-08, + "logits/chosen": -1.9964804649353027, + "logits/rejected": -1.9660855531692505, + "logps/chosen": -201.94583129882812, + "logps/rejected": -478.991943359375, + "loss": 0.0601, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.614837646484375, + "rewards/margins": 4.078955173492432, + "rewards/rejected": -2.4641175270080566, + "step": 5012 + }, + { + "epoch": 0.29, + "learning_rate": 8.308933869148841e-08, + "logits/chosen": -2.187898874282837, + "logits/rejected": -2.231848955154419, + "logps/chosen": -128.27853393554688, + "logps/rejected": -419.06243896484375, + "loss": 0.1805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6820526123046875, + "rewards/margins": 2.9065825939178467, + "rewards/rejected": -2.224529981613159, + "step": 5013 + }, + { + "epoch": 0.29, + "learning_rate": 8.30822729872774e-08, + "logits/chosen": -2.205580472946167, + "logits/rejected": -2.197096109390259, + "logps/chosen": -76.74341583251953, + "logps/rejected": -239.11753845214844, + "loss": 0.4501, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18196716904640198, + "rewards/margins": 1.8806473016738892, + "rewards/rejected": -2.0626144409179688, + "step": 5014 + }, + { + "epoch": 0.29, + "learning_rate": 8.307520610782221e-08, + "logits/chosen": -1.8121005296707153, + "logits/rejected": -1.8182328939437866, + "logps/chosen": -53.29901885986328, + "logps/rejected": -160.48500061035156, + "loss": 0.6504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10518112033605576, + "rewards/margins": 0.18914717435836792, + "rewards/rejected": -0.2943283021450043, + "step": 5015 + }, + { + "epoch": 0.29, + "learning_rate": 8.306813805337394e-08, + "logits/chosen": -1.7104060649871826, + "logits/rejected": -1.7105075120925903, + "logps/chosen": -0.0005073593347333372, + "logps/rejected": -37.663394927978516, + "loss": 0.6207, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2409949451684952e-05, + "rewards/margins": 0.31131696701049805, + "rewards/rejected": -0.3112945556640625, + "step": 5016 + }, + { + "epoch": 0.29, + "learning_rate": 8.306106882418364e-08, + "logits/chosen": -2.0409653186798096, + "logits/rejected": -2.0535717010498047, + "logps/chosen": -215.16355895996094, + "logps/rejected": -262.19488525390625, + "loss": 0.1883, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1695878505706787, + "rewards/margins": 0.9960618019104004, + "rewards/rejected": 1.1735260486602783, + "step": 5017 + }, + { + "epoch": 0.29, + "learning_rate": 8.305399842050244e-08, + "logits/chosen": -2.3013486862182617, + "logits/rejected": -2.2886335849761963, + "logps/chosen": -11.523127555847168, + "logps/rejected": -193.24691772460938, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0499231331050396, + "rewards/margins": 2.9777095317840576, + "rewards/rejected": -2.927786350250244, + "step": 5018 + }, + { + "epoch": 0.29, + "learning_rate": 8.304692684258155e-08, + "logits/chosen": -1.9192945957183838, + "logits/rejected": -1.8885955810546875, + "logps/chosen": -187.92352294921875, + "logps/rejected": -463.34906005859375, + "loss": 0.0463, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3760132789611816, + "rewards/margins": 4.991995334625244, + "rewards/rejected": -2.6159820556640625, + "step": 5019 + }, + { + "epoch": 0.29, + "learning_rate": 8.303985409067216e-08, + "logits/chosen": -1.8223580121994019, + "logits/rejected": -1.8170088529586792, + "logps/chosen": -184.0313720703125, + "logps/rejected": -279.7987365722656, + "loss": 0.1182, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0150604248046875, + "rewards/margins": 1.7752959728240967, + "rewards/rejected": 0.23976440727710724, + "step": 5020 + }, + { + "epoch": 0.29, + "learning_rate": 8.303278016502555e-08, + "logits/chosen": -2.119185447692871, + "logits/rejected": -2.112785577774048, + "logps/chosen": -0.004589219577610493, + "logps/rejected": -100.41783142089844, + "loss": 0.5236, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4273030501499306e-05, + "rewards/margins": 0.8426930904388428, + "rewards/rejected": -0.8426788449287415, + "step": 5021 + }, + { + "epoch": 0.29, + "learning_rate": 8.302570506589299e-08, + "logits/chosen": -1.8816627264022827, + "logits/rejected": -1.8548108339309692, + "logps/chosen": -334.0182189941406, + "logps/rejected": -415.1540832519531, + "loss": 0.2384, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5553741455078125, + "rewards/margins": 1.0172607898712158, + "rewards/rejected": 0.5381134152412415, + "step": 5022 + }, + { + "epoch": 0.29, + "learning_rate": 8.301862879352585e-08, + "logits/chosen": -1.9917584657669067, + "logits/rejected": -1.95724618434906, + "logps/chosen": -291.20501708984375, + "logps/rejected": -425.05853271484375, + "loss": 0.4051, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.508306860923767, + "rewards/margins": 0.0044127702713012695, + "rewards/rejected": 1.5038940906524658, + "step": 5023 + }, + { + "epoch": 0.29, + "learning_rate": 8.30115513481755e-08, + "logits/chosen": -1.8705774545669556, + "logits/rejected": -1.8734344244003296, + "logps/chosen": -13.193157196044922, + "logps/rejected": -226.97549438476562, + "loss": 0.3762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37609806656837463, + "rewards/margins": 1.5438318252563477, + "rewards/rejected": -1.1677337884902954, + "step": 5024 + }, + { + "epoch": 0.29, + "learning_rate": 8.300447273009336e-08, + "logits/chosen": -1.9968655109405518, + "logits/rejected": -1.9932750463485718, + "logps/chosen": -22.807809829711914, + "logps/rejected": -122.872314453125, + "loss": 0.4103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09705295413732529, + "rewards/margins": 1.8277490139007568, + "rewards/rejected": -1.7306960821151733, + "step": 5025 + }, + { + "epoch": 0.29, + "learning_rate": 8.299739293953092e-08, + "logits/chosen": -2.1289496421813965, + "logits/rejected": -2.133603811264038, + "logps/chosen": -81.33036804199219, + "logps/rejected": -215.25967407226562, + "loss": 0.3348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3225242793560028, + "rewards/margins": 1.831586480140686, + "rewards/rejected": -1.5090621709823608, + "step": 5026 + }, + { + "epoch": 0.29, + "learning_rate": 8.299031197673967e-08, + "logits/chosen": -2.193373918533325, + "logits/rejected": -2.1945388317108154, + "logps/chosen": -4.820630073547363, + "logps/rejected": -116.63697052001953, + "loss": 0.5837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1331978291273117, + "rewards/margins": 0.6351836919784546, + "rewards/rejected": -0.7683815360069275, + "step": 5027 + }, + { + "epoch": 0.29, + "learning_rate": 8.298322984197116e-08, + "logits/chosen": -1.9321123361587524, + "logits/rejected": -1.9446961879730225, + "logps/chosen": -6.271677017211914, + "logps/rejected": -152.22579956054688, + "loss": 0.3928, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1127815768122673, + "rewards/margins": 1.4555991888046265, + "rewards/rejected": -1.3428176641464233, + "step": 5028 + }, + { + "epoch": 0.29, + "learning_rate": 8.2976146535477e-08, + "logits/chosen": -2.1368420124053955, + "logits/rejected": -2.1276092529296875, + "logps/chosen": -119.69650268554688, + "logps/rejected": -197.446044921875, + "loss": 0.5459, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4891105592250824, + "rewards/margins": -0.18312153220176697, + "rewards/rejected": 0.6722320914268494, + "step": 5029 + }, + { + "epoch": 0.29, + "learning_rate": 8.296906205750882e-08, + "logits/chosen": -2.142524242401123, + "logits/rejected": -2.143028497695923, + "logps/chosen": -95.58851623535156, + "logps/rejected": -203.61947631835938, + "loss": 0.3836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27719879150390625, + "rewards/margins": 1.4363311529159546, + "rewards/rejected": -1.1591323614120483, + "step": 5030 + }, + { + "epoch": 0.29, + "learning_rate": 8.296197640831827e-08, + "logits/chosen": -1.955336332321167, + "logits/rejected": -1.9493377208709717, + "logps/chosen": -0.0015696943737566471, + "logps/rejected": -154.205078125, + "loss": 0.4365, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.770645192475058e-06, + "rewards/margins": 1.6074167490005493, + "rewards/rejected": -1.6074265241622925, + "step": 5031 + }, + { + "epoch": 0.29, + "learning_rate": 8.295488958815708e-08, + "logits/chosen": -2.0290915966033936, + "logits/rejected": -2.010007858276367, + "logps/chosen": -158.02023315429688, + "logps/rejected": -314.18060302734375, + "loss": 0.4559, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.345831274986267, + "rewards/margins": -0.07633066177368164, + "rewards/rejected": 1.4221619367599487, + "step": 5032 + }, + { + "epoch": 0.29, + "learning_rate": 8.294780159727703e-08, + "logits/chosen": -2.0906991958618164, + "logits/rejected": -2.082184076309204, + "logps/chosen": -32.409881591796875, + "logps/rejected": -141.69509887695312, + "loss": 0.3369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.517333984375, + "rewards/margins": 1.3384888172149658, + "rewards/rejected": -0.821154773235321, + "step": 5033 + }, + { + "epoch": 0.29, + "learning_rate": 8.294071243592989e-08, + "logits/chosen": -2.06400728225708, + "logits/rejected": -2.0348801612854004, + "logps/chosen": -198.12789916992188, + "logps/rejected": -343.8865051269531, + "loss": 0.1936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2761154174804688, + "rewards/margins": 1.3264907598495483, + "rewards/rejected": -0.05037536844611168, + "step": 5034 + }, + { + "epoch": 0.29, + "learning_rate": 8.293362210436751e-08, + "logits/chosen": -2.007807493209839, + "logits/rejected": -1.9997859001159668, + "logps/chosen": -33.08441162109375, + "logps/rejected": -115.47573852539062, + "loss": 0.6694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02225494384765625, + "rewards/margins": 0.01817779615521431, + "rewards/rejected": 0.004077148623764515, + "step": 5035 + }, + { + "epoch": 0.29, + "learning_rate": 8.292653060284178e-08, + "logits/chosen": -1.8666688203811646, + "logits/rejected": -1.8064137697219849, + "logps/chosen": -247.35910034179688, + "logps/rejected": -521.8320922851562, + "loss": 0.172, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.322540283203125, + "rewards/margins": 2.375598192214966, + "rewards/rejected": -1.0530579090118408, + "step": 5036 + }, + { + "epoch": 0.29, + "learning_rate": 8.291943793160463e-08, + "logits/chosen": -1.9117772579193115, + "logits/rejected": -1.900961995124817, + "logps/chosen": -15.586324691772461, + "logps/rejected": -164.97100830078125, + "loss": 0.5098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.230830579996109, + "rewards/margins": 1.6461256742477417, + "rewards/rejected": -1.8769562244415283, + "step": 5037 + }, + { + "epoch": 0.29, + "learning_rate": 8.291234409090802e-08, + "logits/chosen": -2.096646547317505, + "logits/rejected": -2.0794999599456787, + "logps/chosen": -28.82581329345703, + "logps/rejected": -314.82403564453125, + "loss": 0.2111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6476432681083679, + "rewards/margins": 3.667513370513916, + "rewards/rejected": -3.0198700428009033, + "step": 5038 + }, + { + "epoch": 0.29, + "learning_rate": 8.290524908100392e-08, + "logits/chosen": -2.157456398010254, + "logits/rejected": -2.160543918609619, + "logps/chosen": -51.2044792175293, + "logps/rejected": -200.6053924560547, + "loss": 0.4455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6891590356826782, + "rewards/margins": 0.3863025903701782, + "rewards/rejected": 0.3028564453125, + "step": 5039 + }, + { + "epoch": 0.29, + "learning_rate": 8.289815290214446e-08, + "logits/chosen": -1.9916718006134033, + "logits/rejected": -1.992644190788269, + "logps/chosen": -95.20277404785156, + "logps/rejected": -329.0355224609375, + "loss": 0.6175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2319847196340561, + "rewards/margins": 0.6902229189872742, + "rewards/rejected": -0.9222076535224915, + "step": 5040 + }, + { + "epoch": 0.29, + "learning_rate": 8.289105555458167e-08, + "logits/chosen": -2.1234512329101562, + "logits/rejected": -2.085533857345581, + "logps/chosen": -244.427734375, + "logps/rejected": -476.73583984375, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.073382616043091, + "rewards/margins": 2.476959228515625, + "rewards/rejected": -0.40357667207717896, + "step": 5041 + }, + { + "epoch": 0.29, + "learning_rate": 8.288395703856769e-08, + "logits/chosen": -2.099290609359741, + "logits/rejected": -2.084094285964966, + "logps/chosen": -72.46272277832031, + "logps/rejected": -248.7429656982422, + "loss": 0.2046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0503311157226562, + "rewards/margins": 1.876214623451233, + "rewards/rejected": -0.8258835077285767, + "step": 5042 + }, + { + "epoch": 0.29, + "learning_rate": 8.287685735435472e-08, + "logits/chosen": -2.085498094558716, + "logits/rejected": -2.0730273723602295, + "logps/chosen": -20.59421730041504, + "logps/rejected": -188.071044921875, + "loss": 0.3864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40691015124320984, + "rewards/margins": 0.9494638442993164, + "rewards/rejected": -0.542553722858429, + "step": 5043 + }, + { + "epoch": 0.29, + "learning_rate": 8.286975650219494e-08, + "logits/chosen": -2.2799975872039795, + "logits/rejected": -2.276857376098633, + "logps/chosen": -16.766826629638672, + "logps/rejected": -268.7681884765625, + "loss": 0.3812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2561666667461395, + "rewards/margins": 1.3687644004821777, + "rewards/rejected": -1.1125977039337158, + "step": 5044 + }, + { + "epoch": 0.29, + "learning_rate": 8.286265448234064e-08, + "logits/chosen": -2.197737216949463, + "logits/rejected": -2.1939938068389893, + "logps/chosen": -1.3133230209350586, + "logps/rejected": -110.77993774414062, + "loss": 0.4438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018866265192627907, + "rewards/margins": 1.3748493194580078, + "rewards/rejected": -1.355983018875122, + "step": 5045 + }, + { + "epoch": 0.29, + "learning_rate": 8.285555129504411e-08, + "logits/chosen": -1.9770323038101196, + "logits/rejected": -1.9818600416183472, + "logps/chosen": -352.0455322265625, + "logps/rejected": -505.72686767578125, + "loss": 0.1016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4380371570587158, + "rewards/margins": 2.4792909622192383, + "rewards/rejected": -1.041253685951233, + "step": 5046 + }, + { + "epoch": 0.29, + "learning_rate": 8.284844694055767e-08, + "logits/chosen": -1.9690773487091064, + "logits/rejected": -1.9902373552322388, + "logps/chosen": -134.6666259765625, + "logps/rejected": -177.97396850585938, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3855438232421875, + "rewards/margins": 0.4801986813545227, + "rewards/rejected": 0.9053451418876648, + "step": 5047 + }, + { + "epoch": 0.29, + "learning_rate": 8.284134141913373e-08, + "logits/chosen": -1.9979835748672485, + "logits/rejected": -2.0018908977508545, + "logps/chosen": -189.8584442138672, + "logps/rejected": -354.19061279296875, + "loss": 0.2534, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.598078966140747, + "rewards/margins": 0.6048813462257385, + "rewards/rejected": 0.9931976199150085, + "step": 5048 + }, + { + "epoch": 0.29, + "learning_rate": 8.283423473102468e-08, + "logits/chosen": -1.8346952199935913, + "logits/rejected": -1.7882031202316284, + "logps/chosen": -185.93838500976562, + "logps/rejected": -330.52362060546875, + "loss": 0.2637, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1706955432891846, + "rewards/margins": 1.1302536725997925, + "rewards/rejected": 0.04044189676642418, + "step": 5049 + }, + { + "epoch": 0.29, + "learning_rate": 8.282712687648301e-08, + "logits/chosen": -2.0935006141662598, + "logits/rejected": -2.082850217819214, + "logps/chosen": -229.18467712402344, + "logps/rejected": -306.238037109375, + "loss": 0.3518, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.174325704574585, + "rewards/margins": 0.08487868309020996, + "rewards/rejected": 2.089447021484375, + "step": 5050 + }, + { + "epoch": 0.29, + "learning_rate": 8.282001785576123e-08, + "logits/chosen": -2.1125574111938477, + "logits/rejected": -2.106490135192871, + "logps/chosen": -23.60476303100586, + "logps/rejected": -219.2158203125, + "loss": 0.3607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28609371185302734, + "rewards/margins": 1.9269903898239136, + "rewards/rejected": -1.6408966779708862, + "step": 5051 + }, + { + "epoch": 0.29, + "learning_rate": 8.281290766911186e-08, + "logits/chosen": -2.03865647315979, + "logits/rejected": -1.9938353300094604, + "logps/chosen": -376.0487976074219, + "logps/rejected": -663.1674194335938, + "loss": 0.1167, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0412323474884033, + "rewards/margins": 2.453579902648926, + "rewards/rejected": -1.412347435951233, + "step": 5052 + }, + { + "epoch": 0.29, + "learning_rate": 8.280579631678751e-08, + "logits/chosen": -2.1695446968078613, + "logits/rejected": -2.1690468788146973, + "logps/chosen": -149.5341796875, + "logps/rejected": -397.34619140625, + "loss": 0.1406, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0881943702697754, + "rewards/margins": 1.6392136812210083, + "rewards/rejected": 0.4489807188510895, + "step": 5053 + }, + { + "epoch": 0.29, + "learning_rate": 8.27986837990408e-08, + "logits/chosen": -2.0676190853118896, + "logits/rejected": -2.059617280960083, + "logps/chosen": -83.06147766113281, + "logps/rejected": -198.63092041015625, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5141968131065369, + "rewards/margins": 1.2347931861877441, + "rewards/rejected": -0.7205963134765625, + "step": 5054 + }, + { + "epoch": 0.29, + "learning_rate": 8.279157011612441e-08, + "logits/chosen": -2.041945457458496, + "logits/rejected": -2.0405313968658447, + "logps/chosen": -29.68890380859375, + "logps/rejected": -140.82823181152344, + "loss": 0.5187, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.926391456043348e-05, + "rewards/margins": 0.8975990414619446, + "rewards/rejected": -0.8976883292198181, + "step": 5055 + }, + { + "epoch": 0.29, + "learning_rate": 8.278445526829106e-08, + "logits/chosen": -2.068913698196411, + "logits/rejected": -2.0649094581604004, + "logps/chosen": -232.21865844726562, + "logps/rejected": -253.320556640625, + "loss": 0.4883, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0781983137130737, + "rewards/margins": -0.1563568115234375, + "rewards/rejected": 1.2345551252365112, + "step": 5056 + }, + { + "epoch": 0.29, + "learning_rate": 8.27773392557935e-08, + "logits/chosen": -1.953473687171936, + "logits/rejected": -1.9450761079788208, + "logps/chosen": -9.227755546569824, + "logps/rejected": -60.60805130004883, + "loss": 0.6005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08601436764001846, + "rewards/margins": 0.2781229019165039, + "rewards/rejected": -0.19210854172706604, + "step": 5057 + }, + { + "epoch": 0.29, + "learning_rate": 8.277022207888449e-08, + "logits/chosen": -1.9598373174667358, + "logits/rejected": -1.9636280536651611, + "logps/chosen": -71.49431610107422, + "logps/rejected": -211.84072875976562, + "loss": 0.2267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5436409115791321, + "rewards/margins": 2.660999298095703, + "rewards/rejected": -2.117358446121216, + "step": 5058 + }, + { + "epoch": 0.29, + "learning_rate": 8.276310373781689e-08, + "logits/chosen": -1.7998093366622925, + "logits/rejected": -1.8009787797927856, + "logps/chosen": -204.37139892578125, + "logps/rejected": -410.9630126953125, + "loss": 0.3235, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3775055408477783, + "rewards/margins": 0.37550055980682373, + "rewards/rejected": 1.0020049810409546, + "step": 5059 + }, + { + "epoch": 0.29, + "learning_rate": 8.27559842328436e-08, + "logits/chosen": -2.2378406524658203, + "logits/rejected": -2.239104986190796, + "logps/chosen": -69.27810668945312, + "logps/rejected": -404.914794921875, + "loss": 0.4207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21103821694850922, + "rewards/margins": 4.304760932922363, + "rewards/rejected": -4.515799045562744, + "step": 5060 + }, + { + "epoch": 0.29, + "learning_rate": 8.274886356421751e-08, + "logits/chosen": -2.0737760066986084, + "logits/rejected": -2.0694565773010254, + "logps/chosen": -43.85550308227539, + "logps/rejected": -289.8017578125, + "loss": 0.6633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8330391049385071, + "rewards/margins": 2.008760929107666, + "rewards/rejected": -2.8417999744415283, + "step": 5061 + }, + { + "epoch": 0.29, + "learning_rate": 8.274174173219161e-08, + "logits/chosen": -2.059081554412842, + "logits/rejected": -2.0350050926208496, + "logps/chosen": -147.13812255859375, + "logps/rejected": -278.8432312011719, + "loss": 0.4215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7989608645439148, + "rewards/margins": 0.3121078312397003, + "rewards/rejected": 0.4868530333042145, + "step": 5062 + }, + { + "epoch": 0.29, + "learning_rate": 8.273461873701889e-08, + "logits/chosen": -2.131605863571167, + "logits/rejected": -2.127406597137451, + "logps/chosen": -83.75047302246094, + "logps/rejected": -331.28515625, + "loss": 0.4148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3040573298931122, + "rewards/margins": 1.4267319440841675, + "rewards/rejected": -1.122674584388733, + "step": 5063 + }, + { + "epoch": 0.29, + "learning_rate": 8.272749457895238e-08, + "logits/chosen": -2.0624985694885254, + "logits/rejected": -2.055511713027954, + "logps/chosen": -13.108142852783203, + "logps/rejected": -101.31224060058594, + "loss": 0.6935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07368021458387375, + "rewards/margins": 0.005590721964836121, + "rewards/rejected": -0.07927093654870987, + "step": 5064 + }, + { + "epoch": 0.29, + "learning_rate": 8.272036925824518e-08, + "logits/chosen": -2.1126515865325928, + "logits/rejected": -2.064944267272949, + "logps/chosen": -220.7814178466797, + "logps/rejected": -298.39190673828125, + "loss": 0.2633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8280960321426392, + "rewards/margins": 1.5561721324920654, + "rewards/rejected": -0.728076159954071, + "step": 5065 + }, + { + "epoch": 0.29, + "learning_rate": 8.271324277515042e-08, + "logits/chosen": -2.213644504547119, + "logits/rejected": -2.2163915634155273, + "logps/chosen": -132.11465454101562, + "logps/rejected": -474.312744140625, + "loss": 0.0722, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.318945288658142, + "rewards/margins": 4.423169136047363, + "rewards/rejected": -3.1042237281799316, + "step": 5066 + }, + { + "epoch": 0.29, + "learning_rate": 8.270611512992126e-08, + "logits/chosen": -1.8029698133468628, + "logits/rejected": -1.8044989109039307, + "logps/chosen": -75.31835174560547, + "logps/rejected": -221.558837890625, + "loss": 0.1774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7171943783760071, + "rewards/margins": 2.7186989784240723, + "rewards/rejected": -2.00150465965271, + "step": 5067 + }, + { + "epoch": 0.29, + "learning_rate": 8.269898632281087e-08, + "logits/chosen": -1.9023065567016602, + "logits/rejected": -1.89669930934906, + "logps/chosen": -47.877540588378906, + "logps/rejected": -150.55429077148438, + "loss": 0.4695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3571060299873352, + "rewards/margins": 0.7171066403388977, + "rewards/rejected": -0.3600006103515625, + "step": 5068 + }, + { + "epoch": 0.29, + "learning_rate": 8.269185635407256e-08, + "logits/chosen": -1.955034613609314, + "logits/rejected": -1.8966132402420044, + "logps/chosen": -199.4203643798828, + "logps/rejected": -602.520263671875, + "loss": 0.0868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4179047346115112, + "rewards/margins": 4.756747722625732, + "rewards/rejected": -3.3388428688049316, + "step": 5069 + }, + { + "epoch": 0.3, + "learning_rate": 8.268472522395961e-08, + "logits/chosen": -2.121539354324341, + "logits/rejected": -2.0856173038482666, + "logps/chosen": -164.9559326171875, + "logps/rejected": -342.2147521972656, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2049882411956787, + "rewards/margins": 3.5112109184265137, + "rewards/rejected": -1.3062225580215454, + "step": 5070 + }, + { + "epoch": 0.3, + "learning_rate": 8.267759293272533e-08, + "logits/chosen": -2.0548338890075684, + "logits/rejected": -2.049372911453247, + "logps/chosen": -56.673439025878906, + "logps/rejected": -426.15386962890625, + "loss": 0.1726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7402427792549133, + "rewards/margins": 6.54343843460083, + "rewards/rejected": -5.803195476531982, + "step": 5071 + }, + { + "epoch": 0.3, + "learning_rate": 8.267045948062313e-08, + "logits/chosen": -2.1059443950653076, + "logits/rejected": -2.109546422958374, + "logps/chosen": -172.9302978515625, + "logps/rejected": -329.3272705078125, + "loss": 0.2105, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4478775262832642, + "rewards/margins": 1.1479263305664062, + "rewards/rejected": 0.2999511659145355, + "step": 5072 + }, + { + "epoch": 0.3, + "learning_rate": 8.266332486790637e-08, + "logits/chosen": -2.0401551723480225, + "logits/rejected": -2.034193754196167, + "logps/chosen": -103.4422836303711, + "logps/rejected": -250.81600952148438, + "loss": 0.2632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7569748163223267, + "rewards/margins": 0.9947464466094971, + "rewards/rejected": -0.23777161538600922, + "step": 5073 + }, + { + "epoch": 0.3, + "learning_rate": 8.265618909482855e-08, + "logits/chosen": -1.9708999395370483, + "logits/rejected": -1.9663985967636108, + "logps/chosen": -12.819664001464844, + "logps/rejected": -170.7169952392578, + "loss": 0.5682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09514694660902023, + "rewards/margins": 0.30409926176071167, + "rewards/rejected": -0.3992462158203125, + "step": 5074 + }, + { + "epoch": 0.3, + "learning_rate": 8.264905216164315e-08, + "logits/chosen": -2.098381280899048, + "logits/rejected": -2.095414161682129, + "logps/chosen": -115.0927505493164, + "logps/rejected": -176.7171630859375, + "loss": 0.3971, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3600578308105469, + "rewards/margins": 0.08923256397247314, + "rewards/rejected": 1.2708252668380737, + "step": 5075 + }, + { + "epoch": 0.3, + "learning_rate": 8.264191406860373e-08, + "logits/chosen": -2.1211206912994385, + "logits/rejected": -2.1150119304656982, + "logps/chosen": -28.76142120361328, + "logps/rejected": -242.30401611328125, + "loss": 0.3846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15122948586940765, + "rewards/margins": 1.6657711267471313, + "rewards/rejected": -1.5145416259765625, + "step": 5076 + }, + { + "epoch": 0.3, + "learning_rate": 8.263477481596383e-08, + "logits/chosen": -2.0683107376098633, + "logits/rejected": -2.048121929168701, + "logps/chosen": -308.9278564453125, + "logps/rejected": -538.0955200195312, + "loss": 0.0427, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.169079542160034, + "rewards/margins": 4.22576904296875, + "rewards/rejected": -2.056689500808716, + "step": 5077 + }, + { + "epoch": 0.3, + "learning_rate": 8.26276344039771e-08, + "logits/chosen": -1.7445979118347168, + "logits/rejected": -1.733338475227356, + "logps/chosen": -185.8271942138672, + "logps/rejected": -306.09039306640625, + "loss": 0.2465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8339905142784119, + "rewards/margins": 1.1026458740234375, + "rewards/rejected": -0.268655389547348, + "step": 5078 + }, + { + "epoch": 0.3, + "learning_rate": 8.26204928328972e-08, + "logits/chosen": -2.0229737758636475, + "logits/rejected": -1.9991382360458374, + "logps/chosen": -264.0308837890625, + "logps/rejected": -418.6468505859375, + "loss": 0.1536, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7692047357559204, + "rewards/margins": 1.2392364740371704, + "rewards/rejected": 0.52996826171875, + "step": 5079 + }, + { + "epoch": 0.3, + "learning_rate": 8.261335010297783e-08, + "logits/chosen": -2.1087985038757324, + "logits/rejected": -2.1042890548706055, + "logps/chosen": -27.556947708129883, + "logps/rejected": -128.49551391601562, + "loss": 0.5373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15429936349391937, + "rewards/margins": 0.5384897589683533, + "rewards/rejected": -0.3841903805732727, + "step": 5080 + }, + { + "epoch": 0.3, + "learning_rate": 8.260620621447274e-08, + "logits/chosen": -1.9912296533584595, + "logits/rejected": -1.9842740297317505, + "logps/chosen": -165.5745391845703, + "logps/rejected": -231.00213623046875, + "loss": 0.4079, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1460068225860596, + "rewards/margins": -0.00410616397857666, + "rewards/rejected": 1.1501129865646362, + "step": 5081 + }, + { + "epoch": 0.3, + "learning_rate": 8.259906116763572e-08, + "logits/chosen": -2.0146920680999756, + "logits/rejected": -1.9986603260040283, + "logps/chosen": -150.60452270507812, + "logps/rejected": -227.8897247314453, + "loss": 0.4221, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0927581787109375, + "rewards/margins": 0.15140533447265625, + "rewards/rejected": 0.9413528442382812, + "step": 5082 + }, + { + "epoch": 0.3, + "learning_rate": 8.259191496272057e-08, + "logits/chosen": -1.9757009744644165, + "logits/rejected": -1.968639612197876, + "logps/chosen": -147.1043243408203, + "logps/rejected": -221.3624267578125, + "loss": 0.2355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7537583112716675, + "rewards/margins": 0.7773041129112244, + "rewards/rejected": 0.9764541983604431, + "step": 5083 + }, + { + "epoch": 0.3, + "learning_rate": 8.258476759998119e-08, + "logits/chosen": -1.9659197330474854, + "logits/rejected": -1.965378761291504, + "logps/chosen": -114.92786407470703, + "logps/rejected": -344.5273132324219, + "loss": 0.4818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4403366148471832, + "rewards/margins": 3.176621913909912, + "rewards/rejected": -3.6169586181640625, + "step": 5084 + }, + { + "epoch": 0.3, + "learning_rate": 8.257761907967145e-08, + "logits/chosen": -2.136096954345703, + "logits/rejected": -2.1220438480377197, + "logps/chosen": -3.504717460600659e-05, + "logps/rejected": -130.2544708251953, + "loss": 0.3751, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0530395040150324e-07, + "rewards/margins": 2.8348963260650635, + "rewards/rejected": -2.8348968029022217, + "step": 5085 + }, + { + "epoch": 0.3, + "learning_rate": 8.257046940204535e-08, + "logits/chosen": -2.0541722774505615, + "logits/rejected": -2.0480029582977295, + "logps/chosen": -7.655001640319824, + "logps/rejected": -182.26991271972656, + "loss": 0.4806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12432088702917099, + "rewards/margins": 0.848818838596344, + "rewards/rejected": -0.7244979739189148, + "step": 5086 + }, + { + "epoch": 0.3, + "learning_rate": 8.256331856735685e-08, + "logits/chosen": -1.8943637609481812, + "logits/rejected": -1.9130542278289795, + "logps/chosen": -172.37216186523438, + "logps/rejected": -228.806396484375, + "loss": 0.4783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5770050287246704, + "rewards/margins": 0.3779083490371704, + "rewards/rejected": 0.1990966796875, + "step": 5087 + }, + { + "epoch": 0.3, + "learning_rate": 8.255616657586e-08, + "logits/chosen": -2.0853750705718994, + "logits/rejected": -2.0416953563690186, + "logps/chosen": -196.10037231445312, + "logps/rejected": -400.2284240722656, + "loss": 0.0496, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0265777111053467, + "rewards/margins": 3.138195753097534, + "rewards/rejected": -1.1116180419921875, + "step": 5088 + }, + { + "epoch": 0.3, + "learning_rate": 8.254901342780884e-08, + "logits/chosen": -1.957102656364441, + "logits/rejected": -1.9451050758361816, + "logps/chosen": -238.51406860351562, + "logps/rejected": -300.91754150390625, + "loss": 0.1393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0604705810546875, + "rewards/margins": 1.4843230247497559, + "rewards/rejected": 0.5761474967002869, + "step": 5089 + }, + { + "epoch": 0.3, + "learning_rate": 8.254185912345753e-08, + "logits/chosen": -2.027700185775757, + "logits/rejected": -2.008727788925171, + "logps/chosen": -4.525669097900391, + "logps/rejected": -336.2313232421875, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10808103531599045, + "rewards/margins": 4.559253215789795, + "rewards/rejected": -4.667334079742432, + "step": 5090 + }, + { + "epoch": 0.3, + "learning_rate": 8.25347036630602e-08, + "logits/chosen": -1.9085538387298584, + "logits/rejected": -1.9067068099975586, + "logps/chosen": -0.008287165313959122, + "logps/rejected": -79.29969024658203, + "loss": 0.4122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003757989907171577, + "rewards/margins": 1.8383220434188843, + "rewards/rejected": -1.8386977910995483, + "step": 5091 + }, + { + "epoch": 0.3, + "learning_rate": 8.252754704687105e-08, + "logits/chosen": -2.16658616065979, + "logits/rejected": -2.1665449142456055, + "logps/chosen": -6.210726132849231e-05, + "logps/rejected": -106.86184692382812, + "loss": 0.7103, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.5403479614615208e-06, + "rewards/margins": -0.0674738809466362, + "rewards/rejected": 0.06747741997241974, + "step": 5092 + }, + { + "epoch": 0.3, + "learning_rate": 8.252038927514431e-08, + "logits/chosen": -2.1641550064086914, + "logits/rejected": -2.1641416549682617, + "logps/chosen": -31.25823974609375, + "logps/rejected": -127.17558288574219, + "loss": 0.5411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024107743054628372, + "rewards/margins": 0.7169361114501953, + "rewards/rejected": -0.7410438656806946, + "step": 5093 + }, + { + "epoch": 0.3, + "learning_rate": 8.251323034813429e-08, + "logits/chosen": -1.6638402938842773, + "logits/rejected": -1.6618030071258545, + "logps/chosen": -28.44168472290039, + "logps/rejected": -196.39947509765625, + "loss": 0.5314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22861213982105255, + "rewards/margins": 1.471467137336731, + "rewards/rejected": -1.700079321861267, + "step": 5094 + }, + { + "epoch": 0.3, + "learning_rate": 8.250607026609527e-08, + "logits/chosen": -2.0505924224853516, + "logits/rejected": -2.0118913650512695, + "logps/chosen": -275.3527526855469, + "logps/rejected": -451.6270751953125, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.09421706199646, + "rewards/margins": 4.631991863250732, + "rewards/rejected": -1.537774682044983, + "step": 5095 + }, + { + "epoch": 0.3, + "learning_rate": 8.249890902928165e-08, + "logits/chosen": -2.048818588256836, + "logits/rejected": -2.049755573272705, + "logps/chosen": -20.905380249023438, + "logps/rejected": -113.7464599609375, + "loss": 0.4619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2981613278388977, + "rewards/margins": 0.5956131219863892, + "rewards/rejected": -0.29745179414749146, + "step": 5096 + }, + { + "epoch": 0.3, + "learning_rate": 8.249174663794779e-08, + "logits/chosen": -1.8578650951385498, + "logits/rejected": -1.735996961593628, + "logps/chosen": -215.3555450439453, + "logps/rejected": -493.45892333984375, + "loss": 0.3951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8026565909385681, + "rewards/margins": 0.5371231436729431, + "rewards/rejected": 0.265533447265625, + "step": 5097 + }, + { + "epoch": 0.3, + "learning_rate": 8.248458309234816e-08, + "logits/chosen": -2.1525626182556152, + "logits/rejected": -2.1490073204040527, + "logps/chosen": -24.557926177978516, + "logps/rejected": -187.2808837890625, + "loss": 0.5337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14999103546142578, + "rewards/margins": 0.6078840494155884, + "rewards/rejected": -0.7578750848770142, + "step": 5098 + }, + { + "epoch": 0.3, + "learning_rate": 8.247741839273725e-08, + "logits/chosen": -2.1401588916778564, + "logits/rejected": -2.0954203605651855, + "logps/chosen": -167.82972717285156, + "logps/rejected": -424.1910400390625, + "loss": 0.1176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0490295886993408, + "rewards/margins": 2.3592042922973633, + "rewards/rejected": -1.310174584388733, + "step": 5099 + }, + { + "epoch": 0.3, + "learning_rate": 8.247025253936956e-08, + "logits/chosen": -1.865012526512146, + "logits/rejected": -1.8718335628509521, + "logps/chosen": -69.74638366699219, + "logps/rejected": -248.35629272460938, + "loss": 1.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2182148694992065, + "rewards/margins": 0.29103803634643555, + "rewards/rejected": -1.509252905845642, + "step": 5100 + }, + { + "epoch": 0.3, + "learning_rate": 8.246308553249968e-08, + "logits/chosen": -2.1439504623413086, + "logits/rejected": -2.1157588958740234, + "logps/chosen": -44.62260818481445, + "logps/rejected": -284.2558288574219, + "loss": 0.2224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4261985719203949, + "rewards/margins": 3.3951895236968994, + "rewards/rejected": -2.9689910411834717, + "step": 5101 + }, + { + "epoch": 0.3, + "learning_rate": 8.24559173723822e-08, + "logits/chosen": -1.7651512622833252, + "logits/rejected": -1.7644699811935425, + "logps/chosen": -31.251089096069336, + "logps/rejected": -214.83363342285156, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31276342272758484, + "rewards/margins": 1.3126871585845947, + "rewards/rejected": -0.9999237060546875, + "step": 5102 + }, + { + "epoch": 0.3, + "learning_rate": 8.244874805927178e-08, + "logits/chosen": -1.8326843976974487, + "logits/rejected": -1.8352267742156982, + "logps/chosen": -50.97726058959961, + "logps/rejected": -132.14190673828125, + "loss": 0.6306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42738229036331177, + "rewards/margins": 0.28207361698150635, + "rewards/rejected": -0.7094559073448181, + "step": 5103 + }, + { + "epoch": 0.3, + "learning_rate": 8.244157759342312e-08, + "logits/chosen": -1.890738844871521, + "logits/rejected": -1.8834078311920166, + "logps/chosen": -92.65091705322266, + "logps/rejected": -247.93215942382812, + "loss": 0.5416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11723404377698898, + "rewards/margins": 0.4954261779785156, + "rewards/rejected": -0.6126602292060852, + "step": 5104 + }, + { + "epoch": 0.3, + "learning_rate": 8.24344059750909e-08, + "logits/chosen": -1.9749901294708252, + "logits/rejected": -1.9739229679107666, + "logps/chosen": -251.1073760986328, + "logps/rejected": -522.465087890625, + "loss": 0.0399, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1677138805389404, + "rewards/margins": 3.602952480316162, + "rewards/rejected": -1.4352387189865112, + "step": 5105 + }, + { + "epoch": 0.3, + "learning_rate": 8.242723320452996e-08, + "logits/chosen": -2.0177500247955322, + "logits/rejected": -2.016322612762451, + "logps/chosen": -163.84136962890625, + "logps/rejected": -449.92333984375, + "loss": 0.0952, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7648102045059204, + "rewards/margins": 2.2452971935272217, + "rewards/rejected": -0.48048707842826843, + "step": 5106 + }, + { + "epoch": 0.3, + "learning_rate": 8.242005928199506e-08, + "logits/chosen": -1.9397022724151611, + "logits/rejected": -1.8645528554916382, + "logps/chosen": -171.12379455566406, + "logps/rejected": -334.11383056640625, + "loss": 0.2726, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1954482793807983, + "rewards/margins": 1.16883385181427, + "rewards/rejected": 0.02661438100039959, + "step": 5107 + }, + { + "epoch": 0.3, + "learning_rate": 8.241288420774108e-08, + "logits/chosen": -1.8603408336639404, + "logits/rejected": -1.8353780508041382, + "logps/chosen": -147.52825927734375, + "logps/rejected": -495.9339294433594, + "loss": 0.1148, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3291046619415283, + "rewards/margins": 3.033477783203125, + "rewards/rejected": -1.7043732404708862, + "step": 5108 + }, + { + "epoch": 0.3, + "learning_rate": 8.24057079820229e-08, + "logits/chosen": -1.96665620803833, + "logits/rejected": -2.0371124744415283, + "logps/chosen": -161.18817138671875, + "logps/rejected": -180.37496948242188, + "loss": 0.1684, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4151443243026733, + "rewards/margins": 1.4301177263259888, + "rewards/rejected": -0.014973449520766735, + "step": 5109 + }, + { + "epoch": 0.3, + "learning_rate": 8.239853060509545e-08, + "logits/chosen": -2.0985872745513916, + "logits/rejected": -2.09728741645813, + "logps/chosen": -46.35993576049805, + "logps/rejected": -145.96734619140625, + "loss": 0.4236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17642860114574432, + "rewards/margins": 1.3378665447235107, + "rewards/rejected": -1.16143798828125, + "step": 5110 + }, + { + "epoch": 0.3, + "learning_rate": 8.239135207721373e-08, + "logits/chosen": -2.0052011013031006, + "logits/rejected": -1.994611382484436, + "logps/chosen": -6.735172064509243e-05, + "logps/rejected": -220.41551208496094, + "loss": 0.3754, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.410416352129687e-07, + "rewards/margins": 2.799384593963623, + "rewards/rejected": -2.7993850708007812, + "step": 5111 + }, + { + "epoch": 0.3, + "learning_rate": 8.238417239863273e-08, + "logits/chosen": -2.0313639640808105, + "logits/rejected": -2.0385708808898926, + "logps/chosen": -53.40983200073242, + "logps/rejected": -148.18075561523438, + "loss": 0.7237, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2195533812046051, + "rewards/margins": -0.14009934663772583, + "rewards/rejected": -0.07945404201745987, + "step": 5112 + }, + { + "epoch": 0.3, + "learning_rate": 8.237699156960753e-08, + "logits/chosen": -2.029838800430298, + "logits/rejected": -2.029911756515503, + "logps/chosen": -35.14899444580078, + "logps/rejected": -174.09454345703125, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13614463806152344, + "rewards/margins": 2.0848188400268555, + "rewards/rejected": -1.9486740827560425, + "step": 5113 + }, + { + "epoch": 0.3, + "learning_rate": 8.236980959039321e-08, + "logits/chosen": -1.9406187534332275, + "logits/rejected": -1.9419101476669312, + "logps/chosen": -0.04516249895095825, + "logps/rejected": -172.85301208496094, + "loss": 0.3809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003374669700860977, + "rewards/margins": 2.93424916267395, + "rewards/rejected": -2.9376237392425537, + "step": 5114 + }, + { + "epoch": 0.3, + "learning_rate": 8.236262646124492e-08, + "logits/chosen": -1.9759514331817627, + "logits/rejected": -1.9559835195541382, + "logps/chosen": -281.28997802734375, + "logps/rejected": -508.16754150390625, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8596314191818237, + "rewards/margins": 3.9092469215393066, + "rewards/rejected": -2.0496156215667725, + "step": 5115 + }, + { + "epoch": 0.3, + "learning_rate": 8.235544218241784e-08, + "logits/chosen": -1.9879660606384277, + "logits/rejected": -1.9836914539337158, + "logps/chosen": -39.21797561645508, + "logps/rejected": -101.32118225097656, + "loss": 0.5042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1774005889892578, + "rewards/margins": 0.7268726229667664, + "rewards/rejected": -0.5494720339775085, + "step": 5116 + }, + { + "epoch": 0.3, + "learning_rate": 8.234825675416717e-08, + "logits/chosen": -1.860880732536316, + "logits/rejected": -1.8538048267364502, + "logps/chosen": -246.61541748046875, + "logps/rejected": -344.3572998046875, + "loss": 0.356, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9329131841659546, + "rewards/margins": 0.09332573413848877, + "rewards/rejected": 1.8395874500274658, + "step": 5117 + }, + { + "epoch": 0.3, + "learning_rate": 8.234107017674818e-08, + "logits/chosen": -1.9894193410873413, + "logits/rejected": -1.990890622138977, + "logps/chosen": -278.75799560546875, + "logps/rejected": -448.1689453125, + "loss": 0.082, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1879608631134033, + "rewards/margins": 2.03769850730896, + "rewards/rejected": 0.15026246011257172, + "step": 5118 + }, + { + "epoch": 0.3, + "learning_rate": 8.23338824504162e-08, + "logits/chosen": -1.9796631336212158, + "logits/rejected": -1.980984091758728, + "logps/chosen": -9.967914581298828, + "logps/rejected": -79.93052673339844, + "loss": 0.6314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05823402479290962, + "rewards/margins": 0.10750770568847656, + "rewards/rejected": -0.04927368089556694, + "step": 5119 + }, + { + "epoch": 0.3, + "learning_rate": 8.232669357542654e-08, + "logits/chosen": -1.9008606672286987, + "logits/rejected": -1.8609851598739624, + "logps/chosen": -229.66078186035156, + "logps/rejected": -364.1332702636719, + "loss": 0.1387, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8917160034179688, + "rewards/margins": 1.7806411981582642, + "rewards/rejected": 0.11107482761144638, + "step": 5120 + }, + { + "epoch": 0.3, + "learning_rate": 8.231950355203461e-08, + "logits/chosen": -1.9614243507385254, + "logits/rejected": -1.9705324172973633, + "logps/chosen": -191.91287231445312, + "logps/rejected": -433.8999328613281, + "loss": 0.1126, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4182846546173096, + "rewards/margins": 2.1846694946289062, + "rewards/rejected": -0.7663848996162415, + "step": 5121 + }, + { + "epoch": 0.3, + "learning_rate": 8.231231238049581e-08, + "logits/chosen": -1.9633980989456177, + "logits/rejected": -1.965438723564148, + "logps/chosen": -8.803030014038086, + "logps/rejected": -120.99710083007812, + "loss": 0.4005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03002042882144451, + "rewards/margins": 2.016136407852173, + "rewards/rejected": -1.9861160516738892, + "step": 5122 + }, + { + "epoch": 0.3, + "learning_rate": 8.230512006106563e-08, + "logits/chosen": -2.1431498527526855, + "logits/rejected": -2.1499133110046387, + "logps/chosen": -145.8135223388672, + "logps/rejected": -241.25933837890625, + "loss": 0.1897, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5659927129745483, + "rewards/margins": 1.0710113048553467, + "rewards/rejected": 0.4949813783168793, + "step": 5123 + }, + { + "epoch": 0.3, + "learning_rate": 8.229792659399956e-08, + "logits/chosen": -1.9429922103881836, + "logits/rejected": -1.889972448348999, + "logps/chosen": -220.68038940429688, + "logps/rejected": -527.0233764648438, + "loss": 0.132, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8876084089279175, + "rewards/margins": 1.4788193702697754, + "rewards/rejected": 0.4087890684604645, + "step": 5124 + }, + { + "epoch": 0.3, + "learning_rate": 8.229073197955314e-08, + "logits/chosen": -2.077881336212158, + "logits/rejected": -2.074876308441162, + "logps/chosen": -77.33626556396484, + "logps/rejected": -261.3127136230469, + "loss": 0.3441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06722640991210938, + "rewards/margins": 2.147054433822632, + "rewards/rejected": -2.0798280239105225, + "step": 5125 + }, + { + "epoch": 0.3, + "learning_rate": 8.228353621798199e-08, + "logits/chosen": -2.1332740783691406, + "logits/rejected": -2.112416982650757, + "logps/chosen": -28.766437530517578, + "logps/rejected": -288.5047607421875, + "loss": 0.4559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.198262020945549, + "rewards/margins": 2.670445442199707, + "rewards/rejected": -2.8687074184417725, + "step": 5126 + }, + { + "epoch": 0.3, + "learning_rate": 8.227633930954169e-08, + "logits/chosen": -1.9782392978668213, + "logits/rejected": -1.974887490272522, + "logps/chosen": -191.92160034179688, + "logps/rejected": -339.6936950683594, + "loss": 0.1834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5731842517852783, + "rewards/margins": 1.1260437965393066, + "rewards/rejected": 0.44714051485061646, + "step": 5127 + }, + { + "epoch": 0.3, + "learning_rate": 8.226914125448797e-08, + "logits/chosen": -1.9773534536361694, + "logits/rejected": -1.9517847299575806, + "logps/chosen": -187.21778869628906, + "logps/rejected": -425.59063720703125, + "loss": 0.1523, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9162124395370483, + "rewards/margins": 1.3910536766052246, + "rewards/rejected": 0.525158703327179, + "step": 5128 + }, + { + "epoch": 0.3, + "learning_rate": 8.226194205307649e-08, + "logits/chosen": -2.047145366668701, + "logits/rejected": -2.040572166442871, + "logps/chosen": -16.32889747619629, + "logps/rejected": -173.5059051513672, + "loss": 0.5311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19281673431396484, + "rewards/margins": 0.4613927900791168, + "rewards/rejected": -0.268576055765152, + "step": 5129 + }, + { + "epoch": 0.3, + "learning_rate": 8.225474170556302e-08, + "logits/chosen": -2.027212142944336, + "logits/rejected": -2.0330941677093506, + "logps/chosen": -0.0001547260908409953, + "logps/rejected": -263.7638854980469, + "loss": 0.3696, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3117387709371542e-07, + "rewards/margins": 3.0292813777923584, + "rewards/rejected": -3.0292816162109375, + "step": 5130 + }, + { + "epoch": 0.3, + "learning_rate": 8.224754021220335e-08, + "logits/chosen": -1.9323253631591797, + "logits/rejected": -1.9031422138214111, + "logps/chosen": -226.39837646484375, + "logps/rejected": -379.5097351074219, + "loss": 0.1913, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.972259521484375, + "rewards/margins": 1.1789886951446533, + "rewards/rejected": 0.7932708859443665, + "step": 5131 + }, + { + "epoch": 0.3, + "learning_rate": 8.224033757325331e-08, + "logits/chosen": -2.0975661277770996, + "logits/rejected": -2.091163158416748, + "logps/chosen": -0.005743768066167831, + "logps/rejected": -80.63495635986328, + "loss": 0.5151, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.867201980436221e-05, + "rewards/margins": 0.8553422093391418, + "rewards/rejected": -0.8554008603096008, + "step": 5132 + }, + { + "epoch": 0.3, + "learning_rate": 8.223313378896877e-08, + "logits/chosen": -2.1373038291931152, + "logits/rejected": -2.141408920288086, + "logps/chosen": -13.91541862487793, + "logps/rejected": -270.48162841796875, + "loss": 0.5517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16855202615261078, + "rewards/margins": 0.961391806602478, + "rewards/rejected": -1.12994384765625, + "step": 5133 + }, + { + "epoch": 0.3, + "learning_rate": 8.222592885960566e-08, + "logits/chosen": -2.115338087081909, + "logits/rejected": -2.1068902015686035, + "logps/chosen": -83.00643157958984, + "logps/rejected": -352.55145263671875, + "loss": 0.2367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6493026614189148, + "rewards/margins": 2.406785488128662, + "rewards/rejected": -1.757482886314392, + "step": 5134 + }, + { + "epoch": 0.3, + "learning_rate": 8.22187227854199e-08, + "logits/chosen": -1.9074814319610596, + "logits/rejected": -1.9400426149368286, + "logps/chosen": -193.073486328125, + "logps/rejected": -296.96929931640625, + "loss": 0.3611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7763641476631165, + "rewards/margins": 0.398193359375, + "rewards/rejected": 0.37817078828811646, + "step": 5135 + }, + { + "epoch": 0.3, + "learning_rate": 8.221151556666754e-08, + "logits/chosen": -1.884895920753479, + "logits/rejected": -1.8811832666397095, + "logps/chosen": -35.90837478637695, + "logps/rejected": -197.11041259765625, + "loss": 0.5477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5961796045303345, + "rewards/margins": 2.874021530151367, + "rewards/rejected": -3.470201253890991, + "step": 5136 + }, + { + "epoch": 0.3, + "learning_rate": 8.220430720360457e-08, + "logits/chosen": -1.8946453332901, + "logits/rejected": -1.887251615524292, + "logps/chosen": -173.4774169921875, + "logps/rejected": -245.0112762451172, + "loss": 0.4441, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9736480712890625, + "rewards/margins": -0.03832709789276123, + "rewards/rejected": 1.0119751691818237, + "step": 5137 + }, + { + "epoch": 0.3, + "learning_rate": 8.219709769648707e-08, + "logits/chosen": -1.6437461376190186, + "logits/rejected": -1.6367595195770264, + "logps/chosen": -222.80276489257812, + "logps/rejected": -465.82843017578125, + "loss": 0.119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9040374755859375, + "rewards/margins": 2.687936305999756, + "rewards/rejected": -1.783898949623108, + "step": 5138 + }, + { + "epoch": 0.3, + "learning_rate": 8.218988704557118e-08, + "logits/chosen": -1.7425986528396606, + "logits/rejected": -1.7262589931488037, + "logps/chosen": -363.42706298828125, + "logps/rejected": -519.762939453125, + "loss": 0.1144, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5804932117462158, + "rewards/margins": 1.7590699195861816, + "rewards/rejected": -0.17857666313648224, + "step": 5139 + }, + { + "epoch": 0.3, + "learning_rate": 8.218267525111303e-08, + "logits/chosen": -1.8215268850326538, + "logits/rejected": -1.8165711164474487, + "logps/chosen": -75.57538604736328, + "logps/rejected": -369.85858154296875, + "loss": 0.3444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.269949346780777, + "rewards/margins": 1.7672455310821533, + "rewards/rejected": -1.4972962141036987, + "step": 5140 + }, + { + "epoch": 0.3, + "learning_rate": 8.217546231336884e-08, + "logits/chosen": -2.0159950256347656, + "logits/rejected": -1.9849132299423218, + "logps/chosen": -279.796630859375, + "logps/rejected": -272.37811279296875, + "loss": 0.1369, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.930364966392517, + "rewards/margins": 1.738122582435608, + "rewards/rejected": 0.19224242866039276, + "step": 5141 + }, + { + "epoch": 0.3, + "learning_rate": 8.216824823259484e-08, + "logits/chosen": -1.9034894704818726, + "logits/rejected": -1.8874880075454712, + "logps/chosen": -211.87881469726562, + "logps/rejected": -357.3154602050781, + "loss": 0.1108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.867767333984375, + "rewards/margins": 1.9221649169921875, + "rewards/rejected": -0.0543975830078125, + "step": 5142 + }, + { + "epoch": 0.3, + "learning_rate": 8.216103300904731e-08, + "logits/chosen": -2.0261449813842773, + "logits/rejected": -2.092318058013916, + "logps/chosen": -224.290771484375, + "logps/rejected": -325.7184143066406, + "loss": 0.0408, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7986373901367188, + "rewards/margins": 3.355107307434082, + "rewards/rejected": -1.5564697980880737, + "step": 5143 + }, + { + "epoch": 0.3, + "learning_rate": 8.215381664298258e-08, + "logits/chosen": -2.0937294960021973, + "logits/rejected": -2.080942153930664, + "logps/chosen": -5.447091579437256, + "logps/rejected": -242.4841766357422, + "loss": 0.4076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06024813652038574, + "rewards/margins": 1.7646945714950562, + "rewards/rejected": -1.7044464349746704, + "step": 5144 + }, + { + "epoch": 0.3, + "learning_rate": 8.2146599134657e-08, + "logits/chosen": -2.0375027656555176, + "logits/rejected": -1.9938123226165771, + "logps/chosen": -152.9459991455078, + "logps/rejected": -335.9376220703125, + "loss": 0.7132, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8938843011856079, + "rewards/margins": -0.7284790277481079, + "rewards/rejected": 1.6223633289337158, + "step": 5145 + }, + { + "epoch": 0.3, + "learning_rate": 8.213938048432696e-08, + "logits/chosen": -1.8804264068603516, + "logits/rejected": -1.8835315704345703, + "logps/chosen": -51.90037536621094, + "logps/rejected": -183.57080078125, + "loss": 0.6455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.142231747508049, + "rewards/margins": 0.05709075927734375, + "rewards/rejected": -0.19932250678539276, + "step": 5146 + }, + { + "epoch": 0.3, + "learning_rate": 8.213216069224893e-08, + "logits/chosen": -2.0939340591430664, + "logits/rejected": -2.076923131942749, + "logps/chosen": -202.41311645507812, + "logps/rejected": -289.04559326171875, + "loss": 0.2732, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0995651483535767, + "rewards/margins": 0.8667221069335938, + "rewards/rejected": 0.23284302651882172, + "step": 5147 + }, + { + "epoch": 0.3, + "learning_rate": 8.212493975867937e-08, + "logits/chosen": -2.001981735229492, + "logits/rejected": -2.010328769683838, + "logps/chosen": -185.51077270507812, + "logps/rejected": -267.1427307128906, + "loss": 0.2034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7291534543037415, + "rewards/margins": 1.6404082775115967, + "rewards/rejected": -0.9112548828125, + "step": 5148 + }, + { + "epoch": 0.3, + "learning_rate": 8.211771768387481e-08, + "logits/chosen": -1.9894640445709229, + "logits/rejected": -1.9836560487747192, + "logps/chosen": -14.535921096801758, + "logps/rejected": -89.72486114501953, + "loss": 0.5219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06255187839269638, + "rewards/margins": 0.7837364077568054, + "rewards/rejected": -0.7211845517158508, + "step": 5149 + }, + { + "epoch": 0.3, + "learning_rate": 8.211049446809181e-08, + "logits/chosen": -1.874228596687317, + "logits/rejected": -1.8369346857070923, + "logps/chosen": -410.7939147949219, + "logps/rejected": -619.7169189453125, + "loss": 0.1532, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6427520513534546, + "rewards/margins": 1.7736175060272217, + "rewards/rejected": -0.13086548447608948, + "step": 5150 + }, + { + "epoch": 0.3, + "learning_rate": 8.210327011158697e-08, + "logits/chosen": -2.0072851181030273, + "logits/rejected": -2.0027759075164795, + "logps/chosen": -80.28946685791016, + "logps/rejected": -187.74472045898438, + "loss": 0.1589, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1440391540527344, + "rewards/margins": 2.4398491382598877, + "rewards/rejected": -1.2958099842071533, + "step": 5151 + }, + { + "epoch": 0.3, + "learning_rate": 8.209604461461696e-08, + "logits/chosen": -1.9612780809402466, + "logits/rejected": -1.956512212753296, + "logps/chosen": -30.513648986816406, + "logps/rejected": -133.16969299316406, + "loss": 0.4484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5992057919502258, + "rewards/margins": 0.5869728326797485, + "rewards/rejected": 0.012232971377670765, + "step": 5152 + }, + { + "epoch": 0.3, + "learning_rate": 8.208881797743845e-08, + "logits/chosen": -2.0883030891418457, + "logits/rejected": -2.0792505741119385, + "logps/chosen": -26.480785369873047, + "logps/rejected": -202.65081787109375, + "loss": 0.3143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.295445054769516, + "rewards/margins": 2.081301689147949, + "rewards/rejected": -1.7858566045761108, + "step": 5153 + }, + { + "epoch": 0.3, + "learning_rate": 8.208159020030814e-08, + "logits/chosen": -2.1509993076324463, + "logits/rejected": -2.1405484676361084, + "logps/chosen": -0.026706557720899582, + "logps/rejected": -122.8038330078125, + "loss": 0.4886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007363811018876731, + "rewards/margins": 1.1111177206039429, + "rewards/rejected": -1.1103813648223877, + "step": 5154 + }, + { + "epoch": 0.3, + "learning_rate": 8.207436128348283e-08, + "logits/chosen": -2.111725091934204, + "logits/rejected": -2.098024368286133, + "logps/chosen": -69.49421691894531, + "logps/rejected": -189.4495391845703, + "loss": 0.5171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6608932614326477, + "rewards/margins": 0.09429627656936646, + "rewards/rejected": 0.5665969848632812, + "step": 5155 + }, + { + "epoch": 0.3, + "learning_rate": 8.206713122721932e-08, + "logits/chosen": -2.015324592590332, + "logits/rejected": -2.0281877517700195, + "logps/chosen": -201.41717529296875, + "logps/rejected": -328.2994384765625, + "loss": 0.0974, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5517791509628296, + "rewards/margins": 3.1108734607696533, + "rewards/rejected": -1.5590943098068237, + "step": 5156 + }, + { + "epoch": 0.3, + "learning_rate": 8.205990003177443e-08, + "logits/chosen": -1.8909831047058105, + "logits/rejected": -1.8946033716201782, + "logps/chosen": -13.041025161743164, + "logps/rejected": -178.95819091796875, + "loss": 0.3193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28320273756980896, + "rewards/margins": 2.57055926322937, + "rewards/rejected": -2.2873566150665283, + "step": 5157 + }, + { + "epoch": 0.3, + "learning_rate": 8.205266769740509e-08, + "logits/chosen": -1.865623116493225, + "logits/rejected": -1.8707396984100342, + "logps/chosen": -199.29165649414062, + "logps/rejected": -287.5281066894531, + "loss": 0.3678, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.205657958984375, + "rewards/margins": 0.16650688648223877, + "rewards/rejected": 1.0391510725021362, + "step": 5158 + }, + { + "epoch": 0.3, + "learning_rate": 8.20454342243682e-08, + "logits/chosen": -1.9322309494018555, + "logits/rejected": -1.8788986206054688, + "logps/chosen": -129.79705810546875, + "logps/rejected": -306.4484558105469, + "loss": 0.4143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7355408072471619, + "rewards/margins": 0.3891144096851349, + "rewards/rejected": 0.346426397562027, + "step": 5159 + }, + { + "epoch": 0.3, + "learning_rate": 8.203819961292073e-08, + "logits/chosen": -1.6645240783691406, + "logits/rejected": -1.6719218492507935, + "logps/chosen": -1.7169064283370972, + "logps/rejected": -141.84222412109375, + "loss": 0.4358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02980402670800686, + "rewards/margins": 1.7284945249557495, + "rewards/rejected": -1.7582985162734985, + "step": 5160 + }, + { + "epoch": 0.3, + "learning_rate": 8.20309638633197e-08, + "logits/chosen": -2.114311456680298, + "logits/rejected": -2.106523036956787, + "logps/chosen": -3.6182382106781006, + "logps/rejected": -274.9776611328125, + "loss": 0.3049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3564905524253845, + "rewards/margins": 1.8775599002838135, + "rewards/rejected": -1.5210694074630737, + "step": 5161 + }, + { + "epoch": 0.3, + "learning_rate": 8.202372697582216e-08, + "logits/chosen": -2.0230557918548584, + "logits/rejected": -2.0156352519989014, + "logps/chosen": -0.03677713871002197, + "logps/rejected": -72.45891571044922, + "loss": 0.7408, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0021048851776868105, + "rewards/margins": -0.18054039776325226, + "rewards/rejected": 0.1784355193376541, + "step": 5162 + }, + { + "epoch": 0.3, + "learning_rate": 8.201648895068518e-08, + "logits/chosen": -1.8954402208328247, + "logits/rejected": -1.8916592597961426, + "logps/chosen": -38.72858428955078, + "logps/rejected": -204.11117553710938, + "loss": 0.157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9133278131484985, + "rewards/margins": 3.769089698791504, + "rewards/rejected": -2.855761766433716, + "step": 5163 + }, + { + "epoch": 0.3, + "learning_rate": 8.20092497881659e-08, + "logits/chosen": -1.908470630645752, + "logits/rejected": -1.910132884979248, + "logps/chosen": -3.36166558554396e-05, + "logps/rejected": -133.95697021484375, + "loss": 0.5303, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3708646520171897e-06, + "rewards/margins": 0.7933790683746338, + "rewards/rejected": -0.793377697467804, + "step": 5164 + }, + { + "epoch": 0.3, + "learning_rate": 8.20020094885215e-08, + "logits/chosen": -1.9841582775115967, + "logits/rejected": -1.9126157760620117, + "logps/chosen": -318.5470886230469, + "logps/rejected": -457.565185546875, + "loss": 0.2067, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0314117670059204, + "rewards/margins": 1.9502594470977783, + "rewards/rejected": -0.9188476800918579, + "step": 5165 + }, + { + "epoch": 0.3, + "learning_rate": 8.19947680520092e-08, + "logits/chosen": -2.1681928634643555, + "logits/rejected": -2.1682984828948975, + "logps/chosen": -6.061035633087158, + "logps/rejected": -108.78050994873047, + "loss": 0.5197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03298754617571831, + "rewards/margins": 0.9532666206359863, + "rewards/rejected": -0.9862541556358337, + "step": 5166 + }, + { + "epoch": 0.3, + "learning_rate": 8.198752547888621e-08, + "logits/chosen": -1.94416081905365, + "logits/rejected": -1.96474289894104, + "logps/chosen": -176.59078979492188, + "logps/rejected": -423.84283447265625, + "loss": 0.0923, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6179962158203125, + "rewards/margins": 2.705685615539551, + "rewards/rejected": -1.0876892805099487, + "step": 5167 + }, + { + "epoch": 0.3, + "learning_rate": 8.198028176940987e-08, + "logits/chosen": -2.0002193450927734, + "logits/rejected": -1.9943902492523193, + "logps/chosen": -255.92416381835938, + "logps/rejected": -353.76141357421875, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8730194568634033, + "rewards/margins": 0.14990544319152832, + "rewards/rejected": 1.723114013671875, + "step": 5168 + }, + { + "epoch": 0.3, + "learning_rate": 8.197303692383747e-08, + "logits/chosen": -2.1949853897094727, + "logits/rejected": -2.1897644996643066, + "logps/chosen": -18.958858489990234, + "logps/rejected": -147.1227569580078, + "loss": 0.5533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011563110165297985, + "rewards/margins": 0.6620193719863892, + "rewards/rejected": -0.6735824942588806, + "step": 5169 + }, + { + "epoch": 0.3, + "learning_rate": 8.196579094242642e-08, + "logits/chosen": -1.8973650932312012, + "logits/rejected": -1.8930373191833496, + "logps/chosen": -191.79202270507812, + "logps/rejected": -384.2375183105469, + "loss": 0.1936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9972991943359375, + "rewards/margins": 1.44952392578125, + "rewards/rejected": -0.4522247314453125, + "step": 5170 + }, + { + "epoch": 0.3, + "learning_rate": 8.195854382543409e-08, + "logits/chosen": -1.8241384029388428, + "logits/rejected": -1.8221803903579712, + "logps/chosen": -203.57521057128906, + "logps/rejected": -318.1199951171875, + "loss": 0.4209, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28311464190483093, + "rewards/margins": 0.48263856768608093, + "rewards/rejected": -0.19952392578125, + "step": 5171 + }, + { + "epoch": 0.3, + "learning_rate": 8.195129557311798e-08, + "logits/chosen": -2.1652896404266357, + "logits/rejected": -2.142643451690674, + "logps/chosen": -139.69305419921875, + "logps/rejected": -284.42926025390625, + "loss": 0.3664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8837219476699829, + "rewards/margins": 0.6096527576446533, + "rewards/rejected": 0.274069219827652, + "step": 5172 + }, + { + "epoch": 0.3, + "learning_rate": 8.194404618573554e-08, + "logits/chosen": -2.3103115558624268, + "logits/rejected": -2.2991836071014404, + "logps/chosen": -0.001394736929796636, + "logps/rejected": -167.2039794921875, + "loss": 0.672, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.587756822933443e-05, + "rewards/margins": 0.05438290163874626, + "rewards/rejected": -0.05443878099322319, + "step": 5173 + }, + { + "epoch": 0.3, + "learning_rate": 8.193679566354433e-08, + "logits/chosen": -1.8732702732086182, + "logits/rejected": -1.8781185150146484, + "logps/chosen": -290.84039306640625, + "logps/rejected": -335.500732421875, + "loss": 0.1846, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0041565895080566, + "rewards/margins": 1.1162354946136475, + "rewards/rejected": 0.887921154499054, + "step": 5174 + }, + { + "epoch": 0.3, + "learning_rate": 8.192954400680193e-08, + "logits/chosen": -1.9729411602020264, + "logits/rejected": -1.966959834098816, + "logps/chosen": -0.000572804594412446, + "logps/rejected": -277.5382385253906, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.202818910125643e-05, + "rewards/margins": 4.356390953063965, + "rewards/rejected": -4.356298923492432, + "step": 5175 + }, + { + "epoch": 0.3, + "learning_rate": 8.192229121576594e-08, + "logits/chosen": -1.9892078638076782, + "logits/rejected": -2.007866382598877, + "logps/chosen": -141.04978942871094, + "logps/rejected": -445.9647521972656, + "loss": 0.0878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6074355840682983, + "rewards/margins": 3.6738510131835938, + "rewards/rejected": -2.066415548324585, + "step": 5176 + }, + { + "epoch": 0.3, + "learning_rate": 8.191503729069401e-08, + "logits/chosen": -1.8626148700714111, + "logits/rejected": -1.8485996723175049, + "logps/chosen": -136.89859008789062, + "logps/rejected": -232.41873168945312, + "loss": 0.3146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1794967651367188, + "rewards/margins": 0.8904830813407898, + "rewards/rejected": 0.28901368379592896, + "step": 5177 + }, + { + "epoch": 0.3, + "learning_rate": 8.190778223184385e-08, + "logits/chosen": -1.979608178138733, + "logits/rejected": -1.9626917839050293, + "logps/chosen": -115.89689636230469, + "logps/rejected": -382.71807861328125, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10890960693359375, + "rewards/margins": 2.5188400745391846, + "rewards/rejected": -2.409930467605591, + "step": 5178 + }, + { + "epoch": 0.3, + "learning_rate": 8.190052603947317e-08, + "logits/chosen": -1.9623888731002808, + "logits/rejected": -1.9273558855056763, + "logps/chosen": -182.4303741455078, + "logps/rejected": -352.1852722167969, + "loss": 0.1903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9987869262695312, + "rewards/margins": 1.471614122390747, + "rewards/rejected": -0.47282716631889343, + "step": 5179 + }, + { + "epoch": 0.3, + "learning_rate": 8.189326871383979e-08, + "logits/chosen": -2.026993751525879, + "logits/rejected": -2.019196033477783, + "logps/chosen": -10.849421501159668, + "logps/rejected": -106.18519592285156, + "loss": 0.7308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5647143125534058, + "rewards/margins": 0.5600552558898926, + "rewards/rejected": -1.1247695684432983, + "step": 5180 + }, + { + "epoch": 0.3, + "learning_rate": 8.188601025520148e-08, + "logits/chosen": -1.8877958059310913, + "logits/rejected": -1.8787914514541626, + "logps/chosen": -159.9698486328125, + "logps/rejected": -295.10723876953125, + "loss": 0.2361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3848724365234375, + "rewards/margins": 1.0981353521347046, + "rewards/rejected": 0.2867370545864105, + "step": 5181 + }, + { + "epoch": 0.3, + "learning_rate": 8.187875066381612e-08, + "logits/chosen": -1.9367152452468872, + "logits/rejected": -1.960420846939087, + "logps/chosen": -197.12774658203125, + "logps/rejected": -200.00865173339844, + "loss": 0.2098, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.667700171470642, + "rewards/margins": 0.9436187148094177, + "rewards/rejected": 0.7240814566612244, + "step": 5182 + }, + { + "epoch": 0.3, + "learning_rate": 8.187148993994159e-08, + "logits/chosen": -2.0612847805023193, + "logits/rejected": -2.060372829437256, + "logps/chosen": -177.82177734375, + "logps/rejected": -400.8888244628906, + "loss": 0.0716, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4999436140060425, + "rewards/margins": 3.5472521781921387, + "rewards/rejected": -2.0473084449768066, + "step": 5183 + }, + { + "epoch": 0.3, + "learning_rate": 8.186422808383584e-08, + "logits/chosen": -2.2398781776428223, + "logits/rejected": -2.2337756156921387, + "logps/chosen": -23.63628387451172, + "logps/rejected": -294.66748046875, + "loss": 0.3058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2675352096557617, + "rewards/margins": 2.835458278656006, + "rewards/rejected": -2.567923069000244, + "step": 5184 + }, + { + "epoch": 0.3, + "learning_rate": 8.185696509575686e-08, + "logits/chosen": -2.001676082611084, + "logits/rejected": -2.0060253143310547, + "logps/chosen": -9.101494789123535, + "logps/rejected": -60.574188232421875, + "loss": 0.5807, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03094501607120037, + "rewards/margins": 0.468671590089798, + "rewards/rejected": -0.43772658705711365, + "step": 5185 + }, + { + "epoch": 0.3, + "learning_rate": 8.184970097596263e-08, + "logits/chosen": -1.8591941595077515, + "logits/rejected": -1.8526437282562256, + "logps/chosen": -72.74995422363281, + "logps/rejected": -193.1397705078125, + "loss": 0.377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7726227045059204, + "rewards/margins": 0.7239090204238892, + "rewards/rejected": 0.04871368408203125, + "step": 5186 + }, + { + "epoch": 0.3, + "learning_rate": 8.184243572471123e-08, + "logits/chosen": -1.9586538076400757, + "logits/rejected": -1.9566304683685303, + "logps/chosen": -0.0007706680335104465, + "logps/rejected": -68.48351287841797, + "loss": 0.6839, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.688118552730884e-06, + "rewards/margins": 0.11059211939573288, + "rewards/rejected": -0.11060180515050888, + "step": 5187 + }, + { + "epoch": 0.3, + "learning_rate": 8.183516934226078e-08, + "logits/chosen": -1.764310598373413, + "logits/rejected": -1.7334603071212769, + "logps/chosen": -289.28302001953125, + "logps/rejected": -432.6763610839844, + "loss": 0.3197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.809728980064392, + "rewards/margins": 0.3061034679412842, + "rewards/rejected": 1.503625512123108, + "step": 5188 + }, + { + "epoch": 0.3, + "learning_rate": 8.182790182886937e-08, + "logits/chosen": -1.9920974969863892, + "logits/rejected": -1.9792367219924927, + "logps/chosen": -112.23301696777344, + "logps/rejected": -303.2939453125, + "loss": 0.9326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5991119742393494, + "rewards/margins": 0.025683581829071045, + "rewards/rejected": -0.6247955560684204, + "step": 5189 + }, + { + "epoch": 0.3, + "learning_rate": 8.182063318479521e-08, + "logits/chosen": -1.9810049533843994, + "logits/rejected": -1.966881275177002, + "logps/chosen": -18.127429962158203, + "logps/rejected": -182.33255004882812, + "loss": 0.2662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4045677185058594, + "rewards/margins": 2.2789969444274902, + "rewards/rejected": -1.8744293451309204, + "step": 5190 + }, + { + "epoch": 0.3, + "learning_rate": 8.18133634102965e-08, + "logits/chosen": -2.1194801330566406, + "logits/rejected": -2.1035544872283936, + "logps/chosen": -292.89862060546875, + "logps/rejected": -362.97991943359375, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0734649896621704, + "rewards/margins": 0.509844958782196, + "rewards/rejected": 0.5636200308799744, + "step": 5191 + }, + { + "epoch": 0.3, + "learning_rate": 8.18060925056315e-08, + "logits/chosen": -2.025961399078369, + "logits/rejected": -1.9803229570388794, + "logps/chosen": -284.5885925292969, + "logps/rejected": -477.3914794921875, + "loss": 0.1292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.41619873046875, + "rewards/margins": 2.5616912841796875, + "rewards/rejected": -1.1454925537109375, + "step": 5192 + }, + { + "epoch": 0.3, + "learning_rate": 8.179882047105853e-08, + "logits/chosen": -2.073122501373291, + "logits/rejected": -2.0832910537719727, + "logps/chosen": -38.01557159423828, + "logps/rejected": -171.78829956054688, + "loss": 0.3799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010267257690429688, + "rewards/margins": 2.2632510662078857, + "rewards/rejected": -2.2735183238983154, + "step": 5193 + }, + { + "epoch": 0.3, + "learning_rate": 8.17915473068359e-08, + "logits/chosen": -1.9952143430709839, + "logits/rejected": -2.0158262252807617, + "logps/chosen": -266.5985412597656, + "logps/rejected": -421.89813232421875, + "loss": 0.0695, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7101624011993408, + "rewards/margins": 2.4901857376098633, + "rewards/rejected": -0.7800232172012329, + "step": 5194 + }, + { + "epoch": 0.3, + "learning_rate": 8.178427301322201e-08, + "logits/chosen": -1.9841961860656738, + "logits/rejected": -1.8953298330307007, + "logps/chosen": -241.09283447265625, + "logps/rejected": -838.72021484375, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.676513671875, + "rewards/margins": 5.612414360046387, + "rewards/rejected": -3.935900926589966, + "step": 5195 + }, + { + "epoch": 0.3, + "learning_rate": 8.177699759047525e-08, + "logits/chosen": -1.9713729619979858, + "logits/rejected": -1.947892189025879, + "logps/chosen": -229.26828002929688, + "logps/rejected": -286.01324462890625, + "loss": 0.0989, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0818634033203125, + "rewards/margins": 2.1164581775665283, + "rewards/rejected": -0.03459472581744194, + "step": 5196 + }, + { + "epoch": 0.3, + "learning_rate": 8.176972103885411e-08, + "logits/chosen": -1.8952808380126953, + "logits/rejected": -1.896500825881958, + "logps/chosen": -0.7224804162979126, + "logps/rejected": -160.2047119140625, + "loss": 0.3569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07901022583246231, + "rewards/margins": 2.7926363945007324, + "rewards/rejected": -2.7136261463165283, + "step": 5197 + }, + { + "epoch": 0.3, + "learning_rate": 8.176244335861708e-08, + "logits/chosen": -2.0758540630340576, + "logits/rejected": -2.0551671981811523, + "logps/chosen": -141.03192138671875, + "logps/rejected": -245.22500610351562, + "loss": 0.565, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.46937257051467896, + "rewards/margins": -0.2962127923965454, + "rewards/rejected": 0.7655853629112244, + "step": 5198 + }, + { + "epoch": 0.3, + "learning_rate": 8.175516455002269e-08, + "logits/chosen": -2.113936424255371, + "logits/rejected": -2.1153976917266846, + "logps/chosen": -48.09168243408203, + "logps/rejected": -216.7458953857422, + "loss": 0.6129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4054206907749176, + "rewards/margins": 0.6901146173477173, + "rewards/rejected": -1.0955352783203125, + "step": 5199 + }, + { + "epoch": 0.3, + "learning_rate": 8.174788461332953e-08, + "logits/chosen": -2.1889989376068115, + "logits/rejected": -2.1870903968811035, + "logps/chosen": -14.629444122314453, + "logps/rejected": -94.8056640625, + "loss": 0.3965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1251668930053711, + "rewards/margins": 1.740748405456543, + "rewards/rejected": -1.6155815124511719, + "step": 5200 + }, + { + "epoch": 0.3, + "learning_rate": 8.174060354879621e-08, + "logits/chosen": -1.9814575910568237, + "logits/rejected": -1.9788888692855835, + "logps/chosen": -14.509505271911621, + "logps/rejected": -50.912315368652344, + "loss": 0.5072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09033222496509552, + "rewards/margins": 1.0938241481781006, + "rewards/rejected": -1.1841564178466797, + "step": 5201 + }, + { + "epoch": 0.3, + "learning_rate": 8.173332135668139e-08, + "logits/chosen": -2.124138355255127, + "logits/rejected": -2.1127727031707764, + "logps/chosen": -141.8208465576172, + "logps/rejected": -251.26638793945312, + "loss": 0.2441, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5057891607284546, + "rewards/margins": 0.7455718517303467, + "rewards/rejected": 0.7602173089981079, + "step": 5202 + }, + { + "epoch": 0.3, + "learning_rate": 8.172603803724377e-08, + "logits/chosen": -1.9781904220581055, + "logits/rejected": -1.9800291061401367, + "logps/chosen": -17.812885284423828, + "logps/rejected": -122.33802795410156, + "loss": 0.597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16248074173927307, + "rewards/margins": 0.22324275970458984, + "rewards/rejected": -0.06076202541589737, + "step": 5203 + }, + { + "epoch": 0.3, + "learning_rate": 8.17187535907421e-08, + "logits/chosen": -2.069118022918701, + "logits/rejected": -2.066249132156372, + "logps/chosen": -0.00027522919117473066, + "logps/rejected": -27.258026123046875, + "loss": 0.6084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3111241969454568e-05, + "rewards/margins": 0.3764284551143646, + "rewards/rejected": -0.3764415681362152, + "step": 5204 + }, + { + "epoch": 0.3, + "learning_rate": 8.171146801743514e-08, + "logits/chosen": -1.912729263305664, + "logits/rejected": -1.8951420783996582, + "logps/chosen": -80.52772521972656, + "logps/rejected": -279.30438232421875, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5757980346679688, + "rewards/margins": 1.248936414718628, + "rewards/rejected": -0.673138439655304, + "step": 5205 + }, + { + "epoch": 0.3, + "learning_rate": 8.170418131758171e-08, + "logits/chosen": -2.0529379844665527, + "logits/rejected": -2.0468802452087402, + "logps/chosen": -19.43639373779297, + "logps/rejected": -220.992919921875, + "loss": 0.1789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8261465430259705, + "rewards/margins": 4.020970821380615, + "rewards/rejected": -3.19482421875, + "step": 5206 + }, + { + "epoch": 0.3, + "learning_rate": 8.169689349144068e-08, + "logits/chosen": -2.034132480621338, + "logits/rejected": -2.0311620235443115, + "logps/chosen": -27.845787048339844, + "logps/rejected": -118.7445297241211, + "loss": 0.5131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15175782144069672, + "rewards/margins": 0.8345786929130554, + "rewards/rejected": -0.9863365292549133, + "step": 5207 + }, + { + "epoch": 0.3, + "learning_rate": 8.168960453927096e-08, + "logits/chosen": -2.072747230529785, + "logits/rejected": -2.0683696269989014, + "logps/chosen": -69.14644622802734, + "logps/rejected": -207.26719665527344, + "loss": 0.2242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7186203002929688, + "rewards/margins": 2.759779453277588, + "rewards/rejected": -2.041159152984619, + "step": 5208 + }, + { + "epoch": 0.3, + "learning_rate": 8.168231446133147e-08, + "logits/chosen": -1.9741442203521729, + "logits/rejected": -1.8644604682922363, + "logps/chosen": -203.65782165527344, + "logps/rejected": -505.5389709472656, + "loss": 0.0709, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3924026489257812, + "rewards/margins": 2.7516891956329346, + "rewards/rejected": -0.35928651690483093, + "step": 5209 + }, + { + "epoch": 0.3, + "learning_rate": 8.16750232578812e-08, + "logits/chosen": -2.0290210247039795, + "logits/rejected": -2.008632183074951, + "logps/chosen": -167.82748413085938, + "logps/rejected": -251.44656372070312, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8932754397392273, + "rewards/margins": 0.526123046875, + "rewards/rejected": 0.3671524226665497, + "step": 5210 + }, + { + "epoch": 0.3, + "learning_rate": 8.166773092917916e-08, + "logits/chosen": -2.136561155319214, + "logits/rejected": -2.1248769760131836, + "logps/chosen": -12.084941864013672, + "logps/rejected": -184.0079345703125, + "loss": 0.3783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06052036210894585, + "rewards/margins": 2.2241053581237793, + "rewards/rejected": -2.1635849475860596, + "step": 5211 + }, + { + "epoch": 0.3, + "learning_rate": 8.166043747548441e-08, + "logits/chosen": -1.9626405239105225, + "logits/rejected": -1.928141713142395, + "logps/chosen": -202.69529724121094, + "logps/rejected": -314.11334228515625, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9098190069198608, + "rewards/margins": 0.37035369873046875, + "rewards/rejected": 1.539465308189392, + "step": 5212 + }, + { + "epoch": 0.3, + "learning_rate": 8.165314289705605e-08, + "logits/chosen": -2.0181450843811035, + "logits/rejected": -2.0230722427368164, + "logps/chosen": -13.056350708007812, + "logps/rejected": -194.5210723876953, + "loss": 0.3773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06773920357227325, + "rewards/margins": 2.367553234100342, + "rewards/rejected": -2.299813985824585, + "step": 5213 + }, + { + "epoch": 0.3, + "learning_rate": 8.164584719415324e-08, + "logits/chosen": -1.8895390033721924, + "logits/rejected": -1.893495798110962, + "logps/chosen": -5.255800724029541, + "logps/rejected": -98.82998657226562, + "loss": 0.468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04458951950073242, + "rewards/margins": 1.1423839330673218, + "rewards/rejected": -1.0977944135665894, + "step": 5214 + }, + { + "epoch": 0.3, + "learning_rate": 8.163855036703513e-08, + "logits/chosen": -1.9795751571655273, + "logits/rejected": -1.9573017358779907, + "logps/chosen": -231.95242309570312, + "logps/rejected": -380.60626220703125, + "loss": 0.3891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8162399530410767, + "rewards/margins": 0.47206270694732666, + "rewards/rejected": 0.34417724609375, + "step": 5215 + }, + { + "epoch": 0.3, + "learning_rate": 8.163125241596094e-08, + "logits/chosen": -2.168564558029175, + "logits/rejected": -2.1589725017547607, + "logps/chosen": -77.23054504394531, + "logps/rejected": -264.79559326171875, + "loss": 0.2033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8018646240234375, + "rewards/margins": 3.0059173107147217, + "rewards/rejected": -2.204052686691284, + "step": 5216 + }, + { + "epoch": 0.3, + "learning_rate": 8.162395334118996e-08, + "logits/chosen": -2.087796688079834, + "logits/rejected": -2.081566333770752, + "logps/chosen": -180.25628662109375, + "logps/rejected": -293.29168701171875, + "loss": 0.4509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8289108276367188, + "rewards/margins": 0.024769604206085205, + "rewards/rejected": 0.8041412234306335, + "step": 5217 + }, + { + "epoch": 0.3, + "learning_rate": 8.161665314298145e-08, + "logits/chosen": -2.0797629356384277, + "logits/rejected": -2.0666890144348145, + "logps/chosen": -154.27194213867188, + "logps/rejected": -230.28549194335938, + "loss": 0.2657, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3187439441680908, + "rewards/margins": 1.0208892822265625, + "rewards/rejected": 0.29785463213920593, + "step": 5218 + }, + { + "epoch": 0.3, + "learning_rate": 8.160935182159478e-08, + "logits/chosen": -2.084221839904785, + "logits/rejected": -2.0922396183013916, + "logps/chosen": -0.0004171781474724412, + "logps/rejected": -174.9460906982422, + "loss": 0.401, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1512922355905175e-05, + "rewards/margins": 2.151775360107422, + "rewards/rejected": -2.1517868041992188, + "step": 5219 + }, + { + "epoch": 0.3, + "learning_rate": 8.16020493772893e-08, + "logits/chosen": -2.0287346839904785, + "logits/rejected": -2.0253443717956543, + "logps/chosen": -47.02386474609375, + "logps/rejected": -316.715576171875, + "loss": 0.4872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46777763962745667, + "rewards/margins": 0.30047112703323364, + "rewards/rejected": 0.16730652749538422, + "step": 5220 + }, + { + "epoch": 0.3, + "learning_rate": 8.159474581032446e-08, + "logits/chosen": -2.116971254348755, + "logits/rejected": -2.1004462242126465, + "logps/chosen": -161.9566192626953, + "logps/rejected": -292.52142333984375, + "loss": 0.4974, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19540558755397797, + "rewards/margins": 0.7858994007110596, + "rewards/rejected": -0.5904937982559204, + "step": 5221 + }, + { + "epoch": 0.3, + "learning_rate": 8.158744112095968e-08, + "logits/chosen": -1.9502503871917725, + "logits/rejected": -1.9455331563949585, + "logps/chosen": -233.17959594726562, + "logps/rejected": -369.3151550292969, + "loss": 0.3116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4393844604492188, + "rewards/margins": 0.4255324602127075, + "rewards/rejected": 1.0138520002365112, + "step": 5222 + }, + { + "epoch": 0.3, + "learning_rate": 8.158013530945451e-08, + "logits/chosen": -2.0528061389923096, + "logits/rejected": -2.0598666667938232, + "logps/chosen": -0.001140893204137683, + "logps/rejected": -90.87367248535156, + "loss": 0.5047, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.277603496419033e-05, + "rewards/margins": 0.9784902334213257, + "rewards/rejected": -0.9784774780273438, + "step": 5223 + }, + { + "epoch": 0.3, + "learning_rate": 8.157282837606842e-08, + "logits/chosen": -1.9403729438781738, + "logits/rejected": -1.9947811365127563, + "logps/chosen": -294.0499267578125, + "logps/rejected": -252.47116088867188, + "loss": 0.1084, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2876007556915283, + "rewards/margins": 1.7282776832580566, + "rewards/rejected": 0.5593231320381165, + "step": 5224 + }, + { + "epoch": 0.3, + "learning_rate": 8.156552032106106e-08, + "logits/chosen": -1.7988312244415283, + "logits/rejected": -1.798957347869873, + "logps/chosen": -95.67930603027344, + "logps/rejected": -295.9830017089844, + "loss": 0.5767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44735413789749146, + "rewards/margins": 1.2687439918518066, + "rewards/rejected": -1.7160980701446533, + "step": 5225 + }, + { + "epoch": 0.3, + "learning_rate": 8.155821114469201e-08, + "logits/chosen": -2.023505449295044, + "logits/rejected": -1.9840896129608154, + "logps/chosen": -13.631207466125488, + "logps/rejected": -545.7529296875, + "loss": 0.2332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4426240026950836, + "rewards/margins": 4.846866130828857, + "rewards/rejected": -4.404242038726807, + "step": 5226 + }, + { + "epoch": 0.3, + "learning_rate": 8.155090084722093e-08, + "logits/chosen": -2.088244676589966, + "logits/rejected": -2.088308334350586, + "logps/chosen": -40.286190032958984, + "logps/rejected": -234.45889282226562, + "loss": 0.4445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14714165031909943, + "rewards/margins": 0.9397014379501343, + "rewards/rejected": -0.792559802532196, + "step": 5227 + }, + { + "epoch": 0.3, + "learning_rate": 8.154358942890752e-08, + "logits/chosen": -1.7621667385101318, + "logits/rejected": -1.770864725112915, + "logps/chosen": -74.84593200683594, + "logps/rejected": -112.09759521484375, + "loss": 0.5667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.135223388671875, + "rewards/margins": 0.7935844659805298, + "rewards/rejected": -0.9288078546524048, + "step": 5228 + }, + { + "epoch": 0.3, + "learning_rate": 8.15362768900115e-08, + "logits/chosen": -1.9059706926345825, + "logits/rejected": -1.8875809907913208, + "logps/chosen": -186.87171936035156, + "logps/rejected": -249.67657470703125, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9554489850997925, + "rewards/margins": 0.42578279972076416, + "rewards/rejected": 1.5296661853790283, + "step": 5229 + }, + { + "epoch": 0.3, + "learning_rate": 8.152896323079268e-08, + "logits/chosen": -2.08392333984375, + "logits/rejected": -2.058694839477539, + "logps/chosen": -193.44119262695312, + "logps/rejected": -464.18682861328125, + "loss": 0.1363, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4254440069198608, + "rewards/margins": 2.0100998878479004, + "rewards/rejected": -0.58465576171875, + "step": 5230 + }, + { + "epoch": 0.3, + "learning_rate": 8.152164845151086e-08, + "logits/chosen": -1.9588475227355957, + "logits/rejected": -1.9682390689849854, + "logps/chosen": -236.24200439453125, + "logps/rejected": -343.6438903808594, + "loss": 0.1357, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2422547340393066, + "rewards/margins": 1.5260255336761475, + "rewards/rejected": 0.716229259967804, + "step": 5231 + }, + { + "epoch": 0.3, + "learning_rate": 8.15143325524259e-08, + "logits/chosen": -1.9291059970855713, + "logits/rejected": -1.9235050678253174, + "logps/chosen": -19.415802001953125, + "logps/rejected": -166.3728485107422, + "loss": 0.4084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20726127922534943, + "rewards/margins": 1.366532564163208, + "rewards/rejected": -1.159271240234375, + "step": 5232 + }, + { + "epoch": 0.3, + "learning_rate": 8.150701553379769e-08, + "logits/chosen": -2.2064452171325684, + "logits/rejected": -2.2051727771759033, + "logps/chosen": -58.858184814453125, + "logps/rejected": -227.50704956054688, + "loss": 0.2438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.660443127155304, + "rewards/margins": 2.037606954574585, + "rewards/rejected": -1.3771637678146362, + "step": 5233 + }, + { + "epoch": 0.3, + "learning_rate": 8.149969739588617e-08, + "logits/chosen": -2.0807814598083496, + "logits/rejected": -1.9029557704925537, + "logps/chosen": -197.92837524414062, + "logps/rejected": -596.7950439453125, + "loss": 0.2462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8058364987373352, + "rewards/margins": 1.770826816558838, + "rewards/rejected": -0.9649902582168579, + "step": 5234 + }, + { + "epoch": 0.3, + "learning_rate": 8.149237813895133e-08, + "logits/chosen": -1.8865529298782349, + "logits/rejected": -1.8841077089309692, + "logps/chosen": -49.69129180908203, + "logps/rejected": -122.44364166259766, + "loss": 0.7825, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7417274713516235, + "rewards/margins": 0.27454984188079834, + "rewards/rejected": -1.0162773132324219, + "step": 5235 + }, + { + "epoch": 0.3, + "learning_rate": 8.148505776325316e-08, + "logits/chosen": -1.9464360475540161, + "logits/rejected": -1.9350390434265137, + "logps/chosen": -235.02725219726562, + "logps/rejected": -445.2309875488281, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.633004903793335, + "rewards/margins": 1.7727785110473633, + "rewards/rejected": 0.8602264523506165, + "step": 5236 + }, + { + "epoch": 0.3, + "learning_rate": 8.147773626905174e-08, + "logits/chosen": -2.0486083030700684, + "logits/rejected": -2.035212993621826, + "logps/chosen": -8.582940101623535, + "logps/rejected": -294.7561950683594, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025187397375702858, + "rewards/margins": 3.9048140048980713, + "rewards/rejected": -3.879626512527466, + "step": 5237 + }, + { + "epoch": 0.3, + "learning_rate": 8.147041365660715e-08, + "logits/chosen": -1.931024432182312, + "logits/rejected": -1.926708698272705, + "logps/chosen": -0.0018130458192899823, + "logps/rejected": -184.05059814453125, + "loss": 0.4176, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.681717837229371e-05, + "rewards/margins": 1.8524119853973389, + "rewards/rejected": -1.8523651361465454, + "step": 5238 + }, + { + "epoch": 0.3, + "learning_rate": 8.14630899261795e-08, + "logits/chosen": -2.001657724380493, + "logits/rejected": -1.999920129776001, + "logps/chosen": -0.0009137287852354348, + "logps/rejected": -150.08132934570312, + "loss": 0.5244, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.892110392684117e-05, + "rewards/margins": 0.8504148125648499, + "rewards/rejected": -0.8503158688545227, + "step": 5239 + }, + { + "epoch": 0.3, + "learning_rate": 8.145576507802903e-08, + "logits/chosen": -2.1208407878875732, + "logits/rejected": -2.1081435680389404, + "logps/chosen": -266.9698791503906, + "logps/rejected": -426.15216064453125, + "loss": 0.1063, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.445242404937744, + "rewards/margins": 1.6952698230743408, + "rewards/rejected": 0.7499725222587585, + "step": 5240 + }, + { + "epoch": 0.3, + "learning_rate": 8.14484391124159e-08, + "logits/chosen": -2.038745164871216, + "logits/rejected": -2.042100191116333, + "logps/chosen": -0.008633684366941452, + "logps/rejected": -113.13752746582031, + "loss": 0.743, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.256196062779054e-05, + "rewards/margins": -0.2105967253446579, + "rewards/rejected": 0.21050415933132172, + "step": 5241 + }, + { + "epoch": 0.31, + "learning_rate": 8.144111202960038e-08, + "logits/chosen": -1.982538104057312, + "logits/rejected": -1.9697248935699463, + "logps/chosen": -105.05622863769531, + "logps/rejected": -137.97398376464844, + "loss": 0.4636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8442581295967102, + "rewards/margins": 0.36348268389701843, + "rewards/rejected": 0.4807754456996918, + "step": 5242 + }, + { + "epoch": 0.31, + "learning_rate": 8.143378382984276e-08, + "logits/chosen": -1.8553186655044556, + "logits/rejected": -1.846974492073059, + "logps/chosen": -308.86102294921875, + "logps/rejected": -531.4315185546875, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.634350538253784, + "rewards/margins": 2.907336473464966, + "rewards/rejected": -0.2729858458042145, + "step": 5243 + }, + { + "epoch": 0.31, + "learning_rate": 8.14264545134034e-08, + "logits/chosen": -2.176187038421631, + "logits/rejected": -2.174455165863037, + "logps/chosen": -32.571014404296875, + "logps/rejected": -235.45443725585938, + "loss": 0.4236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06850586086511612, + "rewards/margins": 1.7106430530548096, + "rewards/rejected": -1.6421371698379517, + "step": 5244 + }, + { + "epoch": 0.31, + "learning_rate": 8.141912408054264e-08, + "logits/chosen": -1.881750464439392, + "logits/rejected": -1.8593820333480835, + "logps/chosen": -289.0082092285156, + "logps/rejected": -477.84271240234375, + "loss": 0.1706, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.112432837486267, + "rewards/margins": 2.1446807384490967, + "rewards/rejected": -1.0322479009628296, + "step": 5245 + }, + { + "epoch": 0.31, + "learning_rate": 8.14117925315209e-08, + "logits/chosen": -2.069410800933838, + "logits/rejected": -2.065173625946045, + "logps/chosen": -42.38854217529297, + "logps/rejected": -176.6151885986328, + "loss": 0.5652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09388809651136398, + "rewards/margins": 0.4738929867744446, + "rewards/rejected": -0.3800048828125, + "step": 5246 + }, + { + "epoch": 0.31, + "learning_rate": 8.140445986659866e-08, + "logits/chosen": -1.9877384901046753, + "logits/rejected": -1.9771629571914673, + "logps/chosen": -36.348045349121094, + "logps/rejected": -249.77749633789062, + "loss": 0.4434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22299805283546448, + "rewards/margins": 1.913476586341858, + "rewards/rejected": -2.136474609375, + "step": 5247 + }, + { + "epoch": 0.31, + "learning_rate": 8.139712608603637e-08, + "logits/chosen": -2.0932507514953613, + "logits/rejected": -2.092440128326416, + "logps/chosen": -2.96382737159729, + "logps/rejected": -53.05175018310547, + "loss": 0.6967, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.022153759375214577, + "rewards/margins": -0.020867062732577324, + "rewards/rejected": -0.0012866974575445056, + "step": 5248 + }, + { + "epoch": 0.31, + "learning_rate": 8.138979119009459e-08, + "logits/chosen": -1.9474636316299438, + "logits/rejected": -1.966208815574646, + "logps/chosen": -238.38739013671875, + "logps/rejected": -447.5479736328125, + "loss": 0.0839, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6657685041427612, + "rewards/margins": 2.9631683826446533, + "rewards/rejected": -1.297399878501892, + "step": 5249 + }, + { + "epoch": 0.31, + "learning_rate": 8.138245517903388e-08, + "logits/chosen": -1.9511890411376953, + "logits/rejected": -1.9525647163391113, + "logps/chosen": -24.61754035949707, + "logps/rejected": -87.49639892578125, + "loss": 0.6833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15019360184669495, + "rewards/margins": 0.15948277711868286, + "rewards/rejected": -0.3096763789653778, + "step": 5250 + }, + { + "epoch": 0.31, + "learning_rate": 8.137511805311485e-08, + "logits/chosen": -2.171025514602661, + "logits/rejected": -2.154759645462036, + "logps/chosen": -53.33418655395508, + "logps/rejected": -354.3649597167969, + "loss": 0.1936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7132439017295837, + "rewards/margins": 3.5764124393463135, + "rewards/rejected": -2.863168478012085, + "step": 5251 + }, + { + "epoch": 0.31, + "learning_rate": 8.136777981259814e-08, + "logits/chosen": -2.1407370567321777, + "logits/rejected": -2.132113456726074, + "logps/chosen": -109.22479248046875, + "logps/rejected": -299.25372314453125, + "loss": 0.376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0411834716796875, + "rewards/margins": 1.9151763916015625, + "rewards/rejected": -1.873992919921875, + "step": 5252 + }, + { + "epoch": 0.31, + "learning_rate": 8.136044045774448e-08, + "logits/chosen": -1.9030652046203613, + "logits/rejected": -1.8686784505844116, + "logps/chosen": -294.4476318359375, + "logps/rejected": -460.544677734375, + "loss": 0.0737, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7172454595565796, + "rewards/margins": 2.3844025135040283, + "rewards/rejected": -0.667156994342804, + "step": 5253 + }, + { + "epoch": 0.31, + "learning_rate": 8.135309998881457e-08, + "logits/chosen": -1.9277112483978271, + "logits/rejected": -1.9175219535827637, + "logps/chosen": -115.26510620117188, + "logps/rejected": -307.8497619628906, + "loss": 0.181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8998557925224304, + "rewards/margins": 2.3012261390686035, + "rewards/rejected": -1.4013702869415283, + "step": 5254 + }, + { + "epoch": 0.31, + "learning_rate": 8.134575840606917e-08, + "logits/chosen": -1.6295852661132812, + "logits/rejected": -1.6349492073059082, + "logps/chosen": -165.37973022460938, + "logps/rejected": -156.79678344726562, + "loss": 0.439, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0617462396621704, + "rewards/margins": 0.13860934972763062, + "rewards/rejected": 0.9231368899345398, + "step": 5255 + }, + { + "epoch": 0.31, + "learning_rate": 8.133841570976912e-08, + "logits/chosen": -2.215726852416992, + "logits/rejected": -2.1937832832336426, + "logps/chosen": -0.976125180721283, + "logps/rejected": -349.3445739746094, + "loss": 0.3842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024997515603899956, + "rewards/margins": 2.3887791633605957, + "rewards/rejected": -2.363781690597534, + "step": 5256 + }, + { + "epoch": 0.31, + "learning_rate": 8.133107190017525e-08, + "logits/chosen": -1.9173909425735474, + "logits/rejected": -1.908864974975586, + "logps/chosen": -133.46841430664062, + "logps/rejected": -344.20977783203125, + "loss": 0.2179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7779327630996704, + "rewards/margins": 1.516809105873108, + "rewards/rejected": -0.7388763427734375, + "step": 5257 + }, + { + "epoch": 0.31, + "learning_rate": 8.132372697754844e-08, + "logits/chosen": -2.1039345264434814, + "logits/rejected": -2.106433153152466, + "logps/chosen": -114.84953308105469, + "logps/rejected": -302.3692626953125, + "loss": 0.4894, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7596275210380554, + "rewards/margins": -0.00868302583694458, + "rewards/rejected": 0.768310546875, + "step": 5258 + }, + { + "epoch": 0.31, + "learning_rate": 8.131638094214963e-08, + "logits/chosen": -2.0788254737854004, + "logits/rejected": -2.078078031539917, + "logps/chosen": -0.20082813501358032, + "logps/rejected": -246.51266479492188, + "loss": 0.4069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006175136659294367, + "rewards/margins": 2.1517655849456787, + "rewards/rejected": -2.157940626144409, + "step": 5259 + }, + { + "epoch": 0.31, + "learning_rate": 8.130903379423978e-08, + "logits/chosen": -2.061370849609375, + "logits/rejected": -2.095674753189087, + "logps/chosen": -318.5053405761719, + "logps/rejected": -397.3914489746094, + "loss": 0.3736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09573974460363388, + "rewards/margins": 0.6953582763671875, + "rewards/rejected": -0.5996185541152954, + "step": 5260 + }, + { + "epoch": 0.31, + "learning_rate": 8.13016855340799e-08, + "logits/chosen": -2.033949613571167, + "logits/rejected": -2.027430772781372, + "logps/chosen": -176.53781127929688, + "logps/rejected": -308.198974609375, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8772674798965454, + "rewards/margins": 1.0788726806640625, + "rewards/rejected": -0.20160523056983948, + "step": 5261 + }, + { + "epoch": 0.31, + "learning_rate": 8.129433616193105e-08, + "logits/chosen": -2.0203261375427246, + "logits/rejected": -2.012075424194336, + "logps/chosen": -15.19672679901123, + "logps/rejected": -335.95721435546875, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3318207859992981, + "rewards/margins": 3.198910713195801, + "rewards/rejected": -2.8670899868011475, + "step": 5262 + }, + { + "epoch": 0.31, + "learning_rate": 8.128698567805429e-08, + "logits/chosen": -1.9014091491699219, + "logits/rejected": -1.888713002204895, + "logps/chosen": -234.70343017578125, + "logps/rejected": -468.52459716796875, + "loss": 0.1533, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9281342029571533, + "rewards/margins": 1.4935089349746704, + "rewards/rejected": 0.4346252381801605, + "step": 5263 + }, + { + "epoch": 0.31, + "learning_rate": 8.127963408271076e-08, + "logits/chosen": -2.000544548034668, + "logits/rejected": -1.9918923377990723, + "logps/chosen": -196.0545654296875, + "logps/rejected": -256.2536926269531, + "loss": 0.4442, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.9553359746932983, + "rewards/margins": -0.25381624698638916, + "rewards/rejected": 2.2091522216796875, + "step": 5264 + }, + { + "epoch": 0.31, + "learning_rate": 8.127228137616162e-08, + "logits/chosen": -1.8890360593795776, + "logits/rejected": -1.9027752876281738, + "logps/chosen": -273.8908386230469, + "logps/rejected": -506.8070068359375, + "loss": 0.2865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0828704833984375, + "rewards/margins": 0.9614471197128296, + "rewards/rejected": 0.12142334133386612, + "step": 5265 + }, + { + "epoch": 0.31, + "learning_rate": 8.126492755866808e-08, + "logits/chosen": -2.2555582523345947, + "logits/rejected": -2.247392177581787, + "logps/chosen": -63.2494010925293, + "logps/rejected": -182.11029052734375, + "loss": 0.2717, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1595287322998047, + "rewards/margins": 0.9232540130615234, + "rewards/rejected": 0.23627471923828125, + "step": 5266 + }, + { + "epoch": 0.31, + "learning_rate": 8.125757263049138e-08, + "logits/chosen": -2.05713152885437, + "logits/rejected": -2.054908275604248, + "logps/chosen": -246.32733154296875, + "logps/rejected": -354.28204345703125, + "loss": 0.1311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9392212629318237, + "rewards/margins": 1.5118408203125, + "rewards/rejected": 0.42738038301467896, + "step": 5267 + }, + { + "epoch": 0.31, + "learning_rate": 8.125021659189281e-08, + "logits/chosen": -1.996910810470581, + "logits/rejected": -2.0172908306121826, + "logps/chosen": -154.9956512451172, + "logps/rejected": -392.3160400390625, + "loss": 0.0842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.312281847000122, + "rewards/margins": 3.9543685913085938, + "rewards/rejected": -2.6420867443084717, + "step": 5268 + }, + { + "epoch": 0.31, + "learning_rate": 8.124285944313368e-08, + "logits/chosen": -1.8928941488265991, + "logits/rejected": -1.885481595993042, + "logps/chosen": -144.367919921875, + "logps/rejected": -309.11151123046875, + "loss": 0.2269, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3069199323654175, + "rewards/margins": 1.2669419050216675, + "rewards/rejected": 0.03997802734375, + "step": 5269 + }, + { + "epoch": 0.31, + "learning_rate": 8.123550118447535e-08, + "logits/chosen": -1.9043407440185547, + "logits/rejected": -1.90509033203125, + "logps/chosen": -213.24107360839844, + "logps/rejected": -423.11767578125, + "loss": 0.1092, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0982131958007812, + "rewards/margins": 1.6943435668945312, + "rewards/rejected": 0.40386962890625, + "step": 5270 + }, + { + "epoch": 0.31, + "learning_rate": 8.122814181617923e-08, + "logits/chosen": -1.8293334245681763, + "logits/rejected": -1.8387659788131714, + "logps/chosen": -245.70809936523438, + "logps/rejected": -317.72747802734375, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9044632315635681, + "rewards/margins": 0.6378738880157471, + "rewards/rejected": 0.26658937335014343, + "step": 5271 + }, + { + "epoch": 0.31, + "learning_rate": 8.122078133850676e-08, + "logits/chosen": -2.1075446605682373, + "logits/rejected": -2.101470947265625, + "logps/chosen": -0.00021051372459623963, + "logps/rejected": -120.36924743652344, + "loss": 0.527, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2419307495292742e-05, + "rewards/margins": 0.8318918347358704, + "rewards/rejected": -0.8318794369697571, + "step": 5272 + }, + { + "epoch": 0.31, + "learning_rate": 8.121341975171943e-08, + "logits/chosen": -2.099771499633789, + "logits/rejected": -2.0918028354644775, + "logps/chosen": -46.632118225097656, + "logps/rejected": -257.40582275390625, + "loss": 0.264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27006837725639343, + "rewards/margins": 3.169217109680176, + "rewards/rejected": -2.89914870262146, + "step": 5273 + }, + { + "epoch": 0.31, + "learning_rate": 8.120605705607873e-08, + "logits/chosen": -1.8089656829833984, + "logits/rejected": -1.739477276802063, + "logps/chosen": -262.6173095703125, + "logps/rejected": -589.5556030273438, + "loss": 0.1849, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.117828369140625, + "rewards/margins": 1.021691918373108, + "rewards/rejected": 1.096136450767517, + "step": 5274 + }, + { + "epoch": 0.31, + "learning_rate": 8.119869325184625e-08, + "logits/chosen": -2.080690860748291, + "logits/rejected": -2.085862159729004, + "logps/chosen": -215.5083465576172, + "logps/rejected": -374.5185546875, + "loss": 0.3146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41402435302734375, + "rewards/margins": 0.7333786487579346, + "rewards/rejected": -0.31935426592826843, + "step": 5275 + }, + { + "epoch": 0.31, + "learning_rate": 8.119132833928359e-08, + "logits/chosen": -1.8373397588729858, + "logits/rejected": -1.7768678665161133, + "logps/chosen": -119.94224548339844, + "logps/rejected": -433.186279296875, + "loss": 0.1106, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9150832891464233, + "rewards/margins": 1.8260329961776733, + "rewards/rejected": 0.08905029296875, + "step": 5276 + }, + { + "epoch": 0.31, + "learning_rate": 8.118396231865235e-08, + "logits/chosen": -1.9741467237472534, + "logits/rejected": -1.9496515989303589, + "logps/chosen": -15.96362018585205, + "logps/rejected": -325.26031494140625, + "loss": 0.3729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05652208253741264, + "rewards/margins": 2.3788671493530273, + "rewards/rejected": -2.322345018386841, + "step": 5277 + }, + { + "epoch": 0.31, + "learning_rate": 8.117659519021424e-08, + "logits/chosen": -1.909656047821045, + "logits/rejected": -1.9050638675689697, + "logps/chosen": -24.07928466796875, + "logps/rejected": -105.90584564208984, + "loss": 0.5151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2280004471540451, + "rewards/margins": 0.4902242422103882, + "rewards/rejected": -0.2622238099575043, + "step": 5278 + }, + { + "epoch": 0.31, + "learning_rate": 8.116922695423098e-08, + "logits/chosen": -2.1126537322998047, + "logits/rejected": -2.1100873947143555, + "logps/chosen": -0.53141188621521, + "logps/rejected": -200.4540252685547, + "loss": 0.459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028603076934814453, + "rewards/margins": 1.3761863708496094, + "rewards/rejected": -1.3790466785430908, + "step": 5279 + }, + { + "epoch": 0.31, + "learning_rate": 8.11618576109643e-08, + "logits/chosen": -2.0086636543273926, + "logits/rejected": -2.0009377002716064, + "logps/chosen": -313.519287109375, + "logps/rejected": -415.1130676269531, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0253663063049316, + "rewards/margins": 3.1755311489105225, + "rewards/rejected": -1.1501648426055908, + "step": 5280 + }, + { + "epoch": 0.31, + "learning_rate": 8.115448716067602e-08, + "logits/chosen": -1.9723808765411377, + "logits/rejected": -1.9676575660705566, + "logps/chosen": -41.59362030029297, + "logps/rejected": -373.26544189453125, + "loss": 0.4933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4228794276714325, + "rewards/margins": 2.530700206756592, + "rewards/rejected": -2.9535796642303467, + "step": 5281 + }, + { + "epoch": 0.31, + "learning_rate": 8.114711560362794e-08, + "logits/chosen": -1.892392873764038, + "logits/rejected": -1.8701788187026978, + "logps/chosen": -173.98333740234375, + "logps/rejected": -337.31182861328125, + "loss": 0.5775, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.813000500202179, + "rewards/margins": -0.37706905603408813, + "rewards/rejected": 1.190069556236267, + "step": 5282 + }, + { + "epoch": 0.31, + "learning_rate": 8.113974294008197e-08, + "logits/chosen": -1.976540446281433, + "logits/rejected": -1.973950743675232, + "logps/chosen": -0.04864782467484474, + "logps/rejected": -128.83583068847656, + "loss": 0.477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009409350343048573, + "rewards/margins": 1.1434662342071533, + "rewards/rejected": -1.1340569257736206, + "step": 5283 + }, + { + "epoch": 0.31, + "learning_rate": 8.11323691703e-08, + "logits/chosen": -1.8717710971832275, + "logits/rejected": -1.8869599103927612, + "logps/chosen": -187.52464294433594, + "logps/rejected": -263.660400390625, + "loss": 0.194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4060226678848267, + "rewards/margins": 1.1587876081466675, + "rewards/rejected": 0.24723510444164276, + "step": 5284 + }, + { + "epoch": 0.31, + "learning_rate": 8.1124994294544e-08, + "logits/chosen": -2.002821922302246, + "logits/rejected": -2.000298023223877, + "logps/chosen": -16.778701782226562, + "logps/rejected": -165.81130981445312, + "loss": 0.6558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07683143764734268, + "rewards/margins": 0.021634291857481003, + "rewards/rejected": 0.05519714578986168, + "step": 5285 + }, + { + "epoch": 0.31, + "learning_rate": 8.111761831307597e-08, + "logits/chosen": -2.0164692401885986, + "logits/rejected": -2.0128939151763916, + "logps/chosen": -13.779020309448242, + "logps/rejected": -229.79693603515625, + "loss": 0.4628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012468910776078701, + "rewards/margins": 1.402264952659607, + "rewards/rejected": -1.41473388671875, + "step": 5286 + }, + { + "epoch": 0.31, + "learning_rate": 8.11102412261579e-08, + "logits/chosen": -2.100832939147949, + "logits/rejected": -2.107252597808838, + "logps/chosen": -21.063587188720703, + "logps/rejected": -159.75506591796875, + "loss": 0.6832, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3174285888671875, + "rewards/margins": -0.19193881750106812, + "rewards/rejected": 0.5093674063682556, + "step": 5287 + }, + { + "epoch": 0.31, + "learning_rate": 8.110286303405189e-08, + "logits/chosen": -2.0130088329315186, + "logits/rejected": -2.0350430011749268, + "logps/chosen": -219.228759765625, + "logps/rejected": -249.29661560058594, + "loss": 0.2474, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2875107526779175, + "rewards/margins": 0.9062515497207642, + "rewards/rejected": 0.38125917315483093, + "step": 5288 + }, + { + "epoch": 0.31, + "learning_rate": 8.109548373702006e-08, + "logits/chosen": -1.7278746366500854, + "logits/rejected": -1.7258148193359375, + "logps/chosen": -27.366682052612305, + "logps/rejected": -93.34564971923828, + "loss": 0.6327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08640880882740021, + "rewards/margins": 0.12810879945755005, + "rewards/rejected": -0.041699983179569244, + "step": 5289 + }, + { + "epoch": 0.31, + "learning_rate": 8.108810333532455e-08, + "logits/chosen": -1.9797627925872803, + "logits/rejected": -1.983944296836853, + "logps/chosen": -0.16960382461547852, + "logps/rejected": -69.25178527832031, + "loss": 0.6457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00012730360322166234, + "rewards/margins": 0.2646186053752899, + "rewards/rejected": -0.2644912898540497, + "step": 5290 + }, + { + "epoch": 0.31, + "learning_rate": 8.108072182922751e-08, + "logits/chosen": -2.0525567531585693, + "logits/rejected": -2.0566020011901855, + "logps/chosen": -213.52410888671875, + "logps/rejected": -264.44329833984375, + "loss": 0.228, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.728389024734497, + "rewards/margins": 1.096247911453247, + "rewards/rejected": 0.63214111328125, + "step": 5291 + }, + { + "epoch": 0.31, + "learning_rate": 8.107333921899122e-08, + "logits/chosen": -2.004788637161255, + "logits/rejected": -1.9724332094192505, + "logps/chosen": -234.44699096679688, + "logps/rejected": -289.903564453125, + "loss": 0.5286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11505432426929474, + "rewards/margins": 0.6597381830215454, + "rewards/rejected": -0.5446838736534119, + "step": 5292 + }, + { + "epoch": 0.31, + "learning_rate": 8.106595550487792e-08, + "logits/chosen": -2.1026368141174316, + "logits/rejected": -2.1017584800720215, + "logps/chosen": -0.553674042224884, + "logps/rejected": -140.83209228515625, + "loss": 0.5896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01739201322197914, + "rewards/margins": 0.4856521189212799, + "rewards/rejected": -0.5030441284179688, + "step": 5293 + }, + { + "epoch": 0.31, + "learning_rate": 8.105857068714992e-08, + "logits/chosen": -2.0356247425079346, + "logits/rejected": -1.9779757261276245, + "logps/chosen": -257.0831298828125, + "logps/rejected": -509.8844909667969, + "loss": 0.1271, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.157244920730591, + "rewards/margins": 1.4410797357559204, + "rewards/rejected": 0.7161651849746704, + "step": 5294 + }, + { + "epoch": 0.31, + "learning_rate": 8.105118476606956e-08, + "logits/chosen": -2.065298557281494, + "logits/rejected": -2.0736963748931885, + "logps/chosen": -23.297361373901367, + "logps/rejected": -190.32705688476562, + "loss": 0.4889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18702010810375214, + "rewards/margins": 0.6730918884277344, + "rewards/rejected": -0.48607179522514343, + "step": 5295 + }, + { + "epoch": 0.31, + "learning_rate": 8.104379774189922e-08, + "logits/chosen": -2.037858247756958, + "logits/rejected": -2.033860206604004, + "logps/chosen": -60.468772888183594, + "logps/rejected": -268.4707946777344, + "loss": 0.2131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4901328980922699, + "rewards/margins": 2.5851097106933594, + "rewards/rejected": -2.0949769020080566, + "step": 5296 + }, + { + "epoch": 0.31, + "learning_rate": 8.103640961490134e-08, + "logits/chosen": -2.0208582878112793, + "logits/rejected": -2.0062248706817627, + "logps/chosen": -263.51654052734375, + "logps/rejected": -289.6484375, + "loss": 0.7291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7894302606582642, + "rewards/margins": 0.29191434383392334, + "rewards/rejected": -1.0813446044921875, + "step": 5297 + }, + { + "epoch": 0.31, + "learning_rate": 8.102902038533839e-08, + "logits/chosen": -2.0239222049713135, + "logits/rejected": -2.0282747745513916, + "logps/chosen": -238.1011199951172, + "logps/rejected": -315.3323974609375, + "loss": 0.0714, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.752650499343872, + "rewards/margins": 2.2293899059295654, + "rewards/rejected": 0.5232605338096619, + "step": 5298 + }, + { + "epoch": 0.31, + "learning_rate": 8.102163005347285e-08, + "logits/chosen": -1.6215903759002686, + "logits/rejected": -1.6207979917526245, + "logps/chosen": -27.47701072692871, + "logps/rejected": -291.9002990722656, + "loss": 0.4059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008344650268554688, + "rewards/margins": 2.1438307762145996, + "rewards/rejected": -2.142996311187744, + "step": 5299 + }, + { + "epoch": 0.31, + "learning_rate": 8.101423861956727e-08, + "logits/chosen": -1.9440381526947021, + "logits/rejected": -1.9434398412704468, + "logps/chosen": -14.673561096191406, + "logps/rejected": -28.15333366394043, + "loss": 0.6884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15874910354614258, + "rewards/margins": 0.1981169879436493, + "rewards/rejected": -0.35686609148979187, + "step": 5300 + }, + { + "epoch": 0.31, + "learning_rate": 8.100684608388422e-08, + "logits/chosen": -2.163029432296753, + "logits/rejected": -2.1969008445739746, + "logps/chosen": -199.71493530273438, + "logps/rejected": -300.75555419921875, + "loss": 0.1771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5125824213027954, + "rewards/margins": 1.337823510169983, + "rewards/rejected": 0.1747589111328125, + "step": 5301 + }, + { + "epoch": 0.31, + "learning_rate": 8.099945244668634e-08, + "logits/chosen": -2.1707966327667236, + "logits/rejected": -2.1661243438720703, + "logps/chosen": -74.82991027832031, + "logps/rejected": -295.46697998046875, + "loss": 0.3532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10706787556409836, + "rewards/margins": 2.0980286598205566, + "rewards/rejected": -1.990960717201233, + "step": 5302 + }, + { + "epoch": 0.31, + "learning_rate": 8.099205770823625e-08, + "logits/chosen": -1.9759619235992432, + "logits/rejected": -1.9722988605499268, + "logps/chosen": -3.631401777267456, + "logps/rejected": -106.60736083984375, + "loss": 0.549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25254273414611816, + "rewards/margins": 1.289769172668457, + "rewards/rejected": -1.5423119068145752, + "step": 5303 + }, + { + "epoch": 0.31, + "learning_rate": 8.098466186879667e-08, + "logits/chosen": -1.9901585578918457, + "logits/rejected": -1.9570919275283813, + "logps/chosen": -241.1614990234375, + "logps/rejected": -345.0640563964844, + "loss": 0.1773, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9107544422149658, + "rewards/margins": 1.0970275402069092, + "rewards/rejected": 0.8137268424034119, + "step": 5304 + }, + { + "epoch": 0.31, + "learning_rate": 8.097726492863036e-08, + "logits/chosen": -1.7690659761428833, + "logits/rejected": -1.7131160497665405, + "logps/chosen": -257.39794921875, + "logps/rejected": -383.86431884765625, + "loss": 0.2082, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4869415760040283, + "rewards/margins": 1.3050568103790283, + "rewards/rejected": 0.181884765625, + "step": 5305 + }, + { + "epoch": 0.31, + "learning_rate": 8.096986688800007e-08, + "logits/chosen": -1.9908074140548706, + "logits/rejected": -1.9859535694122314, + "logps/chosen": -35.7227783203125, + "logps/rejected": -305.90966796875, + "loss": 0.3671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19108963012695312, + "rewards/margins": 2.1332619190216064, + "rewards/rejected": -1.9421722888946533, + "step": 5306 + }, + { + "epoch": 0.31, + "learning_rate": 8.096246774716862e-08, + "logits/chosen": -2.098315954208374, + "logits/rejected": -2.0986289978027344, + "logps/chosen": -8.463661652058363e-05, + "logps/rejected": -50.856475830078125, + "loss": 0.6563, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.680781906543416e-06, + "rewards/margins": 0.12908385694026947, + "rewards/rejected": -0.12908554077148438, + "step": 5307 + }, + { + "epoch": 0.31, + "learning_rate": 8.095506750639885e-08, + "logits/chosen": -2.0930335521698, + "logits/rejected": -2.0742621421813965, + "logps/chosen": -46.806861877441406, + "logps/rejected": -228.85311889648438, + "loss": 0.2743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4802864193916321, + "rewards/margins": 2.3251168727874756, + "rewards/rejected": -1.8448303937911987, + "step": 5308 + }, + { + "epoch": 0.31, + "learning_rate": 8.094766616595367e-08, + "logits/chosen": -2.1675045490264893, + "logits/rejected": -2.167055368423462, + "logps/chosen": -45.568878173828125, + "logps/rejected": -133.09954833984375, + "loss": 0.4739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23377227783203125, + "rewards/margins": 0.7034271359443665, + "rewards/rejected": -0.4696548581123352, + "step": 5309 + }, + { + "epoch": 0.31, + "learning_rate": 8.094026372609601e-08, + "logits/chosen": -2.2566463947296143, + "logits/rejected": -2.2519357204437256, + "logps/chosen": -30.145492553710938, + "logps/rejected": -139.75277709960938, + "loss": 0.928, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.231154441833496, + "rewards/margins": 0.5297106504440308, + "rewards/rejected": -1.7608650922775269, + "step": 5310 + }, + { + "epoch": 0.31, + "learning_rate": 8.093286018708882e-08, + "logits/chosen": -1.878273844718933, + "logits/rejected": -1.9313757419586182, + "logps/chosen": -297.78759765625, + "logps/rejected": -413.86163330078125, + "loss": 0.0834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6944092512130737, + "rewards/margins": 2.2188539505004883, + "rewards/rejected": -0.524444580078125, + "step": 5311 + }, + { + "epoch": 0.31, + "learning_rate": 8.092545554919516e-08, + "logits/chosen": -2.119131565093994, + "logits/rejected": -2.11799955368042, + "logps/chosen": -0.744813084602356, + "logps/rejected": -170.25747680664062, + "loss": 0.6213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027359653264284134, + "rewards/margins": 0.2916831076145172, + "rewards/rejected": -0.2643234431743622, + "step": 5312 + }, + { + "epoch": 0.31, + "learning_rate": 8.091804981267802e-08, + "logits/chosen": -2.087592840194702, + "logits/rejected": -2.084193468093872, + "logps/chosen": -0.0059606763534247875, + "logps/rejected": -99.30841827392578, + "loss": 0.6489, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000423133751610294, + "rewards/margins": 0.18588362634181976, + "rewards/rejected": -0.18630675971508026, + "step": 5313 + }, + { + "epoch": 0.31, + "learning_rate": 8.091064297780053e-08, + "logits/chosen": -2.177849531173706, + "logits/rejected": -2.178328514099121, + "logps/chosen": -38.404720306396484, + "logps/rejected": -116.0364990234375, + "loss": 0.4427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1840595304965973, + "rewards/margins": 1.3309017419815063, + "rewards/rejected": -1.1468422412872314, + "step": 5314 + }, + { + "epoch": 0.31, + "learning_rate": 8.090323504482578e-08, + "logits/chosen": -1.9440315961837769, + "logits/rejected": -1.947458028793335, + "logps/chosen": -204.07325744628906, + "logps/rejected": -210.29092407226562, + "loss": 0.4819, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.320613145828247, + "rewards/margins": -0.22279655933380127, + "rewards/rejected": 1.5434097051620483, + "step": 5315 + }, + { + "epoch": 0.31, + "learning_rate": 8.089582601401697e-08, + "logits/chosen": -1.9881353378295898, + "logits/rejected": -1.9879788160324097, + "logps/chosen": -53.344810485839844, + "logps/rejected": -216.27293395996094, + "loss": 0.7129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6763927340507507, + "rewards/margins": 1.0047061443328857, + "rewards/rejected": -1.6810989379882812, + "step": 5316 + }, + { + "epoch": 0.31, + "learning_rate": 8.08884158856373e-08, + "logits/chosen": -2.0023391246795654, + "logits/rejected": -1.987327218055725, + "logps/chosen": -0.08322127908468246, + "logps/rejected": -245.55178833007812, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0057348888367414474, + "rewards/margins": 3.8623926639556885, + "rewards/rejected": -3.8681275844573975, + "step": 5317 + }, + { + "epoch": 0.31, + "learning_rate": 8.088100465995e-08, + "logits/chosen": -1.863685965538025, + "logits/rejected": -1.8782434463500977, + "logps/chosen": -239.14907836914062, + "logps/rejected": -510.8384704589844, + "loss": 0.1101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4603592157363892, + "rewards/margins": 2.1672775745391846, + "rewards/rejected": -0.7069183588027954, + "step": 5318 + }, + { + "epoch": 0.31, + "learning_rate": 8.087359233721835e-08, + "logits/chosen": -1.9222891330718994, + "logits/rejected": -1.9263355731964111, + "logps/chosen": -56.815895080566406, + "logps/rejected": -140.70826721191406, + "loss": 0.5313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028713226318359375, + "rewards/margins": 0.5831947326660156, + "rewards/rejected": -0.5544815063476562, + "step": 5319 + }, + { + "epoch": 0.31, + "learning_rate": 8.086617891770569e-08, + "logits/chosen": -1.8294814825057983, + "logits/rejected": -1.8223676681518555, + "logps/chosen": -254.24728393554688, + "logps/rejected": -418.22894287109375, + "loss": 0.1057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5355865955352783, + "rewards/margins": 2.2651093006134033, + "rewards/rejected": -0.729522705078125, + "step": 5320 + }, + { + "epoch": 0.31, + "learning_rate": 8.085876440167537e-08, + "logits/chosen": -1.9511592388153076, + "logits/rejected": -1.9385833740234375, + "logps/chosen": -77.5003662109375, + "logps/rejected": -323.59576416015625, + "loss": 0.254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7574615478515625, + "rewards/margins": 1.8072540760040283, + "rewards/rejected": -1.0497925281524658, + "step": 5321 + }, + { + "epoch": 0.31, + "learning_rate": 8.08513487893908e-08, + "logits/chosen": -1.846604585647583, + "logits/rejected": -1.8417384624481201, + "logps/chosen": -0.0012126038782298565, + "logps/rejected": -108.95532989501953, + "loss": 0.4132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001011149724945426, + "rewards/margins": 1.8877880573272705, + "rewards/rejected": -1.8878891468048096, + "step": 5322 + }, + { + "epoch": 0.31, + "learning_rate": 8.084393208111539e-08, + "logits/chosen": -2.0603508949279785, + "logits/rejected": -1.9709529876708984, + "logps/chosen": -202.43443298339844, + "logps/rejected": -291.18121337890625, + "loss": 0.225, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.256677269935608, + "rewards/margins": 1.3945496082305908, + "rewards/rejected": -0.13787232339382172, + "step": 5323 + }, + { + "epoch": 0.31, + "learning_rate": 8.083651427711265e-08, + "logits/chosen": -2.043367624282837, + "logits/rejected": -2.0410337448120117, + "logps/chosen": -35.880184173583984, + "logps/rejected": -151.41619873046875, + "loss": 0.4375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38472214341163635, + "rewards/margins": 0.5455131530761719, + "rewards/rejected": -0.16079102456569672, + "step": 5324 + }, + { + "epoch": 0.31, + "learning_rate": 8.082909537764608e-08, + "logits/chosen": -2.0216991901397705, + "logits/rejected": -2.021130084991455, + "logps/chosen": -12.702878952026367, + "logps/rejected": -89.10032653808594, + "loss": 0.4682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011197376064956188, + "rewards/margins": 1.25547456741333, + "rewards/rejected": -1.2442772388458252, + "step": 5325 + }, + { + "epoch": 0.31, + "learning_rate": 8.082167538297926e-08, + "logits/chosen": -2.001171350479126, + "logits/rejected": -2.0052080154418945, + "logps/chosen": -2.041372060775757, + "logps/rejected": -55.64129638671875, + "loss": 0.5945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10299248993396759, + "rewards/margins": 0.26284661889076233, + "rewards/rejected": -0.15985412895679474, + "step": 5326 + }, + { + "epoch": 0.31, + "learning_rate": 8.081425429337574e-08, + "logits/chosen": -2.0720531940460205, + "logits/rejected": -2.068774461746216, + "logps/chosen": -60.91212844848633, + "logps/rejected": -219.72669982910156, + "loss": 0.6252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18860435485839844, + "rewards/margins": 0.22886314988136292, + "rewards/rejected": -0.04025879129767418, + "step": 5327 + }, + { + "epoch": 0.31, + "learning_rate": 8.08068321090992e-08, + "logits/chosen": -2.199367046356201, + "logits/rejected": -2.1393191814422607, + "logps/chosen": -197.04757690429688, + "logps/rejected": -303.2185974121094, + "loss": 0.4735, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2887848615646362, + "rewards/margins": -0.21289968490600586, + "rewards/rejected": 1.501684546470642, + "step": 5328 + }, + { + "epoch": 0.31, + "learning_rate": 8.079940883041328e-08, + "logits/chosen": -1.9145431518554688, + "logits/rejected": -1.9180114269256592, + "logps/chosen": -0.0010470702545717359, + "logps/rejected": -367.55963134765625, + "loss": 0.3429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008832194143906236, + "rewards/margins": 6.273383617401123, + "rewards/rejected": -6.272500514984131, + "step": 5329 + }, + { + "epoch": 0.31, + "learning_rate": 8.079198445758171e-08, + "logits/chosen": -2.0333609580993652, + "logits/rejected": -2.0298314094543457, + "logps/chosen": -182.40966796875, + "logps/rejected": -215.69766235351562, + "loss": 0.2541, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0897506475448608, + "rewards/margins": 1.3580337762832642, + "rewards/rejected": -0.26828309893608093, + "step": 5330 + }, + { + "epoch": 0.31, + "learning_rate": 8.078455899086822e-08, + "logits/chosen": -2.0299150943756104, + "logits/rejected": -2.03210186958313, + "logps/chosen": -240.0385284423828, + "logps/rejected": -305.31719970703125, + "loss": 0.1109, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.049852132797241, + "rewards/margins": 1.7524217367172241, + "rewards/rejected": 0.2974304258823395, + "step": 5331 + }, + { + "epoch": 0.31, + "learning_rate": 8.077713243053661e-08, + "logits/chosen": -2.2031784057617188, + "logits/rejected": -2.1816697120666504, + "logps/chosen": -0.0011705758515745401, + "logps/rejected": -229.8609619140625, + "loss": 0.3589, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.765313497046009e-05, + "rewards/margins": 3.690800905227661, + "rewards/rejected": -3.690753221511841, + "step": 5332 + }, + { + "epoch": 0.31, + "learning_rate": 8.076970477685072e-08, + "logits/chosen": -1.8497933149337769, + "logits/rejected": -1.8379285335540771, + "logps/chosen": -78.37982177734375, + "logps/rejected": -265.66729736328125, + "loss": 0.258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43431854248046875, + "rewards/margins": 2.2833054065704346, + "rewards/rejected": -1.8489868640899658, + "step": 5333 + }, + { + "epoch": 0.31, + "learning_rate": 8.076227603007442e-08, + "logits/chosen": -2.074936628341675, + "logits/rejected": -2.0599186420440674, + "logps/chosen": -0.1098472848534584, + "logps/rejected": -269.1618347167969, + "loss": 0.3684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007287915330380201, + "rewards/margins": 3.5373027324676514, + "rewards/rejected": -3.544590711593628, + "step": 5334 + }, + { + "epoch": 0.31, + "learning_rate": 8.075484619047157e-08, + "logits/chosen": -2.0728096961975098, + "logits/rejected": -2.075269937515259, + "logps/chosen": -6.0795904573751613e-05, + "logps/rejected": -419.33392333984375, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.509949000450433e-07, + "rewards/margins": 7.574186325073242, + "rewards/rejected": -7.574185371398926, + "step": 5335 + }, + { + "epoch": 0.31, + "learning_rate": 8.074741525830616e-08, + "logits/chosen": -1.959497094154358, + "logits/rejected": -1.9733177423477173, + "logps/chosen": -55.55595016479492, + "logps/rejected": -189.25238037109375, + "loss": 0.4197, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15737877786159515, + "rewards/margins": 1.057705283164978, + "rewards/rejected": -0.9003265500068665, + "step": 5336 + }, + { + "epoch": 0.31, + "learning_rate": 8.073998323384216e-08, + "logits/chosen": -1.8171550035476685, + "logits/rejected": -1.8206623792648315, + "logps/chosen": -17.04360008239746, + "logps/rejected": -110.99327087402344, + "loss": 0.5669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25180187821388245, + "rewards/margins": 0.8007105588912964, + "rewards/rejected": -1.0525124073028564, + "step": 5337 + }, + { + "epoch": 0.31, + "learning_rate": 8.073255011734358e-08, + "logits/chosen": -2.120744466781616, + "logits/rejected": -2.1175649166107178, + "logps/chosen": -1.1989988088607788, + "logps/rejected": -130.12515258789062, + "loss": 0.609, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05363892391324043, + "rewards/margins": 0.4383845329284668, + "rewards/rejected": -0.4920234680175781, + "step": 5338 + }, + { + "epoch": 0.31, + "learning_rate": 8.072511590907451e-08, + "logits/chosen": -1.997828722000122, + "logits/rejected": -1.9920200109481812, + "logps/chosen": -186.2579345703125, + "logps/rejected": -297.2112731933594, + "loss": 0.2476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9669784903526306, + "rewards/margins": 1.572309970855713, + "rewards/rejected": -0.6053314208984375, + "step": 5339 + }, + { + "epoch": 0.31, + "learning_rate": 8.071768060929901e-08, + "logits/chosen": -2.0921213626861572, + "logits/rejected": -2.086264133453369, + "logps/chosen": -57.915771484375, + "logps/rejected": -217.64627075195312, + "loss": 0.522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28729936480522156, + "rewards/margins": 0.29705584049224854, + "rewards/rejected": -0.00975647009909153, + "step": 5340 + }, + { + "epoch": 0.31, + "learning_rate": 8.071024421828125e-08, + "logits/chosen": -1.9924793243408203, + "logits/rejected": -2.012812376022339, + "logps/chosen": -206.04635620117188, + "logps/rejected": -237.95481872558594, + "loss": 0.2271, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5219467878341675, + "rewards/margins": 0.8839539289474487, + "rewards/rejected": 0.6379928588867188, + "step": 5341 + }, + { + "epoch": 0.31, + "learning_rate": 8.070280673628538e-08, + "logits/chosen": -1.9462428092956543, + "logits/rejected": -1.9501938819885254, + "logps/chosen": -150.34385681152344, + "logps/rejected": -184.97000122070312, + "loss": 0.3882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2540619373321533, + "rewards/margins": 0.2550445795059204, + "rewards/rejected": 0.9990173578262329, + "step": 5342 + }, + { + "epoch": 0.31, + "learning_rate": 8.069536816357565e-08, + "logits/chosen": -2.199343204498291, + "logits/rejected": -2.2089765071868896, + "logps/chosen": -0.004169884603470564, + "logps/rejected": -131.21615600585938, + "loss": 0.4132, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.655689164996147e-05, + "rewards/margins": 1.9477280378341675, + "rewards/rejected": -1.9476715326309204, + "step": 5343 + }, + { + "epoch": 0.31, + "learning_rate": 8.068792850041628e-08, + "logits/chosen": -1.885231614112854, + "logits/rejected": -1.8815802335739136, + "logps/chosen": -291.179931640625, + "logps/rejected": -303.3355712890625, + "loss": 0.1259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9837158918380737, + "rewards/margins": 1.9647828340530396, + "rewards/rejected": 0.01893310621380806, + "step": 5344 + }, + { + "epoch": 0.31, + "learning_rate": 8.06804877470716e-08, + "logits/chosen": -1.9568225145339966, + "logits/rejected": -1.9202855825424194, + "logps/chosen": -243.84942626953125, + "logps/rejected": -456.79278564453125, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2520172595977783, + "rewards/margins": 4.557055473327637, + "rewards/rejected": -2.3050384521484375, + "step": 5345 + }, + { + "epoch": 0.31, + "learning_rate": 8.067304590380589e-08, + "logits/chosen": -1.8970768451690674, + "logits/rejected": -1.863446831703186, + "logps/chosen": -193.80389404296875, + "logps/rejected": -309.4205322265625, + "loss": 0.1496, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6065338850021362, + "rewards/margins": 1.8999359607696533, + "rewards/rejected": -0.2934021055698395, + "step": 5346 + }, + { + "epoch": 0.31, + "learning_rate": 8.066560297088358e-08, + "logits/chosen": -1.9167191982269287, + "logits/rejected": -1.9013649225234985, + "logps/chosen": -240.27362060546875, + "logps/rejected": -335.6654968261719, + "loss": 0.1392, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2404602766036987, + "rewards/margins": 1.80558180809021, + "rewards/rejected": -0.5651214718818665, + "step": 5347 + }, + { + "epoch": 0.31, + "learning_rate": 8.065815894856902e-08, + "logits/chosen": -1.9696885347366333, + "logits/rejected": -1.9652302265167236, + "logps/chosen": -168.13522338867188, + "logps/rejected": -375.598876953125, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4972595274448395, + "rewards/margins": 1.1620301008224487, + "rewards/rejected": -0.6647705435752869, + "step": 5348 + }, + { + "epoch": 0.31, + "learning_rate": 8.065071383712671e-08, + "logits/chosen": -1.9900344610214233, + "logits/rejected": -1.9968047142028809, + "logps/chosen": -33.44236373901367, + "logps/rejected": -62.85871124267578, + "loss": 0.572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23470650613307953, + "rewards/margins": 0.10437965393066406, + "rewards/rejected": 0.13032685220241547, + "step": 5349 + }, + { + "epoch": 0.31, + "learning_rate": 8.064326763682112e-08, + "logits/chosen": -1.8837863206863403, + "logits/rejected": -1.867725133895874, + "logps/chosen": -198.0685272216797, + "logps/rejected": -359.9902038574219, + "loss": 0.0965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7548843622207642, + "rewards/margins": 2.3134384155273438, + "rewards/rejected": -0.5585541129112244, + "step": 5350 + }, + { + "epoch": 0.31, + "learning_rate": 8.063582034791676e-08, + "logits/chosen": -2.0064988136291504, + "logits/rejected": -2.000683069229126, + "logps/chosen": -46.29388427734375, + "logps/rejected": -170.3452911376953, + "loss": 0.5736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6429829001426697, + "rewards/margins": 2.8501811027526855, + "rewards/rejected": -3.4931640625, + "step": 5351 + }, + { + "epoch": 0.31, + "learning_rate": 8.062837197067822e-08, + "logits/chosen": -2.1163148880004883, + "logits/rejected": -2.115443468093872, + "logps/chosen": -2.5108261108398438, + "logps/rejected": -11.76185131072998, + "loss": 0.6716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04270157963037491, + "rewards/margins": 0.08014044910669327, + "rewards/rejected": -0.03743886947631836, + "step": 5352 + }, + { + "epoch": 0.31, + "learning_rate": 8.062092250537007e-08, + "logits/chosen": -2.1434717178344727, + "logits/rejected": -2.129674196243286, + "logps/chosen": -0.00012909962970297784, + "logps/rejected": -127.94879150390625, + "loss": 0.4837, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.22240622760728e-06, + "rewards/margins": 1.0490103960037231, + "rewards/rejected": -1.0490165948867798, + "step": 5353 + }, + { + "epoch": 0.31, + "learning_rate": 8.061347195225699e-08, + "logits/chosen": -1.9373258352279663, + "logits/rejected": -1.915753722190857, + "logps/chosen": -67.54967498779297, + "logps/rejected": -276.910888671875, + "loss": 0.3721, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06115264818072319, + "rewards/margins": 3.3167357444763184, + "rewards/rejected": -3.3778884410858154, + "step": 5354 + }, + { + "epoch": 0.31, + "learning_rate": 8.060602031160362e-08, + "logits/chosen": -1.9858438968658447, + "logits/rejected": -1.9525374174118042, + "logps/chosen": -275.6903076171875, + "logps/rejected": -456.09698486328125, + "loss": 0.1864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.607312023639679, + "rewards/margins": 2.307421922683716, + "rewards/rejected": -1.700109839439392, + "step": 5355 + }, + { + "epoch": 0.31, + "learning_rate": 8.05985675836747e-08, + "logits/chosen": -1.9707797765731812, + "logits/rejected": -1.9705051183700562, + "logps/chosen": -41.55286407470703, + "logps/rejected": -249.109130859375, + "loss": 0.5333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49160194396972656, + "rewards/margins": 2.0775678157806396, + "rewards/rejected": -2.569169759750366, + "step": 5356 + }, + { + "epoch": 0.31, + "learning_rate": 8.059111376873499e-08, + "logits/chosen": -2.3083479404449463, + "logits/rejected": -2.3040709495544434, + "logps/chosen": -92.45854187011719, + "logps/rejected": -299.4847106933594, + "loss": 0.4505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09499740600585938, + "rewards/margins": 1.106127142906189, + "rewards/rejected": -1.0111297369003296, + "step": 5357 + }, + { + "epoch": 0.31, + "learning_rate": 8.058365886704928e-08, + "logits/chosen": -1.8880144357681274, + "logits/rejected": -1.892187476158142, + "logps/chosen": -40.32126998901367, + "logps/rejected": -312.8516845703125, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2923629879951477, + "rewards/margins": 1.898909091949463, + "rewards/rejected": -1.6065460443496704, + "step": 5358 + }, + { + "epoch": 0.31, + "learning_rate": 8.057620287888242e-08, + "logits/chosen": -1.8529328107833862, + "logits/rejected": -1.855383276939392, + "logps/chosen": -57.98030090332031, + "logps/rejected": -166.473388671875, + "loss": 0.4255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06127471849322319, + "rewards/margins": 1.168110728263855, + "rewards/rejected": -1.106835961341858, + "step": 5359 + }, + { + "epoch": 0.31, + "learning_rate": 8.056874580449927e-08, + "logits/chosen": -1.8458476066589355, + "logits/rejected": -1.8687175512313843, + "logps/chosen": -286.7544250488281, + "logps/rejected": -573.76220703125, + "loss": 0.064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9737519025802612, + "rewards/margins": 2.745266914367676, + "rewards/rejected": -0.771514892578125, + "step": 5360 + }, + { + "epoch": 0.31, + "learning_rate": 8.056128764416472e-08, + "logits/chosen": -1.9950538873672485, + "logits/rejected": -2.049968957901001, + "logps/chosen": -152.65457153320312, + "logps/rejected": -228.24534606933594, + "loss": 0.7195, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4121841490268707, + "rewards/margins": -0.7328445911407471, + "rewards/rejected": 1.1450287103652954, + "step": 5361 + }, + { + "epoch": 0.31, + "learning_rate": 8.055382839814375e-08, + "logits/chosen": -1.9012620449066162, + "logits/rejected": -1.9155904054641724, + "logps/chosen": -202.0792999267578, + "logps/rejected": -390.06707763671875, + "loss": 0.0741, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.004948377609253, + "rewards/margins": 2.7776076793670654, + "rewards/rejected": -0.7726593017578125, + "step": 5362 + }, + { + "epoch": 0.31, + "learning_rate": 8.054636806670134e-08, + "logits/chosen": -2.004568576812744, + "logits/rejected": -1.999899983406067, + "logps/chosen": -64.06781768798828, + "logps/rejected": -206.86920166015625, + "loss": 0.213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.51717609167099, + "rewards/margins": 2.0425972938537598, + "rewards/rejected": -1.525421142578125, + "step": 5363 + }, + { + "epoch": 0.31, + "learning_rate": 8.05389066501025e-08, + "logits/chosen": -2.165344476699829, + "logits/rejected": -2.1695690155029297, + "logps/chosen": -0.002251499332487583, + "logps/rejected": -51.76209259033203, + "loss": 0.7105, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.8600217774510384e-05, + "rewards/margins": -0.08604279160499573, + "rewards/rejected": 0.08607139438390732, + "step": 5364 + }, + { + "epoch": 0.31, + "learning_rate": 8.053144414861235e-08, + "logits/chosen": -2.002556085586548, + "logits/rejected": -1.9973074197769165, + "logps/chosen": -217.83306884765625, + "logps/rejected": -350.1749267578125, + "loss": 0.1318, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5798218250274658, + "rewards/margins": 2.10919189453125, + "rewards/rejected": -0.529370129108429, + "step": 5365 + }, + { + "epoch": 0.31, + "learning_rate": 8.052398056249594e-08, + "logits/chosen": -1.8976507186889648, + "logits/rejected": -1.8354748487472534, + "logps/chosen": -270.2835693359375, + "logps/rejected": -546.8359375, + "loss": 0.1951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4192169904708862, + "rewards/margins": 1.4468780755996704, + "rewards/rejected": -0.02766113355755806, + "step": 5366 + }, + { + "epoch": 0.31, + "learning_rate": 8.051651589201842e-08, + "logits/chosen": -1.8313605785369873, + "logits/rejected": -1.8319900035858154, + "logps/chosen": -18.72624969482422, + "logps/rejected": -202.03977966308594, + "loss": 0.2933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24058857560157776, + "rewards/margins": 2.636610746383667, + "rewards/rejected": -2.396022081375122, + "step": 5367 + }, + { + "epoch": 0.31, + "learning_rate": 8.0509050137445e-08, + "logits/chosen": -2.143033266067505, + "logits/rejected": -2.132742404937744, + "logps/chosen": -14.109434127807617, + "logps/rejected": -176.94163513183594, + "loss": 0.6772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.702053427696228, + "rewards/margins": 1.194517970085144, + "rewards/rejected": -1.896571397781372, + "step": 5368 + }, + { + "epoch": 0.31, + "learning_rate": 8.050158329904089e-08, + "logits/chosen": -1.9147056341171265, + "logits/rejected": -1.9183788299560547, + "logps/chosen": -12.92184829711914, + "logps/rejected": -187.2030792236328, + "loss": 0.4526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08399715274572372, + "rewards/margins": 1.6389719247817993, + "rewards/rejected": -1.7229690551757812, + "step": 5369 + }, + { + "epoch": 0.31, + "learning_rate": 8.049411537707133e-08, + "logits/chosen": -1.9033066034317017, + "logits/rejected": -1.9064275026321411, + "logps/chosen": -79.86708068847656, + "logps/rejected": -227.5655517578125, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5758285522460938, + "rewards/margins": 1.4649749994277954, + "rewards/rejected": -0.8891464471817017, + "step": 5370 + }, + { + "epoch": 0.31, + "learning_rate": 8.048664637180163e-08, + "logits/chosen": -2.0487306118011475, + "logits/rejected": -2.06673002243042, + "logps/chosen": -215.25381469726562, + "logps/rejected": -226.96249389648438, + "loss": 0.1636, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.231658935546875, + "rewards/margins": 1.3398864269256592, + "rewards/rejected": 0.891772449016571, + "step": 5371 + }, + { + "epoch": 0.31, + "learning_rate": 8.047917628349712e-08, + "logits/chosen": -2.0173258781433105, + "logits/rejected": -2.013331651687622, + "logps/chosen": -40.89665603637695, + "logps/rejected": -251.8299560546875, + "loss": 0.2683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5181255340576172, + "rewards/margins": 1.921275019645691, + "rewards/rejected": -1.4031494855880737, + "step": 5372 + }, + { + "epoch": 0.31, + "learning_rate": 8.047170511242319e-08, + "logits/chosen": -1.8790472745895386, + "logits/rejected": -1.8766764402389526, + "logps/chosen": -8.3778715133667, + "logps/rejected": -257.8722229003906, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0650092139840126, + "rewards/margins": 2.4008917808532715, + "rewards/rejected": -2.3358826637268066, + "step": 5373 + }, + { + "epoch": 0.31, + "learning_rate": 8.046423285884522e-08, + "logits/chosen": -1.8308874368667603, + "logits/rejected": -1.7917506694793701, + "logps/chosen": -281.56939697265625, + "logps/rejected": -406.03424072265625, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.383871555328369, + "rewards/margins": 3.681997776031494, + "rewards/rejected": -1.298126220703125, + "step": 5374 + }, + { + "epoch": 0.31, + "learning_rate": 8.04567595230287e-08, + "logits/chosen": -1.940342664718628, + "logits/rejected": -1.9257864952087402, + "logps/chosen": -143.81568908691406, + "logps/rejected": -445.74273681640625, + "loss": 0.1086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5049759149551392, + "rewards/margins": 2.3352553844451904, + "rewards/rejected": -0.830279529094696, + "step": 5375 + }, + { + "epoch": 0.31, + "learning_rate": 8.04492851052391e-08, + "logits/chosen": -1.9408739805221558, + "logits/rejected": -1.9374024868011475, + "logps/chosen": -13.536035537719727, + "logps/rejected": -147.3159637451172, + "loss": 0.9256, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4989950358867645, + "rewards/margins": -0.36205655336380005, + "rewards/rejected": -0.13693848252296448, + "step": 5376 + }, + { + "epoch": 0.31, + "learning_rate": 8.044180960574195e-08, + "logits/chosen": -2.0680580139160156, + "logits/rejected": -2.06030535697937, + "logps/chosen": -73.9992904663086, + "logps/rejected": -166.25039672851562, + "loss": 0.7507, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.044832613319158554, + "rewards/margins": -0.18610002100467682, + "rewards/rejected": 0.14126740396022797, + "step": 5377 + }, + { + "epoch": 0.31, + "learning_rate": 8.043433302480281e-08, + "logits/chosen": -2.1621978282928467, + "logits/rejected": -2.1432533264160156, + "logps/chosen": -13.921126365661621, + "logps/rejected": -248.41632080078125, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07752352207899094, + "rewards/margins": 3.7874996662139893, + "rewards/rejected": -3.7099761962890625, + "step": 5378 + }, + { + "epoch": 0.31, + "learning_rate": 8.042685536268729e-08, + "logits/chosen": -1.923829436302185, + "logits/rejected": -1.9278419017791748, + "logps/chosen": -116.23457336425781, + "logps/rejected": -255.212890625, + "loss": 0.1871, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0644057989120483, + "rewards/margins": 1.9156951904296875, + "rewards/rejected": -0.8512893915176392, + "step": 5379 + }, + { + "epoch": 0.31, + "learning_rate": 8.041937661966105e-08, + "logits/chosen": -1.9433172941207886, + "logits/rejected": -1.9409737586975098, + "logps/chosen": -0.0031399475410580635, + "logps/rejected": -110.0119400024414, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00018571782857179642, + "rewards/margins": 2.3674252033233643, + "rewards/rejected": -2.367239475250244, + "step": 5380 + }, + { + "epoch": 0.31, + "learning_rate": 8.041189679598975e-08, + "logits/chosen": -2.1870670318603516, + "logits/rejected": -2.185814619064331, + "logps/chosen": -2.8033313751220703, + "logps/rejected": -133.5462188720703, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007662463467568159, + "rewards/margins": 2.3566112518310547, + "rewards/rejected": -2.3489487171173096, + "step": 5381 + }, + { + "epoch": 0.31, + "learning_rate": 8.04044158919391e-08, + "logits/chosen": -1.9483842849731445, + "logits/rejected": -1.9431076049804688, + "logps/chosen": -158.140380859375, + "logps/rejected": -181.8890380859375, + "loss": 0.5145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8833938837051392, + "rewards/margins": 0.10366058349609375, + "rewards/rejected": 0.7797333002090454, + "step": 5382 + }, + { + "epoch": 0.31, + "learning_rate": 8.039693390777488e-08, + "logits/chosen": -2.0121755599975586, + "logits/rejected": -2.0039656162261963, + "logps/chosen": -4.753663539886475, + "logps/rejected": -228.55706787109375, + "loss": 0.3602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0740668773651123, + "rewards/margins": 2.4148812294006348, + "rewards/rejected": -2.3408143520355225, + "step": 5383 + }, + { + "epoch": 0.31, + "learning_rate": 8.038945084376287e-08, + "logits/chosen": -1.888204574584961, + "logits/rejected": -1.8838499784469604, + "logps/chosen": -111.20818328857422, + "logps/rejected": -369.5804443359375, + "loss": 0.2679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9004692435264587, + "rewards/margins": 1.356813907623291, + "rewards/rejected": -0.4563446044921875, + "step": 5384 + }, + { + "epoch": 0.31, + "learning_rate": 8.038196670016893e-08, + "logits/chosen": -2.0678324699401855, + "logits/rejected": -2.0702388286590576, + "logps/chosen": -0.00037844537291675806, + "logps/rejected": -137.11968994140625, + "loss": 0.428, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9475211956887506e-05, + "rewards/margins": 1.6347827911376953, + "rewards/rejected": -1.634802222251892, + "step": 5385 + }, + { + "epoch": 0.31, + "learning_rate": 8.037448147725891e-08, + "logits/chosen": -2.01768159866333, + "logits/rejected": -2.026831865310669, + "logps/chosen": -15.822285652160645, + "logps/rejected": -141.63613891601562, + "loss": 0.3613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06698980182409286, + "rewards/margins": 2.7147796154022217, + "rewards/rejected": -2.647789716720581, + "step": 5386 + }, + { + "epoch": 0.31, + "learning_rate": 8.036699517529875e-08, + "logits/chosen": -1.921701192855835, + "logits/rejected": -1.923299789428711, + "logps/chosen": -7.610167980194092, + "logps/rejected": -72.1038818359375, + "loss": 0.5594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13147464394569397, + "rewards/margins": 0.2882533669471741, + "rewards/rejected": -0.1567787230014801, + "step": 5387 + }, + { + "epoch": 0.31, + "learning_rate": 8.035950779455436e-08, + "logits/chosen": -2.0851681232452393, + "logits/rejected": -2.0815536975860596, + "logps/chosen": -19.39002799987793, + "logps/rejected": -153.65408325195312, + "loss": 0.613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2094411849975586, + "rewards/margins": 0.6388391852378845, + "rewards/rejected": -0.8482803702354431, + "step": 5388 + }, + { + "epoch": 0.31, + "learning_rate": 8.035201933529175e-08, + "logits/chosen": -1.8242897987365723, + "logits/rejected": -1.808672308921814, + "logps/chosen": -58.818763732910156, + "logps/rejected": -213.86044311523438, + "loss": 0.3234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7355926632881165, + "rewards/margins": 0.965313732624054, + "rewards/rejected": -0.2297210693359375, + "step": 5389 + }, + { + "epoch": 0.31, + "learning_rate": 8.034452979777694e-08, + "logits/chosen": -2.0290913581848145, + "logits/rejected": -1.9651466608047485, + "logps/chosen": -243.07591247558594, + "logps/rejected": -725.42236328125, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.287510633468628, + "rewards/margins": 5.251829624176025, + "rewards/rejected": -2.9643189907073975, + "step": 5390 + }, + { + "epoch": 0.31, + "learning_rate": 8.033703918227602e-08, + "logits/chosen": -2.0599584579467773, + "logits/rejected": -2.0597848892211914, + "logps/chosen": -6.696961402893066, + "logps/rejected": -142.87533569335938, + "loss": 0.4383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0581761859357357, + "rewards/margins": 2.452568292617798, + "rewards/rejected": -2.510744571685791, + "step": 5391 + }, + { + "epoch": 0.31, + "learning_rate": 8.032954748905507e-08, + "logits/chosen": -2.247217893600464, + "logits/rejected": -2.24794602394104, + "logps/chosen": -0.0019293176010251045, + "logps/rejected": -21.563518524169922, + "loss": 0.6963, + "rewards/accuracies": 0.0, + "rewards/chosen": 5.460647116706241e-06, + "rewards/margins": -0.012460589408874512, + "rewards/rejected": 0.012466049753129482, + "step": 5392 + }, + { + "epoch": 0.31, + "learning_rate": 8.032205471838023e-08, + "logits/chosen": -2.0197596549987793, + "logits/rejected": -2.022442102432251, + "logps/chosen": -176.54022216796875, + "logps/rejected": -345.0877685546875, + "loss": 0.3288, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0953247547149658, + "rewards/margins": 0.5972626209259033, + "rewards/rejected": 0.4980621337890625, + "step": 5393 + }, + { + "epoch": 0.31, + "learning_rate": 8.031456087051768e-08, + "logits/chosen": -1.8595867156982422, + "logits/rejected": -1.8549911975860596, + "logps/chosen": -274.68353271484375, + "logps/rejected": -492.879150390625, + "loss": 0.0499, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4070098400115967, + "rewards/margins": 2.9044249057769775, + "rewards/rejected": -0.497415155172348, + "step": 5394 + }, + { + "epoch": 0.31, + "learning_rate": 8.030706594573365e-08, + "logits/chosen": -2.1358141899108887, + "logits/rejected": -2.114351749420166, + "logps/chosen": -40.135372161865234, + "logps/rejected": -199.84869384765625, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4114864468574524, + "rewards/margins": 2.5824763774871826, + "rewards/rejected": -2.170989990234375, + "step": 5395 + }, + { + "epoch": 0.31, + "learning_rate": 8.029956994429439e-08, + "logits/chosen": -2.0880377292633057, + "logits/rejected": -2.0737576484680176, + "logps/chosen": -218.0516357421875, + "logps/rejected": -415.1999206542969, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2807586193084717, + "rewards/margins": 3.704153537750244, + "rewards/rejected": -1.423394799232483, + "step": 5396 + }, + { + "epoch": 0.31, + "learning_rate": 8.02920728664662e-08, + "logits/chosen": -2.1890130043029785, + "logits/rejected": -2.190295696258545, + "logps/chosen": -17.180986404418945, + "logps/rejected": -190.57177734375, + "loss": 0.5928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5518123507499695, + "rewards/margins": 1.6063480377197266, + "rewards/rejected": -2.158160448074341, + "step": 5397 + }, + { + "epoch": 0.31, + "learning_rate": 8.028457471251539e-08, + "logits/chosen": -2.063375949859619, + "logits/rejected": -2.0587170124053955, + "logps/chosen": -6.890205258969218e-05, + "logps/rejected": -123.88368225097656, + "loss": 0.4787, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.5428840823879e-06, + "rewards/margins": 1.194414734840393, + "rewards/rejected": -1.1944092512130737, + "step": 5398 + }, + { + "epoch": 0.31, + "learning_rate": 8.027707548270836e-08, + "logits/chosen": -1.9838464260101318, + "logits/rejected": -1.9392644166946411, + "logps/chosen": -230.75802612304688, + "logps/rejected": -350.5561218261719, + "loss": 0.1884, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.204554796218872, + "rewards/margins": 2.148829698562622, + "rewards/rejected": -0.94427490234375, + "step": 5399 + }, + { + "epoch": 0.31, + "learning_rate": 8.02695751773115e-08, + "logits/chosen": -2.1266801357269287, + "logits/rejected": -2.121732234954834, + "logps/chosen": -0.0007243541185744107, + "logps/rejected": -55.80399703979492, + "loss": 0.5067, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0004756152047776e-05, + "rewards/margins": 0.973500669002533, + "rewards/rejected": -0.9735206961631775, + "step": 5400 + }, + { + "epoch": 0.31, + "learning_rate": 8.026207379659127e-08, + "logits/chosen": -2.058397054672241, + "logits/rejected": -2.0589711666107178, + "logps/chosen": -265.32525634765625, + "logps/rejected": -261.88043212890625, + "loss": 0.3921, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.830651879310608, + "rewards/margins": -0.01841127872467041, + "rewards/rejected": 1.8490631580352783, + "step": 5401 + }, + { + "epoch": 0.31, + "learning_rate": 8.025457134081415e-08, + "logits/chosen": -2.207674741744995, + "logits/rejected": -2.200709581375122, + "logps/chosen": -152.41177368164062, + "logps/rejected": -206.6953887939453, + "loss": 0.3921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.119659423828125, + "rewards/margins": 0.1983184814453125, + "rewards/rejected": 0.9213409423828125, + "step": 5402 + }, + { + "epoch": 0.31, + "learning_rate": 8.024706781024667e-08, + "logits/chosen": -2.008673667907715, + "logits/rejected": -2.006513833999634, + "logps/chosen": -24.981115341186523, + "logps/rejected": -320.1333923339844, + "loss": 0.3544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010079765692353249, + "rewards/margins": 2.8980040550231934, + "rewards/rejected": -2.8879241943359375, + "step": 5403 + }, + { + "epoch": 0.31, + "learning_rate": 8.023956320515539e-08, + "logits/chosen": -2.1966333389282227, + "logits/rejected": -2.1889278888702393, + "logps/chosen": -47.33724594116211, + "logps/rejected": -228.455810546875, + "loss": 0.3373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027135848999023438, + "rewards/margins": 3.3433544635772705, + "rewards/rejected": -3.316218614578247, + "step": 5404 + }, + { + "epoch": 0.31, + "learning_rate": 8.023205752580688e-08, + "logits/chosen": -2.0060765743255615, + "logits/rejected": -2.009282112121582, + "logps/chosen": -21.90241813659668, + "logps/rejected": -117.01522064208984, + "loss": 0.5907, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.036097146570682526, + "rewards/margins": 0.3494325578212738, + "rewards/rejected": -0.3133354187011719, + "step": 5405 + }, + { + "epoch": 0.31, + "learning_rate": 8.022455077246783e-08, + "logits/chosen": -1.9184998273849487, + "logits/rejected": -1.9158071279525757, + "logps/chosen": -49.84814453125, + "logps/rejected": -222.12620544433594, + "loss": 0.3985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6998012661933899, + "rewards/margins": 0.7254253625869751, + "rewards/rejected": -0.02562408521771431, + "step": 5406 + }, + { + "epoch": 0.31, + "learning_rate": 8.02170429454049e-08, + "logits/chosen": -2.0778133869171143, + "logits/rejected": -2.071683168411255, + "logps/chosen": -11.817996978759766, + "logps/rejected": -107.96524047851562, + "loss": 0.7554, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08403339236974716, + "rewards/margins": -0.30542927980422974, + "rewards/rejected": 0.22139587998390198, + "step": 5407 + }, + { + "epoch": 0.31, + "learning_rate": 8.020953404488477e-08, + "logits/chosen": -2.021770715713501, + "logits/rejected": -1.9945547580718994, + "logps/chosen": -47.146236419677734, + "logps/rejected": -392.9949951171875, + "loss": 0.2037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6750187277793884, + "rewards/margins": 2.5048649311065674, + "rewards/rejected": -1.8298462629318237, + "step": 5408 + }, + { + "epoch": 0.31, + "learning_rate": 8.020202407117425e-08, + "logits/chosen": -1.9632593393325806, + "logits/rejected": -1.9624016284942627, + "logps/chosen": -375.29833984375, + "logps/rejected": -527.6973876953125, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.93597412109375, + "rewards/margins": 3.7744507789611816, + "rewards/rejected": -0.8384765982627869, + "step": 5409 + }, + { + "epoch": 0.31, + "learning_rate": 8.019451302454008e-08, + "logits/chosen": -1.8974266052246094, + "logits/rejected": -1.9074026346206665, + "logps/chosen": -145.0186767578125, + "logps/rejected": -256.1869812011719, + "loss": 0.2265, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1196777820587158, + "rewards/margins": 2.2553513050079346, + "rewards/rejected": -1.1356735229492188, + "step": 5410 + }, + { + "epoch": 0.31, + "learning_rate": 8.018700090524913e-08, + "logits/chosen": -1.9923272132873535, + "logits/rejected": -2.037508964538574, + "logps/chosen": -128.60833740234375, + "logps/rejected": -297.6609191894531, + "loss": 0.2499, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0241012573242188, + "rewards/margins": 0.9127761721611023, + "rewards/rejected": 0.11132507771253586, + "step": 5411 + }, + { + "epoch": 0.31, + "learning_rate": 8.017948771356823e-08, + "logits/chosen": -1.7052371501922607, + "logits/rejected": -1.7130628824234009, + "logps/chosen": -4.212507724761963, + "logps/rejected": -172.7503662109375, + "loss": 0.5959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02131064049899578, + "rewards/margins": 0.442365825176239, + "rewards/rejected": -0.46367645263671875, + "step": 5412 + }, + { + "epoch": 0.32, + "learning_rate": 8.017197344976431e-08, + "logits/chosen": -1.7960351705551147, + "logits/rejected": -1.7785282135009766, + "logps/chosen": -164.87411499023438, + "logps/rejected": -238.14483642578125, + "loss": 0.4332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.943023681640625, + "rewards/margins": 0.30224305391311646, + "rewards/rejected": 0.6407806277275085, + "step": 5413 + }, + { + "epoch": 0.32, + "learning_rate": 8.016445811410431e-08, + "logits/chosen": -2.083214521408081, + "logits/rejected": -2.0801758766174316, + "logps/chosen": -0.0002556838735472411, + "logps/rejected": -153.84622192382812, + "loss": 0.4133, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2182085760723567e-06, + "rewards/margins": 2.191702365875244, + "rewards/rejected": -2.1917054653167725, + "step": 5414 + }, + { + "epoch": 0.32, + "learning_rate": 8.015694170685518e-08, + "logits/chosen": -1.8197963237762451, + "logits/rejected": -1.7428098917007446, + "logps/chosen": -313.39996337890625, + "logps/rejected": -484.0771484375, + "loss": 0.1457, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.67205810546875, + "rewards/margins": 1.8907378911972046, + "rewards/rejected": -0.21867981553077698, + "step": 5415 + }, + { + "epoch": 0.32, + "learning_rate": 8.0149424228284e-08, + "logits/chosen": -1.9960436820983887, + "logits/rejected": -1.9885684251785278, + "logps/chosen": -72.52507019042969, + "logps/rejected": -237.10409545898438, + "loss": 0.4373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5306587219238281, + "rewards/margins": 0.3922325074672699, + "rewards/rejected": 0.13842621445655823, + "step": 5416 + }, + { + "epoch": 0.32, + "learning_rate": 8.014190567865777e-08, + "logits/chosen": -1.750839114189148, + "logits/rejected": -1.6689828634262085, + "logps/chosen": -155.51589965820312, + "logps/rejected": -435.0484313964844, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0583771467208862, + "rewards/margins": 1.2003998756408691, + "rewards/rejected": -0.14202271401882172, + "step": 5417 + }, + { + "epoch": 0.32, + "learning_rate": 8.013438605824363e-08, + "logits/chosen": -1.8518381118774414, + "logits/rejected": -1.8566815853118896, + "logps/chosen": -58.80427932739258, + "logps/rejected": -101.4732894897461, + "loss": 0.5484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13531151413917542, + "rewards/margins": 0.3144809603691101, + "rewards/rejected": -0.1791694611310959, + "step": 5418 + }, + { + "epoch": 0.32, + "learning_rate": 8.012686536730868e-08, + "logits/chosen": -1.9133867025375366, + "logits/rejected": -1.9567738771438599, + "logps/chosen": -173.47183227539062, + "logps/rejected": -289.15081787109375, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6651428937911987, + "rewards/margins": 0.57464599609375, + "rewards/rejected": 1.0904968976974487, + "step": 5419 + }, + { + "epoch": 0.32, + "learning_rate": 8.011934360612012e-08, + "logits/chosen": -2.011650800704956, + "logits/rejected": -2.020275831222534, + "logps/chosen": -147.88169860839844, + "logps/rejected": -261.08209228515625, + "loss": 0.1755, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5361435413360596, + "rewards/margins": 1.2587753534317017, + "rewards/rejected": 0.2773681581020355, + "step": 5420 + }, + { + "epoch": 0.32, + "learning_rate": 8.011182077494514e-08, + "logits/chosen": -2.180131673812866, + "logits/rejected": -2.1883108615875244, + "logps/chosen": -255.22605895996094, + "logps/rejected": -432.4503479003906, + "loss": 0.0423, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7133010625839233, + "rewards/margins": 3.371568202972412, + "rewards/rejected": -1.6582672595977783, + "step": 5421 + }, + { + "epoch": 0.32, + "learning_rate": 8.010429687405098e-08, + "logits/chosen": -2.0581982135772705, + "logits/rejected": -2.0662825107574463, + "logps/chosen": -2.296616554260254, + "logps/rejected": -90.03223419189453, + "loss": 0.615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05823097378015518, + "rewards/margins": 0.3043483793735504, + "rewards/rejected": -0.362579345703125, + "step": 5422 + }, + { + "epoch": 0.32, + "learning_rate": 8.009677190370495e-08, + "logits/chosen": -2.034649133682251, + "logits/rejected": -2.0340023040771484, + "logps/chosen": -10.104042053222656, + "logps/rejected": -91.8240966796875, + "loss": 0.4583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14512768387794495, + "rewards/margins": 1.0751135349273682, + "rewards/rejected": -0.9299858212471008, + "step": 5423 + }, + { + "epoch": 0.32, + "learning_rate": 8.008924586417436e-08, + "logits/chosen": -1.8780854940414429, + "logits/rejected": -1.8636716604232788, + "logps/chosen": -92.71931457519531, + "logps/rejected": -272.30596923828125, + "loss": 0.1613, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0690521001815796, + "rewards/margins": 2.254202365875244, + "rewards/rejected": -1.185150146484375, + "step": 5424 + }, + { + "epoch": 0.32, + "learning_rate": 8.008171875572658e-08, + "logits/chosen": -1.8817269802093506, + "logits/rejected": -1.872168779373169, + "logps/chosen": -11.957375526428223, + "logps/rejected": -129.58584594726562, + "loss": 0.4457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3381159007549286, + "rewards/margins": 0.900720477104187, + "rewards/rejected": -0.562604546546936, + "step": 5425 + }, + { + "epoch": 0.32, + "learning_rate": 8.007419057862899e-08, + "logits/chosen": -2.051340103149414, + "logits/rejected": -2.0898163318634033, + "logps/chosen": -183.35292053222656, + "logps/rejected": -295.1446533203125, + "loss": 0.2611, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4504013061523438, + "rewards/margins": 0.7417587041854858, + "rewards/rejected": 0.7086426019668579, + "step": 5426 + }, + { + "epoch": 0.32, + "learning_rate": 8.006666133314906e-08, + "logits/chosen": -2.1096932888031006, + "logits/rejected": -2.1008620262145996, + "logps/chosen": -57.974952697753906, + "logps/rejected": -183.80743408203125, + "loss": 0.4994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10031738132238388, + "rewards/margins": 0.5434845089912415, + "rewards/rejected": -0.443167120218277, + "step": 5427 + }, + { + "epoch": 0.32, + "learning_rate": 8.005913101955423e-08, + "logits/chosen": -2.1140103340148926, + "logits/rejected": -2.1916658878326416, + "logps/chosen": -356.24273681640625, + "logps/rejected": -318.737060546875, + "loss": 0.2096, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3060548305511475, + "rewards/margins": 0.8188416957855225, + "rewards/rejected": 1.487213134765625, + "step": 5428 + }, + { + "epoch": 0.32, + "learning_rate": 8.005159963811205e-08, + "logits/chosen": -1.9831154346466064, + "logits/rejected": -1.97931706905365, + "logps/chosen": -179.28106689453125, + "logps/rejected": -333.46435546875, + "loss": 0.1764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5318145751953125, + "rewards/margins": 1.4346526861190796, + "rewards/rejected": 0.09716186672449112, + "step": 5429 + }, + { + "epoch": 0.32, + "learning_rate": 8.004406718909005e-08, + "logits/chosen": -1.9802359342575073, + "logits/rejected": -1.9769630432128906, + "logps/chosen": -1.1842496395111084, + "logps/rejected": -140.132568359375, + "loss": 0.4508, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008626937866210938, + "rewards/margins": 1.2854655981063843, + "rewards/rejected": -1.2940925359725952, + "step": 5430 + }, + { + "epoch": 0.32, + "learning_rate": 8.003653367275582e-08, + "logits/chosen": -2.0350937843322754, + "logits/rejected": -2.0244407653808594, + "logps/chosen": -160.3411865234375, + "logps/rejected": -291.2123718261719, + "loss": 0.5722, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.4184296131134033, + "rewards/margins": -0.5215362310409546, + "rewards/rejected": 1.939965844154358, + "step": 5431 + }, + { + "epoch": 0.32, + "learning_rate": 8.002899908937698e-08, + "logits/chosen": -1.8801498413085938, + "logits/rejected": -1.873268485069275, + "logps/chosen": -272.1820068359375, + "logps/rejected": -422.63037109375, + "loss": 0.0842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9749878644943237, + "rewards/margins": 2.451080322265625, + "rewards/rejected": -0.47609254717826843, + "step": 5432 + }, + { + "epoch": 0.32, + "learning_rate": 8.00214634392212e-08, + "logits/chosen": -2.0182406902313232, + "logits/rejected": -2.0060081481933594, + "logps/chosen": -273.4949645996094, + "logps/rejected": -319.5574035644531, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.049774169921875, + "rewards/margins": 0.8044403195381165, + "rewards/rejected": 0.24533386528491974, + "step": 5433 + }, + { + "epoch": 0.32, + "learning_rate": 8.00139267225562e-08, + "logits/chosen": -2.151750087738037, + "logits/rejected": -2.122718095779419, + "logps/chosen": -128.9329833984375, + "logps/rejected": -369.2349853515625, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7722153067588806, + "rewards/margins": 0.7119461297988892, + "rewards/rejected": 0.06026916578412056, + "step": 5434 + }, + { + "epoch": 0.32, + "learning_rate": 8.00063889396497e-08, + "logits/chosen": -1.9796440601348877, + "logits/rejected": -1.9823551177978516, + "logps/chosen": -166.14483642578125, + "logps/rejected": -386.85211181640625, + "loss": 0.1482, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2038863897323608, + "rewards/margins": 1.5943100452423096, + "rewards/rejected": -0.39042359590530396, + "step": 5435 + }, + { + "epoch": 0.32, + "learning_rate": 7.99988500907695e-08, + "logits/chosen": -2.0719432830810547, + "logits/rejected": -2.058595657348633, + "logps/chosen": -3.7860631942749023, + "logps/rejected": -116.13821411132812, + "loss": 0.4961, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02306680753827095, + "rewards/margins": 1.0056977272033691, + "rewards/rejected": -0.9826309084892273, + "step": 5436 + }, + { + "epoch": 0.32, + "learning_rate": 7.999131017618341e-08, + "logits/chosen": -2.021038293838501, + "logits/rejected": -2.020085334777832, + "logps/chosen": -216.70858764648438, + "logps/rejected": -303.66790771484375, + "loss": 0.2906, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2721236944198608, + "rewards/margins": 0.6955611705780029, + "rewards/rejected": 0.5765625238418579, + "step": 5437 + }, + { + "epoch": 0.32, + "learning_rate": 7.998376919615927e-08, + "logits/chosen": -2.1032142639160156, + "logits/rejected": -2.0918214321136475, + "logps/chosen": -24.27553939819336, + "logps/rejected": -292.25, + "loss": 0.365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03170204162597656, + "rewards/margins": 2.915304183959961, + "rewards/rejected": -2.9470062255859375, + "step": 5438 + }, + { + "epoch": 0.32, + "learning_rate": 7.997622715096497e-08, + "logits/chosen": -2.0201449394226074, + "logits/rejected": -1.9922239780426025, + "logps/chosen": -249.75526428222656, + "logps/rejected": -388.251220703125, + "loss": 0.1396, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7935227155685425, + "rewards/margins": 2.1143388748168945, + "rewards/rejected": -0.3208160400390625, + "step": 5439 + }, + { + "epoch": 0.32, + "learning_rate": 7.996868404086846e-08, + "logits/chosen": -2.0486464500427246, + "logits/rejected": -2.0420219898223877, + "logps/chosen": -1.8052749633789062, + "logps/rejected": -206.88023376464844, + "loss": 0.3639, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.804134277947014e-06, + "rewards/margins": 3.3179807662963867, + "rewards/rejected": -3.3179855346679688, + "step": 5440 + }, + { + "epoch": 0.32, + "learning_rate": 7.996113986613771e-08, + "logits/chosen": -1.8713281154632568, + "logits/rejected": -1.8624101877212524, + "logps/chosen": -3.7669684388674796e-05, + "logps/rejected": -190.2657470703125, + "loss": 0.43, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.576198821519938e-07, + "rewards/margins": 1.7059811353683472, + "rewards/rejected": -1.7059814929962158, + "step": 5441 + }, + { + "epoch": 0.32, + "learning_rate": 7.99535946270407e-08, + "logits/chosen": -2.1121726036071777, + "logits/rejected": -2.0981802940368652, + "logps/chosen": -212.74952697753906, + "logps/rejected": -443.98516845703125, + "loss": 0.3187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9130294919013977, + "rewards/margins": 0.7049911618232727, + "rewards/rejected": 0.208038330078125, + "step": 5442 + }, + { + "epoch": 0.32, + "learning_rate": 7.99460483238455e-08, + "logits/chosen": -2.0779619216918945, + "logits/rejected": -2.0714023113250732, + "logps/chosen": -39.583770751953125, + "logps/rejected": -172.1643524169922, + "loss": 0.7638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5966469049453735, + "rewards/margins": 0.31335675716400146, + "rewards/rejected": -0.910003662109375, + "step": 5443 + }, + { + "epoch": 0.32, + "learning_rate": 7.993850095682018e-08, + "logits/chosen": -1.8865660429000854, + "logits/rejected": -1.883783221244812, + "logps/chosen": -9.226672409567982e-05, + "logps/rejected": -297.42620849609375, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.15219385938326e-08, + "rewards/margins": 4.615642070770264, + "rewards/rejected": -4.615642070770264, + "step": 5444 + }, + { + "epoch": 0.32, + "learning_rate": 7.993095252623286e-08, + "logits/chosen": -2.1138885021209717, + "logits/rejected": -2.107465982437134, + "logps/chosen": -7.001626491546631, + "logps/rejected": -124.7715072631836, + "loss": 0.5361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06245565414428711, + "rewards/margins": 0.6003150343894958, + "rewards/rejected": -0.5378593802452087, + "step": 5445 + }, + { + "epoch": 0.32, + "learning_rate": 7.99234030323517e-08, + "logits/chosen": -1.8910566568374634, + "logits/rejected": -1.89385187625885, + "logps/chosen": -81.35850524902344, + "logps/rejected": -180.0130615234375, + "loss": 0.2815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9961342215538025, + "rewards/margins": 1.3233604431152344, + "rewards/rejected": -0.3272262513637543, + "step": 5446 + }, + { + "epoch": 0.32, + "learning_rate": 7.991585247544489e-08, + "logits/chosen": -2.072671890258789, + "logits/rejected": -2.0635623931884766, + "logps/chosen": -323.178466796875, + "logps/rejected": -425.4762268066406, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8022186756134033, + "rewards/margins": 3.1295533180236816, + "rewards/rejected": -0.32733461260795593, + "step": 5447 + }, + { + "epoch": 0.32, + "learning_rate": 7.990830085578066e-08, + "logits/chosen": -2.138838052749634, + "logits/rejected": -2.1260249614715576, + "logps/chosen": -48.491859436035156, + "logps/rejected": -360.085693359375, + "loss": 0.3362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04274940490722656, + "rewards/margins": 3.6193783283233643, + "rewards/rejected": -3.662127733230591, + "step": 5448 + }, + { + "epoch": 0.32, + "learning_rate": 7.99007481736273e-08, + "logits/chosen": -2.139357805252075, + "logits/rejected": -2.1870555877685547, + "logps/chosen": -318.5427551269531, + "logps/rejected": -464.6499938964844, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9879578351974487, + "rewards/margins": 2.089016914367676, + "rewards/rejected": -0.1010589599609375, + "step": 5449 + }, + { + "epoch": 0.32, + "learning_rate": 7.98931944292531e-08, + "logits/chosen": -2.078848361968994, + "logits/rejected": -2.0643022060394287, + "logps/chosen": -58.582733154296875, + "logps/rejected": -158.70458984375, + "loss": 0.5123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42208176851272583, + "rewards/margins": 0.3053581416606903, + "rewards/rejected": 0.11672363430261612, + "step": 5450 + }, + { + "epoch": 0.32, + "learning_rate": 7.98856396229264e-08, + "logits/chosen": -1.955481767654419, + "logits/rejected": -1.9538309574127197, + "logps/chosen": -183.3341064453125, + "logps/rejected": -272.6770324707031, + "loss": 0.108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3056793212890625, + "rewards/margins": 2.396646022796631, + "rewards/rejected": -1.090966820716858, + "step": 5451 + }, + { + "epoch": 0.32, + "learning_rate": 7.98780837549156e-08, + "logits/chosen": -2.080718517303467, + "logits/rejected": -2.0866456031799316, + "logps/chosen": -205.79498291015625, + "logps/rejected": -304.74542236328125, + "loss": 0.3187, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3648712635040283, + "rewards/margins": 0.2514312267303467, + "rewards/rejected": 2.1134400367736816, + "step": 5452 + }, + { + "epoch": 0.32, + "learning_rate": 7.987052682548914e-08, + "logits/chosen": -2.0442872047424316, + "logits/rejected": -2.039111614227295, + "logps/chosen": -0.0019320951541885734, + "logps/rejected": -149.42672729492188, + "loss": 0.43, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.474696419900283e-05, + "rewards/margins": 1.7068262100219727, + "rewards/rejected": -1.7068909406661987, + "step": 5453 + }, + { + "epoch": 0.32, + "learning_rate": 7.986296883491542e-08, + "logits/chosen": -2.0104689598083496, + "logits/rejected": -2.0127835273742676, + "logps/chosen": -2.3843283653259277, + "logps/rejected": -121.95307159423828, + "loss": 0.5851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013687443919479847, + "rewards/margins": 0.509959876537323, + "rewards/rejected": -0.5236473083496094, + "step": 5454 + }, + { + "epoch": 0.32, + "learning_rate": 7.985540978346299e-08, + "logits/chosen": -1.8890050649642944, + "logits/rejected": -1.890127420425415, + "logps/chosen": -292.23236083984375, + "logps/rejected": -385.13983154296875, + "loss": 0.0359, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0482118129730225, + "rewards/margins": 4.135183811187744, + "rewards/rejected": -2.0869719982147217, + "step": 5455 + }, + { + "epoch": 0.32, + "learning_rate": 7.984784967140036e-08, + "logits/chosen": -2.006464719772339, + "logits/rejected": -2.009989023208618, + "logps/chosen": -94.40083312988281, + "logps/rejected": -166.5946502685547, + "loss": 0.7828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6867912411689758, + "rewards/margins": 0.6636131405830383, + "rewards/rejected": -1.3504043817520142, + "step": 5456 + }, + { + "epoch": 0.32, + "learning_rate": 7.984028849899611e-08, + "logits/chosen": -2.0842809677124023, + "logits/rejected": -2.083524703979492, + "logps/chosen": -0.019824188202619553, + "logps/rejected": -104.68667602539062, + "loss": 0.5068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00025112219736911356, + "rewards/margins": 0.9730569124221802, + "rewards/rejected": -0.9728057980537415, + "step": 5457 + }, + { + "epoch": 0.32, + "learning_rate": 7.983272626651885e-08, + "logits/chosen": -2.0532217025756836, + "logits/rejected": -2.0103089809417725, + "logps/chosen": -196.13790893554688, + "logps/rejected": -513.929443359375, + "loss": 0.1903, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.715216040611267, + "rewards/margins": 1.074468970298767, + "rewards/rejected": 0.6407470703125, + "step": 5458 + }, + { + "epoch": 0.32, + "learning_rate": 7.982516297423721e-08, + "logits/chosen": -2.0676004886627197, + "logits/rejected": -2.058856964111328, + "logps/chosen": -193.63818359375, + "logps/rejected": -308.1009521484375, + "loss": 0.1225, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.76641845703125, + "rewards/margins": 1.7933533191680908, + "rewards/rejected": -0.02693481557071209, + "step": 5459 + }, + { + "epoch": 0.32, + "learning_rate": 7.98175986224199e-08, + "logits/chosen": -1.9565662145614624, + "logits/rejected": -1.949668526649475, + "logps/chosen": -23.489458084106445, + "logps/rejected": -151.2770538330078, + "loss": 0.4133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19592495262622833, + "rewards/margins": 1.160140037536621, + "rewards/rejected": -0.964215099811554, + "step": 5460 + }, + { + "epoch": 0.32, + "learning_rate": 7.981003321133563e-08, + "logits/chosen": -1.9735056161880493, + "logits/rejected": -1.9917941093444824, + "logps/chosen": -222.03604125976562, + "logps/rejected": -406.284912109375, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.519274950027466, + "rewards/margins": 4.528146743774414, + "rewards/rejected": -2.008871555328369, + "step": 5461 + }, + { + "epoch": 0.32, + "learning_rate": 7.980246674125318e-08, + "logits/chosen": -2.0719544887542725, + "logits/rejected": -2.067147970199585, + "logps/chosen": -0.0008171480149030685, + "logps/rejected": -177.1805877685547, + "loss": 0.4332, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.797002449980937e-05, + "rewards/margins": 1.6367371082305908, + "rewards/rejected": -1.6367950439453125, + "step": 5462 + }, + { + "epoch": 0.32, + "learning_rate": 7.979489921244133e-08, + "logits/chosen": -1.91212797164917, + "logits/rejected": -1.912876844406128, + "logps/chosen": -164.7399139404297, + "logps/rejected": -268.7174987792969, + "loss": 0.1961, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1484360694885254, + "rewards/margins": 0.9139053821563721, + "rewards/rejected": 1.2345306873321533, + "step": 5463 + }, + { + "epoch": 0.32, + "learning_rate": 7.97873306251689e-08, + "logits/chosen": -1.8572310209274292, + "logits/rejected": -1.856723666191101, + "logps/chosen": -220.8023681640625, + "logps/rejected": -423.2936706542969, + "loss": 0.0778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.901753306388855, + "rewards/margins": 2.1843948364257812, + "rewards/rejected": -0.28264161944389343, + "step": 5464 + }, + { + "epoch": 0.32, + "learning_rate": 7.97797609797048e-08, + "logits/chosen": -2.0120887756347656, + "logits/rejected": -2.008467674255371, + "logps/chosen": -206.85699462890625, + "logps/rejected": -304.12677001953125, + "loss": 0.3153, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4012908935546875, + "rewards/margins": 0.6099914312362671, + "rewards/rejected": 0.7912994623184204, + "step": 5465 + }, + { + "epoch": 0.32, + "learning_rate": 7.977219027631789e-08, + "logits/chosen": -1.8897974491119385, + "logits/rejected": -1.8765685558319092, + "logps/chosen": -100.22972869873047, + "logps/rejected": -378.6684265136719, + "loss": 0.3488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13451842963695526, + "rewards/margins": 3.086538791656494, + "rewards/rejected": -3.221057176589966, + "step": 5466 + }, + { + "epoch": 0.32, + "learning_rate": 7.976461851527718e-08, + "logits/chosen": -2.211642026901245, + "logits/rejected": -2.2091429233551025, + "logps/chosen": -179.6531982421875, + "logps/rejected": -467.0286865234375, + "loss": 0.0835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8487457036972046, + "rewards/margins": 2.222311496734619, + "rewards/rejected": -0.373565673828125, + "step": 5467 + }, + { + "epoch": 0.32, + "learning_rate": 7.975704569685162e-08, + "logits/chosen": -1.9622609615325928, + "logits/rejected": -1.9381625652313232, + "logps/chosen": -249.0198974609375, + "logps/rejected": -517.7998657226562, + "loss": 0.1276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0371582508087158, + "rewards/margins": 2.71417236328125, + "rewards/rejected": -1.6770142316818237, + "step": 5468 + }, + { + "epoch": 0.32, + "learning_rate": 7.974947182131022e-08, + "logits/chosen": -2.1190738677978516, + "logits/rejected": -2.114917039871216, + "logps/chosen": -45.78622817993164, + "logps/rejected": -101.13296508789062, + "loss": 0.9615, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.022787094116211, + "rewards/margins": 0.19992411136627197, + "rewards/rejected": -1.222711205482483, + "step": 5469 + }, + { + "epoch": 0.32, + "learning_rate": 7.974189688892208e-08, + "logits/chosen": -2.1585943698883057, + "logits/rejected": -2.1539509296417236, + "logps/chosen": -0.6160789728164673, + "logps/rejected": -68.89976501464844, + "loss": 0.5714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018496591597795486, + "rewards/margins": 0.6013360619544983, + "rewards/rejected": -0.6198326349258423, + "step": 5470 + }, + { + "epoch": 0.32, + "learning_rate": 7.973432089995628e-08, + "logits/chosen": -2.1779277324676514, + "logits/rejected": -2.1803462505340576, + "logps/chosen": -0.0555102676153183, + "logps/rejected": -71.26642608642578, + "loss": 0.5724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0015290193259716034, + "rewards/margins": 0.43936851620674133, + "rewards/rejected": -0.4378395080566406, + "step": 5471 + }, + { + "epoch": 0.32, + "learning_rate": 7.972674385468195e-08, + "logits/chosen": -1.9027595520019531, + "logits/rejected": -1.8984084129333496, + "logps/chosen": -9.123554229736328, + "logps/rejected": -56.077606201171875, + "loss": 0.5693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019040679559111595, + "rewards/margins": 0.4991268217563629, + "rewards/rejected": -0.48008614778518677, + "step": 5472 + }, + { + "epoch": 0.32, + "learning_rate": 7.971916575336827e-08, + "logits/chosen": -1.9223926067352295, + "logits/rejected": -1.907444953918457, + "logps/chosen": -171.1526641845703, + "logps/rejected": -226.8009033203125, + "loss": 0.1144, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.078744649887085, + "rewards/margins": 2.035763740539551, + "rewards/rejected": 0.04298095777630806, + "step": 5473 + }, + { + "epoch": 0.32, + "learning_rate": 7.971158659628445e-08, + "logits/chosen": -1.9338923692703247, + "logits/rejected": -1.9299134016036987, + "logps/chosen": -37.89763641357422, + "logps/rejected": -203.12710571289062, + "loss": 0.3394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3586280941963196, + "rewards/margins": 1.8205316066741943, + "rewards/rejected": -1.46190345287323, + "step": 5474 + }, + { + "epoch": 0.32, + "learning_rate": 7.970400638369974e-08, + "logits/chosen": -2.0368704795837402, + "logits/rejected": -2.011169672012329, + "logps/chosen": -123.56816864013672, + "logps/rejected": -210.46832275390625, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0059700012207031, + "rewards/margins": 0.6905540227890015, + "rewards/rejected": 0.3154159486293793, + "step": 5475 + }, + { + "epoch": 0.32, + "learning_rate": 7.969642511588343e-08, + "logits/chosen": -1.9028335809707642, + "logits/rejected": -1.894168496131897, + "logps/chosen": -161.05075073242188, + "logps/rejected": -219.37326049804688, + "loss": 0.4159, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3908599615097046, + "rewards/margins": -0.03983163833618164, + "rewards/rejected": 1.4306915998458862, + "step": 5476 + }, + { + "epoch": 0.32, + "learning_rate": 7.968884279310484e-08, + "logits/chosen": -1.907633900642395, + "logits/rejected": -1.8938757181167603, + "logps/chosen": -228.776611328125, + "logps/rejected": -203.62258911132812, + "loss": 0.3152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8170532584190369, + "rewards/margins": 1.0131927728652954, + "rewards/rejected": -0.19613952934741974, + "step": 5477 + }, + { + "epoch": 0.32, + "learning_rate": 7.968125941563332e-08, + "logits/chosen": -2.0478103160858154, + "logits/rejected": -1.945837140083313, + "logps/chosen": -252.9212188720703, + "logps/rejected": -599.505859375, + "loss": 0.2216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5083816647529602, + "rewards/margins": 2.709162950515747, + "rewards/rejected": -2.2007813453674316, + "step": 5478 + }, + { + "epoch": 0.32, + "learning_rate": 7.967367498373827e-08, + "logits/chosen": -1.8313114643096924, + "logits/rejected": -1.8166043758392334, + "logps/chosen": -201.08929443359375, + "logps/rejected": -355.33575439453125, + "loss": 0.2714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46255189180374146, + "rewards/margins": 1.5050506591796875, + "rewards/rejected": -1.0424988269805908, + "step": 5479 + }, + { + "epoch": 0.32, + "learning_rate": 7.966608949768916e-08, + "logits/chosen": -1.8782516717910767, + "logits/rejected": -1.878225564956665, + "logps/chosen": -150.9268798828125, + "logps/rejected": -260.2692565917969, + "loss": 0.247, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1747772693634033, + "rewards/margins": 0.9960907697677612, + "rewards/rejected": 0.17868652939796448, + "step": 5480 + }, + { + "epoch": 0.32, + "learning_rate": 7.965850295775542e-08, + "logits/chosen": -2.0338714122772217, + "logits/rejected": -2.040371894836426, + "logps/chosen": -154.8123779296875, + "logps/rejected": -230.73214721679688, + "loss": 1.2261, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.3387787342071533, + "rewards/margins": -0.4419128894805908, + "rewards/rejected": -0.8968658447265625, + "step": 5481 + }, + { + "epoch": 0.32, + "learning_rate": 7.965091536420658e-08, + "logits/chosen": -2.094370126724243, + "logits/rejected": -2.0865328311920166, + "logps/chosen": -20.98237419128418, + "logps/rejected": -168.2481689453125, + "loss": 0.4875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00650444021448493, + "rewards/margins": 1.33611261844635, + "rewards/rejected": -1.3296082019805908, + "step": 5482 + }, + { + "epoch": 0.32, + "learning_rate": 7.964332671731219e-08, + "logits/chosen": -2.0558059215545654, + "logits/rejected": -2.0573983192443848, + "logps/chosen": -56.19148254394531, + "logps/rejected": -142.79742431640625, + "loss": 0.4438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23554496467113495, + "rewards/margins": 2.356663227081299, + "rewards/rejected": -2.59220814704895, + "step": 5483 + }, + { + "epoch": 0.32, + "learning_rate": 7.963573701734185e-08, + "logits/chosen": -2.1524062156677246, + "logits/rejected": -2.1519362926483154, + "logps/chosen": -0.18366064131259918, + "logps/rejected": -22.74776840209961, + "loss": 0.6992, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.020590877160429955, + "rewards/margins": -0.04623613506555557, + "rewards/rejected": 0.06682701408863068, + "step": 5484 + }, + { + "epoch": 0.32, + "learning_rate": 7.962814626456516e-08, + "logits/chosen": -2.112973213195801, + "logits/rejected": -2.1154556274414062, + "logps/chosen": -0.2813529372215271, + "logps/rejected": -270.9554443359375, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022639011964201927, + "rewards/margins": 5.261122703552246, + "rewards/rejected": -5.283761501312256, + "step": 5485 + }, + { + "epoch": 0.32, + "learning_rate": 7.962055445925175e-08, + "logits/chosen": -2.099778413772583, + "logits/rejected": -2.092541456222534, + "logps/chosen": -159.0563507080078, + "logps/rejected": -303.1883239746094, + "loss": 0.381, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3256195783615112, + "rewards/margins": 0.2111968994140625, + "rewards/rejected": 1.1144226789474487, + "step": 5486 + }, + { + "epoch": 0.32, + "learning_rate": 7.961296160167141e-08, + "logits/chosen": -1.9379013776779175, + "logits/rejected": -1.9221034049987793, + "logps/chosen": -80.84098815917969, + "logps/rejected": -287.5069274902344, + "loss": 0.3823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3773612976074219, + "rewards/margins": 1.3050682544708252, + "rewards/rejected": -0.9277068972587585, + "step": 5487 + }, + { + "epoch": 0.32, + "learning_rate": 7.960536769209378e-08, + "logits/chosen": -1.9861788749694824, + "logits/rejected": -1.9814773797988892, + "logps/chosen": -4.2869415283203125, + "logps/rejected": -315.10321044921875, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0233030803501606, + "rewards/margins": 3.49678635597229, + "rewards/rejected": -3.4734833240509033, + "step": 5488 + }, + { + "epoch": 0.32, + "learning_rate": 7.95977727307887e-08, + "logits/chosen": -2.1396687030792236, + "logits/rejected": -2.1323142051696777, + "logps/chosen": -61.933067321777344, + "logps/rejected": -222.620849609375, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20544281601905823, + "rewards/margins": 4.0455427169799805, + "rewards/rejected": -3.840100049972534, + "step": 5489 + }, + { + "epoch": 0.32, + "learning_rate": 7.959017671802594e-08, + "logits/chosen": -2.1298458576202393, + "logits/rejected": -2.1221415996551514, + "logps/chosen": -2.9617650508880615, + "logps/rejected": -138.2440185546875, + "loss": 0.6095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.043769191950559616, + "rewards/margins": 0.33252933621406555, + "rewards/rejected": -0.3762985169887543, + "step": 5490 + }, + { + "epoch": 0.32, + "learning_rate": 7.958257965407537e-08, + "logits/chosen": -2.0718283653259277, + "logits/rejected": -2.058347225189209, + "logps/chosen": -25.458209991455078, + "logps/rejected": -279.134521484375, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3334510922431946, + "rewards/margins": 2.5574042797088623, + "rewards/rejected": -2.2239532470703125, + "step": 5491 + }, + { + "epoch": 0.32, + "learning_rate": 7.957498153920687e-08, + "logits/chosen": -1.9602675437927246, + "logits/rejected": -1.965907335281372, + "logps/chosen": -86.68930053710938, + "logps/rejected": -270.9062805175781, + "loss": 0.3851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09705810993909836, + "rewards/margins": 4.3084821701049805, + "rewards/rejected": -4.405540466308594, + "step": 5492 + }, + { + "epoch": 0.32, + "learning_rate": 7.956738237369037e-08, + "logits/chosen": -2.1015377044677734, + "logits/rejected": -2.1050925254821777, + "logps/chosen": -21.600486755371094, + "logps/rejected": -149.31251525878906, + "loss": 0.2937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12798576056957245, + "rewards/margins": 1.807003378868103, + "rewards/rejected": -1.6790176630020142, + "step": 5493 + }, + { + "epoch": 0.32, + "learning_rate": 7.95597821577958e-08, + "logits/chosen": -2.2168025970458984, + "logits/rejected": -2.2122862339019775, + "logps/chosen": -29.2569637298584, + "logps/rejected": -132.8152313232422, + "loss": 0.5612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16082763671875, + "rewards/margins": 0.27344971895217896, + "rewards/rejected": -0.11262207478284836, + "step": 5494 + }, + { + "epoch": 0.32, + "learning_rate": 7.955218089179317e-08, + "logits/chosen": -2.136983871459961, + "logits/rejected": -2.141153573989868, + "logps/chosen": -0.00017285003559663892, + "logps/rejected": -43.412254333496094, + "loss": 0.6521, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.962501603562487e-08, + "rewards/margins": 0.17080263793468475, + "rewards/rejected": -0.17080269753932953, + "step": 5495 + }, + { + "epoch": 0.32, + "learning_rate": 7.954457857595254e-08, + "logits/chosen": -1.7447110414505005, + "logits/rejected": -1.7203013896942139, + "logps/chosen": -255.50393676757812, + "logps/rejected": -604.5784912109375, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6811920404434204, + "rewards/margins": 3.0723297595977783, + "rewards/rejected": -1.391137719154358, + "step": 5496 + }, + { + "epoch": 0.32, + "learning_rate": 7.953697521054394e-08, + "logits/chosen": -1.8380047082901, + "logits/rejected": -1.8543317317962646, + "logps/chosen": -233.65625, + "logps/rejected": -370.93572998046875, + "loss": 0.0967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.851226806640625, + "rewards/margins": 2.304360866546631, + "rewards/rejected": -0.453134149312973, + "step": 5497 + }, + { + "epoch": 0.32, + "learning_rate": 7.95293707958375e-08, + "logits/chosen": -2.0815634727478027, + "logits/rejected": -2.060009717941284, + "logps/chosen": -107.21675109863281, + "logps/rejected": -261.79052734375, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.256201148033142, + "rewards/margins": 0.6038879156112671, + "rewards/rejected": 0.652313232421875, + "step": 5498 + }, + { + "epoch": 0.32, + "learning_rate": 7.952176533210338e-08, + "logits/chosen": -2.0075299739837646, + "logits/rejected": -1.9429875612258911, + "logps/chosen": -256.74908447265625, + "logps/rejected": -571.7098388671875, + "loss": 0.0804, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.154071092605591, + "rewards/margins": 2.5415101051330566, + "rewards/rejected": -0.38743898272514343, + "step": 5499 + }, + { + "epoch": 0.32, + "learning_rate": 7.951415881961175e-08, + "logits/chosen": -2.0687055587768555, + "logits/rejected": -2.0680599212646484, + "logps/chosen": -0.0007429198012687266, + "logps/rejected": -80.06358337402344, + "loss": 0.6785, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.309433741880639e-06, + "rewards/margins": 0.059378597885370255, + "rewards/rejected": -0.059377290308475494, + "step": 5500 + }, + { + "epoch": 0.32, + "learning_rate": 7.950655125863281e-08, + "logits/chosen": -2.0420491695404053, + "logits/rejected": -2.0389347076416016, + "logps/chosen": -100.779296875, + "logps/rejected": -232.7615966796875, + "loss": 0.7436, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.114680528640747, + "rewards/margins": 3.334149122238159, + "rewards/rejected": -4.448829650878906, + "step": 5501 + }, + { + "epoch": 0.32, + "learning_rate": 7.949894264943685e-08, + "logits/chosen": -2.165043592453003, + "logits/rejected": -2.1537013053894043, + "logps/chosen": -63.74998092651367, + "logps/rejected": -245.80645751953125, + "loss": 0.226, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1063587665557861, + "rewards/margins": 1.5857304334640503, + "rewards/rejected": -0.4793716371059418, + "step": 5502 + }, + { + "epoch": 0.32, + "learning_rate": 7.949133299229415e-08, + "logits/chosen": -1.9510077238082886, + "logits/rejected": -1.9700430631637573, + "logps/chosen": -236.94419860839844, + "logps/rejected": -351.7542419433594, + "loss": 0.2379, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0670791864395142, + "rewards/margins": 1.7127487659454346, + "rewards/rejected": -0.6456695795059204, + "step": 5503 + }, + { + "epoch": 0.32, + "learning_rate": 7.948372228747504e-08, + "logits/chosen": -1.9518053531646729, + "logits/rejected": -1.9503329992294312, + "logps/chosen": -67.68599700927734, + "logps/rejected": -225.11660766601562, + "loss": 0.2804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4886108338832855, + "rewards/margins": 2.021981716156006, + "rewards/rejected": -1.5333709716796875, + "step": 5504 + }, + { + "epoch": 0.32, + "learning_rate": 7.94761105352499e-08, + "logits/chosen": -2.049560308456421, + "logits/rejected": -2.0398247241973877, + "logps/chosen": -100.08670043945312, + "logps/rejected": -186.7952117919922, + "loss": 0.6486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43935471773147583, + "rewards/margins": 0.4330940246582031, + "rewards/rejected": -0.872448742389679, + "step": 5505 + }, + { + "epoch": 0.32, + "learning_rate": 7.946849773588915e-08, + "logits/chosen": -2.0490317344665527, + "logits/rejected": -2.0081264972686768, + "logps/chosen": -311.14971923828125, + "logps/rejected": -500.89556884765625, + "loss": 0.0571, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.49700927734375, + "rewards/margins": 3.8668458461761475, + "rewards/rejected": -2.3698365688323975, + "step": 5506 + }, + { + "epoch": 0.32, + "learning_rate": 7.946088388966317e-08, + "logits/chosen": -1.9671989679336548, + "logits/rejected": -1.9629976749420166, + "logps/chosen": -39.888465881347656, + "logps/rejected": -148.2615966796875, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20263557136058807, + "rewards/margins": 2.139925003051758, + "rewards/rejected": -1.9372894763946533, + "step": 5507 + }, + { + "epoch": 0.32, + "learning_rate": 7.945326899684251e-08, + "logits/chosen": -1.9980779886245728, + "logits/rejected": -2.001007318496704, + "logps/chosen": -3.654240369796753, + "logps/rejected": -65.58966827392578, + "loss": 0.6963, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.13535769283771515, + "rewards/margins": -0.1386672705411911, + "rewards/rejected": 0.27402496337890625, + "step": 5508 + }, + { + "epoch": 0.32, + "learning_rate": 7.944565305769767e-08, + "logits/chosen": -1.9217325448989868, + "logits/rejected": -1.8788056373596191, + "logps/chosen": -157.16770935058594, + "logps/rejected": -274.27716064453125, + "loss": 0.4175, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.02965247631073, + "rewards/margins": 0.2889755964279175, + "rewards/rejected": 0.7406768798828125, + "step": 5509 + }, + { + "epoch": 0.32, + "learning_rate": 7.943803607249919e-08, + "logits/chosen": -2.180044174194336, + "logits/rejected": -2.172086715698242, + "logps/chosen": -64.004150390625, + "logps/rejected": -228.83338928222656, + "loss": 0.4072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08819504082202911, + "rewards/margins": 2.2817587852478027, + "rewards/rejected": -2.3699538707733154, + "step": 5510 + }, + { + "epoch": 0.32, + "learning_rate": 7.943041804151768e-08, + "logits/chosen": -1.8039069175720215, + "logits/rejected": -1.7917603254318237, + "logps/chosen": -259.4513854980469, + "logps/rejected": -427.8294677734375, + "loss": 0.1033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6615447998046875, + "rewards/margins": 2.0461151599884033, + "rewards/rejected": -0.38457033038139343, + "step": 5511 + }, + { + "epoch": 0.32, + "learning_rate": 7.942279896502375e-08, + "logits/chosen": -2.2396581172943115, + "logits/rejected": -2.243027448654175, + "logps/chosen": -72.06195068359375, + "logps/rejected": -141.79150390625, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42535400390625, + "rewards/margins": 1.8152283430099487, + "rewards/rejected": -1.3898743391036987, + "step": 5512 + }, + { + "epoch": 0.32, + "learning_rate": 7.941517884328809e-08, + "logits/chosen": -1.934755802154541, + "logits/rejected": -1.761405348777771, + "logps/chosen": -284.69708251953125, + "logps/rejected": -754.5686645507812, + "loss": 0.0695, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7677154541015625, + "rewards/margins": 2.330392360687256, + "rewards/rejected": 0.4373230040073395, + "step": 5513 + }, + { + "epoch": 0.32, + "learning_rate": 7.940755767658138e-08, + "logits/chosen": -1.932645320892334, + "logits/rejected": -1.8972381353378296, + "logps/chosen": -230.5398712158203, + "logps/rejected": -395.43377685546875, + "loss": 0.3805, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.7082810401916504, + "rewards/margins": -0.04834127426147461, + "rewards/rejected": 2.756622314453125, + "step": 5514 + }, + { + "epoch": 0.32, + "learning_rate": 7.939993546517439e-08, + "logits/chosen": -2.1343679428100586, + "logits/rejected": -2.1488265991210938, + "logps/chosen": -152.58580017089844, + "logps/rejected": -268.0959777832031, + "loss": 0.2063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6060043573379517, + "rewards/margins": 1.0184829235076904, + "rewards/rejected": 0.5875213742256165, + "step": 5515 + }, + { + "epoch": 0.32, + "learning_rate": 7.939231220933786e-08, + "logits/chosen": -2.1113171577453613, + "logits/rejected": -2.1365602016448975, + "logps/chosen": -224.28173828125, + "logps/rejected": -314.63397216796875, + "loss": 0.0531, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7632492780685425, + "rewards/margins": 3.4426681995391846, + "rewards/rejected": -1.679418921470642, + "step": 5516 + }, + { + "epoch": 0.32, + "learning_rate": 7.938468790934264e-08, + "logits/chosen": -1.9354839324951172, + "logits/rejected": -1.9315022230148315, + "logps/chosen": -260.3179626464844, + "logps/rejected": -284.9731750488281, + "loss": 0.5099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.834094226360321, + "rewards/margins": 0.0027557015419006348, + "rewards/rejected": 0.8313385248184204, + "step": 5517 + }, + { + "epoch": 0.32, + "learning_rate": 7.937706256545958e-08, + "logits/chosen": -1.9693809747695923, + "logits/rejected": -1.793688178062439, + "logps/chosen": -230.76321411132812, + "logps/rejected": -650.978759765625, + "loss": 0.2669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.927722156047821, + "rewards/margins": 1.3914611339569092, + "rewards/rejected": -0.4637390077114105, + "step": 5518 + }, + { + "epoch": 0.32, + "learning_rate": 7.936943617795955e-08, + "logits/chosen": -2.046257972717285, + "logits/rejected": -2.049332618713379, + "logps/chosen": -11.802896499633789, + "logps/rejected": -42.91190719604492, + "loss": 0.5527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1339489072561264, + "rewards/margins": 0.41866379976272583, + "rewards/rejected": -0.2847149074077606, + "step": 5519 + }, + { + "epoch": 0.32, + "learning_rate": 7.936180874711347e-08, + "logits/chosen": -2.076047658920288, + "logits/rejected": -2.073021650314331, + "logps/chosen": -139.4862060546875, + "logps/rejected": -335.9223327636719, + "loss": 0.1438, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4478180408477783, + "rewards/margins": 1.4216675758361816, + "rewards/rejected": 0.02615051344037056, + "step": 5520 + }, + { + "epoch": 0.32, + "learning_rate": 7.935418027319233e-08, + "logits/chosen": -1.9374021291732788, + "logits/rejected": -1.9367941617965698, + "logps/chosen": -266.0606689453125, + "logps/rejected": -304.17559814453125, + "loss": 0.491, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.291949510574341, + "rewards/margins": -0.43085336685180664, + "rewards/rejected": 2.7228028774261475, + "step": 5521 + }, + { + "epoch": 0.32, + "learning_rate": 7.934655075646712e-08, + "logits/chosen": -1.8093245029449463, + "logits/rejected": -1.856408715248108, + "logps/chosen": -275.3763427734375, + "logps/rejected": -443.5203552246094, + "loss": 0.0443, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.475665330886841, + "rewards/margins": 2.9456543922424316, + "rewards/rejected": -0.46998903155326843, + "step": 5522 + }, + { + "epoch": 0.32, + "learning_rate": 7.933892019720888e-08, + "logits/chosen": -1.8604011535644531, + "logits/rejected": -1.8691989183425903, + "logps/chosen": -0.00012838240945711732, + "logps/rejected": -166.4249267578125, + "loss": 0.4728, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.261086233891547e-06, + "rewards/margins": 1.2476015090942383, + "rewards/rejected": -1.2475922107696533, + "step": 5523 + }, + { + "epoch": 0.32, + "learning_rate": 7.933128859568869e-08, + "logits/chosen": -2.0119903087615967, + "logits/rejected": -2.0631957054138184, + "logps/chosen": -225.02862548828125, + "logps/rejected": -329.62554931640625, + "loss": 0.0967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7259628772735596, + "rewards/margins": 2.271885871887207, + "rewards/rejected": -0.5459228754043579, + "step": 5524 + }, + { + "epoch": 0.32, + "learning_rate": 7.932365595217764e-08, + "logits/chosen": -1.7905820608139038, + "logits/rejected": -1.7681818008422852, + "logps/chosen": -294.41473388671875, + "logps/rejected": -522.3060302734375, + "loss": 0.0513, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0482146739959717, + "rewards/margins": 4.263833522796631, + "rewards/rejected": -2.215618848800659, + "step": 5525 + }, + { + "epoch": 0.32, + "learning_rate": 7.931602226694689e-08, + "logits/chosen": -1.9796807765960693, + "logits/rejected": -1.9794572591781616, + "logps/chosen": -39.348419189453125, + "logps/rejected": -146.2327880859375, + "loss": 0.5805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037612151354551315, + "rewards/margins": 0.5513893365859985, + "rewards/rejected": -0.589001476764679, + "step": 5526 + }, + { + "epoch": 0.32, + "learning_rate": 7.930838754026763e-08, + "logits/chosen": -2.213340997695923, + "logits/rejected": -2.2165448665618896, + "logps/chosen": -0.00991764385253191, + "logps/rejected": -161.34535217285156, + "loss": 0.3929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009295615600422025, + "rewards/margins": 2.313324451446533, + "rewards/rejected": -2.312394857406616, + "step": 5527 + }, + { + "epoch": 0.32, + "learning_rate": 7.930075177241109e-08, + "logits/chosen": -2.056572675704956, + "logits/rejected": -2.0543673038482666, + "logps/chosen": -35.730960845947266, + "logps/rejected": -241.5478515625, + "loss": 0.5158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11750679463148117, + "rewards/margins": 1.001714825630188, + "rewards/rejected": -1.119221568107605, + "step": 5528 + }, + { + "epoch": 0.32, + "learning_rate": 7.929311496364852e-08, + "logits/chosen": -2.121051073074341, + "logits/rejected": -2.120499610900879, + "logps/chosen": -51.67934036254883, + "logps/rejected": -278.6812438964844, + "loss": 0.4045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07442283630371094, + "rewards/margins": 1.7432712316513062, + "rewards/rejected": -1.817694067955017, + "step": 5529 + }, + { + "epoch": 0.32, + "learning_rate": 7.928547711425123e-08, + "logits/chosen": -1.6884979009628296, + "logits/rejected": -1.6383343935012817, + "logps/chosen": -185.23727416992188, + "logps/rejected": -355.93133544921875, + "loss": 0.1792, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5998581647872925, + "rewards/margins": 1.541121006011963, + "rewards/rejected": 0.05873718485236168, + "step": 5530 + }, + { + "epoch": 0.32, + "learning_rate": 7.927783822449052e-08, + "logits/chosen": -1.9973087310791016, + "logits/rejected": -1.9827309846878052, + "logps/chosen": -208.05484008789062, + "logps/rejected": -252.92066955566406, + "loss": 0.1017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.54217529296875, + "rewards/margins": 1.8638137578964233, + "rewards/rejected": 0.6783615350723267, + "step": 5531 + }, + { + "epoch": 0.32, + "learning_rate": 7.92701982946378e-08, + "logits/chosen": -1.9642951488494873, + "logits/rejected": -1.9530736207962036, + "logps/chosen": -6.388402462005615, + "logps/rejected": -165.78826904296875, + "loss": 0.4282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23637652397155762, + "rewards/margins": 1.1900523900985718, + "rewards/rejected": -0.9536758661270142, + "step": 5532 + }, + { + "epoch": 0.32, + "learning_rate": 7.926255732496445e-08, + "logits/chosen": -2.0164945125579834, + "logits/rejected": -2.0156140327453613, + "logps/chosen": -0.00019609183073043823, + "logps/rejected": -149.24630737304688, + "loss": 0.3839, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9800794436596334e-06, + "rewards/margins": 2.7707369327545166, + "rewards/rejected": -2.770739793777466, + "step": 5533 + }, + { + "epoch": 0.32, + "learning_rate": 7.925491531574193e-08, + "logits/chosen": -2.081953287124634, + "logits/rejected": -2.0803141593933105, + "logps/chosen": -1.1379408836364746, + "logps/rejected": -103.12759399414062, + "loss": 0.546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08137442916631699, + "rewards/margins": 0.8427192568778992, + "rewards/rejected": -0.9240936636924744, + "step": 5534 + }, + { + "epoch": 0.32, + "learning_rate": 7.924727226724173e-08, + "logits/chosen": -1.7685251235961914, + "logits/rejected": -1.7081776857376099, + "logps/chosen": -307.7901611328125, + "logps/rejected": -417.240478515625, + "loss": 0.2847, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.181646704673767, + "rewards/margins": 1.218042016029358, + "rewards/rejected": -0.03639526292681694, + "step": 5535 + }, + { + "epoch": 0.32, + "learning_rate": 7.923962817973534e-08, + "logits/chosen": -2.0784521102905273, + "logits/rejected": -2.0641679763793945, + "logps/chosen": -3.337835005368106e-05, + "logps/rejected": -287.8361511230469, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4570149409773876e-07, + "rewards/margins": 5.316702842712402, + "rewards/rejected": -5.316702365875244, + "step": 5536 + }, + { + "epoch": 0.32, + "learning_rate": 7.923198305349433e-08, + "logits/chosen": -2.0600178241729736, + "logits/rejected": -2.0641767978668213, + "logps/chosen": -12.956056594848633, + "logps/rejected": -134.566650390625, + "loss": 0.5734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050331976264715195, + "rewards/margins": 0.5576439499855042, + "rewards/rejected": -0.507311999797821, + "step": 5537 + }, + { + "epoch": 0.32, + "learning_rate": 7.92243368887903e-08, + "logits/chosen": -1.9831695556640625, + "logits/rejected": -1.96681809425354, + "logps/chosen": -2.443769153614994e-05, + "logps/rejected": -204.25320434570312, + "loss": 0.4086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.788099410759969e-07, + "rewards/margins": 2.0230894088745117, + "rewards/rejected": -2.023089647293091, + "step": 5538 + }, + { + "epoch": 0.32, + "learning_rate": 7.921668968589487e-08, + "logits/chosen": -2.2571847438812256, + "logits/rejected": -2.2391154766082764, + "logps/chosen": -50.556549072265625, + "logps/rejected": -322.83978271484375, + "loss": 1.0921, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6871055364608765, + "rewards/margins": 0.7789162397384644, + "rewards/rejected": -2.466021776199341, + "step": 5539 + }, + { + "epoch": 0.32, + "learning_rate": 7.92090414450797e-08, + "logits/chosen": -2.0414011478424072, + "logits/rejected": -2.031106948852539, + "logps/chosen": -81.7243423461914, + "logps/rejected": -393.33319091796875, + "loss": 1.1147, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7229194641113281, + "rewards/margins": 0.978731632232666, + "rewards/rejected": -2.701651096343994, + "step": 5540 + }, + { + "epoch": 0.32, + "learning_rate": 7.92013921666165e-08, + "logits/chosen": -2.0886499881744385, + "logits/rejected": -2.0854315757751465, + "logps/chosen": -18.5389461517334, + "logps/rejected": -141.51393127441406, + "loss": 0.4633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1528032273054123, + "rewards/margins": 0.9252703189849854, + "rewards/rejected": -0.7724670767784119, + "step": 5541 + }, + { + "epoch": 0.32, + "learning_rate": 7.919374185077703e-08, + "logits/chosen": -2.0352988243103027, + "logits/rejected": -1.9357906579971313, + "logps/chosen": -262.9400329589844, + "logps/rejected": -524.372802734375, + "loss": 0.1079, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.467883348464966, + "rewards/margins": 1.821069359779358, + "rewards/rejected": 0.6468139886856079, + "step": 5542 + }, + { + "epoch": 0.32, + "learning_rate": 7.918609049783303e-08, + "logits/chosen": -2.1059157848358154, + "logits/rejected": -2.110673189163208, + "logps/chosen": -37.475738525390625, + "logps/rejected": -102.66656494140625, + "loss": 0.4543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6709785461425781, + "rewards/margins": 0.33431777358055115, + "rewards/rejected": 0.336660772562027, + "step": 5543 + }, + { + "epoch": 0.32, + "learning_rate": 7.917843810805633e-08, + "logits/chosen": -2.1781210899353027, + "logits/rejected": -2.179640054702759, + "logps/chosen": -7.603002071380615, + "logps/rejected": -103.04817199707031, + "loss": 0.4901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08692803233861923, + "rewards/margins": 0.9962755441665649, + "rewards/rejected": -0.9093475341796875, + "step": 5544 + }, + { + "epoch": 0.32, + "learning_rate": 7.91707846817188e-08, + "logits/chosen": -2.1381330490112305, + "logits/rejected": -2.1403448581695557, + "logps/chosen": -29.039810180664062, + "logps/rejected": -292.8501892089844, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6697353720664978, + "rewards/margins": 1.4546871185302734, + "rewards/rejected": -0.7849518060684204, + "step": 5545 + }, + { + "epoch": 0.32, + "learning_rate": 7.916313021909229e-08, + "logits/chosen": -2.203542709350586, + "logits/rejected": -2.1920270919799805, + "logps/chosen": -37.44334030151367, + "logps/rejected": -139.8886260986328, + "loss": 0.5282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03142395243048668, + "rewards/margins": 0.6964523196220398, + "rewards/rejected": -0.6650283932685852, + "step": 5546 + }, + { + "epoch": 0.32, + "learning_rate": 7.915547472044874e-08, + "logits/chosen": -2.065725803375244, + "logits/rejected": -2.0622737407684326, + "logps/chosen": -69.48881530761719, + "logps/rejected": -260.9477233886719, + "loss": 0.2718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4462127685546875, + "rewards/margins": 2.5386948585510254, + "rewards/rejected": -2.092482089996338, + "step": 5547 + }, + { + "epoch": 0.32, + "learning_rate": 7.914781818606011e-08, + "logits/chosen": -2.2057759761810303, + "logits/rejected": -2.2019550800323486, + "logps/chosen": -96.56575012207031, + "logps/rejected": -289.1039733886719, + "loss": 0.363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2122848480939865, + "rewards/margins": 2.019548177719116, + "rewards/rejected": -1.8072632551193237, + "step": 5548 + }, + { + "epoch": 0.32, + "learning_rate": 7.914016061619842e-08, + "logits/chosen": -2.258249044418335, + "logits/rejected": -2.245671510696411, + "logps/chosen": -210.19549560546875, + "logps/rejected": -351.51885986328125, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3703445196151733, + "rewards/margins": 0.3141646385192871, + "rewards/rejected": 1.0561798810958862, + "step": 5549 + }, + { + "epoch": 0.32, + "learning_rate": 7.913250201113568e-08, + "logits/chosen": -1.9745486974716187, + "logits/rejected": -1.963130235671997, + "logps/chosen": -36.64590835571289, + "logps/rejected": -214.6938018798828, + "loss": 0.6148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5264903903007507, + "rewards/margins": 1.412489652633667, + "rewards/rejected": -1.9389801025390625, + "step": 5550 + }, + { + "epoch": 0.32, + "learning_rate": 7.912484237114396e-08, + "logits/chosen": -2.0233569145202637, + "logits/rejected": -2.030914783477783, + "logps/chosen": -0.0005030851461924613, + "logps/rejected": -160.15106201171875, + "loss": 0.4155, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.432829518918879e-05, + "rewards/margins": 1.9086976051330566, + "rewards/rejected": -1.9086532592773438, + "step": 5551 + }, + { + "epoch": 0.32, + "learning_rate": 7.911718169649537e-08, + "logits/chosen": -2.057960271835327, + "logits/rejected": -2.014662504196167, + "logps/chosen": -211.33950805664062, + "logps/rejected": -377.2041015625, + "loss": 0.1911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2802581787109375, + "rewards/margins": 1.6333649158477783, + "rewards/rejected": -0.35310670733451843, + "step": 5552 + }, + { + "epoch": 0.32, + "learning_rate": 7.910951998746206e-08, + "logits/chosen": -2.1023542881011963, + "logits/rejected": -2.100550889968872, + "logps/chosen": -132.5167236328125, + "logps/rejected": -216.46224975585938, + "loss": 0.4713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.916668713092804, + "rewards/margins": 0.06257021427154541, + "rewards/rejected": 0.8540984988212585, + "step": 5553 + }, + { + "epoch": 0.32, + "learning_rate": 7.910185724431622e-08, + "logits/chosen": -2.0952019691467285, + "logits/rejected": -2.0959606170654297, + "logps/chosen": -4.916807174682617, + "logps/rejected": -94.06163787841797, + "loss": 0.4277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08959140628576279, + "rewards/margins": 1.1266367435455322, + "rewards/rejected": -1.0370453596115112, + "step": 5554 + }, + { + "epoch": 0.32, + "learning_rate": 7.909419346733007e-08, + "logits/chosen": -1.9085220098495483, + "logits/rejected": -1.8337113857269287, + "logps/chosen": -198.17514038085938, + "logps/rejected": -347.1480712890625, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0374237298965454, + "rewards/margins": 0.6967865228652954, + "rewards/rejected": 0.34063720703125, + "step": 5555 + }, + { + "epoch": 0.32, + "learning_rate": 7.908652865677583e-08, + "logits/chosen": -1.90081787109375, + "logits/rejected": -1.9039500951766968, + "logps/chosen": -83.74239349365234, + "logps/rejected": -166.8936309814453, + "loss": 0.2493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6188187003135681, + "rewards/margins": 2.279888868331909, + "rewards/rejected": -1.6610702276229858, + "step": 5556 + }, + { + "epoch": 0.32, + "learning_rate": 7.907886281292582e-08, + "logits/chosen": -1.8618006706237793, + "logits/rejected": -1.845359206199646, + "logps/chosen": -193.61798095703125, + "logps/rejected": -326.60430908203125, + "loss": 0.0672, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8765274286270142, + "rewards/margins": 2.4154372215270996, + "rewards/rejected": -0.538909912109375, + "step": 5557 + }, + { + "epoch": 0.32, + "learning_rate": 7.907119593605236e-08, + "logits/chosen": -1.9981114864349365, + "logits/rejected": -2.003351926803589, + "logps/chosen": -222.71243286132812, + "logps/rejected": -395.833740234375, + "loss": 0.0423, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0499818325042725, + "rewards/margins": 3.576669454574585, + "rewards/rejected": -1.5266876220703125, + "step": 5558 + }, + { + "epoch": 0.32, + "learning_rate": 7.906352802642782e-08, + "logits/chosen": -2.1681606769561768, + "logits/rejected": -2.1682050228118896, + "logps/chosen": -74.46195983886719, + "logps/rejected": -140.34800720214844, + "loss": 0.35, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.871856689453125, + "rewards/margins": 0.6754501461982727, + "rewards/rejected": 0.1964065581560135, + "step": 5559 + }, + { + "epoch": 0.32, + "learning_rate": 7.90558590843246e-08, + "logits/chosen": -2.0290257930755615, + "logits/rejected": -2.029468297958374, + "logps/chosen": -16.177576065063477, + "logps/rejected": -370.1044921875, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18865565955638885, + "rewards/margins": 5.71576452255249, + "rewards/rejected": -5.527108669281006, + "step": 5560 + }, + { + "epoch": 0.32, + "learning_rate": 7.904818911001515e-08, + "logits/chosen": -2.0974490642547607, + "logits/rejected": -2.0960400104522705, + "logps/chosen": -40.31595230102539, + "logps/rejected": -247.882080078125, + "loss": 0.3287, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15057869255542755, + "rewards/margins": 2.179356813430786, + "rewards/rejected": -2.028778076171875, + "step": 5561 + }, + { + "epoch": 0.32, + "learning_rate": 7.90405181037719e-08, + "logits/chosen": -1.8561372756958008, + "logits/rejected": -1.9137296676635742, + "logps/chosen": -194.1295928955078, + "logps/rejected": -450.56207275390625, + "loss": 0.1478, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0845321416854858, + "rewards/margins": 2.6913833618164062, + "rewards/rejected": -1.6068512201309204, + "step": 5562 + }, + { + "epoch": 0.32, + "learning_rate": 7.903284606586741e-08, + "logits/chosen": -2.1129395961761475, + "logits/rejected": -2.109274387359619, + "logps/chosen": -0.002036110032349825, + "logps/rejected": -204.56576538085938, + "loss": 0.3604, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.554885137826204e-05, + "rewards/margins": 3.2355613708496094, + "rewards/rejected": -3.2355058193206787, + "step": 5563 + }, + { + "epoch": 0.32, + "learning_rate": 7.902517299657423e-08, + "logits/chosen": -1.7630356550216675, + "logits/rejected": -1.7595914602279663, + "logps/chosen": -143.91758728027344, + "logps/rejected": -228.8102264404297, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1794403791427612, + "rewards/margins": 0.7676438093185425, + "rewards/rejected": 0.41179656982421875, + "step": 5564 + }, + { + "epoch": 0.32, + "learning_rate": 7.901749889616491e-08, + "logits/chosen": -2.0212063789367676, + "logits/rejected": -2.0157530307769775, + "logps/chosen": -51.11988067626953, + "logps/rejected": -213.3558349609375, + "loss": 0.6169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3757919371128082, + "rewards/margins": 1.220240831375122, + "rewards/rejected": -1.596032738685608, + "step": 5565 + }, + { + "epoch": 0.32, + "learning_rate": 7.90098237649121e-08, + "logits/chosen": -2.0515170097351074, + "logits/rejected": -2.0413455963134766, + "logps/chosen": -48.84662628173828, + "logps/rejected": -165.25242614746094, + "loss": 0.9272, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5787311792373657, + "rewards/margins": -0.24169352650642395, + "rewards/rejected": -0.3370376527309418, + "step": 5566 + }, + { + "epoch": 0.32, + "learning_rate": 7.900214760308845e-08, + "logits/chosen": -1.9147820472717285, + "logits/rejected": -1.9180442094802856, + "logps/chosen": -217.67950439453125, + "logps/rejected": -288.1610107421875, + "loss": 0.2485, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1769317388534546, + "rewards/margins": 0.9737365245819092, + "rewards/rejected": 0.20319519937038422, + "step": 5567 + }, + { + "epoch": 0.32, + "learning_rate": 7.899447041096664e-08, + "logits/chosen": -2.075817823410034, + "logits/rejected": -2.077711820602417, + "logps/chosen": -79.78286743164062, + "logps/rejected": -210.27841186523438, + "loss": 0.3889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.137471005320549, + "rewards/margins": 2.5153183937072754, + "rewards/rejected": -2.37784743309021, + "step": 5568 + }, + { + "epoch": 0.32, + "learning_rate": 7.898679218881941e-08, + "logits/chosen": -2.0777745246887207, + "logits/rejected": -2.0697877407073975, + "logps/chosen": -12.790825843811035, + "logps/rejected": -207.91729736328125, + "loss": 0.3732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017435168847441673, + "rewards/margins": 2.0562121868133545, + "rewards/rejected": -2.0387771129608154, + "step": 5569 + }, + { + "epoch": 0.32, + "learning_rate": 7.897911293691955e-08, + "logits/chosen": -1.965298056602478, + "logits/rejected": -1.954578161239624, + "logps/chosen": -0.00010454427683725953, + "logps/rejected": -252.64437866210938, + "loss": 0.4111, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.360217422392452e-06, + "rewards/margins": 1.8104300498962402, + "rewards/rejected": -1.8104324340820312, + "step": 5570 + }, + { + "epoch": 0.32, + "learning_rate": 7.897143265553983e-08, + "logits/chosen": -1.851787805557251, + "logits/rejected": -1.8464921712875366, + "logps/chosen": -184.82769775390625, + "logps/rejected": -361.56695556640625, + "loss": 0.1154, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8614654541015625, + "rewards/margins": 2.000195264816284, + "rewards/rejected": -0.13872985541820526, + "step": 5571 + }, + { + "epoch": 0.32, + "learning_rate": 7.896375134495312e-08, + "logits/chosen": -1.968790054321289, + "logits/rejected": -1.9549208879470825, + "logps/chosen": -42.055057525634766, + "logps/rejected": -159.71798706054688, + "loss": 0.5013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15619201958179474, + "rewards/margins": 0.8812805414199829, + "rewards/rejected": -0.7250885367393494, + "step": 5572 + }, + { + "epoch": 0.32, + "learning_rate": 7.895606900543228e-08, + "logits/chosen": -2.1664206981658936, + "logits/rejected": -2.153877019882202, + "logps/chosen": -65.14823913574219, + "logps/rejected": -151.4716796875, + "loss": 0.4212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049695588648319244, + "rewards/margins": 1.0838783979415894, + "rewards/rejected": -1.0341827869415283, + "step": 5573 + }, + { + "epoch": 0.32, + "learning_rate": 7.894838563725022e-08, + "logits/chosen": -2.0481271743774414, + "logits/rejected": -2.034529685974121, + "logps/chosen": -104.5532455444336, + "logps/rejected": -227.99545288085938, + "loss": 0.4206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9171043634414673, + "rewards/margins": 0.36366504430770874, + "rewards/rejected": 0.5534393191337585, + "step": 5574 + }, + { + "epoch": 0.32, + "learning_rate": 7.89407012406799e-08, + "logits/chosen": -2.0932400226593018, + "logits/rejected": -2.0890228748321533, + "logps/chosen": -177.7610626220703, + "logps/rejected": -336.03204345703125, + "loss": 0.1628, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3932541608810425, + "rewards/margins": 1.5797715187072754, + "rewards/rejected": -0.18651734292507172, + "step": 5575 + }, + { + "epoch": 0.32, + "learning_rate": 7.893301581599434e-08, + "logits/chosen": -1.8330373764038086, + "logits/rejected": -1.8074313402175903, + "logps/chosen": -339.80328369140625, + "logps/rejected": -556.844970703125, + "loss": 0.1181, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3861054182052612, + "rewards/margins": 2.2450103759765625, + "rewards/rejected": -0.858905017375946, + "step": 5576 + }, + { + "epoch": 0.32, + "learning_rate": 7.89253293634665e-08, + "logits/chosen": -1.9712883234024048, + "logits/rejected": -1.976061224937439, + "logps/chosen": -11.94697380065918, + "logps/rejected": -202.6505584716797, + "loss": 0.4106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12237387150526047, + "rewards/margins": 1.2649565935134888, + "rewards/rejected": -1.1425827741622925, + "step": 5577 + }, + { + "epoch": 0.32, + "learning_rate": 7.89176418833695e-08, + "logits/chosen": -1.9320906400680542, + "logits/rejected": -1.959030270576477, + "logps/chosen": -164.26747131347656, + "logps/rejected": -211.0833282470703, + "loss": 0.344, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4110549688339233, + "rewards/margins": 0.24166250228881836, + "rewards/rejected": 1.169392466545105, + "step": 5578 + }, + { + "epoch": 0.32, + "learning_rate": 7.890995337597636e-08, + "logits/chosen": -2.0249481201171875, + "logits/rejected": -1.990050196647644, + "logps/chosen": -171.1121826171875, + "logps/rejected": -371.6322021484375, + "loss": 0.0857, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4896423816680908, + "rewards/margins": 2.4809937477111816, + "rewards/rejected": -0.991351306438446, + "step": 5579 + }, + { + "epoch": 0.32, + "learning_rate": 7.89022638415603e-08, + "logits/chosen": -2.127019166946411, + "logits/rejected": -2.129431962966919, + "logps/chosen": -0.0026540961116552353, + "logps/rejected": -173.50416564941406, + "loss": 0.4811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0002209602389484644, + "rewards/margins": 1.1948102712631226, + "rewards/rejected": -1.1945892572402954, + "step": 5580 + }, + { + "epoch": 0.32, + "learning_rate": 7.889457328039445e-08, + "logits/chosen": -1.9315885305404663, + "logits/rejected": -1.9441382884979248, + "logps/chosen": -176.9389190673828, + "logps/rejected": -269.5310363769531, + "loss": 0.6007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17298126220703125, + "rewards/margins": 0.3568161129951477, + "rewards/rejected": -0.18383483588695526, + "step": 5581 + }, + { + "epoch": 0.32, + "learning_rate": 7.888688169275201e-08, + "logits/chosen": -2.0118794441223145, + "logits/rejected": -2.008223533630371, + "logps/chosen": -38.65023422241211, + "logps/rejected": -192.87423706054688, + "loss": 0.3713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7986294031143188, + "rewards/margins": 0.840989351272583, + "rewards/rejected": -0.04235992580652237, + "step": 5582 + }, + { + "epoch": 0.32, + "learning_rate": 7.887918907890623e-08, + "logits/chosen": -2.0456435680389404, + "logits/rejected": -2.0534303188323975, + "logps/chosen": -61.478515625, + "logps/rejected": -175.9072723388672, + "loss": 0.4666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45970916748046875, + "rewards/margins": 0.543536365032196, + "rewards/rejected": -0.08382721245288849, + "step": 5583 + }, + { + "epoch": 0.32, + "learning_rate": 7.887149543913039e-08, + "logits/chosen": -1.7787646055221558, + "logits/rejected": -1.7826838493347168, + "logps/chosen": -55.94346618652344, + "logps/rejected": -161.4716339111328, + "loss": 0.2493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8378875851631165, + "rewards/margins": 1.7819000482559204, + "rewards/rejected": -0.944012463092804, + "step": 5584 + }, + { + "epoch": 0.33, + "learning_rate": 7.886380077369781e-08, + "logits/chosen": -2.052882671356201, + "logits/rejected": -2.037447929382324, + "logps/chosen": -147.69729614257812, + "logps/rejected": -210.7259063720703, + "loss": 0.1137, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.859686255455017, + "rewards/margins": 2.2052993774414062, + "rewards/rejected": -0.3456130921840668, + "step": 5585 + }, + { + "epoch": 0.33, + "learning_rate": 7.885610508288184e-08, + "logits/chosen": -1.8296191692352295, + "logits/rejected": -1.7907601594924927, + "logps/chosen": -221.909423828125, + "logps/rejected": -353.09832763671875, + "loss": 0.2273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6863571405410767, + "rewards/margins": 0.7509109377861023, + "rewards/rejected": 0.9354462027549744, + "step": 5586 + }, + { + "epoch": 0.33, + "learning_rate": 7.884840836695587e-08, + "logits/chosen": -1.9553263187408447, + "logits/rejected": -2.0627782344818115, + "logps/chosen": -396.9024963378906, + "logps/rejected": -393.1151123046875, + "loss": 0.2252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6369232535362244, + "rewards/margins": 1.0782196521759033, + "rewards/rejected": -0.44129639863967896, + "step": 5587 + }, + { + "epoch": 0.33, + "learning_rate": 7.88407106261933e-08, + "logits/chosen": -2.06882381439209, + "logits/rejected": -2.067904472351074, + "logps/chosen": -1.913090467453003, + "logps/rejected": -303.4942626953125, + "loss": 0.374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10119926929473877, + "rewards/margins": 5.7809038162231445, + "rewards/rejected": -5.882102966308594, + "step": 5588 + }, + { + "epoch": 0.33, + "learning_rate": 7.883301186086763e-08, + "logits/chosen": -2.009157180786133, + "logits/rejected": -1.997391939163208, + "logps/chosen": -28.25756072998047, + "logps/rejected": -272.8411865234375, + "loss": 0.4292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33731499314308167, + "rewards/margins": 2.223482131958008, + "rewards/rejected": -2.5607972145080566, + "step": 5589 + }, + { + "epoch": 0.33, + "learning_rate": 7.882531207125234e-08, + "logits/chosen": -2.0062172412872314, + "logits/rejected": -2.002363681793213, + "logps/chosen": -37.71521759033203, + "logps/rejected": -117.19525146484375, + "loss": 0.5619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1249263808131218, + "rewards/margins": 0.7464252710342407, + "rewards/rejected": -0.8713516592979431, + "step": 5590 + }, + { + "epoch": 0.33, + "learning_rate": 7.881761125762097e-08, + "logits/chosen": -2.0615453720092773, + "logits/rejected": -2.052985191345215, + "logps/chosen": -313.5445861816406, + "logps/rejected": -347.0541076660156, + "loss": 0.1633, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9929230213165283, + "rewards/margins": 1.4207825660705566, + "rewards/rejected": 0.5721405148506165, + "step": 5591 + }, + { + "epoch": 0.33, + "learning_rate": 7.880990942024709e-08, + "logits/chosen": -1.6347407102584839, + "logits/rejected": -1.6300749778747559, + "logps/chosen": -14.381223678588867, + "logps/rejected": -270.6618347167969, + "loss": 0.3821, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025498200207948685, + "rewards/margins": 2.9216668605804443, + "rewards/rejected": -2.947165012359619, + "step": 5592 + }, + { + "epoch": 0.33, + "learning_rate": 7.88022065594043e-08, + "logits/chosen": -1.9818261861801147, + "logits/rejected": -2.0261170864105225, + "logps/chosen": -139.60748291015625, + "logps/rejected": -312.3561096191406, + "loss": 0.2664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.998944103717804, + "rewards/margins": 0.8638213872909546, + "rewards/rejected": 0.13512268662452698, + "step": 5593 + }, + { + "epoch": 0.33, + "learning_rate": 7.879450267536625e-08, + "logits/chosen": -1.9851526021957397, + "logits/rejected": -1.9698127508163452, + "logps/chosen": -43.6359748840332, + "logps/rejected": -233.86878967285156, + "loss": 0.2235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6185920834541321, + "rewards/margins": 2.8398430347442627, + "rewards/rejected": -2.2212510108947754, + "step": 5594 + }, + { + "epoch": 0.33, + "learning_rate": 7.87867977684066e-08, + "logits/chosen": -2.0466628074645996, + "logits/rejected": -2.0259926319122314, + "logps/chosen": -78.54841613769531, + "logps/rejected": -258.6253356933594, + "loss": 0.2495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6476890444755554, + "rewards/margins": 2.0921638011932373, + "rewards/rejected": -1.4444748163223267, + "step": 5595 + }, + { + "epoch": 0.33, + "learning_rate": 7.877909183879909e-08, + "logits/chosen": -1.8728023767471313, + "logits/rejected": -1.8367607593536377, + "logps/chosen": -235.9365234375, + "logps/rejected": -623.641357421875, + "loss": 0.2142, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3426544666290283, + "rewards/margins": 1.2953277826309204, + "rewards/rejected": 0.04732666164636612, + "step": 5596 + }, + { + "epoch": 0.33, + "learning_rate": 7.877138488681748e-08, + "logits/chosen": -2.107616424560547, + "logits/rejected": -2.10595440864563, + "logps/chosen": -23.55430030822754, + "logps/rejected": -177.33340454101562, + "loss": 0.5439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16010475158691406, + "rewards/margins": 0.5019229650497437, + "rewards/rejected": -0.341818243265152, + "step": 5597 + }, + { + "epoch": 0.33, + "learning_rate": 7.876367691273551e-08, + "logits/chosen": -1.9868826866149902, + "logits/rejected": -2.0367255210876465, + "logps/chosen": -238.54318237304688, + "logps/rejected": -305.2786865234375, + "loss": 0.2668, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6730362176895142, + "rewards/margins": 0.5983779430389404, + "rewards/rejected": 1.0746582746505737, + "step": 5598 + }, + { + "epoch": 0.33, + "learning_rate": 7.875596791682707e-08, + "logits/chosen": -1.8305584192276, + "logits/rejected": -1.8142682313919067, + "logps/chosen": -191.97491455078125, + "logps/rejected": -342.60809326171875, + "loss": 0.1157, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7637802362442017, + "rewards/margins": 1.9629989862442017, + "rewards/rejected": -0.19921875, + "step": 5599 + }, + { + "epoch": 0.33, + "learning_rate": 7.874825789936597e-08, + "logits/chosen": -1.9526728391647339, + "logits/rejected": -1.945265769958496, + "logps/chosen": -42.89496612548828, + "logps/rejected": -111.61983489990234, + "loss": 0.8849, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8561779260635376, + "rewards/margins": 0.21875035762786865, + "rewards/rejected": -1.0749282836914062, + "step": 5600 + }, + { + "epoch": 0.33, + "learning_rate": 7.874054686062612e-08, + "logits/chosen": -2.015096664428711, + "logits/rejected": -2.0043387413024902, + "logps/chosen": -169.4374542236328, + "logps/rejected": -266.3227233886719, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.12517249584198, + "rewards/margins": 0.6120957136154175, + "rewards/rejected": 0.5130767822265625, + "step": 5601 + }, + { + "epoch": 0.33, + "learning_rate": 7.873283480088146e-08, + "logits/chosen": -1.9624029397964478, + "logits/rejected": -1.973084568977356, + "logps/chosen": -248.7281036376953, + "logps/rejected": -411.2677307128906, + "loss": 0.2317, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7648056745529175, + "rewards/margins": 0.97044837474823, + "rewards/rejected": 0.7943572998046875, + "step": 5602 + }, + { + "epoch": 0.33, + "learning_rate": 7.872512172040598e-08, + "logits/chosen": -1.9893563985824585, + "logits/rejected": -1.9881377220153809, + "logps/chosen": -2.7060435968451202e-05, + "logps/rejected": -153.8162841796875, + "loss": 0.4799, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.88756995764561e-07, + "rewards/margins": 1.0523113012313843, + "rewards/rejected": -1.0523117780685425, + "step": 5603 + }, + { + "epoch": 0.33, + "learning_rate": 7.871740761947365e-08, + "logits/chosen": -1.9308555126190186, + "logits/rejected": -1.9291213750839233, + "logps/chosen": -187.7697296142578, + "logps/rejected": -624.510009765625, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8174911737442017, + "rewards/margins": 4.735154628753662, + "rewards/rejected": -2.91766357421875, + "step": 5604 + }, + { + "epoch": 0.33, + "learning_rate": 7.870969249835855e-08, + "logits/chosen": -1.891762614250183, + "logits/rejected": -1.8494600057601929, + "logps/chosen": -222.81130981445312, + "logps/rejected": -357.9285888671875, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6303374767303467, + "rewards/margins": 2.9198334217071533, + "rewards/rejected": -0.2894958555698395, + "step": 5605 + }, + { + "epoch": 0.33, + "learning_rate": 7.870197635733472e-08, + "logits/chosen": -2.064178466796875, + "logits/rejected": -2.1076319217681885, + "logps/chosen": -183.100341796875, + "logps/rejected": -229.55230712890625, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.062963843345642, + "rewards/margins": 0.5883727669715881, + "rewards/rejected": 0.47459107637405396, + "step": 5606 + }, + { + "epoch": 0.33, + "learning_rate": 7.869425919667629e-08, + "logits/chosen": -1.9190257787704468, + "logits/rejected": -1.9027715921401978, + "logps/chosen": -197.9126739501953, + "logps/rejected": -349.40728759765625, + "loss": 0.0937, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5046494007110596, + "rewards/margins": 2.391618490219116, + "rewards/rejected": -0.8869690299034119, + "step": 5607 + }, + { + "epoch": 0.33, + "learning_rate": 7.868654101665743e-08, + "logits/chosen": -2.0407423973083496, + "logits/rejected": -2.041041612625122, + "logps/chosen": -3.7821128368377686, + "logps/rejected": -28.244966506958008, + "loss": 0.6278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061440300196409225, + "rewards/margins": 0.16381819546222687, + "rewards/rejected": -0.10237789154052734, + "step": 5608 + }, + { + "epoch": 0.33, + "learning_rate": 7.86788218175523e-08, + "logits/chosen": -1.9465899467468262, + "logits/rejected": -1.9479262828826904, + "logps/chosen": -14.166304588317871, + "logps/rejected": -67.90938568115234, + "loss": 0.5397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23992204666137695, + "rewards/margins": 0.40433090925216675, + "rewards/rejected": -0.164408877491951, + "step": 5609 + }, + { + "epoch": 0.33, + "learning_rate": 7.867110159963514e-08, + "logits/chosen": -2.0265614986419678, + "logits/rejected": -2.025118589401245, + "logps/chosen": -0.0009829014306887984, + "logps/rejected": -180.2052459716797, + "loss": 0.4067, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9557833840954117e-05, + "rewards/margins": 2.053544044494629, + "rewards/rejected": -2.0535736083984375, + "step": 5610 + }, + { + "epoch": 0.33, + "learning_rate": 7.866338036318021e-08, + "logits/chosen": -1.9820345640182495, + "logits/rejected": -1.9933933019638062, + "logps/chosen": -242.7664794921875, + "logps/rejected": -355.96185302734375, + "loss": 0.1756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1702972650527954, + "rewards/margins": 1.5417816638946533, + "rewards/rejected": -0.3714843690395355, + "step": 5611 + }, + { + "epoch": 0.33, + "learning_rate": 7.865565810846179e-08, + "logits/chosen": -1.9004255533218384, + "logits/rejected": -1.8994932174682617, + "logps/chosen": -4.9711127281188965, + "logps/rejected": -243.00645446777344, + "loss": 0.3093, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2572479844093323, + "rewards/margins": 2.573870897293091, + "rewards/rejected": -2.3166229724884033, + "step": 5612 + }, + { + "epoch": 0.33, + "learning_rate": 7.864793483575424e-08, + "logits/chosen": -1.7786531448364258, + "logits/rejected": -1.7884819507598877, + "logps/chosen": -268.6097412109375, + "logps/rejected": -314.52081298828125, + "loss": 0.1366, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0951569080352783, + "rewards/margins": 1.44451904296875, + "rewards/rejected": 0.6506378054618835, + "step": 5613 + }, + { + "epoch": 0.33, + "learning_rate": 7.86402105453319e-08, + "logits/chosen": -1.8237909078598022, + "logits/rejected": -1.821457862854004, + "logps/chosen": -37.19786071777344, + "logps/rejected": -93.99501037597656, + "loss": 0.6147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09956588596105576, + "rewards/margins": 0.04781951755285263, + "rewards/rejected": 0.051746368408203125, + "step": 5614 + }, + { + "epoch": 0.33, + "learning_rate": 7.863248523746919e-08, + "logits/chosen": -2.006096839904785, + "logits/rejected": -1.9813313484191895, + "logps/chosen": -192.28228759765625, + "logps/rejected": -405.11822509765625, + "loss": 0.0545, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1222214698791504, + "rewards/margins": 4.206089973449707, + "rewards/rejected": -2.0838685035705566, + "step": 5615 + }, + { + "epoch": 0.33, + "learning_rate": 7.862475891244055e-08, + "logits/chosen": -1.9111822843551636, + "logits/rejected": -1.9475820064544678, + "logps/chosen": -240.09017944335938, + "logps/rejected": -352.51953125, + "loss": 0.2, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6416015625, + "rewards/margins": 1.101409912109375, + "rewards/rejected": 0.540191650390625, + "step": 5616 + }, + { + "epoch": 0.33, + "learning_rate": 7.861703157052046e-08, + "logits/chosen": -1.735983967781067, + "logits/rejected": -1.73362398147583, + "logps/chosen": -5.009181976318359, + "logps/rejected": -38.75404357910156, + "loss": 0.5967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12440667301416397, + "rewards/margins": 0.28505998849868774, + "rewards/rejected": -0.16065330803394318, + "step": 5617 + }, + { + "epoch": 0.33, + "learning_rate": 7.860930321198343e-08, + "logits/chosen": -2.1437785625457764, + "logits/rejected": -2.1527514457702637, + "logps/chosen": -23.77657699584961, + "logps/rejected": -136.46786499023438, + "loss": 0.5307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08284168690443039, + "rewards/margins": 0.4717104136943817, + "rewards/rejected": -0.3888687193393707, + "step": 5618 + }, + { + "epoch": 0.33, + "learning_rate": 7.860157383710399e-08, + "logits/chosen": -2.166900873184204, + "logits/rejected": -2.1641287803649902, + "logps/chosen": -0.00011264934437349439, + "logps/rejected": -275.6257629394531, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5493424143642187e-07, + "rewards/margins": 4.815182685852051, + "rewards/rejected": -4.815182685852051, + "step": 5619 + }, + { + "epoch": 0.33, + "learning_rate": 7.859384344615675e-08, + "logits/chosen": -1.92112135887146, + "logits/rejected": -1.903666615486145, + "logps/chosen": -166.79678344726562, + "logps/rejected": -325.09869384765625, + "loss": 0.3663, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0299056768417358, + "rewards/margins": 0.38668668270111084, + "rewards/rejected": 0.643218994140625, + "step": 5620 + }, + { + "epoch": 0.33, + "learning_rate": 7.858611203941632e-08, + "logits/chosen": -1.8492647409439087, + "logits/rejected": -1.811147928237915, + "logps/chosen": -166.66085815429688, + "logps/rejected": -413.822998046875, + "loss": 0.2621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.768890380859375, + "rewards/margins": 1.5209870338439941, + "rewards/rejected": -0.7520965933799744, + "step": 5621 + }, + { + "epoch": 0.33, + "learning_rate": 7.857837961715737e-08, + "logits/chosen": -1.8399556875228882, + "logits/rejected": -1.8241398334503174, + "logps/chosen": -146.78111267089844, + "logps/rejected": -267.3396911621094, + "loss": 0.3361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7365753054618835, + "rewards/margins": 0.9733611941337585, + "rewards/rejected": -0.236785888671875, + "step": 5622 + }, + { + "epoch": 0.33, + "learning_rate": 7.857064617965456e-08, + "logits/chosen": -2.189375877380371, + "logits/rejected": -2.1886932849884033, + "logps/chosen": -30.01118278503418, + "logps/rejected": -162.66024780273438, + "loss": 0.3759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2059667557477951, + "rewards/margins": 1.3877111673355103, + "rewards/rejected": -1.1817444562911987, + "step": 5623 + }, + { + "epoch": 0.33, + "learning_rate": 7.856291172718266e-08, + "logits/chosen": -1.9469282627105713, + "logits/rejected": -1.8856858015060425, + "logps/chosen": -202.36190795898438, + "logps/rejected": -337.6478271484375, + "loss": 0.4459, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2122986316680908, + "rewards/margins": 0.24373477697372437, + "rewards/rejected": 0.9685638546943665, + "step": 5624 + }, + { + "epoch": 0.33, + "learning_rate": 7.855517626001644e-08, + "logits/chosen": -1.9540467262268066, + "logits/rejected": -1.9623775482177734, + "logps/chosen": -171.8836669921875, + "logps/rejected": -181.42388916015625, + "loss": 0.2692, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.332209825515747, + "rewards/margins": 1.0353866815567017, + "rewards/rejected": 0.296823114156723, + "step": 5625 + }, + { + "epoch": 0.33, + "learning_rate": 7.854743977843066e-08, + "logits/chosen": -2.055574655532837, + "logits/rejected": -2.0492351055145264, + "logps/chosen": -2.078347682952881, + "logps/rejected": -198.6976776123047, + "loss": 0.4259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05231025442481041, + "rewards/margins": 1.367475986480713, + "rewards/rejected": -1.3151657581329346, + "step": 5626 + }, + { + "epoch": 0.33, + "learning_rate": 7.853970228270018e-08, + "logits/chosen": -1.916161060333252, + "logits/rejected": -1.9162200689315796, + "logps/chosen": -11.368353843688965, + "logps/rejected": -77.87828063964844, + "loss": 0.5515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13753461837768555, + "rewards/margins": 0.3571314811706543, + "rewards/rejected": -0.21959686279296875, + "step": 5627 + }, + { + "epoch": 0.33, + "learning_rate": 7.853196377309986e-08, + "logits/chosen": -2.083780288696289, + "logits/rejected": -2.076164722442627, + "logps/chosen": -72.10946655273438, + "logps/rejected": -243.64968872070312, + "loss": 0.1901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8221549987792969, + "rewards/margins": 2.7400474548339844, + "rewards/rejected": -1.9178924560546875, + "step": 5628 + }, + { + "epoch": 0.33, + "learning_rate": 7.852422424990463e-08, + "logits/chosen": -1.864011526107788, + "logits/rejected": -1.8661128282546997, + "logps/chosen": -78.43170928955078, + "logps/rejected": -205.62242126464844, + "loss": 0.5333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4444335997104645, + "rewards/margins": 0.2983291745185852, + "rewards/rejected": 0.14610444009304047, + "step": 5629 + }, + { + "epoch": 0.33, + "learning_rate": 7.851648371338943e-08, + "logits/chosen": -1.9478795528411865, + "logits/rejected": -1.9452964067459106, + "logps/chosen": -53.67592239379883, + "logps/rejected": -285.7978515625, + "loss": 0.3423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42516669631004333, + "rewards/margins": 1.4535877704620361, + "rewards/rejected": -1.0284210443496704, + "step": 5630 + }, + { + "epoch": 0.33, + "learning_rate": 7.850874216382924e-08, + "logits/chosen": -2.004946708679199, + "logits/rejected": -1.9992785453796387, + "logps/chosen": -32.96067428588867, + "logps/rejected": -175.23452758789062, + "loss": 0.4209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2196098417043686, + "rewards/margins": 2.7462410926818848, + "rewards/rejected": -2.965850830078125, + "step": 5631 + }, + { + "epoch": 0.33, + "learning_rate": 7.850099960149908e-08, + "logits/chosen": -2.0182085037231445, + "logits/rejected": -2.0133025646209717, + "logps/chosen": -143.30401611328125, + "logps/rejected": -244.15219116210938, + "loss": 0.1716, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8162888288497925, + "rewards/margins": 1.0852737426757812, + "rewards/rejected": 0.7310150265693665, + "step": 5632 + }, + { + "epoch": 0.33, + "learning_rate": 7.849325602667401e-08, + "logits/chosen": -1.8735971450805664, + "logits/rejected": -1.8460056781768799, + "logps/chosen": -225.5977783203125, + "logps/rejected": -408.7833251953125, + "loss": 0.0725, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5588746070861816, + "rewards/margins": 2.278653144836426, + "rewards/rejected": 0.280221551656723, + "step": 5633 + }, + { + "epoch": 0.33, + "learning_rate": 7.84855114396291e-08, + "logits/chosen": -1.995428204536438, + "logits/rejected": -1.981326699256897, + "logps/chosen": -175.37783813476562, + "logps/rejected": -248.6640625, + "loss": 0.5215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1693771332502365, + "rewards/margins": 0.198741152882576, + "rewards/rejected": -0.02936401404440403, + "step": 5634 + }, + { + "epoch": 0.33, + "learning_rate": 7.847776584063948e-08, + "logits/chosen": -1.957243800163269, + "logits/rejected": -1.9636387825012207, + "logps/chosen": -16.508996963500977, + "logps/rejected": -118.76606750488281, + "loss": 0.5764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1334686279296875, + "rewards/margins": 0.3730270266532898, + "rewards/rejected": -0.2395584136247635, + "step": 5635 + }, + { + "epoch": 0.33, + "learning_rate": 7.847001922998031e-08, + "logits/chosen": -2.03344464302063, + "logits/rejected": -2.0537269115448, + "logps/chosen": -183.22933959960938, + "logps/rejected": -172.16287231445312, + "loss": 0.6916, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2561081051826477, + "rewards/margins": -0.08751371502876282, + "rewards/rejected": 0.3436218202114105, + "step": 5636 + }, + { + "epoch": 0.33, + "learning_rate": 7.846227160792682e-08, + "logits/chosen": -1.975610375404358, + "logits/rejected": -1.9791936874389648, + "logps/chosen": -29.13778305053711, + "logps/rejected": -61.674381256103516, + "loss": 0.695, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04289283975958824, + "rewards/margins": -0.08240470290184021, + "rewards/rejected": 0.12529754638671875, + "step": 5637 + }, + { + "epoch": 0.33, + "learning_rate": 7.84545229747542e-08, + "logits/chosen": -1.958722710609436, + "logits/rejected": -1.9493687152862549, + "logps/chosen": -0.2691246271133423, + "logps/rejected": -165.45932006835938, + "loss": 0.3869, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009804526343941689, + "rewards/margins": 2.5413124561309814, + "rewards/rejected": -2.551116943359375, + "step": 5638 + }, + { + "epoch": 0.33, + "learning_rate": 7.844677333073775e-08, + "logits/chosen": -2.020549774169922, + "logits/rejected": -2.0066447257995605, + "logps/chosen": -153.20106506347656, + "logps/rejected": -281.60491943359375, + "loss": 0.4573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38620302081108093, + "rewards/margins": 0.31737977266311646, + "rewards/rejected": 0.06882324069738388, + "step": 5639 + }, + { + "epoch": 0.33, + "learning_rate": 7.843902267615274e-08, + "logits/chosen": -2.014300584793091, + "logits/rejected": -2.0379886627197266, + "logps/chosen": -316.7666320800781, + "logps/rejected": -512.511962890625, + "loss": 0.0537, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.715405225753784, + "rewards/margins": 2.398724317550659, + "rewards/rejected": 0.316680908203125, + "step": 5640 + }, + { + "epoch": 0.33, + "learning_rate": 7.843127101127456e-08, + "logits/chosen": -2.2136785984039307, + "logits/rejected": -2.203127384185791, + "logps/chosen": -71.86579895019531, + "logps/rejected": -245.18589782714844, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.095563530921936, + "rewards/margins": 0.7166374325752258, + "rewards/rejected": 0.3789260983467102, + "step": 5641 + }, + { + "epoch": 0.33, + "learning_rate": 7.842351833637854e-08, + "logits/chosen": -1.938659906387329, + "logits/rejected": -1.964895486831665, + "logps/chosen": -190.5745391845703, + "logps/rejected": -279.3113708496094, + "loss": 0.4549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5427612662315369, + "rewards/margins": 0.46692508459091187, + "rewards/rejected": 0.075836181640625, + "step": 5642 + }, + { + "epoch": 0.33, + "learning_rate": 7.841576465174014e-08, + "logits/chosen": -1.909908652305603, + "logits/rejected": -1.884399652481079, + "logps/chosen": -328.75299072265625, + "logps/rejected": -481.3004150390625, + "loss": 0.079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9585144519805908, + "rewards/margins": 2.2364869117736816, + "rewards/rejected": -0.27797242999076843, + "step": 5643 + }, + { + "epoch": 0.33, + "learning_rate": 7.840800995763477e-08, + "logits/chosen": -1.9382736682891846, + "logits/rejected": -1.775390386581421, + "logps/chosen": -223.4002685546875, + "logps/rejected": -615.857177734375, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4927337169647217, + "rewards/margins": 3.4405486583709717, + "rewards/rejected": -0.94781494140625, + "step": 5644 + }, + { + "epoch": 0.33, + "learning_rate": 7.840025425433792e-08, + "logits/chosen": -1.955151915550232, + "logits/rejected": -1.9578449726104736, + "logps/chosen": -0.009416748769581318, + "logps/rejected": -190.48873901367188, + "loss": 0.3823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005667777149938047, + "rewards/margins": 2.1144509315490723, + "rewards/rejected": -2.1150176525115967, + "step": 5645 + }, + { + "epoch": 0.33, + "learning_rate": 7.839249754212514e-08, + "logits/chosen": -2.1499505043029785, + "logits/rejected": -2.144726276397705, + "logps/chosen": -27.48911476135254, + "logps/rejected": -143.51254272460938, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0600683689117432, + "rewards/margins": 1.0022467374801636, + "rewards/rejected": 0.05782165750861168, + "step": 5646 + }, + { + "epoch": 0.33, + "learning_rate": 7.838473982127194e-08, + "logits/chosen": -2.145484447479248, + "logits/rejected": -2.149003744125366, + "logps/chosen": -0.0002513943472877145, + "logps/rejected": -263.386962890625, + "loss": 0.3551, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.648434810405888e-07, + "rewards/margins": 4.002215385437012, + "rewards/rejected": -4.00221586227417, + "step": 5647 + }, + { + "epoch": 0.33, + "learning_rate": 7.837698109205395e-08, + "logits/chosen": -1.8602814674377441, + "logits/rejected": -1.8531519174575806, + "logps/chosen": -29.789052963256836, + "logps/rejected": -270.6565856933594, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6734827160835266, + "rewards/margins": 1.926342248916626, + "rewards/rejected": -1.2528594732284546, + "step": 5648 + }, + { + "epoch": 0.33, + "learning_rate": 7.836922135474677e-08, + "logits/chosen": -1.9801030158996582, + "logits/rejected": -1.9848487377166748, + "logps/chosen": -0.0020718679297715425, + "logps/rejected": -120.39363861083984, + "loss": 0.508, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014393244055099785, + "rewards/margins": 0.9565027952194214, + "rewards/rejected": -0.956646740436554, + "step": 5649 + }, + { + "epoch": 0.33, + "learning_rate": 7.83614606096261e-08, + "logits/chosen": -1.9760115146636963, + "logits/rejected": -2.006995439529419, + "logps/chosen": -321.7935791015625, + "logps/rejected": -439.23846435546875, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8556731939315796, + "rewards/margins": 4.438992500305176, + "rewards/rejected": -2.5833191871643066, + "step": 5650 + }, + { + "epoch": 0.33, + "learning_rate": 7.83536988569676e-08, + "logits/chosen": -1.9563030004501343, + "logits/rejected": -1.9342644214630127, + "logps/chosen": -233.90847778320312, + "logps/rejected": -420.7567138671875, + "loss": 0.0849, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3526153564453125, + "rewards/margins": 2.0810883045196533, + "rewards/rejected": 0.27152711153030396, + "step": 5651 + }, + { + "epoch": 0.33, + "learning_rate": 7.834593609704702e-08, + "logits/chosen": -2.0564956665039062, + "logits/rejected": -2.048065423965454, + "logps/chosen": -1.7885972261428833, + "logps/rejected": -206.1906280517578, + "loss": 0.3865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0184013731777668, + "rewards/margins": 2.350574493408203, + "rewards/rejected": -2.3321731090545654, + "step": 5652 + }, + { + "epoch": 0.33, + "learning_rate": 7.833817233014016e-08, + "logits/chosen": -2.114326000213623, + "logits/rejected": -2.0952260494232178, + "logps/chosen": -211.9183349609375, + "logps/rejected": -299.14630126953125, + "loss": 0.2251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8733795285224915, + "rewards/margins": 1.0689423084259033, + "rewards/rejected": -0.19556275010108948, + "step": 5653 + }, + { + "epoch": 0.33, + "learning_rate": 7.833040755652278e-08, + "logits/chosen": -2.023320436477661, + "logits/rejected": -2.0137879848480225, + "logps/chosen": -79.98127746582031, + "logps/rejected": -462.61151123046875, + "loss": 0.4104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38812562823295593, + "rewards/margins": 6.807882785797119, + "rewards/rejected": -7.196008205413818, + "step": 5654 + }, + { + "epoch": 0.33, + "learning_rate": 7.832264177647076e-08, + "logits/chosen": -1.9288265705108643, + "logits/rejected": -1.9161311388015747, + "logps/chosen": -28.082780838012695, + "logps/rejected": -255.6118927001953, + "loss": 0.4924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08301983028650284, + "rewards/margins": 0.7993894815444946, + "rewards/rejected": -0.71636962890625, + "step": 5655 + }, + { + "epoch": 0.33, + "learning_rate": 7.831487499025995e-08, + "logits/chosen": -1.8999247550964355, + "logits/rejected": -1.8886165618896484, + "logps/chosen": -47.42329025268555, + "logps/rejected": -235.64093017578125, + "loss": 0.3889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08393249660730362, + "rewards/margins": 3.3386642932891846, + "rewards/rejected": -3.4225966930389404, + "step": 5656 + }, + { + "epoch": 0.33, + "learning_rate": 7.830710719816629e-08, + "logits/chosen": -2.049553632736206, + "logits/rejected": -2.0463714599609375, + "logps/chosen": -47.17871856689453, + "logps/rejected": -231.88119506835938, + "loss": 0.4995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20065613090991974, + "rewards/margins": 1.1443008184432983, + "rewards/rejected": -1.3449569940567017, + "step": 5657 + }, + { + "epoch": 0.33, + "learning_rate": 7.829933840046572e-08, + "logits/chosen": -2.1480159759521484, + "logits/rejected": -2.1269118785858154, + "logps/chosen": -15.292133331298828, + "logps/rejected": -238.97210693359375, + "loss": 0.4403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2023095190525055, + "rewards/margins": 2.9130117893218994, + "rewards/rejected": -3.115321397781372, + "step": 5658 + }, + { + "epoch": 0.33, + "learning_rate": 7.82915685974342e-08, + "logits/chosen": -1.9242130517959595, + "logits/rejected": -1.9227718114852905, + "logps/chosen": -52.432830810546875, + "logps/rejected": -236.69671630859375, + "loss": 0.4239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49387818574905396, + "rewards/margins": 0.6433868408203125, + "rewards/rejected": -0.14950866997241974, + "step": 5659 + }, + { + "epoch": 0.33, + "learning_rate": 7.82837977893478e-08, + "logits/chosen": -2.1503913402557373, + "logits/rejected": -2.0884926319122314, + "logps/chosen": -187.38278198242188, + "logps/rejected": -487.511474609375, + "loss": 0.0596, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.608123779296875, + "rewards/margins": 4.240221977233887, + "rewards/rejected": -2.632098436355591, + "step": 5660 + }, + { + "epoch": 0.33, + "learning_rate": 7.827602597648253e-08, + "logits/chosen": -1.8093078136444092, + "logits/rejected": -1.7787389755249023, + "logps/chosen": -354.907958984375, + "logps/rejected": -531.8509521484375, + "loss": 0.1323, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3515747785568237, + "rewards/margins": 1.7721314430236816, + "rewards/rejected": -0.4205566346645355, + "step": 5661 + }, + { + "epoch": 0.33, + "learning_rate": 7.826825315911452e-08, + "logits/chosen": -2.036656141281128, + "logits/rejected": -2.021962881088257, + "logps/chosen": -293.53033447265625, + "logps/rejected": -477.8037414550781, + "loss": 0.0661, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5579774379730225, + "rewards/margins": 2.16025710105896, + "rewards/rejected": 0.3977203369140625, + "step": 5662 + }, + { + "epoch": 0.33, + "learning_rate": 7.826047933751988e-08, + "logits/chosen": -2.0464131832122803, + "logits/rejected": -2.0443947315216064, + "logps/chosen": -6.3520894050598145, + "logps/rejected": -288.5361328125, + "loss": 0.3596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002276468323543668, + "rewards/margins": 4.411722183227539, + "rewards/rejected": -4.413998603820801, + "step": 5663 + }, + { + "epoch": 0.33, + "learning_rate": 7.825270451197476e-08, + "logits/chosen": -2.060051202774048, + "logits/rejected": -2.0318846702575684, + "logps/chosen": -217.3189697265625, + "logps/rejected": -297.82232666015625, + "loss": 0.4284, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.6659058332443237, + "rewards/margins": -0.1386169195175171, + "rewards/rejected": 1.8045227527618408, + "step": 5664 + }, + { + "epoch": 0.33, + "learning_rate": 7.824492868275541e-08, + "logits/chosen": -2.151324510574341, + "logits/rejected": -2.1518056392669678, + "logps/chosen": -0.012095378711819649, + "logps/rejected": -172.60894775390625, + "loss": 0.4102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007305597537197173, + "rewards/margins": 1.9584201574325562, + "rewards/rejected": -1.9591506719589233, + "step": 5665 + }, + { + "epoch": 0.33, + "learning_rate": 7.823715185013799e-08, + "logits/chosen": -2.1016290187835693, + "logits/rejected": -2.091264486312866, + "logps/chosen": -71.51254272460938, + "logps/rejected": -203.2666015625, + "loss": 0.1231, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0751632452011108, + "rewards/margins": 3.5975661277770996, + "rewards/rejected": -2.5224030017852783, + "step": 5666 + }, + { + "epoch": 0.33, + "learning_rate": 7.822937401439886e-08, + "logits/chosen": -1.9558769464492798, + "logits/rejected": -2.0076375007629395, + "logps/chosen": -263.89593505859375, + "logps/rejected": -310.45623779296875, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6043641567230225, + "rewards/margins": 4.978970527648926, + "rewards/rejected": -2.3746063709259033, + "step": 5667 + }, + { + "epoch": 0.33, + "learning_rate": 7.822159517581425e-08, + "logits/chosen": -1.790424108505249, + "logits/rejected": -1.821027159690857, + "logps/chosen": -219.2900848388672, + "logps/rejected": -282.09503173828125, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3159500360488892, + "rewards/margins": 0.6324234008789062, + "rewards/rejected": 0.6835266351699829, + "step": 5668 + }, + { + "epoch": 0.33, + "learning_rate": 7.821381533466054e-08, + "logits/chosen": -1.8685270547866821, + "logits/rejected": -1.8680843114852905, + "logps/chosen": -22.988082885742188, + "logps/rejected": -137.34088134765625, + "loss": 0.3792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1475517302751541, + "rewards/margins": 1.719770073890686, + "rewards/rejected": -1.5722182989120483, + "step": 5669 + }, + { + "epoch": 0.33, + "learning_rate": 7.82060344912141e-08, + "logits/chosen": -1.975864052772522, + "logits/rejected": -1.9715608358383179, + "logps/chosen": -121.39739990234375, + "logps/rejected": -417.8148498535156, + "loss": 0.1183, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3466110229492188, + "rewards/margins": 2.3631699085235596, + "rewards/rejected": -1.0165588855743408, + "step": 5670 + }, + { + "epoch": 0.33, + "learning_rate": 7.819825264575134e-08, + "logits/chosen": -2.167961835861206, + "logits/rejected": -2.172037363052368, + "logps/chosen": -21.366531372070312, + "logps/rejected": -127.41041564941406, + "loss": 0.4812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030792808160185814, + "rewards/margins": 0.9329339861869812, + "rewards/rejected": -0.963726818561554, + "step": 5671 + }, + { + "epoch": 0.33, + "learning_rate": 7.819046979854871e-08, + "logits/chosen": -2.0029962062835693, + "logits/rejected": -2.0015981197357178, + "logps/chosen": -0.23702843487262726, + "logps/rejected": -126.54141235351562, + "loss": 0.4022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007055725436657667, + "rewards/margins": 2.0374596118927, + "rewards/rejected": -2.044515371322632, + "step": 5672 + }, + { + "epoch": 0.33, + "learning_rate": 7.818268594988271e-08, + "logits/chosen": -2.1004638671875, + "logits/rejected": -2.1010546684265137, + "logps/chosen": -0.0004431765410117805, + "logps/rejected": -158.34033203125, + "loss": 0.3728, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.665146894287318e-05, + "rewards/margins": 2.854696035385132, + "rewards/rejected": -2.854722738265991, + "step": 5673 + }, + { + "epoch": 0.33, + "learning_rate": 7.817490110002985e-08, + "logits/chosen": -1.8944664001464844, + "logits/rejected": -1.8940989971160889, + "logps/chosen": -92.92858123779297, + "logps/rejected": -271.53204345703125, + "loss": 0.7846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9020576477050781, + "rewards/margins": 0.6412467956542969, + "rewards/rejected": -1.543304443359375, + "step": 5674 + }, + { + "epoch": 0.33, + "learning_rate": 7.816711524926666e-08, + "logits/chosen": -2.1481752395629883, + "logits/rejected": -2.1360602378845215, + "logps/chosen": -30.846792221069336, + "logps/rejected": -194.3652801513672, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14591923356056213, + "rewards/margins": 1.502347707748413, + "rewards/rejected": -1.3564285039901733, + "step": 5675 + }, + { + "epoch": 0.33, + "learning_rate": 7.815932839786979e-08, + "logits/chosen": -1.954802393913269, + "logits/rejected": -2.0002763271331787, + "logps/chosen": -214.26341247558594, + "logps/rejected": -402.21502685546875, + "loss": 0.106, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5679641962051392, + "rewards/margins": 2.248368740081787, + "rewards/rejected": -0.6804046630859375, + "step": 5676 + }, + { + "epoch": 0.33, + "learning_rate": 7.815154054611581e-08, + "logits/chosen": -1.8049501180648804, + "logits/rejected": -1.8081450462341309, + "logps/chosen": -232.9539794921875, + "logps/rejected": -323.3746337890625, + "loss": 0.1049, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.502246141433716, + "rewards/margins": 1.6653320789337158, + "rewards/rejected": 0.8369140625, + "step": 5677 + }, + { + "epoch": 0.33, + "learning_rate": 7.814375169428142e-08, + "logits/chosen": -1.8711841106414795, + "logits/rejected": -1.9079586267471313, + "logps/chosen": -283.0153503417969, + "logps/rejected": -487.63140869140625, + "loss": 0.0722, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0849335193634033, + "rewards/margins": 3.7665834426879883, + "rewards/rejected": -2.681649923324585, + "step": 5678 + }, + { + "epoch": 0.33, + "learning_rate": 7.81359618426433e-08, + "logits/chosen": -1.9394102096557617, + "logits/rejected": -1.9374563694000244, + "logps/chosen": -5.744750499725342, + "logps/rejected": -151.30984497070312, + "loss": 0.7234, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02417759969830513, + "rewards/margins": -0.19426876306533813, + "rewards/rejected": 0.21844635903835297, + "step": 5679 + }, + { + "epoch": 0.33, + "learning_rate": 7.81281709914782e-08, + "logits/chosen": -1.9590281248092651, + "logits/rejected": -1.9622589349746704, + "logps/chosen": -193.2054443359375, + "logps/rejected": -342.5291442871094, + "loss": 0.1933, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0860397815704346, + "rewards/margins": 0.9241684675216675, + "rewards/rejected": 1.161871314048767, + "step": 5680 + }, + { + "epoch": 0.33, + "learning_rate": 7.812037914106284e-08, + "logits/chosen": -2.098054885864258, + "logits/rejected": -2.096993923187256, + "logps/chosen": -148.75106811523438, + "logps/rejected": -393.4966735839844, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.599116563796997, + "rewards/margins": 2.385441780090332, + "rewards/rejected": -0.7863250970840454, + "step": 5681 + }, + { + "epoch": 0.33, + "learning_rate": 7.81125862916741e-08, + "logits/chosen": -1.8493860960006714, + "logits/rejected": -1.8488636016845703, + "logps/chosen": -0.7146191596984863, + "logps/rejected": -61.24586486816406, + "loss": 0.4835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020163238048553467, + "rewards/margins": 1.198026418685913, + "rewards/rejected": -1.2181895971298218, + "step": 5682 + }, + { + "epoch": 0.33, + "learning_rate": 7.810479244358877e-08, + "logits/chosen": -1.760310411453247, + "logits/rejected": -1.7715007066726685, + "logps/chosen": -0.34687045216560364, + "logps/rejected": -142.35513305664062, + "loss": 0.4321, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009855486452579498, + "rewards/margins": 1.62839674949646, + "rewards/rejected": -1.6382522583007812, + "step": 5683 + }, + { + "epoch": 0.33, + "learning_rate": 7.809699759708372e-08, + "logits/chosen": -1.9618703126907349, + "logits/rejected": -1.9593228101730347, + "logps/chosen": -20.473827362060547, + "logps/rejected": -76.78498840332031, + "loss": 0.6009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004092979710549116, + "rewards/margins": 0.3528173565864563, + "rewards/rejected": -0.348724365234375, + "step": 5684 + }, + { + "epoch": 0.33, + "learning_rate": 7.808920175243589e-08, + "logits/chosen": -1.8230738639831543, + "logits/rejected": -1.8222620487213135, + "logps/chosen": -0.00020288614905439317, + "logps/rejected": -123.05632019042969, + "loss": 0.4871, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.43418786694383e-07, + "rewards/margins": 1.124668002128601, + "rewards/rejected": -1.1246674060821533, + "step": 5685 + }, + { + "epoch": 0.33, + "learning_rate": 7.808140490992221e-08, + "logits/chosen": -1.9358313083648682, + "logits/rejected": -1.9285551309585571, + "logps/chosen": -91.68878936767578, + "logps/rejected": -338.5647277832031, + "loss": 0.2263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4951629638671875, + "rewards/margins": 4.679223537445068, + "rewards/rejected": -4.184060573577881, + "step": 5686 + }, + { + "epoch": 0.33, + "learning_rate": 7.807360706981967e-08, + "logits/chosen": -1.8698374032974243, + "logits/rejected": -1.8655781745910645, + "logps/chosen": -24.004297256469727, + "logps/rejected": -340.7525634765625, + "loss": 0.3306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3075176179409027, + "rewards/margins": 2.5181009769439697, + "rewards/rejected": -2.210583448410034, + "step": 5687 + }, + { + "epoch": 0.33, + "learning_rate": 7.80658082324053e-08, + "logits/chosen": -2.132894277572632, + "logits/rejected": -2.1172170639038086, + "logps/chosen": -102.80261993408203, + "logps/rejected": -376.32159423828125, + "loss": 0.7612, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1968231201171875, + "rewards/margins": 2.2147858142852783, + "rewards/rejected": -3.411608934402466, + "step": 5688 + }, + { + "epoch": 0.33, + "learning_rate": 7.805800839795612e-08, + "logits/chosen": -1.9115488529205322, + "logits/rejected": -1.8931339979171753, + "logps/chosen": -249.52456665039062, + "logps/rejected": -436.6490783691406, + "loss": 0.244, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7805756330490112, + "rewards/margins": 0.6332947015762329, + "rewards/rejected": 1.1472809314727783, + "step": 5689 + }, + { + "epoch": 0.33, + "learning_rate": 7.805020756674923e-08, + "logits/chosen": -2.034827947616577, + "logits/rejected": -2.0347182750701904, + "logps/chosen": -35.138282775878906, + "logps/rejected": -158.4499053955078, + "loss": 0.3352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40553781390190125, + "rewards/margins": 1.5937336683273315, + "rewards/rejected": -1.188195824623108, + "step": 5690 + }, + { + "epoch": 0.33, + "learning_rate": 7.804240573906176e-08, + "logits/chosen": -2.2560267448425293, + "logits/rejected": -2.2559447288513184, + "logps/chosen": -4.185329437255859, + "logps/rejected": -133.26649475097656, + "loss": 0.4751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10087838023900986, + "rewards/margins": 1.3562262058258057, + "rewards/rejected": -1.4571045637130737, + "step": 5691 + }, + { + "epoch": 0.33, + "learning_rate": 7.803460291517087e-08, + "logits/chosen": -2.051086902618408, + "logits/rejected": -2.0261006355285645, + "logps/chosen": -74.38294982910156, + "logps/rejected": -322.53326416015625, + "loss": 0.1404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9550308585166931, + "rewards/margins": 4.533751010894775, + "rewards/rejected": -3.5787200927734375, + "step": 5692 + }, + { + "epoch": 0.33, + "learning_rate": 7.802679909535375e-08, + "logits/chosen": -2.0634186267852783, + "logits/rejected": -2.0611062049865723, + "logps/chosen": -284.158447265625, + "logps/rejected": -502.4092712402344, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7499420642852783, + "rewards/margins": 3.3632874488830566, + "rewards/rejected": -1.6133453845977783, + "step": 5693 + }, + { + "epoch": 0.33, + "learning_rate": 7.801899427988765e-08, + "logits/chosen": -2.1222267150878906, + "logits/rejected": -2.13334059715271, + "logps/chosen": -211.31588745117188, + "logps/rejected": -294.66790771484375, + "loss": 0.1473, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4801971912384033, + "rewards/margins": 1.5438263416290283, + "rewards/rejected": -0.063629150390625, + "step": 5694 + }, + { + "epoch": 0.33, + "learning_rate": 7.801118846904979e-08, + "logits/chosen": -1.968623399734497, + "logits/rejected": -1.9711886644363403, + "logps/chosen": -0.005775200203061104, + "logps/rejected": -111.80046081542969, + "loss": 0.4949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003151950368192047, + "rewards/margins": 1.1517233848571777, + "rewards/rejected": -1.15203857421875, + "step": 5695 + }, + { + "epoch": 0.33, + "learning_rate": 7.800338166311752e-08, + "logits/chosen": -1.981510877609253, + "logits/rejected": -1.9860626459121704, + "logps/chosen": -7.128742694854736, + "logps/rejected": -131.01760864257812, + "loss": 0.4236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11229157447814941, + "rewards/margins": 1.4560812711715698, + "rewards/rejected": -1.3437896966934204, + "step": 5696 + }, + { + "epoch": 0.33, + "learning_rate": 7.799557386236815e-08, + "logits/chosen": -1.9393088817596436, + "logits/rejected": -1.9174230098724365, + "logps/chosen": -203.1879119873047, + "logps/rejected": -405.8439636230469, + "loss": 0.1187, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5526901483535767, + "rewards/margins": 1.8890488147735596, + "rewards/rejected": -0.3363586366176605, + "step": 5697 + }, + { + "epoch": 0.33, + "learning_rate": 7.798776506707906e-08, + "logits/chosen": -1.9290802478790283, + "logits/rejected": -1.9033379554748535, + "logps/chosen": -60.96312713623047, + "logps/rejected": -309.1189880371094, + "loss": 0.2707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8805946707725525, + "rewards/margins": 1.3191871643066406, + "rewards/rejected": -0.4385925233364105, + "step": 5698 + }, + { + "epoch": 0.33, + "learning_rate": 7.797995527752764e-08, + "logits/chosen": -1.9062281847000122, + "logits/rejected": -1.9060314893722534, + "logps/chosen": -209.66375732421875, + "logps/rejected": -488.3933410644531, + "loss": 0.0399, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.68157958984375, + "rewards/margins": 4.739889621734619, + "rewards/rejected": -3.058310031890869, + "step": 5699 + }, + { + "epoch": 0.33, + "learning_rate": 7.797214449399136e-08, + "logits/chosen": -1.9385337829589844, + "logits/rejected": -1.926106333732605, + "logps/chosen": -16.50495719909668, + "logps/rejected": -183.47747802734375, + "loss": 0.37, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1417820006608963, + "rewards/margins": 2.095421314239502, + "rewards/rejected": -1.953639268875122, + "step": 5700 + }, + { + "epoch": 0.33, + "learning_rate": 7.796433271674766e-08, + "logits/chosen": -2.1198747158050537, + "logits/rejected": -2.0949723720550537, + "logps/chosen": -71.65750885009766, + "logps/rejected": -335.03973388671875, + "loss": 0.1583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8486213684082031, + "rewards/margins": 6.361786842346191, + "rewards/rejected": -5.513165473937988, + "step": 5701 + }, + { + "epoch": 0.33, + "learning_rate": 7.795651994607408e-08, + "logits/chosen": -1.8167165517807007, + "logits/rejected": -1.7577464580535889, + "logps/chosen": -290.33319091796875, + "logps/rejected": -474.9066162109375, + "loss": 0.2232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3773376941680908, + "rewards/margins": 0.90802001953125, + "rewards/rejected": 0.46931764483451843, + "step": 5702 + }, + { + "epoch": 0.33, + "learning_rate": 7.794870618224818e-08, + "logits/chosen": -1.9951794147491455, + "logits/rejected": -1.9973734617233276, + "logps/chosen": -89.48196411132812, + "logps/rejected": -257.30828857421875, + "loss": 0.3837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38921815156936646, + "rewards/margins": 1.0394073724746704, + "rewards/rejected": -0.650189220905304, + "step": 5703 + }, + { + "epoch": 0.33, + "learning_rate": 7.79408914255475e-08, + "logits/chosen": -2.0415992736816406, + "logits/rejected": -2.0404858589172363, + "logps/chosen": -36.833492279052734, + "logps/rejected": -183.94622802734375, + "loss": 0.3694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1418144255876541, + "rewards/margins": 1.2274452447891235, + "rewards/rejected": -1.0856307744979858, + "step": 5704 + }, + { + "epoch": 0.33, + "learning_rate": 7.79330756762497e-08, + "logits/chosen": -1.9931942224502563, + "logits/rejected": -1.9552724361419678, + "logps/chosen": -74.12879180908203, + "logps/rejected": -234.6748809814453, + "loss": 0.2667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5794822573661804, + "rewards/margins": 2.079871416091919, + "rewards/rejected": -1.5003890991210938, + "step": 5705 + }, + { + "epoch": 0.33, + "learning_rate": 7.79252589346324e-08, + "logits/chosen": -1.911439299583435, + "logits/rejected": -1.9583656787872314, + "logps/chosen": -180.00009155273438, + "logps/rejected": -268.6083984375, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.230828881263733, + "rewards/margins": 0.27611082792282104, + "rewards/rejected": 0.9547180533409119, + "step": 5706 + }, + { + "epoch": 0.33, + "learning_rate": 7.791744120097332e-08, + "logits/chosen": -1.9889217615127563, + "logits/rejected": -1.9893523454666138, + "logps/chosen": -82.77427673339844, + "logps/rejected": -219.26438903808594, + "loss": 0.2664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5350891351699829, + "rewards/margins": 2.029737949371338, + "rewards/rejected": -1.494648814201355, + "step": 5707 + }, + { + "epoch": 0.33, + "learning_rate": 7.790962247555015e-08, + "logits/chosen": -1.975878119468689, + "logits/rejected": -1.9748873710632324, + "logps/chosen": -12.044288635253906, + "logps/rejected": -213.7215118408203, + "loss": 0.3541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0794304832816124, + "rewards/margins": 2.8808298110961914, + "rewards/rejected": -2.8013992309570312, + "step": 5708 + }, + { + "epoch": 0.33, + "learning_rate": 7.790180275864069e-08, + "logits/chosen": -1.8936113119125366, + "logits/rejected": -1.8791334629058838, + "logps/chosen": -51.664695739746094, + "logps/rejected": -201.8315887451172, + "loss": 0.4082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21036453545093536, + "rewards/margins": 1.1552711725234985, + "rewards/rejected": -0.9449066519737244, + "step": 5709 + }, + { + "epoch": 0.33, + "learning_rate": 7.78939820505227e-08, + "logits/chosen": -2.0544939041137695, + "logits/rejected": -2.0342774391174316, + "logps/chosen": -0.002316380152478814, + "logps/rejected": -120.89060974121094, + "loss": 0.5257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012042155867675319, + "rewards/margins": 0.8408891558647156, + "rewards/rejected": -0.8410095572471619, + "step": 5710 + }, + { + "epoch": 0.33, + "learning_rate": 7.788616035147404e-08, + "logits/chosen": -1.9302918910980225, + "logits/rejected": -1.9778263568878174, + "logps/chosen": -158.12130737304688, + "logps/rejected": -257.85247802734375, + "loss": 0.1286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2286423444747925, + "rewards/margins": 2.1428329944610596, + "rewards/rejected": -0.9141907095909119, + "step": 5711 + }, + { + "epoch": 0.33, + "learning_rate": 7.787833766177254e-08, + "logits/chosen": -2.081669807434082, + "logits/rejected": -2.0920281410217285, + "logps/chosen": -35.89552688598633, + "logps/rejected": -109.67777252197266, + "loss": 0.4211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06108589097857475, + "rewards/margins": 0.992923378944397, + "rewards/rejected": -0.9318374991416931, + "step": 5712 + }, + { + "epoch": 0.33, + "learning_rate": 7.787051398169613e-08, + "logits/chosen": -1.8554695844650269, + "logits/rejected": -1.88869309425354, + "logps/chosen": -250.87481689453125, + "logps/rejected": -395.0123596191406, + "loss": 0.0938, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.017974853515625, + "rewards/margins": 2.2934632301330566, + "rewards/rejected": -0.2754882872104645, + "step": 5713 + }, + { + "epoch": 0.33, + "learning_rate": 7.786268931152273e-08, + "logits/chosen": -2.112973213195801, + "logits/rejected": -2.108922243118286, + "logps/chosen": -0.04446878284215927, + "logps/rejected": -180.3126678466797, + "loss": 0.3713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002764875767752528, + "rewards/margins": 2.892364501953125, + "rewards/rejected": -2.895129442214966, + "step": 5714 + }, + { + "epoch": 0.33, + "learning_rate": 7.785486365153032e-08, + "logits/chosen": -1.8201178312301636, + "logits/rejected": -1.8169060945510864, + "logps/chosen": -1.3534313440322876, + "logps/rejected": -156.3335723876953, + "loss": 0.4607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12653988599777222, + "rewards/margins": 1.0043063163757324, + "rewards/rejected": -0.8777664303779602, + "step": 5715 + }, + { + "epoch": 0.33, + "learning_rate": 7.784703700199689e-08, + "logits/chosen": -2.174950122833252, + "logits/rejected": -2.170349359512329, + "logps/chosen": -45.933937072753906, + "logps/rejected": -187.76605224609375, + "loss": 0.422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027017975226044655, + "rewards/margins": 1.1339393854141235, + "rewards/rejected": -1.1069214344024658, + "step": 5716 + }, + { + "epoch": 0.33, + "learning_rate": 7.783920936320049e-08, + "logits/chosen": -1.7192189693450928, + "logits/rejected": -1.712654709815979, + "logps/chosen": -30.92757797241211, + "logps/rejected": -204.8477783203125, + "loss": 0.4817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29020196199417114, + "rewards/margins": 0.5655529499053955, + "rewards/rejected": -0.275350958108902, + "step": 5717 + }, + { + "epoch": 0.33, + "learning_rate": 7.783138073541921e-08, + "logits/chosen": -1.8322622776031494, + "logits/rejected": -1.8725847005844116, + "logps/chosen": -331.4547424316406, + "logps/rejected": -467.66729736328125, + "loss": 0.0942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2564667463302612, + "rewards/margins": 3.471060276031494, + "rewards/rejected": -2.2145936489105225, + "step": 5718 + }, + { + "epoch": 0.33, + "learning_rate": 7.782355111893114e-08, + "logits/chosen": -2.0434036254882812, + "logits/rejected": -2.000542163848877, + "logps/chosen": -159.0563201904297, + "logps/rejected": -418.4630126953125, + "loss": 0.0985, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.35186767578125, + "rewards/margins": 2.7830810546875, + "rewards/rejected": -1.43121337890625, + "step": 5719 + }, + { + "epoch": 0.33, + "learning_rate": 7.781572051401444e-08, + "logits/chosen": -1.8574950695037842, + "logits/rejected": -1.822930097579956, + "logps/chosen": -161.4508056640625, + "logps/rejected": -258.3277282714844, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8333587646484375, + "rewards/margins": 1.1396957635879517, + "rewards/rejected": -0.3063369691371918, + "step": 5720 + }, + { + "epoch": 0.33, + "learning_rate": 7.780788892094727e-08, + "logits/chosen": -1.9811806678771973, + "logits/rejected": -1.9754416942596436, + "logps/chosen": -128.2549591064453, + "logps/rejected": -227.78372192382812, + "loss": 0.3587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7463882565498352, + "rewards/margins": 0.430368036031723, + "rewards/rejected": 0.3160202205181122, + "step": 5721 + }, + { + "epoch": 0.33, + "learning_rate": 7.780005634000788e-08, + "logits/chosen": -2.1601829528808594, + "logits/rejected": -2.1505284309387207, + "logps/chosen": -13.636957168579102, + "logps/rejected": -208.10720825195312, + "loss": 0.4008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2967107892036438, + "rewards/margins": 1.1996084451675415, + "rewards/rejected": -0.9028976559638977, + "step": 5722 + }, + { + "epoch": 0.33, + "learning_rate": 7.779222277147451e-08, + "logits/chosen": -2.10449481010437, + "logits/rejected": -2.067033529281616, + "logps/chosen": -151.37469482421875, + "logps/rejected": -253.49380493164062, + "loss": 0.3, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2262862920761108, + "rewards/margins": 0.8613418340682983, + "rewards/rejected": 0.3649444580078125, + "step": 5723 + }, + { + "epoch": 0.33, + "learning_rate": 7.778438821562543e-08, + "logits/chosen": -1.9048210382461548, + "logits/rejected": -1.911328911781311, + "logps/chosen": -63.518428802490234, + "logps/rejected": -289.6789855957031, + "loss": 0.3587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05965690687298775, + "rewards/margins": 1.8089275360107422, + "rewards/rejected": -1.7492706775665283, + "step": 5724 + }, + { + "epoch": 0.33, + "learning_rate": 7.777655267273898e-08, + "logits/chosen": -2.036513566970825, + "logits/rejected": -2.023899555206299, + "logps/chosen": -0.30776387453079224, + "logps/rejected": -175.42190551757812, + "loss": 0.3562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005146530456840992, + "rewards/margins": 3.7597384452819824, + "rewards/rejected": -3.7648849487304688, + "step": 5725 + }, + { + "epoch": 0.33, + "learning_rate": 7.776871614309349e-08, + "logits/chosen": -1.8154112100601196, + "logits/rejected": -1.7938393354415894, + "logps/chosen": -180.38552856445312, + "logps/rejected": -326.7135925292969, + "loss": 0.4644, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2865371704101562, + "rewards/margins": -0.20024573802947998, + "rewards/rejected": 1.4867829084396362, + "step": 5726 + }, + { + "epoch": 0.33, + "learning_rate": 7.776087862696738e-08, + "logits/chosen": -1.8757621049880981, + "logits/rejected": -1.8809781074523926, + "logps/chosen": -48.00052261352539, + "logps/rejected": -186.89956665039062, + "loss": 0.5276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07847213745117188, + "rewards/margins": 0.4462471008300781, + "rewards/rejected": -0.52471923828125, + "step": 5727 + }, + { + "epoch": 0.33, + "learning_rate": 7.775304012463907e-08, + "logits/chosen": -1.9105647802352905, + "logits/rejected": -1.9104564189910889, + "logps/chosen": -24.772342681884766, + "logps/rejected": -156.91357421875, + "loss": 0.6322, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5166160464286804, + "rewards/margins": -0.24873733520507812, + "rewards/rejected": 0.7653533816337585, + "step": 5728 + }, + { + "epoch": 0.33, + "learning_rate": 7.774520063638701e-08, + "logits/chosen": -2.0029265880584717, + "logits/rejected": -2.0002188682556152, + "logps/chosen": -48.61072540283203, + "logps/rejected": -146.15753173828125, + "loss": 0.3664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41714173555374146, + "rewards/margins": 1.171630859375, + "rewards/rejected": -0.7544891238212585, + "step": 5729 + }, + { + "epoch": 0.33, + "learning_rate": 7.773736016248972e-08, + "logits/chosen": -2.083292007446289, + "logits/rejected": -2.0781283378601074, + "logps/chosen": -0.49329259991645813, + "logps/rejected": -237.00534057617188, + "loss": 0.3697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019308356568217278, + "rewards/margins": 3.358058452606201, + "rewards/rejected": -3.377366781234741, + "step": 5730 + }, + { + "epoch": 0.33, + "learning_rate": 7.772951870322571e-08, + "logits/chosen": -1.9298396110534668, + "logits/rejected": -1.9314731359481812, + "logps/chosen": -35.06330490112305, + "logps/rejected": -123.13423919677734, + "loss": 0.5474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024314118549227715, + "rewards/margins": 0.7270148992538452, + "rewards/rejected": -0.7027007937431335, + "step": 5731 + }, + { + "epoch": 0.33, + "learning_rate": 7.772167625887357e-08, + "logits/chosen": -2.1348495483398438, + "logits/rejected": -2.1347906589508057, + "logps/chosen": -5.400110603659414e-05, + "logps/rejected": -141.06451416015625, + "loss": 0.3547, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.052977544688474e-07, + "rewards/margins": 4.087026596069336, + "rewards/rejected": -4.087027072906494, + "step": 5732 + }, + { + "epoch": 0.33, + "learning_rate": 7.771383282971188e-08, + "logits/chosen": -2.065448045730591, + "logits/rejected": -2.0619969367980957, + "logps/chosen": -245.85824584960938, + "logps/rejected": -270.49407958984375, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.675915539264679, + "rewards/margins": 0.6191467642784119, + "rewards/rejected": 0.05676880106329918, + "step": 5733 + }, + { + "epoch": 0.33, + "learning_rate": 7.770598841601927e-08, + "logits/chosen": -1.9493277072906494, + "logits/rejected": -1.9136998653411865, + "logps/chosen": -160.59072875976562, + "logps/rejected": -383.8934631347656, + "loss": 0.0912, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8767273426055908, + "rewards/margins": 2.323935031890869, + "rewards/rejected": -0.44720765948295593, + "step": 5734 + }, + { + "epoch": 0.33, + "learning_rate": 7.769814301807444e-08, + "logits/chosen": -1.8735753297805786, + "logits/rejected": -1.8908909559249878, + "logps/chosen": -241.5240020751953, + "logps/rejected": -282.2982177734375, + "loss": 0.2095, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6605148315429688, + "rewards/margins": 1.049098253250122, + "rewards/rejected": 0.6114166378974915, + "step": 5735 + }, + { + "epoch": 0.33, + "learning_rate": 7.769029663615609e-08, + "logits/chosen": -1.9615118503570557, + "logits/rejected": -1.9799811840057373, + "logps/chosen": -247.14736938476562, + "logps/rejected": -285.37982177734375, + "loss": 0.1123, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7705200910568237, + "rewards/margins": 1.9462860822677612, + "rewards/rejected": -0.1757659912109375, + "step": 5736 + }, + { + "epoch": 0.33, + "learning_rate": 7.768244927054297e-08, + "logits/chosen": -1.8616199493408203, + "logits/rejected": -1.8799937963485718, + "logps/chosen": -254.03829956054688, + "logps/rejected": -466.61907958984375, + "loss": 0.0624, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6645355224609375, + "rewards/margins": 2.946328639984131, + "rewards/rejected": -1.281793236732483, + "step": 5737 + }, + { + "epoch": 0.33, + "learning_rate": 7.767460092151382e-08, + "logits/chosen": -2.134960651397705, + "logits/rejected": -2.1152260303497314, + "logps/chosen": -6.64429235458374, + "logps/rejected": -203.48175048828125, + "loss": 0.3767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05126018449664116, + "rewards/margins": 2.486079692840576, + "rewards/rejected": -2.537339925765991, + "step": 5738 + }, + { + "epoch": 0.33, + "learning_rate": 7.766675158934749e-08, + "logits/chosen": -1.9258040189743042, + "logits/rejected": -1.9509224891662598, + "logps/chosen": -69.37124633789062, + "logps/rejected": -160.8153076171875, + "loss": 0.5902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031281281262636185, + "rewards/margins": 0.34818342328071594, + "rewards/rejected": -0.379464715719223, + "step": 5739 + }, + { + "epoch": 0.33, + "learning_rate": 7.765890127432277e-08, + "logits/chosen": -1.9827204942703247, + "logits/rejected": -1.9831331968307495, + "logps/chosen": -0.014145172201097012, + "logps/rejected": -199.10733032226562, + "loss": 0.3888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019706245802808553, + "rewards/margins": 2.1473278999328613, + "rewards/rejected": -2.1475250720977783, + "step": 5740 + }, + { + "epoch": 0.33, + "learning_rate": 7.765104997671863e-08, + "logits/chosen": -1.9714411497116089, + "logits/rejected": -1.9166892766952515, + "logps/chosen": -241.3373565673828, + "logps/rejected": -367.9820861816406, + "loss": 0.1749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5922653675079346, + "rewards/margins": 1.6699539422988892, + "rewards/rejected": -0.07768859714269638, + "step": 5741 + }, + { + "epoch": 0.33, + "learning_rate": 7.76431976968139e-08, + "logits/chosen": -1.9321928024291992, + "logits/rejected": -1.9422134160995483, + "logps/chosen": -34.49147415161133, + "logps/rejected": -115.70915222167969, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34134483337402344, + "rewards/margins": 1.9200741052627563, + "rewards/rejected": -1.578729271888733, + "step": 5742 + }, + { + "epoch": 0.33, + "learning_rate": 7.763534443488761e-08, + "logits/chosen": -2.1919825077056885, + "logits/rejected": -2.1849827766418457, + "logps/chosen": -200.2227783203125, + "logps/rejected": -378.443359375, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0400390625, + "rewards/margins": 4.268533229827881, + "rewards/rejected": -1.2284942865371704, + "step": 5743 + }, + { + "epoch": 0.33, + "learning_rate": 7.762749019121868e-08, + "logits/chosen": -2.094372034072876, + "logits/rejected": -2.098891496658325, + "logps/chosen": -17.117820739746094, + "logps/rejected": -169.22434997558594, + "loss": 0.5274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18768034875392914, + "rewards/margins": 1.0496197938919067, + "rewards/rejected": -1.237300157546997, + "step": 5744 + }, + { + "epoch": 0.33, + "learning_rate": 7.761963496608617e-08, + "logits/chosen": -2.148237943649292, + "logits/rejected": -2.1355538368225098, + "logps/chosen": -0.2018527239561081, + "logps/rejected": -104.57929229736328, + "loss": 0.5712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005785717163234949, + "rewards/margins": 0.45518460869789124, + "rewards/rejected": -0.46097031235694885, + "step": 5745 + }, + { + "epoch": 0.33, + "learning_rate": 7.761177875976912e-08, + "logits/chosen": -1.9199434518814087, + "logits/rejected": -1.9239532947540283, + "logps/chosen": -30.094135284423828, + "logps/rejected": -185.01800537109375, + "loss": 0.4292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09702301025390625, + "rewards/margins": 1.1384689807891846, + "rewards/rejected": -1.0414459705352783, + "step": 5746 + }, + { + "epoch": 0.33, + "learning_rate": 7.760392157254662e-08, + "logits/chosen": -1.9425972700119019, + "logits/rejected": -1.9367996454238892, + "logps/chosen": -55.73783874511719, + "logps/rejected": -165.49655151367188, + "loss": 0.1723, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.184545874595642, + "rewards/margins": 3.2186217308044434, + "rewards/rejected": -2.034075975418091, + "step": 5747 + }, + { + "epoch": 0.33, + "learning_rate": 7.75960634046978e-08, + "logits/chosen": -2.0109405517578125, + "logits/rejected": -1.9974802732467651, + "logps/chosen": -26.460309982299805, + "logps/rejected": -233.49014282226562, + "loss": 0.3074, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3260534405708313, + "rewards/margins": 2.3487906455993652, + "rewards/rejected": -2.0227372646331787, + "step": 5748 + }, + { + "epoch": 0.33, + "learning_rate": 7.758820425650182e-08, + "logits/chosen": -2.061009168624878, + "logits/rejected": -2.072794198989868, + "logps/chosen": -108.52538299560547, + "logps/rejected": -233.95899963378906, + "loss": 0.8293, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5609893798828125, + "rewards/margins": -0.31564027070999146, + "rewards/rejected": -0.24534912407398224, + "step": 5749 + }, + { + "epoch": 0.33, + "learning_rate": 7.758034412823789e-08, + "logits/chosen": -1.9795013666152954, + "logits/rejected": -1.969549298286438, + "logps/chosen": -15.323360443115234, + "logps/rejected": -206.1663818359375, + "loss": 0.2513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7496278882026672, + "rewards/margins": 1.9175004959106445, + "rewards/rejected": -1.167872667312622, + "step": 5750 + }, + { + "epoch": 0.33, + "learning_rate": 7.75724830201852e-08, + "logits/chosen": -2.093867301940918, + "logits/rejected": -2.095867156982422, + "logps/chosen": -0.0002472266787663102, + "logps/rejected": -98.38519287109375, + "loss": 0.5962, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.084394327539485e-05, + "rewards/margins": 0.432628870010376, + "rewards/rejected": -0.432608038187027, + "step": 5751 + }, + { + "epoch": 0.33, + "learning_rate": 7.756462093262306e-08, + "logits/chosen": -1.9469400644302368, + "logits/rejected": -1.9413208961486816, + "logps/chosen": -8.501975059509277, + "logps/rejected": -195.5281219482422, + "loss": 0.3942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07489156723022461, + "rewards/margins": 2.3947813510894775, + "rewards/rejected": -2.319889783859253, + "step": 5752 + }, + { + "epoch": 0.33, + "learning_rate": 7.755675786583076e-08, + "logits/chosen": -2.103802442550659, + "logits/rejected": -2.102968692779541, + "logps/chosen": -29.70543098449707, + "logps/rejected": -107.41983032226562, + "loss": 0.4189, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5051027536392212, + "rewards/margins": 0.6241747140884399, + "rewards/rejected": -0.11907196044921875, + "step": 5753 + }, + { + "epoch": 0.33, + "learning_rate": 7.75488938200876e-08, + "logits/chosen": -2.03629207611084, + "logits/rejected": -2.034252643585205, + "logps/chosen": -75.42180633544922, + "logps/rejected": -320.50811767578125, + "loss": 0.3769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07794189453125, + "rewards/margins": 3.2980194091796875, + "rewards/rejected": -3.3759613037109375, + "step": 5754 + }, + { + "epoch": 0.33, + "learning_rate": 7.754102879567299e-08, + "logits/chosen": -2.0376784801483154, + "logits/rejected": -2.022852659225464, + "logps/chosen": -59.7224235534668, + "logps/rejected": -301.2159423828125, + "loss": 0.5558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0733921080827713, + "rewards/margins": 0.4328281283378601, + "rewards/rejected": -0.35943603515625, + "step": 5755 + }, + { + "epoch": 0.33, + "learning_rate": 7.753316279286633e-08, + "logits/chosen": -1.9675074815750122, + "logits/rejected": -2.0236704349517822, + "logps/chosen": -192.35401916503906, + "logps/rejected": -344.7499694824219, + "loss": 0.1185, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4852402210235596, + "rewards/margins": 1.722639560699463, + "rewards/rejected": -0.23739929497241974, + "step": 5756 + }, + { + "epoch": 0.34, + "learning_rate": 7.752529581194704e-08, + "logits/chosen": -2.1820359230041504, + "logits/rejected": -2.1767687797546387, + "logps/chosen": -4.511853218078613, + "logps/rejected": -142.96519470214844, + "loss": 0.382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2507143020629883, + "rewards/margins": 1.5512590408325195, + "rewards/rejected": -1.3005447387695312, + "step": 5757 + }, + { + "epoch": 0.34, + "learning_rate": 7.751742785319459e-08, + "logits/chosen": -1.8991864919662476, + "logits/rejected": -1.8874000310897827, + "logps/chosen": -313.2176208496094, + "logps/rejected": -443.6020202636719, + "loss": 0.0515, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.20688796043396, + "rewards/margins": 2.8466310501098633, + "rewards/rejected": -0.6397430300712585, + "step": 5758 + }, + { + "epoch": 0.34, + "learning_rate": 7.750955891688853e-08, + "logits/chosen": -1.799546480178833, + "logits/rejected": -1.8317310810089111, + "logps/chosen": -184.87391662597656, + "logps/rejected": -358.1748962402344, + "loss": 0.1166, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.686457872390747, + "rewards/margins": 1.7785660028457642, + "rewards/rejected": -0.09210815280675888, + "step": 5759 + }, + { + "epoch": 0.34, + "learning_rate": 7.750168900330835e-08, + "logits/chosen": -1.9337043762207031, + "logits/rejected": -1.9783153533935547, + "logps/chosen": -231.75836181640625, + "logps/rejected": -348.4617004394531, + "loss": 0.1175, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7812988758087158, + "rewards/margins": 1.7021270990371704, + "rewards/rejected": 0.07917175441980362, + "step": 5760 + }, + { + "epoch": 0.34, + "learning_rate": 7.749381811273366e-08, + "logits/chosen": -1.9578778743743896, + "logits/rejected": -1.9488682746887207, + "logps/chosen": -49.82451629638672, + "logps/rejected": -181.03811645507812, + "loss": 0.7331, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.24497070908546448, + "rewards/margins": -0.005992129445075989, + "rewards/rejected": -0.2389785796403885, + "step": 5761 + }, + { + "epoch": 0.34, + "learning_rate": 7.748594624544406e-08, + "logits/chosen": -2.0304341316223145, + "logits/rejected": -2.0253305435180664, + "logps/chosen": -247.6778564453125, + "logps/rejected": -293.60601806640625, + "loss": 0.3714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7981902956962585, + "rewards/margins": 0.25731807947158813, + "rewards/rejected": 0.5408722162246704, + "step": 5762 + }, + { + "epoch": 0.34, + "learning_rate": 7.747807340171919e-08, + "logits/chosen": -1.965282917022705, + "logits/rejected": -2.0089118480682373, + "logps/chosen": -254.3644256591797, + "logps/rejected": -309.82550048828125, + "loss": 0.1277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6072219610214233, + "rewards/margins": 1.8790115118026733, + "rewards/rejected": -0.27178955078125, + "step": 5763 + }, + { + "epoch": 0.34, + "learning_rate": 7.747019958183876e-08, + "logits/chosen": -2.1875815391540527, + "logits/rejected": -2.1857125759124756, + "logps/chosen": -0.1656089872121811, + "logps/rejected": -94.33164978027344, + "loss": 0.5555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0060990676283836365, + "rewards/margins": 0.7104172706604004, + "rewards/rejected": -0.7043182253837585, + "step": 5764 + }, + { + "epoch": 0.34, + "learning_rate": 7.746232478608246e-08, + "logits/chosen": -1.851984977722168, + "logits/rejected": -1.822619080543518, + "logps/chosen": -206.81881713867188, + "logps/rejected": -400.82354736328125, + "loss": 0.057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9561554193496704, + "rewards/margins": 2.9478180408477783, + "rewards/rejected": -0.9916626214981079, + "step": 5765 + }, + { + "epoch": 0.34, + "learning_rate": 7.745444901473006e-08, + "logits/chosen": -2.2601423263549805, + "logits/rejected": -2.2448174953460693, + "logps/chosen": -43.419368743896484, + "logps/rejected": -315.004638671875, + "loss": 0.1781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5051830410957336, + "rewards/margins": 3.8984570503234863, + "rewards/rejected": -3.3932740688323975, + "step": 5766 + }, + { + "epoch": 0.34, + "learning_rate": 7.744657226806133e-08, + "logits/chosen": -2.1106631755828857, + "logits/rejected": -2.108114719390869, + "logps/chosen": -77.33611297607422, + "logps/rejected": -257.17413330078125, + "loss": 0.3975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11749496310949326, + "rewards/margins": 3.2115516662597656, + "rewards/rejected": -3.3290467262268066, + "step": 5767 + }, + { + "epoch": 0.34, + "learning_rate": 7.74386945463561e-08, + "logits/chosen": -2.2067320346832275, + "logits/rejected": -2.212827682495117, + "logps/chosen": -39.29949951171875, + "logps/rejected": -300.693115234375, + "loss": 0.3993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18312035501003265, + "rewards/margins": 1.5442653894424438, + "rewards/rejected": -1.36114501953125, + "step": 5768 + }, + { + "epoch": 0.34, + "learning_rate": 7.743081584989422e-08, + "logits/chosen": -2.0200846195220947, + "logits/rejected": -1.9982343912124634, + "logps/chosen": -244.3157958984375, + "logps/rejected": -383.5723876953125, + "loss": 0.1272, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2731446027755737, + "rewards/margins": 2.1867921352386475, + "rewards/rejected": -0.913647472858429, + "step": 5769 + }, + { + "epoch": 0.34, + "learning_rate": 7.742293617895557e-08, + "logits/chosen": -1.9156289100646973, + "logits/rejected": -1.901908040046692, + "logps/chosen": -187.59683227539062, + "logps/rejected": -449.5717468261719, + "loss": 0.3951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08088531345129013, + "rewards/margins": 5.986414909362793, + "rewards/rejected": -6.067300319671631, + "step": 5770 + }, + { + "epoch": 0.34, + "learning_rate": 7.74150555338201e-08, + "logits/chosen": -1.9999451637268066, + "logits/rejected": -1.9640735387802124, + "logps/chosen": -209.28138732910156, + "logps/rejected": -460.0908508300781, + "loss": 0.0887, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0505263805389404, + "rewards/margins": 1.9944655895233154, + "rewards/rejected": 0.056060791015625, + "step": 5771 + }, + { + "epoch": 0.34, + "learning_rate": 7.740717391476777e-08, + "logits/chosen": -1.9299129247665405, + "logits/rejected": -1.9339970350265503, + "logps/chosen": -0.02159961499273777, + "logps/rejected": -30.585235595703125, + "loss": 0.7104, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.172662036377005e-05, + "rewards/margins": -0.08252741396427155, + "rewards/rejected": 0.08249568939208984, + "step": 5772 + }, + { + "epoch": 0.34, + "learning_rate": 7.739929132207855e-08, + "logits/chosen": -2.061300039291382, + "logits/rejected": -2.0553293228149414, + "logps/chosen": -69.25225067138672, + "logps/rejected": -228.639892578125, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031620025634765625, + "rewards/margins": 2.8221535682678223, + "rewards/rejected": -2.7905335426330566, + "step": 5773 + }, + { + "epoch": 0.34, + "learning_rate": 7.739140775603245e-08, + "logits/chosen": -2.0059919357299805, + "logits/rejected": -2.016249418258667, + "logps/chosen": -178.61485290527344, + "logps/rejected": -317.0888671875, + "loss": 0.3296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.63786780834198, + "rewards/margins": 0.41703033447265625, + "rewards/rejected": 1.2208374738693237, + "step": 5774 + }, + { + "epoch": 0.34, + "learning_rate": 7.738352321690959e-08, + "logits/chosen": -1.9237016439437866, + "logits/rejected": -1.9421292543411255, + "logps/chosen": -291.124755859375, + "logps/rejected": -310.45208740234375, + "loss": 0.4259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.414434790611267, + "rewards/margins": 0.1994689702987671, + "rewards/rejected": 1.2149658203125, + "step": 5775 + }, + { + "epoch": 0.34, + "learning_rate": 7.737563770499003e-08, + "logits/chosen": -1.974164366722107, + "logits/rejected": -1.9706954956054688, + "logps/chosen": -18.822656631469727, + "logps/rejected": -163.4324188232422, + "loss": 0.4267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017783356830477715, + "rewards/margins": 1.7446448802947998, + "rewards/rejected": -1.7624282836914062, + "step": 5776 + }, + { + "epoch": 0.34, + "learning_rate": 7.736775122055392e-08, + "logits/chosen": -2.047340154647827, + "logits/rejected": -2.039865493774414, + "logps/chosen": -139.55795288085938, + "logps/rejected": -175.98464965820312, + "loss": 0.6223, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.365325927734375, + "rewards/margins": -0.18339842557907104, + "rewards/rejected": 0.548724353313446, + "step": 5777 + }, + { + "epoch": 0.34, + "learning_rate": 7.735986376388141e-08, + "logits/chosen": -1.889477014541626, + "logits/rejected": -1.8586673736572266, + "logps/chosen": -203.14645385742188, + "logps/rejected": -438.30499267578125, + "loss": 0.2032, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5597809553146362, + "rewards/margins": 1.3224060535430908, + "rewards/rejected": 0.23737488687038422, + "step": 5778 + }, + { + "epoch": 0.34, + "learning_rate": 7.735197533525271e-08, + "logits/chosen": -2.0154223442077637, + "logits/rejected": -2.0031049251556396, + "logps/chosen": -153.0586700439453, + "logps/rejected": -450.2355651855469, + "loss": 0.1306, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0882034301757812, + "rewards/margins": 3.979283094406128, + "rewards/rejected": -2.8910796642303467, + "step": 5779 + }, + { + "epoch": 0.34, + "learning_rate": 7.734408593494804e-08, + "logits/chosen": -2.0534942150115967, + "logits/rejected": -2.032317876815796, + "logps/chosen": -156.41812133789062, + "logps/rejected": -347.4356994628906, + "loss": 0.5276, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.774066150188446, + "rewards/margins": -0.07143557071685791, + "rewards/rejected": 0.845501720905304, + "step": 5780 + }, + { + "epoch": 0.34, + "learning_rate": 7.733619556324769e-08, + "logits/chosen": -1.807442545890808, + "logits/rejected": -1.8102070093154907, + "logps/chosen": -4.883060932159424, + "logps/rejected": -276.5231628417969, + "loss": 0.3776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08641057461500168, + "rewards/margins": 4.012996196746826, + "rewards/rejected": -4.099406719207764, + "step": 5781 + }, + { + "epoch": 0.34, + "learning_rate": 7.732830422043197e-08, + "logits/chosen": -2.0836105346679688, + "logits/rejected": -2.0845141410827637, + "logps/chosen": -36.02543640136719, + "logps/rejected": -142.78875732421875, + "loss": 0.3253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30453261733055115, + "rewards/margins": 1.9930329322814941, + "rewards/rejected": -1.6885002851486206, + "step": 5782 + }, + { + "epoch": 0.34, + "learning_rate": 7.73204119067812e-08, + "logits/chosen": -2.0165534019470215, + "logits/rejected": -2.0488195419311523, + "logps/chosen": -251.84487915039062, + "logps/rejected": -331.5468444824219, + "loss": 0.1094, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6146332025527954, + "rewards/margins": 2.0307891368865967, + "rewards/rejected": -0.41615602374076843, + "step": 5783 + }, + { + "epoch": 0.34, + "learning_rate": 7.731251862257575e-08, + "logits/chosen": -2.1539082527160645, + "logits/rejected": -2.1225998401641846, + "logps/chosen": -51.91172790527344, + "logps/rejected": -281.4169921875, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6381942629814148, + "rewards/margins": 2.8398208618164062, + "rewards/rejected": -2.2016265392303467, + "step": 5784 + }, + { + "epoch": 0.34, + "learning_rate": 7.730462436809605e-08, + "logits/chosen": -1.7462151050567627, + "logits/rejected": -1.7914284467697144, + "logps/chosen": -285.2491455078125, + "logps/rejected": -346.1894226074219, + "loss": 0.0474, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.622222900390625, + "rewards/margins": 2.5692138671875, + "rewards/rejected": 0.053009033203125, + "step": 5785 + }, + { + "epoch": 0.34, + "learning_rate": 7.729672914362253e-08, + "logits/chosen": -1.9547641277313232, + "logits/rejected": -1.9372421503067017, + "logps/chosen": -120.63359069824219, + "logps/rejected": -317.14984130859375, + "loss": 0.2434, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.18114173412323, + "rewards/margins": 0.801228404045105, + "rewards/rejected": 0.379913330078125, + "step": 5786 + }, + { + "epoch": 0.34, + "learning_rate": 7.728883294943567e-08, + "logits/chosen": -2.07954740524292, + "logits/rejected": -2.0708136558532715, + "logps/chosen": -0.011186007410287857, + "logps/rejected": -191.04165649414062, + "loss": 0.3788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007291212095879018, + "rewards/margins": 2.7153735160827637, + "rewards/rejected": -2.7161026000976562, + "step": 5787 + }, + { + "epoch": 0.34, + "learning_rate": 7.728093578581598e-08, + "logits/chosen": -2.016021966934204, + "logits/rejected": -2.0014772415161133, + "logps/chosen": -91.9870376586914, + "logps/rejected": -143.4513702392578, + "loss": 0.435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3642425537109375, + "rewards/margins": 0.9377166628837585, + "rewards/rejected": -0.573474109172821, + "step": 5788 + }, + { + "epoch": 0.34, + "learning_rate": 7.727303765304399e-08, + "logits/chosen": -2.1196577548980713, + "logits/rejected": -2.0984017848968506, + "logps/chosen": -5.638523361994885e-05, + "logps/rejected": -388.7808837890625, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6450445627924637e-06, + "rewards/margins": 8.438194274902344, + "rewards/rejected": -8.438196182250977, + "step": 5789 + }, + { + "epoch": 0.34, + "learning_rate": 7.726513855140031e-08, + "logits/chosen": -1.7048327922821045, + "logits/rejected": -1.680076241493225, + "logps/chosen": -193.6065673828125, + "logps/rejected": -475.2994079589844, + "loss": 0.0722, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6550995111465454, + "rewards/margins": 3.100970506668091, + "rewards/rejected": -1.4458709955215454, + "step": 5790 + }, + { + "epoch": 0.34, + "learning_rate": 7.725723848116553e-08, + "logits/chosen": -1.9532196521759033, + "logits/rejected": -1.948675513267517, + "logps/chosen": -20.508113861083984, + "logps/rejected": -182.1065673828125, + "loss": 0.4071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36385536193847656, + "rewards/margins": 1.0365512371063232, + "rewards/rejected": -0.6726959347724915, + "step": 5791 + }, + { + "epoch": 0.34, + "learning_rate": 7.72493374426203e-08, + "logits/chosen": -2.0267629623413086, + "logits/rejected": -2.0204052925109863, + "logps/chosen": -202.55422973632812, + "logps/rejected": -338.14892578125, + "loss": 0.2179, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5247070789337158, + "rewards/margins": 1.0067565441131592, + "rewards/rejected": 0.5179504752159119, + "step": 5792 + }, + { + "epoch": 0.34, + "learning_rate": 7.724143543604535e-08, + "logits/chosen": -2.303464651107788, + "logits/rejected": -2.2934556007385254, + "logps/chosen": -17.25004005432129, + "logps/rejected": -257.4496154785156, + "loss": 0.3401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05027618631720543, + "rewards/margins": 4.354739665985107, + "rewards/rejected": -4.3044633865356445, + "step": 5793 + }, + { + "epoch": 0.34, + "learning_rate": 7.723353246172132e-08, + "logits/chosen": -2.035940170288086, + "logits/rejected": -2.0341897010803223, + "logps/chosen": -0.08364802598953247, + "logps/rejected": -33.93785858154297, + "loss": 0.5889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001196616911329329, + "rewards/margins": 0.49405762553215027, + "rewards/rejected": -0.4928610026836395, + "step": 5794 + }, + { + "epoch": 0.34, + "learning_rate": 7.722562851992903e-08, + "logits/chosen": -2.1451780796051025, + "logits/rejected": -2.136080741882324, + "logps/chosen": -73.85276794433594, + "logps/rejected": -238.9022674560547, + "loss": 0.2167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8574463129043579, + "rewards/margins": 1.7813537120819092, + "rewards/rejected": -0.923907458782196, + "step": 5795 + }, + { + "epoch": 0.34, + "learning_rate": 7.721772361094923e-08, + "logits/chosen": -2.0757062435150146, + "logits/rejected": -2.067859649658203, + "logps/chosen": -179.4982147216797, + "logps/rejected": -285.82427978515625, + "loss": 0.1748, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8941574096679688, + "rewards/margins": 1.1758407354354858, + "rewards/rejected": 0.7183166742324829, + "step": 5796 + }, + { + "epoch": 0.34, + "learning_rate": 7.720981773506276e-08, + "logits/chosen": -2.118731737136841, + "logits/rejected": -2.1176793575286865, + "logps/chosen": -171.43634033203125, + "logps/rejected": -266.79541015625, + "loss": 0.1005, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.001046895980835, + "rewards/margins": 2.1338624954223633, + "rewards/rejected": -0.13281555473804474, + "step": 5797 + }, + { + "epoch": 0.34, + "learning_rate": 7.720191089255046e-08, + "logits/chosen": -2.0889174938201904, + "logits/rejected": -2.0795180797576904, + "logps/chosen": -20.810714721679688, + "logps/rejected": -109.73094940185547, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28690701723098755, + "rewards/margins": 2.1211583614349365, + "rewards/rejected": -1.8342514038085938, + "step": 5798 + }, + { + "epoch": 0.34, + "learning_rate": 7.719400308369324e-08, + "logits/chosen": -2.113877058029175, + "logits/rejected": -2.0963408946990967, + "logps/chosen": -81.60810852050781, + "logps/rejected": -340.9546203613281, + "loss": 0.5179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35034486651420593, + "rewards/margins": 1.5945616960525513, + "rewards/rejected": -1.9449065923690796, + "step": 5799 + }, + { + "epoch": 0.34, + "learning_rate": 7.718609430877199e-08, + "logits/chosen": -2.2334797382354736, + "logits/rejected": -2.2093470096588135, + "logps/chosen": -54.98353958129883, + "logps/rejected": -401.19195556640625, + "loss": 0.2264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6175991296768188, + "rewards/margins": 3.749288558959961, + "rewards/rejected": -3.1316895484924316, + "step": 5800 + }, + { + "epoch": 0.34, + "learning_rate": 7.71781845680677e-08, + "logits/chosen": -2.1502742767333984, + "logits/rejected": -2.1413087844848633, + "logps/chosen": -10.23912239074707, + "logps/rejected": -121.39685821533203, + "loss": 0.5859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06707239151000977, + "rewards/margins": 0.38066568970680237, + "rewards/rejected": -0.3135932981967926, + "step": 5801 + }, + { + "epoch": 0.34, + "learning_rate": 7.717027386186135e-08, + "logits/chosen": -1.985654354095459, + "logits/rejected": -1.9957573413848877, + "logps/chosen": -204.35971069335938, + "logps/rejected": -436.87054443359375, + "loss": 0.089, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.745965600013733, + "rewards/margins": 2.3296172618865967, + "rewards/rejected": -0.5836517214775085, + "step": 5802 + }, + { + "epoch": 0.34, + "learning_rate": 7.716236219043396e-08, + "logits/chosen": -2.0165092945098877, + "logits/rejected": -1.9672983884811401, + "logps/chosen": -206.218505859375, + "logps/rejected": -506.0361633300781, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9556000232696533, + "rewards/margins": 4.02899169921875, + "rewards/rejected": -2.0733916759490967, + "step": 5803 + }, + { + "epoch": 0.34, + "learning_rate": 7.715444955406661e-08, + "logits/chosen": -2.265470504760742, + "logits/rejected": -2.246526002883911, + "logps/chosen": -14.56362533569336, + "logps/rejected": -267.6419677734375, + "loss": 0.4361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1993490308523178, + "rewards/margins": 2.4324047565460205, + "rewards/rejected": -2.63175368309021, + "step": 5804 + }, + { + "epoch": 0.34, + "learning_rate": 7.714653595304037e-08, + "logits/chosen": -1.9498475790023804, + "logits/rejected": -1.9277732372283936, + "logps/chosen": -6.430161476135254, + "logps/rejected": -167.66275024414062, + "loss": 0.6542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006718635559082031, + "rewards/margins": 0.10814867168664932, + "rewards/rejected": -0.10747680813074112, + "step": 5805 + }, + { + "epoch": 0.34, + "learning_rate": 7.71386213876364e-08, + "logits/chosen": -1.9909330606460571, + "logits/rejected": -1.985737681388855, + "logps/chosen": -368.29351806640625, + "logps/rejected": -445.27337646484375, + "loss": 0.2961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5686248540878296, + "rewards/margins": 0.44046318531036377, + "rewards/rejected": 1.1281616687774658, + "step": 5806 + }, + { + "epoch": 0.34, + "learning_rate": 7.713070585813585e-08, + "logits/chosen": -2.0996644496917725, + "logits/rejected": -2.105142593383789, + "logps/chosen": -244.12741088867188, + "logps/rejected": -376.5247802734375, + "loss": 0.0898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7796906232833862, + "rewards/margins": 2.1893341541290283, + "rewards/rejected": -0.4096435606479645, + "step": 5807 + }, + { + "epoch": 0.34, + "learning_rate": 7.712278936481991e-08, + "logits/chosen": -2.033087730407715, + "logits/rejected": -2.0262577533721924, + "logps/chosen": -61.944610595703125, + "logps/rejected": -216.20938110351562, + "loss": 0.2635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7850921750068665, + "rewards/margins": 1.280828833580017, + "rewards/rejected": -0.495736688375473, + "step": 5808 + }, + { + "epoch": 0.34, + "learning_rate": 7.711487190796982e-08, + "logits/chosen": -1.95692777633667, + "logits/rejected": -1.9552825689315796, + "logps/chosen": -31.9063720703125, + "logps/rejected": -224.9639892578125, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22058753669261932, + "rewards/margins": 1.856404423713684, + "rewards/rejected": -1.6358169317245483, + "step": 5809 + }, + { + "epoch": 0.34, + "learning_rate": 7.710695348786684e-08, + "logits/chosen": -1.850721836090088, + "logits/rejected": -1.8533246517181396, + "logps/chosen": -205.08856201171875, + "logps/rejected": -323.4383239746094, + "loss": 0.2865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.185662865638733, + "rewards/margins": 0.6859344244003296, + "rewards/rejected": 0.49972841143608093, + "step": 5810 + }, + { + "epoch": 0.34, + "learning_rate": 7.70990341047923e-08, + "logits/chosen": -1.9430240392684937, + "logits/rejected": -1.9616607427597046, + "logps/chosen": -213.22125244140625, + "logps/rejected": -516.209228515625, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5490968227386475, + "rewards/margins": 6.061032295227051, + "rewards/rejected": -3.5119354724884033, + "step": 5811 + }, + { + "epoch": 0.34, + "learning_rate": 7.709111375902748e-08, + "logits/chosen": -2.0837137699127197, + "logits/rejected": -2.090161085128784, + "logps/chosen": -0.10962028056383133, + "logps/rejected": -85.64649963378906, + "loss": 0.5339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008996634744107723, + "rewards/margins": 0.8811469674110413, + "rewards/rejected": -0.8901435732841492, + "step": 5812 + }, + { + "epoch": 0.34, + "learning_rate": 7.708319245085381e-08, + "logits/chosen": -2.229933261871338, + "logits/rejected": -2.2080423831939697, + "logps/chosen": -48.71434783935547, + "logps/rejected": -355.2697448730469, + "loss": 0.2409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47772979736328125, + "rewards/margins": 3.8338303565979004, + "rewards/rejected": -3.356100559234619, + "step": 5813 + }, + { + "epoch": 0.34, + "learning_rate": 7.707527018055265e-08, + "logits/chosen": -2.126919746398926, + "logits/rejected": -2.132620096206665, + "logps/chosen": -37.540584564208984, + "logps/rejected": -192.884033203125, + "loss": 0.2991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3841384947299957, + "rewards/margins": 1.9346587657928467, + "rewards/rejected": -1.5505203008651733, + "step": 5814 + }, + { + "epoch": 0.34, + "learning_rate": 7.706734694840546e-08, + "logits/chosen": -2.019639492034912, + "logits/rejected": -2.017789125442505, + "logps/chosen": -0.00787153746932745, + "logps/rejected": -185.66305541992188, + "loss": 0.3711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030635271104983985, + "rewards/margins": 2.6809420585632324, + "rewards/rejected": -2.681248426437378, + "step": 5815 + }, + { + "epoch": 0.34, + "learning_rate": 7.70594227546937e-08, + "logits/chosen": -2.0006792545318604, + "logits/rejected": -1.9993233680725098, + "logps/chosen": -0.0003717669169418514, + "logps/rejected": -249.09756469726562, + "loss": 0.3441, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.551716756395763e-06, + "rewards/margins": 5.5761566162109375, + "rewards/rejected": -5.5761613845825195, + "step": 5816 + }, + { + "epoch": 0.34, + "learning_rate": 7.705149759969889e-08, + "logits/chosen": -1.8779064416885376, + "logits/rejected": -1.865249752998352, + "logps/chosen": -384.11456298828125, + "logps/rejected": -563.07080078125, + "loss": 0.1478, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2819154262542725, + "rewards/margins": 1.2680543661117554, + "rewards/rejected": 1.013861060142517, + "step": 5817 + }, + { + "epoch": 0.34, + "learning_rate": 7.704357148370258e-08, + "logits/chosen": -2.0524864196777344, + "logits/rejected": -2.0347492694854736, + "logps/chosen": -36.429222106933594, + "logps/rejected": -421.16015625, + "loss": 0.2437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31280097365379333, + "rewards/margins": 5.001307964324951, + "rewards/rejected": -4.688507080078125, + "step": 5818 + }, + { + "epoch": 0.34, + "learning_rate": 7.703564440698629e-08, + "logits/chosen": -2.190634250640869, + "logits/rejected": -2.1661810874938965, + "logps/chosen": -171.9925994873047, + "logps/rejected": -400.37689208984375, + "loss": 0.116, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9168899655342102, + "rewards/margins": 5.356663703918457, + "rewards/rejected": -4.4397735595703125, + "step": 5819 + }, + { + "epoch": 0.34, + "learning_rate": 7.702771636983168e-08, + "logits/chosen": -2.1822333335876465, + "logits/rejected": -2.1892478466033936, + "logps/chosen": -71.26136779785156, + "logps/rejected": -232.3485870361328, + "loss": 0.356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04825439676642418, + "rewards/margins": 1.7170883417129517, + "rewards/rejected": -1.6688339710235596, + "step": 5820 + }, + { + "epoch": 0.34, + "learning_rate": 7.701978737252038e-08, + "logits/chosen": -2.1426455974578857, + "logits/rejected": -2.131110906600952, + "logps/chosen": -0.8999044895172119, + "logps/rejected": -193.94113159179688, + "loss": 0.3594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05081070587038994, + "rewards/margins": 2.9566290378570557, + "rewards/rejected": -2.905818223953247, + "step": 5821 + }, + { + "epoch": 0.34, + "learning_rate": 7.701185741533407e-08, + "logits/chosen": -1.9223523139953613, + "logits/rejected": -1.912212610244751, + "logps/chosen": -107.11662292480469, + "logps/rejected": -215.8353271484375, + "loss": 0.3532, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4795334339141846, + "rewards/margins": 0.25785374641418457, + "rewards/rejected": 1.2216796875, + "step": 5822 + }, + { + "epoch": 0.34, + "learning_rate": 7.700392649855445e-08, + "logits/chosen": -1.8510907888412476, + "logits/rejected": -1.829896330833435, + "logps/chosen": -4.0443878173828125, + "logps/rejected": -539.379638671875, + "loss": 0.3468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02420978620648384, + "rewards/margins": 7.743374347686768, + "rewards/rejected": -7.767584323883057, + "step": 5823 + }, + { + "epoch": 0.34, + "learning_rate": 7.699599462246327e-08, + "logits/chosen": -2.281219005584717, + "logits/rejected": -2.274242401123047, + "logps/chosen": -3.773308515548706, + "logps/rejected": -142.13587951660156, + "loss": 0.4321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09853680431842804, + "rewards/margins": 1.4068925380706787, + "rewards/rejected": -1.308355689048767, + "step": 5824 + }, + { + "epoch": 0.34, + "learning_rate": 7.698806178734231e-08, + "logits/chosen": -2.073253631591797, + "logits/rejected": -2.0760369300842285, + "logps/chosen": -57.624507904052734, + "logps/rejected": -222.6075897216797, + "loss": 0.5289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39367714524269104, + "rewards/margins": 0.19038887321949005, + "rewards/rejected": 0.203288272023201, + "step": 5825 + }, + { + "epoch": 0.34, + "learning_rate": 7.698012799347338e-08, + "logits/chosen": -2.1597368717193604, + "logits/rejected": -2.0592458248138428, + "logps/chosen": -99.41544342041016, + "logps/rejected": -347.6920471191406, + "loss": 0.3803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030381012707948685, + "rewards/margins": 2.732375383377075, + "rewards/rejected": -2.76275634765625, + "step": 5826 + }, + { + "epoch": 0.34, + "learning_rate": 7.697219324113832e-08, + "logits/chosen": -2.315209150314331, + "logits/rejected": -2.308593273162842, + "logps/chosen": -0.0007952406303957105, + "logps/rejected": -170.08123779296875, + "loss": 0.4235, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9663078748853877e-05, + "rewards/margins": 1.8578317165374756, + "rewards/rejected": -1.8578613996505737, + "step": 5827 + }, + { + "epoch": 0.34, + "learning_rate": 7.696425753061903e-08, + "logits/chosen": -2.0924570560455322, + "logits/rejected": -2.0907034873962402, + "logps/chosen": -11.629609107971191, + "logps/rejected": -211.06912231445312, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11237706989049911, + "rewards/margins": 2.8739662170410156, + "rewards/rejected": -2.7615890502929688, + "step": 5828 + }, + { + "epoch": 0.34, + "learning_rate": 7.695632086219743e-08, + "logits/chosen": -2.1163365840911865, + "logits/rejected": -1.9797685146331787, + "logps/chosen": -265.41448974609375, + "logps/rejected": -620.2064208984375, + "loss": 0.0413, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.512835741043091, + "rewards/margins": 3.544830322265625, + "rewards/rejected": -1.0319947004318237, + "step": 5829 + }, + { + "epoch": 0.34, + "learning_rate": 7.694838323615544e-08, + "logits/chosen": -1.8705312013626099, + "logits/rejected": -1.8419967889785767, + "logps/chosen": -194.6997528076172, + "logps/rejected": -277.3133544921875, + "loss": 0.1968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.117427110671997, + "rewards/margins": 1.666651964187622, + "rewards/rejected": -0.549224853515625, + "step": 5830 + }, + { + "epoch": 0.34, + "learning_rate": 7.694044465277506e-08, + "logits/chosen": -1.925034523010254, + "logits/rejected": -1.9336514472961426, + "logps/chosen": -335.667236328125, + "logps/rejected": -458.0195007324219, + "loss": 0.0347, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4165070056915283, + "rewards/margins": 3.2565248012542725, + "rewards/rejected": -0.8400177359580994, + "step": 5831 + }, + { + "epoch": 0.34, + "learning_rate": 7.693250511233831e-08, + "logits/chosen": -2.0867385864257812, + "logits/rejected": -2.0914559364318848, + "logps/chosen": -47.4204216003418, + "logps/rejected": -235.18624877929688, + "loss": 0.6857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28146591782569885, + "rewards/margins": 0.32635727524757385, + "rewards/rejected": -0.6078231930732727, + "step": 5832 + }, + { + "epoch": 0.34, + "learning_rate": 7.692456461512723e-08, + "logits/chosen": -1.826473355293274, + "logits/rejected": -1.8360527753829956, + "logps/chosen": -221.98696899414062, + "logps/rejected": -441.21087646484375, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.242626905441284, + "rewards/margins": 3.0190765857696533, + "rewards/rejected": -0.7764496207237244, + "step": 5833 + }, + { + "epoch": 0.34, + "learning_rate": 7.691662316142392e-08, + "logits/chosen": -1.6324032545089722, + "logits/rejected": -1.6121371984481812, + "logps/chosen": -249.72848510742188, + "logps/rejected": -317.58154296875, + "loss": 0.0915, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8158477544784546, + "rewards/margins": 2.5568268299102783, + "rewards/rejected": -0.740979015827179, + "step": 5834 + }, + { + "epoch": 0.34, + "learning_rate": 7.69086807515105e-08, + "logits/chosen": -2.0207197666168213, + "logits/rejected": -2.0493319034576416, + "logps/chosen": -228.70753479003906, + "logps/rejected": -375.1235656738281, + "loss": 0.1562, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4982436895370483, + "rewards/margins": 1.36225426197052, + "rewards/rejected": 0.13598938286304474, + "step": 5835 + }, + { + "epoch": 0.34, + "learning_rate": 7.690073738566909e-08, + "logits/chosen": -2.0823142528533936, + "logits/rejected": -2.0627574920654297, + "logps/chosen": -190.01626586914062, + "logps/rejected": -350.7606201171875, + "loss": 0.1903, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2858428955078125, + "rewards/margins": 0.9063689708709717, + "rewards/rejected": 1.3794739246368408, + "step": 5836 + }, + { + "epoch": 0.34, + "learning_rate": 7.689279306418191e-08, + "logits/chosen": -2.028080701828003, + "logits/rejected": -2.030791997909546, + "logps/chosen": -7.311126708984375, + "logps/rejected": -24.074750900268555, + "loss": 0.6452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0330502986907959, + "rewards/margins": 0.08472257107496262, + "rewards/rejected": -0.11777286976575851, + "step": 5837 + }, + { + "epoch": 0.34, + "learning_rate": 7.68848477873312e-08, + "logits/chosen": -2.0604264736175537, + "logits/rejected": -2.0446598529815674, + "logps/chosen": -76.06351470947266, + "logps/rejected": -245.3406524658203, + "loss": 0.623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7035980224609375, + "rewards/margins": 1.5005600452423096, + "rewards/rejected": -2.204158067703247, + "step": 5838 + }, + { + "epoch": 0.34, + "learning_rate": 7.687690155539917e-08, + "logits/chosen": -2.005272150039673, + "logits/rejected": -2.0020596981048584, + "logps/chosen": -15.028373718261719, + "logps/rejected": -85.87306213378906, + "loss": 0.5336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11408920586109161, + "rewards/margins": 0.6551742553710938, + "rewards/rejected": -0.5410850644111633, + "step": 5839 + }, + { + "epoch": 0.34, + "learning_rate": 7.686895436866812e-08, + "logits/chosen": -1.9347732067108154, + "logits/rejected": -1.9282269477844238, + "logps/chosen": -162.57977294921875, + "logps/rejected": -230.36929321289062, + "loss": 0.1005, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5795395374298096, + "rewards/margins": 1.852949619293213, + "rewards/rejected": 0.7265899777412415, + "step": 5840 + }, + { + "epoch": 0.34, + "learning_rate": 7.686100622742038e-08, + "logits/chosen": -1.9310725927352905, + "logits/rejected": -1.9185149669647217, + "logps/chosen": -193.53396606445312, + "logps/rejected": -288.2518310546875, + "loss": 0.4469, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.6552520990371704, + "rewards/margins": -0.2191985845565796, + "rewards/rejected": 1.87445068359375, + "step": 5841 + }, + { + "epoch": 0.34, + "learning_rate": 7.685305713193833e-08, + "logits/chosen": -2.1305947303771973, + "logits/rejected": -2.133808135986328, + "logps/chosen": -16.919143676757812, + "logps/rejected": -125.47389221191406, + "loss": 0.315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40139466524124146, + "rewards/margins": 1.5968232154846191, + "rewards/rejected": -1.195428490638733, + "step": 5842 + }, + { + "epoch": 0.34, + "learning_rate": 7.68451070825043e-08, + "logits/chosen": -1.8666408061981201, + "logits/rejected": -1.8483567237854004, + "logps/chosen": -204.03726196289062, + "logps/rejected": -351.2290344238281, + "loss": 0.4161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7865096926689148, + "rewards/margins": 0.3328567445278168, + "rewards/rejected": 0.453652948141098, + "step": 5843 + }, + { + "epoch": 0.34, + "learning_rate": 7.683715607940077e-08, + "logits/chosen": -2.0650105476379395, + "logits/rejected": -2.062765598297119, + "logps/chosen": -0.6726765632629395, + "logps/rejected": -93.75032806396484, + "loss": 0.3672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10833382606506348, + "rewards/margins": 2.2111454010009766, + "rewards/rejected": -2.102811574935913, + "step": 5844 + }, + { + "epoch": 0.34, + "learning_rate": 7.68292041229102e-08, + "logits/chosen": -2.0404484272003174, + "logits/rejected": -2.042937755584717, + "logps/chosen": -14.758200645446777, + "logps/rejected": -168.8516082763672, + "loss": 0.4478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11804475635290146, + "rewards/margins": 1.291331171989441, + "rewards/rejected": -1.1732864379882812, + "step": 5845 + }, + { + "epoch": 0.34, + "learning_rate": 7.682125121331505e-08, + "logits/chosen": -2.014130115509033, + "logits/rejected": -2.0430307388305664, + "logps/chosen": -277.11785888671875, + "logps/rejected": -401.76202392578125, + "loss": 0.0728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.672393798828125, + "rewards/margins": 2.395855665206909, + "rewards/rejected": -0.723461925983429, + "step": 5846 + }, + { + "epoch": 0.34, + "learning_rate": 7.681329735089787e-08, + "logits/chosen": -2.0828442573547363, + "logits/rejected": -2.0720412731170654, + "logps/chosen": -21.12602996826172, + "logps/rejected": -186.6608123779297, + "loss": 0.4161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34300366044044495, + "rewards/margins": 1.0475553274154663, + "rewards/rejected": -0.7045516967773438, + "step": 5847 + }, + { + "epoch": 0.34, + "learning_rate": 7.68053425359412e-08, + "logits/chosen": -2.062459945678711, + "logits/rejected": -2.0609707832336426, + "logps/chosen": -42.232852935791016, + "logps/rejected": -261.4515380859375, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04974785074591637, + "rewards/margins": 1.1385570764541626, + "rewards/rejected": -1.0888092517852783, + "step": 5848 + }, + { + "epoch": 0.34, + "learning_rate": 7.679738676872764e-08, + "logits/chosen": -1.8872015476226807, + "logits/rejected": -1.88056218624115, + "logps/chosen": -196.45394897460938, + "logps/rejected": -344.1559753417969, + "loss": 0.0717, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.191540479660034, + "rewards/margins": 2.612649440765381, + "rewards/rejected": -0.42110902070999146, + "step": 5849 + }, + { + "epoch": 0.34, + "learning_rate": 7.678943004953983e-08, + "logits/chosen": -2.1440422534942627, + "logits/rejected": -2.1282384395599365, + "logps/chosen": -203.08139038085938, + "logps/rejected": -342.2777099609375, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9492645263671875, + "rewards/margins": 0.5970062017440796, + "rewards/rejected": 0.3522582948207855, + "step": 5850 + }, + { + "epoch": 0.34, + "learning_rate": 7.678147237866041e-08, + "logits/chosen": -1.8614263534545898, + "logits/rejected": -1.902004599571228, + "logps/chosen": -194.13314819335938, + "logps/rejected": -269.50054931640625, + "loss": 0.1098, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.271658420562744, + "rewards/margins": 1.8430848121643066, + "rewards/rejected": 0.4285736083984375, + "step": 5851 + }, + { + "epoch": 0.34, + "learning_rate": 7.677351375637209e-08, + "logits/chosen": -2.016692638397217, + "logits/rejected": -2.021681547164917, + "logps/chosen": -19.273658752441406, + "logps/rejected": -181.421875, + "loss": 0.4979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1580747663974762, + "rewards/margins": 1.2733914852142334, + "rewards/rejected": -1.1153167486190796, + "step": 5852 + }, + { + "epoch": 0.34, + "learning_rate": 7.676555418295762e-08, + "logits/chosen": -2.0364317893981934, + "logits/rejected": -2.0308070182800293, + "logps/chosen": -8.778203010559082, + "logps/rejected": -235.7122039794922, + "loss": 0.3514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008194446563720703, + "rewards/margins": 3.6687428951263428, + "rewards/rejected": -3.660548448562622, + "step": 5853 + }, + { + "epoch": 0.34, + "learning_rate": 7.675759365869972e-08, + "logits/chosen": -1.762514591217041, + "logits/rejected": -1.7510782480239868, + "logps/chosen": -58.49107360839844, + "logps/rejected": -254.34925842285156, + "loss": 0.2413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.729046642780304, + "rewards/margins": 1.778639316558838, + "rewards/rejected": -1.0495926141738892, + "step": 5854 + }, + { + "epoch": 0.34, + "learning_rate": 7.674963218388122e-08, + "logits/chosen": -2.176483392715454, + "logits/rejected": -2.1776537895202637, + "logps/chosen": -84.77882385253906, + "logps/rejected": -208.13201904296875, + "loss": 0.4145, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0801223516464233, + "rewards/margins": 0.2747268080711365, + "rewards/rejected": 0.8053955435752869, + "step": 5855 + }, + { + "epoch": 0.34, + "learning_rate": 7.674166975878493e-08, + "logits/chosen": -2.1017637252807617, + "logits/rejected": -1.9708400964736938, + "logps/chosen": -165.258056640625, + "logps/rejected": -463.59375, + "loss": 0.1459, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8624130487442017, + "rewards/margins": 1.7634201049804688, + "rewards/rejected": 0.09899292141199112, + "step": 5856 + }, + { + "epoch": 0.34, + "learning_rate": 7.673370638369373e-08, + "logits/chosen": -2.069211959838867, + "logits/rejected": -2.0909230709075928, + "logps/chosen": -258.41595458984375, + "logps/rejected": -294.41851806640625, + "loss": 0.198, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.883880615234375, + "rewards/margins": 0.9354614019393921, + "rewards/rejected": 0.9484192132949829, + "step": 5857 + }, + { + "epoch": 0.34, + "learning_rate": 7.672574205889051e-08, + "logits/chosen": -1.9691128730773926, + "logits/rejected": -1.93325936794281, + "logps/chosen": -245.40548706054688, + "logps/rejected": -530.914306640625, + "loss": 0.1483, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2978973388671875, + "rewards/margins": 1.3118804693222046, + "rewards/rejected": 0.9860168695449829, + "step": 5858 + }, + { + "epoch": 0.34, + "learning_rate": 7.671777678465819e-08, + "logits/chosen": -1.9289093017578125, + "logits/rejected": -1.9152336120605469, + "logps/chosen": -79.80775451660156, + "logps/rejected": -240.0255584716797, + "loss": 0.5593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5045501589775085, + "rewards/margins": 1.1041762828826904, + "rewards/rejected": -1.6087265014648438, + "step": 5859 + }, + { + "epoch": 0.34, + "learning_rate": 7.670981056127977e-08, + "logits/chosen": -1.9843720197677612, + "logits/rejected": -2.0029194355010986, + "logps/chosen": -217.24771118164062, + "logps/rejected": -254.26844787597656, + "loss": 0.0638, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1234023571014404, + "rewards/margins": 3.523043632507324, + "rewards/rejected": -1.3996413946151733, + "step": 5860 + }, + { + "epoch": 0.34, + "learning_rate": 7.67018433890382e-08, + "logits/chosen": -1.9782068729400635, + "logits/rejected": -1.9559309482574463, + "logps/chosen": -179.03533935546875, + "logps/rejected": -455.31524658203125, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3676483631134033, + "rewards/margins": 3.6436095237731934, + "rewards/rejected": -1.2759612798690796, + "step": 5861 + }, + { + "epoch": 0.34, + "learning_rate": 7.669387526821655e-08, + "logits/chosen": -1.9715920686721802, + "logits/rejected": -1.958681344985962, + "logps/chosen": -36.937660217285156, + "logps/rejected": -106.8884506225586, + "loss": 0.5406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05757179483771324, + "rewards/margins": 0.5173503756523132, + "rewards/rejected": -0.4597786068916321, + "step": 5862 + }, + { + "epoch": 0.34, + "learning_rate": 7.668590619909789e-08, + "logits/chosen": -1.7162261009216309, + "logits/rejected": -1.7539889812469482, + "logps/chosen": -207.1234130859375, + "logps/rejected": -231.61325073242188, + "loss": 0.1608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9288924932479858, + "rewards/margins": 1.5035033226013184, + "rewards/rejected": 0.4253891110420227, + "step": 5863 + }, + { + "epoch": 0.34, + "learning_rate": 7.66779361819653e-08, + "logits/chosen": -2.0681612491607666, + "logits/rejected": -2.0691447257995605, + "logps/chosen": -45.386390686035156, + "logps/rejected": -173.2733154296875, + "loss": 0.3397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45678672194480896, + "rewards/margins": 1.4580166339874268, + "rewards/rejected": -1.0012298822402954, + "step": 5864 + }, + { + "epoch": 0.34, + "learning_rate": 7.666996521710192e-08, + "logits/chosen": -2.075183629989624, + "logits/rejected": -2.080209970474243, + "logps/chosen": -18.317068099975586, + "logps/rejected": -154.1390838623047, + "loss": 0.3226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16056708991527557, + "rewards/margins": 2.2387776374816895, + "rewards/rejected": -2.0782105922698975, + "step": 5865 + }, + { + "epoch": 0.34, + "learning_rate": 7.666199330479091e-08, + "logits/chosen": -1.8696032762527466, + "logits/rejected": -1.8694181442260742, + "logps/chosen": -225.1002197265625, + "logps/rejected": -390.4557800292969, + "loss": 0.2696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8962234854698181, + "rewards/margins": 1.2930099964141846, + "rewards/rejected": -0.39678651094436646, + "step": 5866 + }, + { + "epoch": 0.34, + "learning_rate": 7.665402044531549e-08, + "logits/chosen": -2.0888559818267822, + "logits/rejected": -2.081357479095459, + "logps/chosen": -8.777945518493652, + "logps/rejected": -126.5098876953125, + "loss": 0.7045, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.006283664610236883, + "rewards/margins": -0.06540212780237198, + "rewards/rejected": 0.071685791015625, + "step": 5867 + }, + { + "epoch": 0.34, + "learning_rate": 7.664604663895889e-08, + "logits/chosen": -2.177757501602173, + "logits/rejected": -2.178802251815796, + "logps/chosen": -5.878324031829834, + "logps/rejected": -98.25706481933594, + "loss": 0.5328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04986844211816788, + "rewards/margins": 0.6329784989356995, + "rewards/rejected": -0.5831100344657898, + "step": 5868 + }, + { + "epoch": 0.34, + "learning_rate": 7.663807188600436e-08, + "logits/chosen": -2.1767518520355225, + "logits/rejected": -2.175023317337036, + "logps/chosen": -3.6850690841674805, + "logps/rejected": -35.53559112548828, + "loss": 0.6757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019963575527071953, + "rewards/margins": 0.01885249651968479, + "rewards/rejected": -0.038816072046756744, + "step": 5869 + }, + { + "epoch": 0.34, + "learning_rate": 7.663009618673521e-08, + "logits/chosen": -2.045064926147461, + "logits/rejected": -2.0445425510406494, + "logps/chosen": -37.524635314941406, + "logps/rejected": -167.48016357421875, + "loss": 0.1817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0733238458633423, + "rewards/margins": 2.8966941833496094, + "rewards/rejected": -1.823370337486267, + "step": 5870 + }, + { + "epoch": 0.34, + "learning_rate": 7.66221195414348e-08, + "logits/chosen": -2.069498062133789, + "logits/rejected": -2.0682284832000732, + "logps/chosen": -25.309396743774414, + "logps/rejected": -51.85863494873047, + "loss": 0.6921, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.045313455164432526, + "rewards/margins": -0.10766734927892685, + "rewards/rejected": 0.15298080444335938, + "step": 5871 + }, + { + "epoch": 0.34, + "learning_rate": 7.661414195038647e-08, + "logits/chosen": -2.0700695514678955, + "logits/rejected": -2.0480523109436035, + "logps/chosen": -223.915283203125, + "logps/rejected": -402.7518615722656, + "loss": 0.062, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9156906604766846, + "rewards/margins": 2.4159438610076904, + "rewards/rejected": 0.499746710062027, + "step": 5872 + }, + { + "epoch": 0.34, + "learning_rate": 7.660616341387363e-08, + "logits/chosen": -1.9525229930877686, + "logits/rejected": -1.953113079071045, + "logps/chosen": -0.0005191550008021295, + "logps/rejected": -64.50652313232422, + "loss": 0.4587, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3522810554131866e-05, + "rewards/margins": 1.3203339576721191, + "rewards/rejected": -1.320367455482483, + "step": 5873 + }, + { + "epoch": 0.34, + "learning_rate": 7.659818393217974e-08, + "logits/chosen": -1.9688547849655151, + "logits/rejected": -1.9508750438690186, + "logps/chosen": -132.49301147460938, + "logps/rejected": -374.0046081542969, + "loss": 0.0624, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6649353504180908, + "rewards/margins": 3.7563934326171875, + "rewards/rejected": -2.0914580821990967, + "step": 5874 + }, + { + "epoch": 0.34, + "learning_rate": 7.659020350558823e-08, + "logits/chosen": -2.0139009952545166, + "logits/rejected": -1.938643217086792, + "logps/chosen": -181.99606323242188, + "logps/rejected": -540.57275390625, + "loss": 0.058, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.521325707435608, + "rewards/margins": 3.6180176734924316, + "rewards/rejected": -2.096691846847534, + "step": 5875 + }, + { + "epoch": 0.34, + "learning_rate": 7.658222213438264e-08, + "logits/chosen": -2.1422135829925537, + "logits/rejected": -2.1486308574676514, + "logps/chosen": -8.937361717224121, + "logps/rejected": -128.05886840820312, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23456774652004242, + "rewards/margins": 1.8266935348510742, + "rewards/rejected": -1.5921257734298706, + "step": 5876 + }, + { + "epoch": 0.34, + "learning_rate": 7.657423981884647e-08, + "logits/chosen": -1.9806180000305176, + "logits/rejected": -1.9825537204742432, + "logps/chosen": -19.52418327331543, + "logps/rejected": -181.4717254638672, + "loss": 0.3819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003746032889466733, + "rewards/margins": 2.2375786304473877, + "rewards/rejected": -2.2379531860351562, + "step": 5877 + }, + { + "epoch": 0.34, + "learning_rate": 7.656625655926334e-08, + "logits/chosen": -1.9717369079589844, + "logits/rejected": -1.972496509552002, + "logps/chosen": -60.78155517578125, + "logps/rejected": -167.88186645507812, + "loss": 0.4981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5542671084403992, + "rewards/margins": 0.43430861830711365, + "rewards/rejected": 0.11995849758386612, + "step": 5878 + }, + { + "epoch": 0.34, + "learning_rate": 7.655827235591681e-08, + "logits/chosen": -1.9815452098846436, + "logits/rejected": -1.9806958436965942, + "logps/chosen": -5.566938853007741e-05, + "logps/rejected": -184.84738159179688, + "loss": 0.3757, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.907195894546021e-07, + "rewards/margins": 2.4781436920166016, + "rewards/rejected": -2.4781434535980225, + "step": 5879 + }, + { + "epoch": 0.34, + "learning_rate": 7.655028720909056e-08, + "logits/chosen": -2.017361640930176, + "logits/rejected": -2.030500650405884, + "logps/chosen": -12.584043502807617, + "logps/rejected": -114.7193374633789, + "loss": 0.5079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04065695032477379, + "rewards/margins": 0.8528814911842346, + "rewards/rejected": -0.8122245669364929, + "step": 5880 + }, + { + "epoch": 0.34, + "learning_rate": 7.654230111906821e-08, + "logits/chosen": -2.077188491821289, + "logits/rejected": -2.07065486907959, + "logps/chosen": -315.67413330078125, + "logps/rejected": -374.0101623535156, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.635357618331909, + "rewards/margins": 4.587652683258057, + "rewards/rejected": -1.952294945716858, + "step": 5881 + }, + { + "epoch": 0.34, + "learning_rate": 7.65343140861335e-08, + "logits/chosen": -1.9174680709838867, + "logits/rejected": -1.920731782913208, + "logps/chosen": -23.689861297607422, + "logps/rejected": -252.423828125, + "loss": 0.2913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41093921661376953, + "rewards/margins": 2.5574984550476074, + "rewards/rejected": -2.146559238433838, + "step": 5882 + }, + { + "epoch": 0.34, + "learning_rate": 7.652632611057016e-08, + "logits/chosen": -2.129957914352417, + "logits/rejected": -2.1160051822662354, + "logps/chosen": -0.0052864220924675465, + "logps/rejected": -243.8983154296875, + "loss": 0.3607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011630905646597967, + "rewards/margins": 4.982006549835205, + "rewards/rejected": -4.982122898101807, + "step": 5883 + }, + { + "epoch": 0.34, + "learning_rate": 7.651833719266196e-08, + "logits/chosen": -1.9762213230133057, + "logits/rejected": -1.9836705923080444, + "logps/chosen": -14.014605522155762, + "logps/rejected": -53.35739517211914, + "loss": 0.6153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06912584602832794, + "rewards/margins": 0.42812108993530273, + "rewards/rejected": -0.49724695086479187, + "step": 5884 + }, + { + "epoch": 0.34, + "learning_rate": 7.651034733269272e-08, + "logits/chosen": -2.154618263244629, + "logits/rejected": -2.1543760299682617, + "logps/chosen": -8.499457908328623e-05, + "logps/rejected": -309.3175048828125, + "loss": 0.3406, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1947151910571847e-06, + "rewards/margins": 5.847451686859131, + "rewards/rejected": -5.847455024719238, + "step": 5885 + }, + { + "epoch": 0.34, + "learning_rate": 7.650235653094624e-08, + "logits/chosen": -1.8611432313919067, + "logits/rejected": -1.8636912107467651, + "logps/chosen": -0.9741644859313965, + "logps/rejected": -112.57364654541016, + "loss": 0.4743, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.814697322468419e-07, + "rewards/margins": 1.2325992584228516, + "rewards/rejected": -1.232598900794983, + "step": 5886 + }, + { + "epoch": 0.34, + "learning_rate": 7.649436478770645e-08, + "logits/chosen": -1.8824536800384521, + "logits/rejected": -1.882252812385559, + "logps/chosen": -123.61654663085938, + "logps/rejected": -306.47857666015625, + "loss": 0.2348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.74334716796875, + "rewards/margins": 2.387326240539551, + "rewards/rejected": -1.6439789533615112, + "step": 5887 + }, + { + "epoch": 0.34, + "learning_rate": 7.648637210325721e-08, + "logits/chosen": -2.0768275260925293, + "logits/rejected": -2.0778305530548096, + "logps/chosen": -175.04995727539062, + "logps/rejected": -268.7830505371094, + "loss": 0.1926, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4671982526779175, + "rewards/margins": 1.1783523559570312, + "rewards/rejected": 0.28884583711624146, + "step": 5888 + }, + { + "epoch": 0.34, + "learning_rate": 7.647837847788248e-08, + "logits/chosen": -2.0355594158172607, + "logits/rejected": -2.035869598388672, + "logps/chosen": -8.498129844665527, + "logps/rejected": -175.68798828125, + "loss": 0.4248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06555777043104172, + "rewards/margins": 1.4252043962478638, + "rewards/rejected": -1.3596466779708862, + "step": 5889 + }, + { + "epoch": 0.34, + "learning_rate": 7.647038391186621e-08, + "logits/chosen": -2.0802836418151855, + "logits/rejected": -2.0740723609924316, + "logps/chosen": -53.39097595214844, + "logps/rejected": -146.0169677734375, + "loss": 0.384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021251678466796875, + "rewards/margins": 2.006240129470825, + "rewards/rejected": -1.9849884510040283, + "step": 5890 + }, + { + "epoch": 0.34, + "learning_rate": 7.646238840549244e-08, + "logits/chosen": -1.9256306886672974, + "logits/rejected": -1.9141128063201904, + "logps/chosen": -0.43117716908454895, + "logps/rejected": -111.09880828857422, + "loss": 0.6253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024922894313931465, + "rewards/margins": 0.30893558263778687, + "rewards/rejected": -0.3338584899902344, + "step": 5891 + }, + { + "epoch": 0.34, + "learning_rate": 7.645439195904519e-08, + "logits/chosen": -2.0374372005462646, + "logits/rejected": -2.0154852867126465, + "logps/chosen": -26.622100830078125, + "logps/rejected": -292.5628967285156, + "loss": 0.2749, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39932480454444885, + "rewards/margins": 2.1508872509002686, + "rewards/rejected": -1.751562476158142, + "step": 5892 + }, + { + "epoch": 0.34, + "learning_rate": 7.644639457280853e-08, + "logits/chosen": -2.0233333110809326, + "logits/rejected": -2.0153822898864746, + "logps/chosen": -0.3925497233867645, + "logps/rejected": -86.28781127929688, + "loss": 0.5144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007295468356460333, + "rewards/margins": 0.8393773436546326, + "rewards/rejected": -0.8466728329658508, + "step": 5893 + }, + { + "epoch": 0.34, + "learning_rate": 7.643839624706656e-08, + "logits/chosen": -2.188258409500122, + "logits/rejected": -2.15995192527771, + "logps/chosen": -66.21524810791016, + "logps/rejected": -218.8034210205078, + "loss": 0.1252, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.08977210521698, + "rewards/margins": 3.403181552886963, + "rewards/rejected": -2.3134095668792725, + "step": 5894 + }, + { + "epoch": 0.34, + "learning_rate": 7.643039698210345e-08, + "logits/chosen": -2.0024678707122803, + "logits/rejected": -2.0046370029449463, + "logps/chosen": -67.77999877929688, + "logps/rejected": -257.03704833984375, + "loss": 0.3837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08665543049573898, + "rewards/margins": 2.944765567779541, + "rewards/rejected": -3.031420946121216, + "step": 5895 + }, + { + "epoch": 0.34, + "learning_rate": 7.642239677820334e-08, + "logits/chosen": -1.9655942916870117, + "logits/rejected": -1.9113149642944336, + "logps/chosen": -198.91824340820312, + "logps/rejected": -373.049072265625, + "loss": 0.0743, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0436036586761475, + "rewards/margins": 3.7714357376098633, + "rewards/rejected": -1.7278320789337158, + "step": 5896 + }, + { + "epoch": 0.34, + "learning_rate": 7.641439563565046e-08, + "logits/chosen": -1.983126163482666, + "logits/rejected": -1.897290825843811, + "logps/chosen": -203.6786346435547, + "logps/rejected": -552.75634765625, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8823318481445312, + "rewards/margins": 5.656214714050293, + "rewards/rejected": -3.773883104324341, + "step": 5897 + }, + { + "epoch": 0.34, + "learning_rate": 7.640639355472905e-08, + "logits/chosen": -1.715583086013794, + "logits/rejected": -1.7172691822052002, + "logps/chosen": -9.992185592651367, + "logps/rejected": -118.10963439941406, + "loss": 0.3308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5435722470283508, + "rewards/margins": 1.4319465160369873, + "rewards/rejected": -0.8883743286132812, + "step": 5898 + }, + { + "epoch": 0.34, + "learning_rate": 7.639839053572337e-08, + "logits/chosen": -1.9576314687728882, + "logits/rejected": -1.952453374862671, + "logps/chosen": -23.723684310913086, + "logps/rejected": -235.46878051757812, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2903434932231903, + "rewards/margins": 2.0874810218811035, + "rewards/rejected": -1.7971374988555908, + "step": 5899 + }, + { + "epoch": 0.34, + "learning_rate": 7.639038657891773e-08, + "logits/chosen": -2.1146926879882812, + "logits/rejected": -2.100288152694702, + "logps/chosen": -22.700824737548828, + "logps/rejected": -282.51629638671875, + "loss": 0.4466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03603649139404297, + "rewards/margins": 1.4142378568649292, + "rewards/rejected": -1.3782013654708862, + "step": 5900 + }, + { + "epoch": 0.34, + "learning_rate": 7.638238168459645e-08, + "logits/chosen": -1.9906561374664307, + "logits/rejected": -1.912828803062439, + "logps/chosen": -317.6167297363281, + "logps/rejected": -445.3923034667969, + "loss": 0.113, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.561178684234619, + "rewards/margins": 1.7487335205078125, + "rewards/rejected": 0.8124451041221619, + "step": 5901 + }, + { + "epoch": 0.34, + "learning_rate": 7.637437585304393e-08, + "logits/chosen": -1.9378026723861694, + "logits/rejected": -1.8958708047866821, + "logps/chosen": -312.3940124511719, + "logps/rejected": -538.46484375, + "loss": 0.1728, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5451323986053467, + "rewards/margins": 1.0003081560134888, + "rewards/rejected": 1.544824242591858, + "step": 5902 + }, + { + "epoch": 0.34, + "learning_rate": 7.636636908454458e-08, + "logits/chosen": -1.9892187118530273, + "logits/rejected": -1.9868862628936768, + "logps/chosen": -17.375112533569336, + "logps/rejected": -125.76164245605469, + "loss": 0.4743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06250381469726562, + "rewards/margins": 1.10515296459198, + "rewards/rejected": -1.0426491498947144, + "step": 5903 + }, + { + "epoch": 0.34, + "learning_rate": 7.63583613793828e-08, + "logits/chosen": -1.8879027366638184, + "logits/rejected": -1.882631778717041, + "logps/chosen": -17.589296340942383, + "logps/rejected": -127.60694885253906, + "loss": 0.3841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1722080260515213, + "rewards/margins": 1.5024845600128174, + "rewards/rejected": -1.3302764892578125, + "step": 5904 + }, + { + "epoch": 0.34, + "learning_rate": 7.635035273784313e-08, + "logits/chosen": -2.075744867324829, + "logits/rejected": -2.0762205123901367, + "logps/chosen": -3.9958250522613525, + "logps/rejected": -172.1851043701172, + "loss": 0.4228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04869060590863228, + "rewards/margins": 1.9872591495513916, + "rewards/rejected": -2.03594970703125, + "step": 5905 + }, + { + "epoch": 0.34, + "learning_rate": 7.634234316021002e-08, + "logits/chosen": -1.8782414197921753, + "logits/rejected": -1.8845295906066895, + "logps/chosen": -31.875165939331055, + "logps/rejected": -274.4798889160156, + "loss": 0.3845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09999790042638779, + "rewards/margins": 1.9894068241119385, + "rewards/rejected": -1.8894089460372925, + "step": 5906 + }, + { + "epoch": 0.34, + "learning_rate": 7.633433264676802e-08, + "logits/chosen": -2.115795373916626, + "logits/rejected": -2.1122195720672607, + "logps/chosen": -0.001269325497560203, + "logps/rejected": -299.7154235839844, + "loss": 0.3494, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.307405495434068e-05, + "rewards/margins": 3.925421714782715, + "rewards/rejected": -3.92545485496521, + "step": 5907 + }, + { + "epoch": 0.34, + "learning_rate": 7.632632119780172e-08, + "logits/chosen": -1.9992847442626953, + "logits/rejected": -2.0009307861328125, + "logps/chosen": -271.1891174316406, + "logps/rejected": -570.233642578125, + "loss": 0.0418, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.017620801925659, + "rewards/margins": 3.8514280319213867, + "rewards/rejected": -1.833807349205017, + "step": 5908 + }, + { + "epoch": 0.34, + "learning_rate": 7.631830881359571e-08, + "logits/chosen": -1.989074468612671, + "logits/rejected": -1.9956998825073242, + "logps/chosen": -280.6017761230469, + "logps/rejected": -447.14361572265625, + "loss": 0.0638, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2912018299102783, + "rewards/margins": 2.9778687953948975, + "rewards/rejected": -0.6866669058799744, + "step": 5909 + }, + { + "epoch": 0.34, + "learning_rate": 7.631029549443464e-08, + "logits/chosen": -1.9376649856567383, + "logits/rejected": -1.9414706230163574, + "logps/chosen": -2.6995081901550293, + "logps/rejected": -125.2762222290039, + "loss": 0.7218, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.22659607231616974, + "rewards/margins": -0.00897979736328125, + "rewards/rejected": -0.2176162749528885, + "step": 5910 + }, + { + "epoch": 0.34, + "learning_rate": 7.630228124060317e-08, + "logits/chosen": -2.155839443206787, + "logits/rejected": -2.1534600257873535, + "logps/chosen": -10.275067329406738, + "logps/rejected": -134.85342407226562, + "loss": 0.4083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09164943546056747, + "rewards/margins": 1.9253301620483398, + "rewards/rejected": -1.8336807489395142, + "step": 5911 + }, + { + "epoch": 0.34, + "learning_rate": 7.629426605238602e-08, + "logits/chosen": -1.7273417711257935, + "logits/rejected": -1.7075912952423096, + "logps/chosen": -295.3280029296875, + "logps/rejected": -450.57879638671875, + "loss": 0.1134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.659722924232483, + "rewards/margins": 2.095672607421875, + "rewards/rejected": -0.4359497129917145, + "step": 5912 + }, + { + "epoch": 0.34, + "learning_rate": 7.62862499300679e-08, + "logits/chosen": -1.6806532144546509, + "logits/rejected": -1.6604522466659546, + "logps/chosen": -194.93565368652344, + "logps/rejected": -279.6467590332031, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2199081182479858, + "rewards/margins": 0.9967696666717529, + "rewards/rejected": 0.22313843667507172, + "step": 5913 + }, + { + "epoch": 0.34, + "learning_rate": 7.627823287393363e-08, + "logits/chosen": -2.1740949153900146, + "logits/rejected": -2.167480945587158, + "logps/chosen": -27.056114196777344, + "logps/rejected": -148.5023193359375, + "loss": 0.5705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22769318521022797, + "rewards/margins": 0.35059356689453125, + "rewards/rejected": -0.12290038913488388, + "step": 5914 + }, + { + "epoch": 0.34, + "learning_rate": 7.627021488426796e-08, + "logits/chosen": -2.1133201122283936, + "logits/rejected": -2.1107325553894043, + "logps/chosen": -11.417171478271484, + "logps/rejected": -122.77896118164062, + "loss": 0.476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04456024244427681, + "rewards/margins": 0.992535412311554, + "rewards/rejected": -0.9479751586914062, + "step": 5915 + }, + { + "epoch": 0.34, + "learning_rate": 7.626219596135577e-08, + "logits/chosen": -1.925719141960144, + "logits/rejected": -1.8714138269424438, + "logps/chosen": -286.9852294921875, + "logps/rejected": -434.2956848144531, + "loss": 0.0471, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.335736036300659, + "rewards/margins": 3.3618345260620117, + "rewards/rejected": -1.026098608970642, + "step": 5916 + }, + { + "epoch": 0.34, + "learning_rate": 7.625417610548192e-08, + "logits/chosen": -2.0647943019866943, + "logits/rejected": -2.061337947845459, + "logps/chosen": -0.0004133559705223888, + "logps/rejected": -109.62776184082031, + "loss": 0.5443, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.673741381746368e-07, + "rewards/margins": 0.7234765291213989, + "rewards/rejected": -0.7234771847724915, + "step": 5917 + }, + { + "epoch": 0.34, + "learning_rate": 7.62461553169313e-08, + "logits/chosen": -1.9900003671646118, + "logits/rejected": -1.991687536239624, + "logps/chosen": -47.842041015625, + "logps/rejected": -133.83108520507812, + "loss": 0.2797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6075416803359985, + "rewards/margins": 1.873358964920044, + "rewards/rejected": -1.2658172845840454, + "step": 5918 + }, + { + "epoch": 0.34, + "learning_rate": 7.623813359598888e-08, + "logits/chosen": -2.170050859451294, + "logits/rejected": -2.154236078262329, + "logps/chosen": -185.73629760742188, + "logps/rejected": -340.786865234375, + "loss": 0.1593, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.685083031654358, + "rewards/margins": 1.4273254871368408, + "rewards/rejected": 0.2577575743198395, + "step": 5919 + }, + { + "epoch": 0.34, + "learning_rate": 7.623011094293959e-08, + "logits/chosen": -1.9629102945327759, + "logits/rejected": -1.9540634155273438, + "logps/chosen": -0.49756282567977905, + "logps/rejected": -147.06161499023438, + "loss": 0.3844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011011905036866665, + "rewards/margins": 2.359912395477295, + "rewards/rejected": -2.348900556564331, + "step": 5920 + }, + { + "epoch": 0.34, + "learning_rate": 7.622208735806845e-08, + "logits/chosen": -1.8625216484069824, + "logits/rejected": -1.8598296642303467, + "logps/chosen": -0.006596801802515984, + "logps/rejected": -83.67089080810547, + "loss": 0.4923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031828205101192, + "rewards/margins": 1.0733495950698853, + "rewards/rejected": -1.0736678838729858, + "step": 5921 + }, + { + "epoch": 0.34, + "learning_rate": 7.621406284166053e-08, + "logits/chosen": -1.9713425636291504, + "logits/rejected": -1.9782383441925049, + "logps/chosen": -175.1077880859375, + "logps/rejected": -287.8095397949219, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.258032202720642, + "rewards/margins": 0.6195067763328552, + "rewards/rejected": 0.6385254263877869, + "step": 5922 + }, + { + "epoch": 0.34, + "learning_rate": 7.620603739400085e-08, + "logits/chosen": -1.8955841064453125, + "logits/rejected": -1.8949635028839111, + "logps/chosen": -7.773158550262451, + "logps/rejected": -100.39403533935547, + "loss": 0.7086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035512831062078476, + "rewards/margins": 0.0175229050219059, + "rewards/rejected": -0.053035736083984375, + "step": 5923 + }, + { + "epoch": 0.34, + "learning_rate": 7.619801101537454e-08, + "logits/chosen": -2.054595708847046, + "logits/rejected": -2.0350465774536133, + "logps/chosen": -198.50711059570312, + "logps/rejected": -388.19207763671875, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2522430419921875, + "rewards/margins": 2.2791504859924316, + "rewards/rejected": -0.02690734900534153, + "step": 5924 + }, + { + "epoch": 0.34, + "learning_rate": 7.618998370606673e-08, + "logits/chosen": -1.9724398851394653, + "logits/rejected": -1.970810890197754, + "logps/chosen": -19.834484100341797, + "logps/rejected": -320.7884216308594, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5018558502197266, + "rewards/margins": 3.2779757976531982, + "rewards/rejected": -2.7761199474334717, + "step": 5925 + }, + { + "epoch": 0.34, + "learning_rate": 7.618195546636259e-08, + "logits/chosen": -2.090109348297119, + "logits/rejected": -2.0951342582702637, + "logps/chosen": -13.465919494628906, + "logps/rejected": -142.6231231689453, + "loss": 0.4331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0431608222424984, + "rewards/margins": 1.9214143753051758, + "rewards/rejected": -1.964575171470642, + "step": 5926 + }, + { + "epoch": 0.34, + "learning_rate": 7.617392629654732e-08, + "logits/chosen": -2.0897669792175293, + "logits/rejected": -2.0876481533050537, + "logps/chosen": -114.0362548828125, + "logps/rejected": -321.7526550292969, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.53298419713974, + "rewards/margins": 2.135260820388794, + "rewards/rejected": -1.6022766828536987, + "step": 5927 + }, + { + "epoch": 0.34, + "learning_rate": 7.616589619690615e-08, + "logits/chosen": -2.1248199939727783, + "logits/rejected": -2.1000165939331055, + "logps/chosen": -132.3280792236328, + "logps/rejected": -306.71337890625, + "loss": 0.2434, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2839096784591675, + "rewards/margins": 0.9942063093185425, + "rewards/rejected": 0.289703369140625, + "step": 5928 + }, + { + "epoch": 0.35, + "learning_rate": 7.615786516772438e-08, + "logits/chosen": -2.1651594638824463, + "logits/rejected": -2.1539409160614014, + "logps/chosen": -40.08074951171875, + "logps/rejected": -274.82568359375, + "loss": 0.3102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005524826236069202, + "rewards/margins": 3.305630922317505, + "rewards/rejected": -3.3111557960510254, + "step": 5929 + }, + { + "epoch": 0.35, + "learning_rate": 7.614983320928725e-08, + "logits/chosen": -1.9150164127349854, + "logits/rejected": -1.994032382965088, + "logps/chosen": -191.53756713867188, + "logps/rejected": -209.53956604003906, + "loss": 0.4141, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1990631818771362, + "rewards/margins": 0.07816171646118164, + "rewards/rejected": 1.1209014654159546, + "step": 5930 + }, + { + "epoch": 0.35, + "learning_rate": 7.614180032188017e-08, + "logits/chosen": -2.208489179611206, + "logits/rejected": -2.1947028636932373, + "logps/chosen": -0.9273979067802429, + "logps/rejected": -122.23738098144531, + "loss": 0.5052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022785771638154984, + "rewards/margins": 1.0676409006118774, + "rewards/rejected": -1.0904266834259033, + "step": 5931 + }, + { + "epoch": 0.35, + "learning_rate": 7.613376650578845e-08, + "logits/chosen": -1.8805770874023438, + "logits/rejected": -1.8758445978164673, + "logps/chosen": -76.0776596069336, + "logps/rejected": -152.88873291015625, + "loss": 0.383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6923934817314148, + "rewards/margins": 0.6420837044715881, + "rewards/rejected": 0.05030975490808487, + "step": 5932 + }, + { + "epoch": 0.35, + "learning_rate": 7.612573176129751e-08, + "logits/chosen": -2.054610252380371, + "logits/rejected": -2.056159734725952, + "logps/chosen": -40.636131286621094, + "logps/rejected": -116.59098052978516, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4867737293243408, + "rewards/margins": 0.44604265689849854, + "rewards/rejected": 1.0407310724258423, + "step": 5933 + }, + { + "epoch": 0.35, + "learning_rate": 7.611769608869277e-08, + "logits/chosen": -1.8069921731948853, + "logits/rejected": -1.7926220893859863, + "logps/chosen": -170.8680419921875, + "logps/rejected": -338.06365966796875, + "loss": 0.4292, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.562646508216858, + "rewards/margins": -0.154327392578125, + "rewards/rejected": 1.716973900794983, + "step": 5934 + }, + { + "epoch": 0.35, + "learning_rate": 7.610965948825973e-08, + "logits/chosen": -1.882267713546753, + "logits/rejected": -1.9189834594726562, + "logps/chosen": -157.76809692382812, + "logps/rejected": -368.66900634765625, + "loss": 0.1443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9140853881835938, + "rewards/margins": 4.975227355957031, + "rewards/rejected": -4.0611419677734375, + "step": 5935 + }, + { + "epoch": 0.35, + "learning_rate": 7.610162196028386e-08, + "logits/chosen": -2.105104923248291, + "logits/rejected": -2.1075925827026367, + "logps/chosen": -77.72321319580078, + "logps/rejected": -125.53622436523438, + "loss": 0.418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02606811560690403, + "rewards/margins": 1.894551157951355, + "rewards/rejected": -1.920619249343872, + "step": 5936 + }, + { + "epoch": 0.35, + "learning_rate": 7.60935835050507e-08, + "logits/chosen": -1.825972557067871, + "logits/rejected": -1.8254626989364624, + "logps/chosen": -9.37527847290039, + "logps/rejected": -256.7534484863281, + "loss": 0.3131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14999209344387054, + "rewards/margins": 5.257594108581543, + "rewards/rejected": -5.107602119445801, + "step": 5937 + }, + { + "epoch": 0.35, + "learning_rate": 7.608554412284582e-08, + "logits/chosen": -1.8247555494308472, + "logits/rejected": -1.8255867958068848, + "logps/chosen": -0.7036718726158142, + "logps/rejected": -116.48390197753906, + "loss": 0.4183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0208293329924345, + "rewards/margins": 1.7404627799987793, + "rewards/rejected": -1.7196334600448608, + "step": 5938 + }, + { + "epoch": 0.35, + "learning_rate": 7.607750381395483e-08, + "logits/chosen": -2.0925326347351074, + "logits/rejected": -2.096539258956909, + "logps/chosen": -21.590801239013672, + "logps/rejected": -128.146240234375, + "loss": 0.5138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39192238450050354, + "rewards/margins": 0.42762643098831177, + "rewards/rejected": -0.03570404276251793, + "step": 5939 + }, + { + "epoch": 0.35, + "learning_rate": 7.606946257866332e-08, + "logits/chosen": -2.040799617767334, + "logits/rejected": -2.033553123474121, + "logps/chosen": -1.4789564609527588, + "logps/rejected": -237.3445587158203, + "loss": 0.3997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09258097410202026, + "rewards/margins": 2.8304827213287354, + "rewards/rejected": -2.9230637550354004, + "step": 5940 + }, + { + "epoch": 0.35, + "learning_rate": 7.606142041725699e-08, + "logits/chosen": -1.970546007156372, + "logits/rejected": -1.9687339067459106, + "logps/chosen": -32.19198226928711, + "logps/rejected": -188.0133056640625, + "loss": 0.4954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23310700058937073, + "rewards/margins": 0.6312652826309204, + "rewards/rejected": -0.3981582820415497, + "step": 5941 + }, + { + "epoch": 0.35, + "learning_rate": 7.605337733002152e-08, + "logits/chosen": -2.0679514408111572, + "logits/rejected": -2.051481246948242, + "logps/chosen": -57.44353103637695, + "logps/rejected": -344.6342468261719, + "loss": 0.2319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6656810641288757, + "rewards/margins": 2.565088987350464, + "rewards/rejected": -1.899407982826233, + "step": 5942 + }, + { + "epoch": 0.35, + "learning_rate": 7.604533331724267e-08, + "logits/chosen": -1.9565377235412598, + "logits/rejected": -1.956813931465149, + "logps/chosen": -18.158021926879883, + "logps/rejected": -115.65438842773438, + "loss": 0.5741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1288398802280426, + "rewards/margins": 0.28335875272750854, + "rewards/rejected": -0.15451888740062714, + "step": 5943 + }, + { + "epoch": 0.35, + "learning_rate": 7.603728837920615e-08, + "logits/chosen": -1.9296655654907227, + "logits/rejected": -1.8559954166412354, + "logps/chosen": -208.94984436035156, + "logps/rejected": -377.5078125, + "loss": 0.4044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2538131475448608, + "rewards/margins": 0.21847379207611084, + "rewards/rejected": 1.03533935546875, + "step": 5944 + }, + { + "epoch": 0.35, + "learning_rate": 7.60292425161978e-08, + "logits/chosen": -2.059753656387329, + "logits/rejected": -2.0551624298095703, + "logps/chosen": -164.847900390625, + "logps/rejected": -268.6327819824219, + "loss": 0.1043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.678472876548767, + "rewards/margins": 2.1930603981018066, + "rewards/rejected": -0.51458740234375, + "step": 5945 + }, + { + "epoch": 0.35, + "learning_rate": 7.602119572850343e-08, + "logits/chosen": -1.9869945049285889, + "logits/rejected": -1.9753670692443848, + "logps/chosen": -0.011523013934493065, + "logps/rejected": -241.96038818359375, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009162881760857999, + "rewards/margins": 2.3911752700805664, + "rewards/rejected": -2.3920915126800537, + "step": 5946 + }, + { + "epoch": 0.35, + "learning_rate": 7.60131480164089e-08, + "logits/chosen": -2.0263264179229736, + "logits/rejected": -2.0100414752960205, + "logps/chosen": -65.90865325927734, + "logps/rejected": -392.53314208984375, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3086807429790497, + "rewards/margins": 6.614863395690918, + "rewards/rejected": -6.306182861328125, + "step": 5947 + }, + { + "epoch": 0.35, + "learning_rate": 7.600509938020012e-08, + "logits/chosen": -1.9159018993377686, + "logits/rejected": -1.8962821960449219, + "logps/chosen": -312.7836608886719, + "logps/rejected": -440.7016906738281, + "loss": 0.332, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4289276599884033, + "rewards/margins": 0.17767333984375, + "rewards/rejected": 1.2512543201446533, + "step": 5948 + }, + { + "epoch": 0.35, + "learning_rate": 7.599704982016299e-08, + "logits/chosen": -2.001314878463745, + "logits/rejected": -2.0123045444488525, + "logps/chosen": -208.64181518554688, + "logps/rejected": -285.2091979980469, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.644989013671875, + "rewards/margins": 1.5978851318359375, + "rewards/rejected": 1.0471038818359375, + "step": 5949 + }, + { + "epoch": 0.35, + "learning_rate": 7.59889993365835e-08, + "logits/chosen": -2.07137393951416, + "logits/rejected": -2.0783872604370117, + "logps/chosen": -0.0009445769246667624, + "logps/rejected": -194.12979125976562, + "loss": 0.5165, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.134327875566669e-05, + "rewards/margins": 0.9025762677192688, + "rewards/rejected": -0.9026275873184204, + "step": 5950 + }, + { + "epoch": 0.35, + "learning_rate": 7.598094792974762e-08, + "logits/chosen": -1.9192709922790527, + "logits/rejected": -1.9447340965270996, + "logps/chosen": -252.2304229736328, + "logps/rejected": -508.2310485839844, + "loss": 0.0564, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5329086780548096, + "rewards/margins": 5.9762773513793945, + "rewards/rejected": -4.443368434906006, + "step": 5951 + }, + { + "epoch": 0.35, + "learning_rate": 7.597289559994138e-08, + "logits/chosen": -2.0571439266204834, + "logits/rejected": -2.017738103866577, + "logps/chosen": -221.19747924804688, + "logps/rejected": -389.5035400390625, + "loss": 0.2575, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1465301513671875, + "rewards/margins": 0.5302459001541138, + "rewards/rejected": 1.6162842512130737, + "step": 5952 + }, + { + "epoch": 0.35, + "learning_rate": 7.596484234745085e-08, + "logits/chosen": -2.07270884513855, + "logits/rejected": -2.0665087699890137, + "logps/chosen": -246.6454620361328, + "logps/rejected": -392.9364013671875, + "loss": 0.2478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8694046139717102, + "rewards/margins": 1.2185866832733154, + "rewards/rejected": -0.34918212890625, + "step": 5953 + }, + { + "epoch": 0.35, + "learning_rate": 7.59567881725621e-08, + "logits/chosen": -2.083066701889038, + "logits/rejected": -2.0742175579071045, + "logps/chosen": -66.4847640991211, + "logps/rejected": -173.83462524414062, + "loss": 0.3111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46987611055374146, + "rewards/margins": 2.2086334228515625, + "rewards/rejected": -1.7387573719024658, + "step": 5954 + }, + { + "epoch": 0.35, + "learning_rate": 7.594873307556127e-08, + "logits/chosen": -2.1764419078826904, + "logits/rejected": -2.16392183303833, + "logps/chosen": -54.53840255737305, + "logps/rejected": -166.29275512695312, + "loss": 0.41, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4185188412666321, + "rewards/margins": 0.8214988708496094, + "rewards/rejected": -0.4029800593852997, + "step": 5955 + }, + { + "epoch": 0.35, + "learning_rate": 7.594067705673453e-08, + "logits/chosen": -2.0815961360931396, + "logits/rejected": -2.0768239498138428, + "logps/chosen": -68.10420227050781, + "logps/rejected": -180.42288208007812, + "loss": 0.2494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5476104617118835, + "rewards/margins": 2.6284685134887695, + "rewards/rejected": -2.080857992172241, + "step": 5956 + }, + { + "epoch": 0.35, + "learning_rate": 7.593262011636803e-08, + "logits/chosen": -2.128035545349121, + "logits/rejected": -2.111781358718872, + "logps/chosen": -63.27232360839844, + "logps/rejected": -460.54229736328125, + "loss": 0.4423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25862160325050354, + "rewards/margins": 5.252721786499023, + "rewards/rejected": -5.511343479156494, + "step": 5957 + }, + { + "epoch": 0.35, + "learning_rate": 7.592456225474802e-08, + "logits/chosen": -1.9899678230285645, + "logits/rejected": -1.9684005975723267, + "logps/chosen": -257.37127685546875, + "logps/rejected": -373.40240478515625, + "loss": 0.282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.154089331626892, + "rewards/margins": 0.9834655523300171, + "rewards/rejected": 0.170623779296875, + "step": 5958 + }, + { + "epoch": 0.35, + "learning_rate": 7.591650347216074e-08, + "logits/chosen": -1.983771562576294, + "logits/rejected": -1.986945629119873, + "logps/chosen": -6.649330139160156, + "logps/rejected": -178.85177612304688, + "loss": 0.4352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.061647653579711914, + "rewards/margins": 1.8351556062698364, + "rewards/rejected": -1.8968032598495483, + "step": 5959 + }, + { + "epoch": 0.35, + "learning_rate": 7.590844376889249e-08, + "logits/chosen": -1.9547022581100464, + "logits/rejected": -1.9615591764450073, + "logps/chosen": -33.83641815185547, + "logps/rejected": -149.6097412109375, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14084167778491974, + "rewards/margins": 2.1800339221954346, + "rewards/rejected": -2.0391921997070312, + "step": 5960 + }, + { + "epoch": 0.35, + "learning_rate": 7.59003831452296e-08, + "logits/chosen": -2.012650966644287, + "logits/rejected": -1.9930827617645264, + "logps/chosen": -194.19993591308594, + "logps/rejected": -461.79974365234375, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1794464588165283, + "rewards/margins": 4.263827323913574, + "rewards/rejected": -3.084381103515625, + "step": 5961 + }, + { + "epoch": 0.35, + "learning_rate": 7.58923216014584e-08, + "logits/chosen": -2.1310629844665527, + "logits/rejected": -2.1218347549438477, + "logps/chosen": -11.554170608520508, + "logps/rejected": -145.34144592285156, + "loss": 0.5686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04812917858362198, + "rewards/margins": 0.5776200294494629, + "rewards/rejected": -0.6257492303848267, + "step": 5962 + }, + { + "epoch": 0.35, + "learning_rate": 7.588425913786527e-08, + "logits/chosen": -1.933471918106079, + "logits/rejected": -1.931958556175232, + "logps/chosen": -129.8822021484375, + "logps/rejected": -334.0507507324219, + "loss": 0.1614, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1120331287384033, + "rewards/margins": 2.6224091053009033, + "rewards/rejected": -1.5103759765625, + "step": 5963 + }, + { + "epoch": 0.35, + "learning_rate": 7.587619575473665e-08, + "logits/chosen": -1.9599430561065674, + "logits/rejected": -1.9599050283432007, + "logps/chosen": -36.101070404052734, + "logps/rejected": -145.40740966796875, + "loss": 0.4136, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19193534553050995, + "rewards/margins": 1.371094822883606, + "rewards/rejected": -1.1791595220565796, + "step": 5964 + }, + { + "epoch": 0.35, + "learning_rate": 7.586813145235899e-08, + "logits/chosen": -2.128443479537964, + "logits/rejected": -2.1355881690979004, + "logps/chosen": -60.519309997558594, + "logps/rejected": -317.2705078125, + "loss": 0.2034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8947243094444275, + "rewards/margins": 3.0839455127716064, + "rewards/rejected": -2.189221143722534, + "step": 5965 + }, + { + "epoch": 0.35, + "learning_rate": 7.586006623101877e-08, + "logits/chosen": -1.998047113418579, + "logits/rejected": -1.9946017265319824, + "logps/chosen": -0.00018857694522012025, + "logps/rejected": -306.37152099609375, + "loss": 0.3405, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.714554835227318e-06, + "rewards/margins": 5.072962760925293, + "rewards/rejected": -5.072972297668457, + "step": 5966 + }, + { + "epoch": 0.35, + "learning_rate": 7.585200009100248e-08, + "logits/chosen": -2.0707015991210938, + "logits/rejected": -2.0762739181518555, + "logps/chosen": -0.005046600475907326, + "logps/rejected": -176.5784149169922, + "loss": 0.3685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0005377184716053307, + "rewards/margins": 3.1362416744232178, + "rewards/rejected": -3.1357040405273438, + "step": 5967 + }, + { + "epoch": 0.35, + "learning_rate": 7.584393303259671e-08, + "logits/chosen": -2.0700314044952393, + "logits/rejected": -2.0709400177001953, + "logps/chosen": -22.429006576538086, + "logps/rejected": -108.27606201171875, + "loss": 0.6534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0477331168949604, + "rewards/margins": 0.09817790985107422, + "rewards/rejected": -0.050444792956113815, + "step": 5968 + }, + { + "epoch": 0.35, + "learning_rate": 7.583586505608801e-08, + "logits/chosen": -1.8090386390686035, + "logits/rejected": -1.7897758483886719, + "logps/chosen": -0.9070006012916565, + "logps/rejected": -244.82070922851562, + "loss": 0.3505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004929006099700928, + "rewards/margins": 4.9254913330078125, + "rewards/rejected": -4.9259843826293945, + "step": 5969 + }, + { + "epoch": 0.35, + "learning_rate": 7.582779616176301e-08, + "logits/chosen": -1.9119548797607422, + "logits/rejected": -1.9061541557312012, + "logps/chosen": -4.08881860494148e-05, + "logps/rejected": -84.75110626220703, + "loss": 0.687, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.788099410759969e-07, + "rewards/margins": 0.0387251041829586, + "rewards/rejected": -0.03872528299689293, + "step": 5970 + }, + { + "epoch": 0.35, + "learning_rate": 7.581972634990837e-08, + "logits/chosen": -1.9350076913833618, + "logits/rejected": -1.9374765157699585, + "logps/chosen": -3.4391379356384277, + "logps/rejected": -223.26573181152344, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04749863222241402, + "rewards/margins": 5.562798976898193, + "rewards/rejected": -5.515300273895264, + "step": 5971 + }, + { + "epoch": 0.35, + "learning_rate": 7.581165562081073e-08, + "logits/chosen": -2.032179117202759, + "logits/rejected": -2.020198345184326, + "logps/chosen": -166.65689086914062, + "logps/rejected": -243.1913604736328, + "loss": 0.1598, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8032684326171875, + "rewards/margins": 1.3287140130996704, + "rewards/rejected": 0.4745544493198395, + "step": 5972 + }, + { + "epoch": 0.35, + "learning_rate": 7.580358397475684e-08, + "logits/chosen": -2.1056137084960938, + "logits/rejected": -2.0976622104644775, + "logps/chosen": -4.39877767348662e-05, + "logps/rejected": -165.55059814453125, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.536670830812e-08, + "rewards/margins": 2.5955779552459717, + "rewards/rejected": -2.5955779552459717, + "step": 5973 + }, + { + "epoch": 0.35, + "learning_rate": 7.579551141203339e-08, + "logits/chosen": -1.7232277393341064, + "logits/rejected": -1.7514283657073975, + "logps/chosen": -324.40875244140625, + "logps/rejected": -396.103271484375, + "loss": 0.7513, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.19595642387866974, + "rewards/margins": -0.609942615032196, + "rewards/rejected": 0.4139862060546875, + "step": 5974 + }, + { + "epoch": 0.35, + "learning_rate": 7.578743793292724e-08, + "logits/chosen": -2.0535097122192383, + "logits/rejected": -2.0496230125427246, + "logps/chosen": -20.646821975708008, + "logps/rejected": -146.03704833984375, + "loss": 0.4914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2072601318359375, + "rewards/margins": 1.7172882556915283, + "rewards/rejected": -1.9245483875274658, + "step": 5975 + }, + { + "epoch": 0.35, + "learning_rate": 7.577936353772514e-08, + "logits/chosen": -1.927513837814331, + "logits/rejected": -1.8947168588638306, + "logps/chosen": -241.40663146972656, + "logps/rejected": -407.8540344238281, + "loss": 0.0842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6459823846817017, + "rewards/margins": 2.172602891921997, + "rewards/rejected": -0.5266205072402954, + "step": 5976 + }, + { + "epoch": 0.35, + "learning_rate": 7.577128822671395e-08, + "logits/chosen": -1.9710239171981812, + "logits/rejected": -1.946768879890442, + "logps/chosen": -244.54429626464844, + "logps/rejected": -359.589599609375, + "loss": 0.3478, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2415298223495483, + "rewards/margins": 0.4042434096336365, + "rewards/rejected": 0.8372864127159119, + "step": 5977 + }, + { + "epoch": 0.35, + "learning_rate": 7.576321200018053e-08, + "logits/chosen": -1.962989091873169, + "logits/rejected": -1.9365990161895752, + "logps/chosen": -262.7642822265625, + "logps/rejected": -350.96563720703125, + "loss": 0.3591, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.657678246498108, + "rewards/margins": 0.14166259765625, + "rewards/rejected": 1.516015648841858, + "step": 5978 + }, + { + "epoch": 0.35, + "learning_rate": 7.57551348584118e-08, + "logits/chosen": -1.86220121383667, + "logits/rejected": -1.8106119632720947, + "logps/chosen": -244.7213897705078, + "logps/rejected": -316.72991943359375, + "loss": 0.3958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7811172604560852, + "rewards/margins": 0.48657989501953125, + "rewards/rejected": 0.29453736543655396, + "step": 5979 + }, + { + "epoch": 0.35, + "learning_rate": 7.57470568016947e-08, + "logits/chosen": -1.7981023788452148, + "logits/rejected": -1.7891379594802856, + "logps/chosen": -196.5313262939453, + "logps/rejected": -342.22454833984375, + "loss": 0.2772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5018600225448608, + "rewards/margins": 0.8319076299667358, + "rewards/rejected": 0.669952392578125, + "step": 5980 + }, + { + "epoch": 0.35, + "learning_rate": 7.573897783031622e-08, + "logits/chosen": -2.015277147293091, + "logits/rejected": -2.022562026977539, + "logps/chosen": -38.397674560546875, + "logps/rejected": -141.43658447265625, + "loss": 0.4081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27973976731300354, + "rewards/margins": 1.216464638710022, + "rewards/rejected": -0.936724841594696, + "step": 5981 + }, + { + "epoch": 0.35, + "learning_rate": 7.573089794456333e-08, + "logits/chosen": -2.0545246601104736, + "logits/rejected": -2.0396780967712402, + "logps/chosen": -0.011010818183422089, + "logps/rejected": -358.63201904296875, + "loss": 0.3488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005137330736033618, + "rewards/margins": 7.4862542152404785, + "rewards/rejected": -7.486767768859863, + "step": 5982 + }, + { + "epoch": 0.35, + "learning_rate": 7.572281714472308e-08, + "logits/chosen": -1.9687144756317139, + "logits/rejected": -1.9939463138580322, + "logps/chosen": -197.78392028808594, + "logps/rejected": -341.080322265625, + "loss": 0.179, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4812637567520142, + "rewards/margins": 1.5502792596817017, + "rewards/rejected": -0.0690155029296875, + "step": 5983 + }, + { + "epoch": 0.35, + "learning_rate": 7.571473543108253e-08, + "logits/chosen": -1.9236334562301636, + "logits/rejected": -1.9198569059371948, + "logps/chosen": -3.5733389854431152, + "logps/rejected": -96.48812866210938, + "loss": 0.4714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007857585325837135, + "rewards/margins": 1.1163520812988281, + "rewards/rejected": -1.1242096424102783, + "step": 5984 + }, + { + "epoch": 0.35, + "learning_rate": 7.570665280392882e-08, + "logits/chosen": -1.9761439561843872, + "logits/rejected": -2.009624719619751, + "logps/chosen": -253.9605712890625, + "logps/rejected": -307.86419677734375, + "loss": 0.0514, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3937714099884033, + "rewards/margins": 2.645590305328369, + "rewards/rejected": -0.25181886553764343, + "step": 5985 + }, + { + "epoch": 0.35, + "learning_rate": 7.569856926354903e-08, + "logits/chosen": -1.6283361911773682, + "logits/rejected": -1.6278547048568726, + "logps/chosen": -234.85101318359375, + "logps/rejected": -428.1448974609375, + "loss": 0.1423, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.698400855064392, + "rewards/margins": 1.455480933189392, + "rewards/rejected": 0.242919921875, + "step": 5986 + }, + { + "epoch": 0.35, + "learning_rate": 7.569048481023037e-08, + "logits/chosen": -2.014888286590576, + "logits/rejected": -1.8778386116027832, + "logps/chosen": -183.430908203125, + "logps/rejected": -431.2618408203125, + "loss": 0.2292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.21893310546875, + "rewards/margins": 1.4197814464569092, + "rewards/rejected": -0.20084838569164276, + "step": 5987 + }, + { + "epoch": 0.35, + "learning_rate": 7.568239944426003e-08, + "logits/chosen": -2.038140058517456, + "logits/rejected": -2.06485915184021, + "logps/chosen": -230.7406768798828, + "logps/rejected": -488.6229553222656, + "loss": 0.1087, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3160812854766846, + "rewards/margins": 2.318068027496338, + "rewards/rejected": -1.0019867420196533, + "step": 5988 + }, + { + "epoch": 0.35, + "learning_rate": 7.567431316592521e-08, + "logits/chosen": -1.8135915994644165, + "logits/rejected": -1.8520830869674683, + "logps/chosen": -140.83932495117188, + "logps/rejected": -422.2833251953125, + "loss": 0.1006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3998397588729858, + "rewards/margins": 2.341719150543213, + "rewards/rejected": -0.9418792724609375, + "step": 5989 + }, + { + "epoch": 0.35, + "learning_rate": 7.566622597551321e-08, + "logits/chosen": -2.184408664703369, + "logits/rejected": -2.1738812923431396, + "logps/chosen": -9.096702575683594, + "logps/rejected": -138.4510040283203, + "loss": 0.7244, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08651409298181534, + "rewards/margins": -0.1756090223789215, + "rewards/rejected": 0.26212310791015625, + "step": 5990 + }, + { + "epoch": 0.35, + "learning_rate": 7.565813787331132e-08, + "logits/chosen": -2.1194376945495605, + "logits/rejected": -2.125896453857422, + "logps/chosen": -3.9616222381591797, + "logps/rejected": -135.94949340820312, + "loss": 0.4336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1006234660744667, + "rewards/margins": 1.242937684059143, + "rewards/rejected": -1.1423141956329346, + "step": 5991 + }, + { + "epoch": 0.35, + "learning_rate": 7.565004885960688e-08, + "logits/chosen": -1.9192898273468018, + "logits/rejected": -1.9530564546585083, + "logps/chosen": -250.69650268554688, + "logps/rejected": -358.0498352050781, + "loss": 0.1415, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6387481689453125, + "rewards/margins": 1.4993774890899658, + "rewards/rejected": 0.13937072455883026, + "step": 5992 + }, + { + "epoch": 0.35, + "learning_rate": 7.564195893468722e-08, + "logits/chosen": -2.0115532875061035, + "logits/rejected": -2.012779712677002, + "logps/chosen": -255.8961181640625, + "logps/rejected": -471.2277526855469, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.892688035964966, + "rewards/margins": 3.855917453765869, + "rewards/rejected": -0.9632293581962585, + "step": 5993 + }, + { + "epoch": 0.35, + "learning_rate": 7.563386809883976e-08, + "logits/chosen": -1.9630175828933716, + "logits/rejected": -1.9667856693267822, + "logps/chosen": -7.6401801109313965, + "logps/rejected": -201.48008728027344, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024363374337553978, + "rewards/margins": 5.171403408050537, + "rewards/rejected": -5.147039890289307, + "step": 5994 + }, + { + "epoch": 0.35, + "learning_rate": 7.562577635235189e-08, + "logits/chosen": -2.0699574947357178, + "logits/rejected": -2.039393424987793, + "logps/chosen": -169.38525390625, + "logps/rejected": -374.0459899902344, + "loss": 0.1154, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.542199730873108, + "rewards/margins": 2.857846260070801, + "rewards/rejected": -1.3156464099884033, + "step": 5995 + }, + { + "epoch": 0.35, + "learning_rate": 7.561768369551111e-08, + "logits/chosen": -2.0898079872131348, + "logits/rejected": -2.0900416374206543, + "logps/chosen": -13.401803970336914, + "logps/rejected": -47.80769348144531, + "loss": 0.5606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06367893517017365, + "rewards/margins": 0.7597754001617432, + "rewards/rejected": -0.8234543204307556, + "step": 5996 + }, + { + "epoch": 0.35, + "learning_rate": 7.56095901286049e-08, + "logits/chosen": -1.932287573814392, + "logits/rejected": -1.9052072763442993, + "logps/chosen": -291.4140625, + "logps/rejected": -435.33929443359375, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.431439161300659, + "rewards/margins": 3.1917357444763184, + "rewards/rejected": -0.760296642780304, + "step": 5997 + }, + { + "epoch": 0.35, + "learning_rate": 7.560149565192079e-08, + "logits/chosen": -1.8859984874725342, + "logits/rejected": -1.9018067121505737, + "logps/chosen": -92.09916687011719, + "logps/rejected": -247.07461547851562, + "loss": 0.4309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.199147030711174, + "rewards/margins": 0.836682140827179, + "rewards/rejected": -0.6375350952148438, + "step": 5998 + }, + { + "epoch": 0.35, + "learning_rate": 7.55934002657463e-08, + "logits/chosen": -2.025202751159668, + "logits/rejected": -2.018845558166504, + "logps/chosen": -80.21835327148438, + "logps/rejected": -171.14907836914062, + "loss": 0.5449, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5397254824638367, + "rewards/margins": -0.08438646793365479, + "rewards/rejected": 0.6241119503974915, + "step": 5999 + }, + { + "epoch": 0.35, + "learning_rate": 7.558530397036907e-08, + "logits/chosen": -2.051696300506592, + "logits/rejected": -2.057955026626587, + "logps/chosen": -240.52871704101562, + "logps/rejected": -359.39935302734375, + "loss": 0.1325, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.087426781654358, + "rewards/margins": 1.9666199684143066, + "rewards/rejected": -0.879193127155304, + "step": 6000 + }, + { + "epoch": 0.35, + "learning_rate": 7.557720676607666e-08, + "logits/chosen": -1.8427783250808716, + "logits/rejected": -1.7928686141967773, + "logps/chosen": -197.34942626953125, + "logps/rejected": -335.22857666015625, + "loss": 0.3856, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.504730224609375, + "rewards/margins": 0.2577788829803467, + "rewards/rejected": 1.2469513416290283, + "step": 6001 + }, + { + "epoch": 0.35, + "learning_rate": 7.556910865315678e-08, + "logits/chosen": -2.173743963241577, + "logits/rejected": -2.1637182235717773, + "logps/chosen": -29.165882110595703, + "logps/rejected": -176.94281005859375, + "loss": 0.4016, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1055019423365593, + "rewards/margins": 0.9745266437530518, + "rewards/rejected": -0.8690246939659119, + "step": 6002 + }, + { + "epoch": 0.35, + "learning_rate": 7.556100963189708e-08, + "logits/chosen": -2.019460678100586, + "logits/rejected": -2.020827054977417, + "logps/chosen": -5.92449426651001, + "logps/rejected": -149.74319458007812, + "loss": 0.3091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16131983697414398, + "rewards/margins": 2.7530388832092285, + "rewards/rejected": -2.591719150543213, + "step": 6003 + }, + { + "epoch": 0.35, + "learning_rate": 7.555290970258528e-08, + "logits/chosen": -1.8977211713790894, + "logits/rejected": -1.888584852218628, + "logps/chosen": -120.90972137451172, + "logps/rejected": -234.09754943847656, + "loss": 0.2714, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2237647771835327, + "rewards/margins": 0.8046287298202515, + "rewards/rejected": 0.41913604736328125, + "step": 6004 + }, + { + "epoch": 0.35, + "learning_rate": 7.554480886550913e-08, + "logits/chosen": -2.117643356323242, + "logits/rejected": -2.1325955390930176, + "logps/chosen": -249.83990478515625, + "logps/rejected": -343.4306335449219, + "loss": 0.257, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2806519269943237, + "rewards/margins": 1.0738251209259033, + "rewards/rejected": 0.20682679116725922, + "step": 6005 + }, + { + "epoch": 0.35, + "learning_rate": 7.553670712095643e-08, + "logits/chosen": -1.9888455867767334, + "logits/rejected": -1.9640079736709595, + "logps/chosen": -200.46751403808594, + "logps/rejected": -371.71630859375, + "loss": 0.1003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.426049828529358, + "rewards/margins": 2.774646043777466, + "rewards/rejected": -1.348596215248108, + "step": 6006 + }, + { + "epoch": 0.35, + "learning_rate": 7.552860446921498e-08, + "logits/chosen": -2.076663017272949, + "logits/rejected": -2.0661849975585938, + "logps/chosen": -29.65858268737793, + "logps/rejected": -243.3520965576172, + "loss": 0.1529, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2331335544586182, + "rewards/margins": 2.487760066986084, + "rewards/rejected": -1.2546265125274658, + "step": 6007 + }, + { + "epoch": 0.35, + "learning_rate": 7.552050091057263e-08, + "logits/chosen": -2.0730528831481934, + "logits/rejected": -2.0743775367736816, + "logps/chosen": -2.5695817470550537, + "logps/rejected": -112.01922607421875, + "loss": 0.5539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004875302314758301, + "rewards/margins": 0.6536172032356262, + "rewards/rejected": -0.6487419009208679, + "step": 6008 + }, + { + "epoch": 0.35, + "learning_rate": 7.551239644531724e-08, + "logits/chosen": -1.923666000366211, + "logits/rejected": -1.8724873065948486, + "logps/chosen": -248.22653198242188, + "logps/rejected": -419.01373291015625, + "loss": 0.2686, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.881439208984375, + "rewards/margins": 1.2255675792694092, + "rewards/rejected": -0.34412842988967896, + "step": 6009 + }, + { + "epoch": 0.35, + "learning_rate": 7.550429107373673e-08, + "logits/chosen": -2.2201876640319824, + "logits/rejected": -2.220639944076538, + "logps/chosen": -27.559654235839844, + "logps/rejected": -187.5872039794922, + "loss": 0.3817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23069344460964203, + "rewards/margins": 1.151442289352417, + "rewards/rejected": -0.9207488894462585, + "step": 6010 + }, + { + "epoch": 0.35, + "learning_rate": 7.549618479611905e-08, + "logits/chosen": -2.046776533126831, + "logits/rejected": -2.0522849559783936, + "logps/chosen": -0.06565149128437042, + "logps/rejected": -185.10968017578125, + "loss": 0.3859, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006015158724039793, + "rewards/margins": 2.541015625, + "rewards/rejected": -2.5470306873321533, + "step": 6011 + }, + { + "epoch": 0.35, + "learning_rate": 7.548807761275218e-08, + "logits/chosen": -2.0191073417663574, + "logits/rejected": -2.0180447101593018, + "logps/chosen": -5.17357730132062e-05, + "logps/rejected": -173.054931640625, + "loss": 0.4266, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6690538018337975e-07, + "rewards/margins": 1.7462648153305054, + "rewards/rejected": -1.7462646961212158, + "step": 6012 + }, + { + "epoch": 0.35, + "learning_rate": 7.547996952392411e-08, + "logits/chosen": -1.9538062810897827, + "logits/rejected": -1.9455777406692505, + "logps/chosen": -98.2568359375, + "logps/rejected": -296.75860595703125, + "loss": 0.3681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2972206175327301, + "rewards/margins": 1.3844887018203735, + "rewards/rejected": -1.0872681140899658, + "step": 6013 + }, + { + "epoch": 0.35, + "learning_rate": 7.547186052992288e-08, + "logits/chosen": -2.113163471221924, + "logits/rejected": -2.069150686264038, + "logps/chosen": -69.30264282226562, + "logps/rejected": -389.2831726074219, + "loss": 0.3561, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10349350422620773, + "rewards/margins": 2.796445608139038, + "rewards/rejected": -2.8999390602111816, + "step": 6014 + }, + { + "epoch": 0.35, + "learning_rate": 7.546375063103657e-08, + "logits/chosen": -1.9150663614273071, + "logits/rejected": -1.9160016775131226, + "logps/chosen": -54.21245574951172, + "logps/rejected": -241.82684326171875, + "loss": 0.4022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2208843231201172, + "rewards/margins": 1.684556245803833, + "rewards/rejected": -1.4636719226837158, + "step": 6015 + }, + { + "epoch": 0.35, + "learning_rate": 7.545563982755329e-08, + "logits/chosen": -2.082780122756958, + "logits/rejected": -2.181615114212036, + "logps/chosen": -224.04818725585938, + "logps/rejected": -275.6152038574219, + "loss": 0.3401, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0189682245254517, + "rewards/margins": 0.3496292233467102, + "rewards/rejected": 0.6693390011787415, + "step": 6016 + }, + { + "epoch": 0.35, + "learning_rate": 7.544752811976116e-08, + "logits/chosen": -2.1194307804107666, + "logits/rejected": -2.1200144290924072, + "logps/chosen": -9.071631939150393e-05, + "logps/rejected": -171.3452911376953, + "loss": 0.3858, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.015860784216784e-06, + "rewards/margins": 2.4524168968200684, + "rewards/rejected": -2.4524199962615967, + "step": 6017 + }, + { + "epoch": 0.35, + "learning_rate": 7.543941550794835e-08, + "logits/chosen": -1.9527175426483154, + "logits/rejected": -1.9479857683181763, + "logps/chosen": -52.373661041259766, + "logps/rejected": -299.1431884765625, + "loss": 0.3995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14809493720531464, + "rewards/margins": 2.989316701889038, + "rewards/rejected": -3.137411594390869, + "step": 6018 + }, + { + "epoch": 0.35, + "learning_rate": 7.543130199240309e-08, + "logits/chosen": -1.9818495512008667, + "logits/rejected": -1.9812705516815186, + "logps/chosen": -186.33921813964844, + "logps/rejected": -392.0498046875, + "loss": 0.0819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9862655401229858, + "rewards/margins": 2.4256882667541504, + "rewards/rejected": -0.439422607421875, + "step": 6019 + }, + { + "epoch": 0.35, + "learning_rate": 7.542318757341355e-08, + "logits/chosen": -1.7763652801513672, + "logits/rejected": -1.7791638374328613, + "logps/chosen": -5.1322340965271, + "logps/rejected": -68.41578674316406, + "loss": 0.707, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012349414639174938, + "rewards/margins": -0.010043811053037643, + "rewards/rejected": -0.0023056031204760075, + "step": 6020 + }, + { + "epoch": 0.35, + "learning_rate": 7.541507225126804e-08, + "logits/chosen": -1.8012303113937378, + "logits/rejected": -1.8068366050720215, + "logps/chosen": -200.97845458984375, + "logps/rejected": -358.48175048828125, + "loss": 0.0876, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8653199672698975, + "rewards/margins": 2.0022339820861816, + "rewards/rejected": 0.863085925579071, + "step": 6021 + }, + { + "epoch": 0.35, + "learning_rate": 7.540695602625485e-08, + "logits/chosen": -2.02925181388855, + "logits/rejected": -2.008653402328491, + "logps/chosen": -97.40825653076172, + "logps/rejected": -233.45053100585938, + "loss": 0.3314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8233665823936462, + "rewards/margins": 0.6782646179199219, + "rewards/rejected": 0.14510193467140198, + "step": 6022 + }, + { + "epoch": 0.35, + "learning_rate": 7.539883889866231e-08, + "logits/chosen": -1.859113335609436, + "logits/rejected": -1.8874516487121582, + "logps/chosen": -226.97463989257812, + "logps/rejected": -335.408935546875, + "loss": 0.1681, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7942383289337158, + "rewards/margins": 1.446130394935608, + "rewards/rejected": 0.3481079041957855, + "step": 6023 + }, + { + "epoch": 0.35, + "learning_rate": 7.539072086877875e-08, + "logits/chosen": -2.039473533630371, + "logits/rejected": -2.020906925201416, + "logps/chosen": -252.42324829101562, + "logps/rejected": -385.42205810546875, + "loss": 0.528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5774399042129517, + "rewards/margins": 0.6694167852401733, + "rewards/rejected": -1.246856689453125, + "step": 6024 + }, + { + "epoch": 0.35, + "learning_rate": 7.538260193689261e-08, + "logits/chosen": -1.9793142080307007, + "logits/rejected": -1.9811584949493408, + "logps/chosen": -1.317612648010254, + "logps/rejected": -165.38900756835938, + "loss": 0.3757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009411633014678955, + "rewards/margins": 2.715890407562256, + "rewards/rejected": -2.7064788341522217, + "step": 6025 + }, + { + "epoch": 0.35, + "learning_rate": 7.537448210329228e-08, + "logits/chosen": -1.9531067609786987, + "logits/rejected": -1.9429175853729248, + "logps/chosen": -39.74726104736328, + "logps/rejected": -89.809326171875, + "loss": 0.4676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3887489438056946, + "rewards/margins": 0.6286018490791321, + "rewards/rejected": -0.2398529052734375, + "step": 6026 + }, + { + "epoch": 0.35, + "learning_rate": 7.536636136826624e-08, + "logits/chosen": -2.0495574474334717, + "logits/rejected": -2.0576395988464355, + "logps/chosen": -11.198308944702148, + "logps/rejected": -88.02747344970703, + "loss": 0.4622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003019237658008933, + "rewards/margins": 1.116936206817627, + "rewards/rejected": -1.1199554204940796, + "step": 6027 + }, + { + "epoch": 0.35, + "learning_rate": 7.535823973210294e-08, + "logits/chosen": -2.0155367851257324, + "logits/rejected": -1.9934358596801758, + "logps/chosen": -115.2501220703125, + "logps/rejected": -274.6551513671875, + "loss": 0.1112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6646164655685425, + "rewards/margins": 2.311793565750122, + "rewards/rejected": -0.6471771597862244, + "step": 6028 + }, + { + "epoch": 0.35, + "learning_rate": 7.535011719509093e-08, + "logits/chosen": -2.046706199645996, + "logits/rejected": -2.0485470294952393, + "logps/chosen": -1.3348912000656128, + "logps/rejected": -35.794189453125, + "loss": 0.7034, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03454923257231712, + "rewards/margins": -0.029337212443351746, + "rewards/rejected": -0.005212021060287952, + "step": 6029 + }, + { + "epoch": 0.35, + "learning_rate": 7.534199375751876e-08, + "logits/chosen": -2.142735481262207, + "logits/rejected": -2.14304256439209, + "logps/chosen": -15.351341247558594, + "logps/rejected": -125.09440612792969, + "loss": 0.4523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029645634815096855, + "rewards/margins": 1.441501498222351, + "rewards/rejected": -1.4711471796035767, + "step": 6030 + }, + { + "epoch": 0.35, + "learning_rate": 7.533386941967501e-08, + "logits/chosen": -1.8705214262008667, + "logits/rejected": -1.8710947036743164, + "logps/chosen": -29.482376098632812, + "logps/rejected": -163.3400115966797, + "loss": 0.2038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.996930718421936, + "rewards/margins": 2.3218071460723877, + "rewards/rejected": -1.3248764276504517, + "step": 6031 + }, + { + "epoch": 0.35, + "learning_rate": 7.532574418184829e-08, + "logits/chosen": -2.0709266662597656, + "logits/rejected": -2.059091567993164, + "logps/chosen": -70.67478942871094, + "logps/rejected": -281.7115783691406, + "loss": 0.2544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4425705075263977, + "rewards/margins": 2.4649429321289062, + "rewards/rejected": -2.0223724842071533, + "step": 6032 + }, + { + "epoch": 0.35, + "learning_rate": 7.531761804432726e-08, + "logits/chosen": -2.063051700592041, + "logits/rejected": -2.060462713241577, + "logps/chosen": -42.1901969909668, + "logps/rejected": -132.44427490234375, + "loss": 0.8171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8836372494697571, + "rewards/margins": 0.5541557669639587, + "rewards/rejected": -1.4377930164337158, + "step": 6033 + }, + { + "epoch": 0.35, + "learning_rate": 7.53094910074006e-08, + "logits/chosen": -2.021609306335449, + "logits/rejected": -2.011192560195923, + "logps/chosen": -30.997087478637695, + "logps/rejected": -257.8570861816406, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2961336076259613, + "rewards/margins": 2.1032090187072754, + "rewards/rejected": -1.8070755004882812, + "step": 6034 + }, + { + "epoch": 0.35, + "learning_rate": 7.530136307135702e-08, + "logits/chosen": -1.861358642578125, + "logits/rejected": -1.8645316362380981, + "logps/chosen": -5.149763819645159e-05, + "logps/rejected": -190.0651397705078, + "loss": 0.3406, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1920700444534305e-06, + "rewards/margins": 4.681813716888428, + "rewards/rejected": -4.681814670562744, + "step": 6035 + }, + { + "epoch": 0.35, + "learning_rate": 7.529323423648526e-08, + "logits/chosen": -1.9323925971984863, + "logits/rejected": -1.8886831998825073, + "logps/chosen": -276.92633056640625, + "logps/rejected": -450.8399658203125, + "loss": 0.2303, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.462506055831909, + "rewards/margins": 0.6317260265350342, + "rewards/rejected": 1.830780029296875, + "step": 6036 + }, + { + "epoch": 0.35, + "learning_rate": 7.528510450307408e-08, + "logits/chosen": -1.9684057235717773, + "logits/rejected": -1.9321379661560059, + "logps/chosen": -203.09268188476562, + "logps/rejected": -391.389404296875, + "loss": 0.4565, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.0150818824768066, + "rewards/margins": -0.3246033191680908, + "rewards/rejected": 2.3396852016448975, + "step": 6037 + }, + { + "epoch": 0.35, + "learning_rate": 7.527697387141233e-08, + "logits/chosen": -1.9591583013534546, + "logits/rejected": -1.9584732055664062, + "logps/chosen": -15.000086784362793, + "logps/rejected": -118.33900451660156, + "loss": 0.491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012494183145463467, + "rewards/margins": 1.071207880973816, + "rewards/rejected": -1.0837020874023438, + "step": 6038 + }, + { + "epoch": 0.35, + "learning_rate": 7.526884234178881e-08, + "logits/chosen": -1.9897700548171997, + "logits/rejected": -1.9960442781448364, + "logps/chosen": -12.51091480255127, + "logps/rejected": -156.47848510742188, + "loss": 0.5119, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20715036988258362, + "rewards/margins": 1.2969863414764404, + "rewards/rejected": -1.5041366815567017, + "step": 6039 + }, + { + "epoch": 0.35, + "learning_rate": 7.526070991449242e-08, + "logits/chosen": -2.099611282348633, + "logits/rejected": -2.065531015396118, + "logps/chosen": -181.95602416992188, + "logps/rejected": -533.170654296875, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.623376488685608, + "rewards/margins": 3.8463377952575684, + "rewards/rejected": -2.22296142578125, + "step": 6040 + }, + { + "epoch": 0.35, + "learning_rate": 7.525257658981203e-08, + "logits/chosen": -1.9915151596069336, + "logits/rejected": -1.9917412996292114, + "logps/chosen": -0.33642685413360596, + "logps/rejected": -89.2266845703125, + "loss": 0.4893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01601494662463665, + "rewards/margins": 0.9318215250968933, + "rewards/rejected": -0.9158065915107727, + "step": 6041 + }, + { + "epoch": 0.35, + "learning_rate": 7.524444236803662e-08, + "logits/chosen": -1.7933439016342163, + "logits/rejected": -1.8391138315200806, + "logps/chosen": -290.0068359375, + "logps/rejected": -462.94573974609375, + "loss": 0.0802, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6491029262542725, + "rewards/margins": 1.8969697952270508, + "rewards/rejected": 0.7521331906318665, + "step": 6042 + }, + { + "epoch": 0.35, + "learning_rate": 7.523630724945511e-08, + "logits/chosen": -2.119408130645752, + "logits/rejected": -2.1093320846557617, + "logps/chosen": -7.5610527992248535, + "logps/rejected": -100.018798828125, + "loss": 0.6202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007892608642578125, + "rewards/margins": 0.2698158323764801, + "rewards/rejected": -0.2777084410190582, + "step": 6043 + }, + { + "epoch": 0.35, + "learning_rate": 7.522817123435654e-08, + "logits/chosen": -1.7899763584136963, + "logits/rejected": -1.7943514585494995, + "logps/chosen": -325.7225646972656, + "logps/rejected": -379.97515869140625, + "loss": 0.2097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5903412103652954, + "rewards/margins": 1.268707275390625, + "rewards/rejected": 0.321633905172348, + "step": 6044 + }, + { + "epoch": 0.35, + "learning_rate": 7.522003432302991e-08, + "logits/chosen": -2.000629186630249, + "logits/rejected": -1.995307207107544, + "logps/chosen": -17.653736114501953, + "logps/rejected": -187.71847534179688, + "loss": 0.5301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22343015670776367, + "rewards/margins": 1.0992703437805176, + "rewards/rejected": -1.3227005004882812, + "step": 6045 + }, + { + "epoch": 0.35, + "learning_rate": 7.521189651576432e-08, + "logits/chosen": -1.7209831476211548, + "logits/rejected": -1.693906307220459, + "logps/chosen": -250.1800537109375, + "logps/rejected": -407.09521484375, + "loss": 0.1148, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.337927222251892, + "rewards/margins": 2.295785427093506, + "rewards/rejected": -0.9578582644462585, + "step": 6046 + }, + { + "epoch": 0.35, + "learning_rate": 7.520375781284881e-08, + "logits/chosen": -2.0182301998138428, + "logits/rejected": -2.013789653778076, + "logps/chosen": -28.1614933013916, + "logps/rejected": -283.0511779785156, + "loss": 0.3424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20651035010814667, + "rewards/margins": 2.3432717323303223, + "rewards/rejected": -2.136761426925659, + "step": 6047 + }, + { + "epoch": 0.35, + "learning_rate": 7.519561821457255e-08, + "logits/chosen": -2.0566699504852295, + "logits/rejected": -2.0698928833007812, + "logps/chosen": -132.48052978515625, + "logps/rejected": -253.65390014648438, + "loss": 0.4824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19471435248851776, + "rewards/margins": 1.3182175159454346, + "rewards/rejected": -1.5129318237304688, + "step": 6048 + }, + { + "epoch": 0.35, + "learning_rate": 7.518747772122469e-08, + "logits/chosen": -1.973338007926941, + "logits/rejected": -1.976763129234314, + "logps/chosen": -150.41317749023438, + "logps/rejected": -242.58547973632812, + "loss": 0.3768, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0755401849746704, + "rewards/margins": 0.16591185331344604, + "rewards/rejected": 0.9096283316612244, + "step": 6049 + }, + { + "epoch": 0.35, + "learning_rate": 7.517933633309441e-08, + "logits/chosen": -2.026528835296631, + "logits/rejected": -2.022447109222412, + "logps/chosen": -176.1175537109375, + "logps/rejected": -298.658447265625, + "loss": 0.1928, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.532629370689392, + "rewards/margins": 1.3421508073806763, + "rewards/rejected": 0.19047851860523224, + "step": 6050 + }, + { + "epoch": 0.35, + "learning_rate": 7.517119405047094e-08, + "logits/chosen": -1.9758464097976685, + "logits/rejected": -1.9593459367752075, + "logps/chosen": -26.865663528442383, + "logps/rejected": -167.18338012695312, + "loss": 0.4248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1868761032819748, + "rewards/margins": 1.223875641822815, + "rewards/rejected": -1.0369995832443237, + "step": 6051 + }, + { + "epoch": 0.35, + "learning_rate": 7.516305087364353e-08, + "logits/chosen": -2.0010528564453125, + "logits/rejected": -1.9740952253341675, + "logps/chosen": -239.22605895996094, + "logps/rejected": -299.6893310546875, + "loss": 0.1535, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2214157581329346, + "rewards/margins": 2.1819534301757812, + "rewards/rejected": -0.9605377316474915, + "step": 6052 + }, + { + "epoch": 0.35, + "learning_rate": 7.515490680290148e-08, + "logits/chosen": -1.9402800798416138, + "logits/rejected": -1.938107967376709, + "logps/chosen": -5.558653831481934, + "logps/rejected": -89.59542846679688, + "loss": 0.5978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06776457279920578, + "rewards/margins": 0.42018309235572815, + "rewards/rejected": -0.3524185121059418, + "step": 6053 + }, + { + "epoch": 0.35, + "learning_rate": 7.514676183853408e-08, + "logits/chosen": -1.8993563652038574, + "logits/rejected": -1.896959900856018, + "logps/chosen": -212.88995361328125, + "logps/rejected": -288.8865661621094, + "loss": 0.1262, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1783294677734375, + "rewards/margins": 1.697241187095642, + "rewards/rejected": 0.481088250875473, + "step": 6054 + }, + { + "epoch": 0.35, + "learning_rate": 7.51386159808307e-08, + "logits/chosen": -1.8389215469360352, + "logits/rejected": -1.8442175388336182, + "logps/chosen": -2.025031089782715, + "logps/rejected": -158.8173065185547, + "loss": 0.4349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010326719842851162, + "rewards/margins": 1.617702841758728, + "rewards/rejected": -1.6073760986328125, + "step": 6055 + }, + { + "epoch": 0.35, + "learning_rate": 7.513046923008069e-08, + "logits/chosen": -1.9088490009307861, + "logits/rejected": -1.9069725275039673, + "logps/chosen": -24.55821990966797, + "logps/rejected": -141.8319091796875, + "loss": 0.3008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20978966355323792, + "rewards/margins": 3.165619373321533, + "rewards/rejected": -2.955829620361328, + "step": 6056 + }, + { + "epoch": 0.35, + "learning_rate": 7.512232158657352e-08, + "logits/chosen": -1.9354021549224854, + "logits/rejected": -1.9121780395507812, + "logps/chosen": -206.06747436523438, + "logps/rejected": -317.5863342285156, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7468719482421875, + "rewards/margins": 0.5224425792694092, + "rewards/rejected": 1.2244293689727783, + "step": 6057 + }, + { + "epoch": 0.35, + "learning_rate": 7.511417305059858e-08, + "logits/chosen": -1.9306309223175049, + "logits/rejected": -1.9325494766235352, + "logps/chosen": -0.00014793027366977185, + "logps/rejected": -158.4411163330078, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.330693279072875e-06, + "rewards/margins": 3.2724878787994385, + "rewards/rejected": -3.2724952697753906, + "step": 6058 + }, + { + "epoch": 0.35, + "learning_rate": 7.510602362244538e-08, + "logits/chosen": -2.158561944961548, + "logits/rejected": -2.154336929321289, + "logps/chosen": -28.08939552307129, + "logps/rejected": -179.38670349121094, + "loss": 0.3594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30129948258399963, + "rewards/margins": 1.4667565822601318, + "rewards/rejected": -1.1654571294784546, + "step": 6059 + }, + { + "epoch": 0.35, + "learning_rate": 7.509787330240342e-08, + "logits/chosen": -1.987817645072937, + "logits/rejected": -1.9850565195083618, + "logps/chosen": -80.14339447021484, + "logps/rejected": -138.70021057128906, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13609619438648224, + "rewards/margins": 2.0773727893829346, + "rewards/rejected": -1.9412765502929688, + "step": 6060 + }, + { + "epoch": 0.35, + "learning_rate": 7.508972209076222e-08, + "logits/chosen": -1.8890589475631714, + "logits/rejected": -1.8825124502182007, + "logps/chosen": -42.963626861572266, + "logps/rejected": -316.0018615722656, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2596317231655121, + "rewards/margins": 2.9090640544891357, + "rewards/rejected": -2.649432420730591, + "step": 6061 + }, + { + "epoch": 0.35, + "learning_rate": 7.508156998781137e-08, + "logits/chosen": -1.9200668334960938, + "logits/rejected": -1.9152225255966187, + "logps/chosen": -0.0010487906401976943, + "logps/rejected": -184.31936645507812, + "loss": 0.344, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.033730354218278e-05, + "rewards/margins": 4.03305196762085, + "rewards/rejected": -4.033082485198975, + "step": 6062 + }, + { + "epoch": 0.35, + "learning_rate": 7.507341699384046e-08, + "logits/chosen": -2.041492223739624, + "logits/rejected": -2.035438060760498, + "logps/chosen": -181.07058715820312, + "logps/rejected": -332.6248779296875, + "loss": 0.0914, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8703339099884033, + "rewards/margins": 2.0503082275390625, + "rewards/rejected": -0.17997436225414276, + "step": 6063 + }, + { + "epoch": 0.35, + "learning_rate": 7.506526310913913e-08, + "logits/chosen": -1.9561898708343506, + "logits/rejected": -1.8101649284362793, + "logps/chosen": -263.684814453125, + "logps/rejected": -689.226318359375, + "loss": 0.13, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.325262427330017, + "rewards/margins": 2.4733338356018066, + "rewards/rejected": -1.1480712890625, + "step": 6064 + }, + { + "epoch": 0.35, + "learning_rate": 7.505710833399706e-08, + "logits/chosen": -1.8544036149978638, + "logits/rejected": -1.8518152236938477, + "logps/chosen": -37.421783447265625, + "logps/rejected": -149.9658660888672, + "loss": 0.4392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035011291038244963, + "rewards/margins": 1.7852424383163452, + "rewards/rejected": -1.7887436151504517, + "step": 6065 + }, + { + "epoch": 0.35, + "learning_rate": 7.504895266870392e-08, + "logits/chosen": -1.77660071849823, + "logits/rejected": -1.8083875179290771, + "logps/chosen": -292.8583679199219, + "logps/rejected": -421.7486877441406, + "loss": 0.2176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4521759748458862, + "rewards/margins": 1.1102997064590454, + "rewards/rejected": 0.34187623858451843, + "step": 6066 + }, + { + "epoch": 0.35, + "learning_rate": 7.504079611354947e-08, + "logits/chosen": -1.911067008972168, + "logits/rejected": -1.90903639793396, + "logps/chosen": -306.68768310546875, + "logps/rejected": -335.0575866699219, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.961901843547821, + "rewards/margins": 0.12908935546875, + "rewards/rejected": 0.832812488079071, + "step": 6067 + }, + { + "epoch": 0.35, + "learning_rate": 7.503263866882344e-08, + "logits/chosen": -2.0136332511901855, + "logits/rejected": -1.997830867767334, + "logps/chosen": -121.01741027832031, + "logps/rejected": -254.3206024169922, + "loss": 0.3572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6795700192451477, + "rewards/margins": 0.29021912813186646, + "rewards/rejected": 0.38935089111328125, + "step": 6068 + }, + { + "epoch": 0.35, + "learning_rate": 7.502448033481564e-08, + "logits/chosen": -1.9443217515945435, + "logits/rejected": -1.9556387662887573, + "logps/chosen": -89.00007629394531, + "logps/rejected": -347.74395751953125, + "loss": 0.6963, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2574424743652344, + "rewards/margins": 4.86937952041626, + "rewards/rejected": -6.126821994781494, + "step": 6069 + }, + { + "epoch": 0.35, + "learning_rate": 7.50163211118159e-08, + "logits/chosen": -2.1146674156188965, + "logits/rejected": -2.113960027694702, + "logps/chosen": -6.276942729949951, + "logps/rejected": -78.95211791992188, + "loss": 0.4063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05333442613482475, + "rewards/margins": 2.208958864212036, + "rewards/rejected": -2.2622933387756348, + "step": 6070 + }, + { + "epoch": 0.35, + "learning_rate": 7.500816100011404e-08, + "logits/chosen": -1.8860317468643188, + "logits/rejected": -1.9107691049575806, + "logps/chosen": -218.91099548339844, + "logps/rejected": -393.57879638671875, + "loss": 0.111, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0198562145233154, + "rewards/margins": 1.7557265758514404, + "rewards/rejected": 0.264129638671875, + "step": 6071 + }, + { + "epoch": 0.35, + "learning_rate": 7.5e-08, + "logits/chosen": -2.0217785835266113, + "logits/rejected": -2.01588773727417, + "logps/chosen": -38.04139709472656, + "logps/rejected": -328.85504150390625, + "loss": 0.7188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9152629971504211, + "rewards/margins": 1.6362354755401611, + "rewards/rejected": -2.5514984130859375, + "step": 6072 + }, + { + "epoch": 0.35, + "learning_rate": 7.499183811176366e-08, + "logits/chosen": -2.020195484161377, + "logits/rejected": -1.9947949647903442, + "logps/chosen": -0.373386025428772, + "logps/rejected": -319.1619873046875, + "loss": 0.4028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009634516201913357, + "rewards/margins": 2.086254835128784, + "rewards/rejected": -2.0958893299102783, + "step": 6073 + }, + { + "epoch": 0.35, + "learning_rate": 7.498367533569499e-08, + "logits/chosen": -2.0395960807800293, + "logits/rejected": -2.0333850383758545, + "logps/chosen": -0.001356203225441277, + "logps/rejected": -121.92807006835938, + "loss": 0.6236, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0932849515229464e-05, + "rewards/margins": 0.3002017140388489, + "rewards/rejected": -0.3002426326274872, + "step": 6074 + }, + { + "epoch": 0.35, + "learning_rate": 7.497551167208395e-08, + "logits/chosen": -1.9038808345794678, + "logits/rejected": -1.9769009351730347, + "logps/chosen": -259.98577880859375, + "logps/rejected": -349.0497741699219, + "loss": 0.0505, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.899041771888733, + "rewards/margins": 3.255697727203369, + "rewards/rejected": -1.3566559553146362, + "step": 6075 + }, + { + "epoch": 0.35, + "learning_rate": 7.496734712122057e-08, + "logits/chosen": -2.0939574241638184, + "logits/rejected": -2.094459056854248, + "logps/chosen": -23.824068069458008, + "logps/rejected": -106.27192687988281, + "loss": 0.5589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15445823967456818, + "rewards/margins": 0.34934043884277344, + "rewards/rejected": -0.19488219916820526, + "step": 6076 + }, + { + "epoch": 0.35, + "learning_rate": 7.49591816833949e-08, + "logits/chosen": -1.8745228052139282, + "logits/rejected": -1.8698519468307495, + "logps/chosen": -38.81749725341797, + "logps/rejected": -198.05563354492188, + "loss": 0.4445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19541816413402557, + "rewards/margins": 1.1492420434951782, + "rewards/rejected": -0.9538238644599915, + "step": 6077 + }, + { + "epoch": 0.35, + "learning_rate": 7.495101535889701e-08, + "logits/chosen": -2.1079747676849365, + "logits/rejected": -2.0876851081848145, + "logps/chosen": -314.9645690917969, + "logps/rejected": -510.451171875, + "loss": 0.4879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8384338617324829, + "rewards/margins": 4.272510051727295, + "rewards/rejected": -5.110943794250488, + "step": 6078 + }, + { + "epoch": 0.35, + "learning_rate": 7.494284814801699e-08, + "logits/chosen": -2.074366331100464, + "logits/rejected": -2.090414047241211, + "logps/chosen": -184.1396484375, + "logps/rejected": -438.65216064453125, + "loss": 0.0936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6490906476974487, + "rewards/margins": 2.4284698963165283, + "rewards/rejected": -0.7793793082237244, + "step": 6079 + }, + { + "epoch": 0.35, + "learning_rate": 7.493468005104502e-08, + "logits/chosen": -1.8599433898925781, + "logits/rejected": -1.8617215156555176, + "logps/chosen": -27.43207359313965, + "logps/rejected": -110.28903198242188, + "loss": 0.6733, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.23123379051685333, + "rewards/margins": -0.1460893601179123, + "rewards/rejected": 0.3773231506347656, + "step": 6080 + }, + { + "epoch": 0.35, + "learning_rate": 7.492651106827122e-08, + "logits/chosen": -2.1884636878967285, + "logits/rejected": -2.1672394275665283, + "logps/chosen": -170.09823608398438, + "logps/rejected": -246.23670959472656, + "loss": 0.3846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5568420886993408, + "rewards/margins": 0.04845130443572998, + "rewards/rejected": 1.5083907842636108, + "step": 6081 + }, + { + "epoch": 0.35, + "learning_rate": 7.491834119998583e-08, + "logits/chosen": -2.080841302871704, + "logits/rejected": -2.082277536392212, + "logps/chosen": -0.0072221457958221436, + "logps/rejected": -182.94711303710938, + "loss": 0.4632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00021938821009825915, + "rewards/margins": 1.2257168292999268, + "rewards/rejected": -1.2254974842071533, + "step": 6082 + }, + { + "epoch": 0.35, + "learning_rate": 7.491017044647907e-08, + "logits/chosen": -1.9020501375198364, + "logits/rejected": -1.8936476707458496, + "logps/chosen": -0.0005604381440207362, + "logps/rejected": -173.27569580078125, + "loss": 0.4271, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2394267944036983e-05, + "rewards/margins": 1.7375956773757935, + "rewards/rejected": -1.737573266029358, + "step": 6083 + }, + { + "epoch": 0.35, + "learning_rate": 7.490199880804123e-08, + "logits/chosen": -2.133103609085083, + "logits/rejected": -2.1171607971191406, + "logps/chosen": -131.6661834716797, + "logps/rejected": -486.6811218261719, + "loss": 0.12, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1246460676193237, + "rewards/margins": 3.5543150901794434, + "rewards/rejected": -2.429669141769409, + "step": 6084 + }, + { + "epoch": 0.35, + "learning_rate": 7.489382628496254e-08, + "logits/chosen": -2.049036979675293, + "logits/rejected": -2.022057294845581, + "logps/chosen": -283.1402587890625, + "logps/rejected": -341.82733154296875, + "loss": 0.094, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.34613037109375, + "rewards/margins": 1.9007294178009033, + "rewards/rejected": 0.44540101289749146, + "step": 6085 + }, + { + "epoch": 0.35, + "learning_rate": 7.488565287753341e-08, + "logits/chosen": -2.0763182640075684, + "logits/rejected": -2.0645878314971924, + "logps/chosen": -3.588152685551904e-05, + "logps/rejected": -71.47699737548828, + "loss": 0.6713, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2186434850700607e-07, + "rewards/margins": 0.0875384658575058, + "rewards/rejected": -0.08753814548254013, + "step": 6086 + }, + { + "epoch": 0.35, + "learning_rate": 7.487747858604414e-08, + "logits/chosen": -2.0351288318634033, + "logits/rejected": -2.0279462337493896, + "logps/chosen": -41.75483322143555, + "logps/rejected": -124.13923645019531, + "loss": 0.2772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9603420495986938, + "rewards/margins": 1.0707340240478516, + "rewards/rejected": -0.1103919968008995, + "step": 6087 + }, + { + "epoch": 0.35, + "learning_rate": 7.486930341078514e-08, + "logits/chosen": -2.161841630935669, + "logits/rejected": -2.093294382095337, + "logps/chosen": -173.9170379638672, + "logps/rejected": -460.86285400390625, + "loss": 0.2555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9388839602470398, + "rewards/margins": 1.174626111984253, + "rewards/rejected": -0.23574219644069672, + "step": 6088 + }, + { + "epoch": 0.35, + "learning_rate": 7.486112735204683e-08, + "logits/chosen": -1.894842267036438, + "logits/rejected": -1.8919447660446167, + "logps/chosen": -285.8438720703125, + "logps/rejected": -479.75091552734375, + "loss": 0.0911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.315399169921875, + "rewards/margins": 2.1646149158477783, + "rewards/rejected": -0.8492156863212585, + "step": 6089 + }, + { + "epoch": 0.35, + "learning_rate": 7.48529504101197e-08, + "logits/chosen": -1.926084041595459, + "logits/rejected": -1.9128313064575195, + "logps/chosen": -196.11846923828125, + "logps/rejected": -203.40438842773438, + "loss": 0.2178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5443085432052612, + "rewards/margins": 1.0859253406524658, + "rewards/rejected": 0.458383172750473, + "step": 6090 + }, + { + "epoch": 0.35, + "learning_rate": 7.484477258529417e-08, + "logits/chosen": -1.78756844997406, + "logits/rejected": -1.7815475463867188, + "logps/chosen": -0.06515393406152725, + "logps/rejected": -175.98280334472656, + "loss": 0.4746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0030969330109655857, + "rewards/margins": 1.0754648447036743, + "rewards/rejected": -1.0723679065704346, + "step": 6091 + }, + { + "epoch": 0.35, + "learning_rate": 7.48365938778608e-08, + "logits/chosen": -1.8578088283538818, + "logits/rejected": -1.7849016189575195, + "logps/chosen": -215.5966339111328, + "logps/rejected": -414.3985595703125, + "loss": 0.2994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9003586173057556, + "rewards/margins": 0.8901779651641846, + "rewards/rejected": 0.010180664248764515, + "step": 6092 + }, + { + "epoch": 0.35, + "learning_rate": 7.482841428811014e-08, + "logits/chosen": -2.068557024002075, + "logits/rejected": -2.071903944015503, + "logps/chosen": -12.080622673034668, + "logps/rejected": -171.19488525390625, + "loss": 0.3964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11818952858448029, + "rewards/margins": 1.4439494609832764, + "rewards/rejected": -1.3257598876953125, + "step": 6093 + }, + { + "epoch": 0.35, + "learning_rate": 7.482023381633272e-08, + "logits/chosen": -2.0224082469940186, + "logits/rejected": -2.00285005569458, + "logps/chosen": -0.012758114375174046, + "logps/rejected": -230.99212646484375, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008027717703953385, + "rewards/margins": 3.031202554702759, + "rewards/rejected": -3.0320053100585938, + "step": 6094 + }, + { + "epoch": 0.35, + "learning_rate": 7.481205246281922e-08, + "logits/chosen": -2.059178113937378, + "logits/rejected": -2.058049440383911, + "logps/chosen": -0.012812424451112747, + "logps/rejected": -96.54158020019531, + "loss": 0.452, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.155213486636057e-05, + "rewards/margins": 1.3206089735031128, + "rewards/rejected": -1.3205474615097046, + "step": 6095 + }, + { + "epoch": 0.35, + "learning_rate": 7.480387022786022e-08, + "logits/chosen": -1.7695804834365845, + "logits/rejected": -1.680212140083313, + "logps/chosen": -310.9941711425781, + "logps/rejected": -437.31646728515625, + "loss": 0.3222, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2417938709259033, + "rewards/margins": 0.7008301019668579, + "rewards/rejected": 0.5409637689590454, + "step": 6096 + }, + { + "epoch": 0.35, + "learning_rate": 7.479568711174644e-08, + "logits/chosen": -1.917246699333191, + "logits/rejected": -1.9134529829025269, + "logps/chosen": -38.852272033691406, + "logps/rejected": -190.33120727539062, + "loss": 0.3502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28033754229545593, + "rewards/margins": 1.8565689325332642, + "rewards/rejected": -1.5762313604354858, + "step": 6097 + }, + { + "epoch": 0.35, + "learning_rate": 7.478750311476856e-08, + "logits/chosen": -1.802294135093689, + "logits/rejected": -1.8104500770568848, + "logps/chosen": -174.18905639648438, + "logps/rejected": -334.25823974609375, + "loss": 0.1368, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1804412603378296, + "rewards/margins": 2.208444118499756, + "rewards/rejected": -1.0280029773712158, + "step": 6098 + }, + { + "epoch": 0.35, + "learning_rate": 7.477931823721732e-08, + "logits/chosen": -1.953040599822998, + "logits/rejected": -1.9374921321868896, + "logps/chosen": -34.52568054199219, + "logps/rejected": -289.67181396484375, + "loss": 0.411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23101425170898438, + "rewards/margins": 1.5532448291778564, + "rewards/rejected": -1.7842590808868408, + "step": 6099 + }, + { + "epoch": 0.35, + "learning_rate": 7.477113247938348e-08, + "logits/chosen": -1.9384788274765015, + "logits/rejected": -1.930985450744629, + "logps/chosen": -3.7669684388674796e-05, + "logps/rejected": -202.3048858642578, + "loss": 0.4387, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.940427846937382e-07, + "rewards/margins": 1.4458978176116943, + "rewards/rejected": -1.4458969831466675, + "step": 6100 + }, + { + "epoch": 0.36, + "learning_rate": 7.476294584155786e-08, + "logits/chosen": -1.997768521308899, + "logits/rejected": -1.9680756330490112, + "logps/chosen": -193.52041625976562, + "logps/rejected": -450.5580139160156, + "loss": 0.226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.801257312297821, + "rewards/margins": 1.2226715087890625, + "rewards/rejected": -0.42141419649124146, + "step": 6101 + }, + { + "epoch": 0.36, + "learning_rate": 7.475475832403127e-08, + "logits/chosen": -1.8431191444396973, + "logits/rejected": -1.859541893005371, + "logps/chosen": -57.150672912597656, + "logps/rejected": -185.09364318847656, + "loss": 0.1961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.184954047203064, + "rewards/margins": 2.0380606651306152, + "rewards/rejected": -0.853106677532196, + "step": 6102 + }, + { + "epoch": 0.36, + "learning_rate": 7.474656992709458e-08, + "logits/chosen": -1.971725583076477, + "logits/rejected": -1.968183994293213, + "logps/chosen": -27.434518814086914, + "logps/rejected": -156.98736572265625, + "loss": 0.3516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24095992743968964, + "rewards/margins": 2.2186150550842285, + "rewards/rejected": -1.977655053138733, + "step": 6103 + }, + { + "epoch": 0.36, + "learning_rate": 7.473838065103867e-08, + "logits/chosen": -2.013201951980591, + "logits/rejected": -2.0111725330352783, + "logps/chosen": -15.105857849121094, + "logps/rejected": -119.23609924316406, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49501878023147583, + "rewards/margins": 0.9646857976913452, + "rewards/rejected": -0.4696670472621918, + "step": 6104 + }, + { + "epoch": 0.36, + "learning_rate": 7.473019049615448e-08, + "logits/chosen": -1.9922258853912354, + "logits/rejected": -1.991465449333191, + "logps/chosen": -114.82304382324219, + "logps/rejected": -152.22171020507812, + "loss": 0.4603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7220062613487244, + "rewards/margins": 0.21446990966796875, + "rewards/rejected": 0.5075363516807556, + "step": 6105 + }, + { + "epoch": 0.36, + "learning_rate": 7.472199946273294e-08, + "logits/chosen": -1.9832468032836914, + "logits/rejected": -2.040217161178589, + "logps/chosen": -257.7615966796875, + "logps/rejected": -261.25933837890625, + "loss": 0.0742, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.14231276512146, + "rewards/margins": 2.47263503074646, + "rewards/rejected": -0.330322265625, + "step": 6106 + }, + { + "epoch": 0.36, + "learning_rate": 7.471380755106507e-08, + "logits/chosen": -1.9516650438308716, + "logits/rejected": -1.9498533010482788, + "logps/chosen": -21.21544075012207, + "logps/rejected": -68.2873764038086, + "loss": 0.8566, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0706433057785034, + "rewards/margins": 0.8406646251678467, + "rewards/rejected": -1.91130793094635, + "step": 6107 + }, + { + "epoch": 0.36, + "learning_rate": 7.470561476144185e-08, + "logits/chosen": -2.168790340423584, + "logits/rejected": -2.162726879119873, + "logps/chosen": -14.153135299682617, + "logps/rejected": -127.91637420654297, + "loss": 0.4923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07850408554077148, + "rewards/margins": 0.9557280540466309, + "rewards/rejected": -0.8772239685058594, + "step": 6108 + }, + { + "epoch": 0.36, + "learning_rate": 7.469742109415436e-08, + "logits/chosen": -1.9684514999389648, + "logits/rejected": -1.9462114572525024, + "logps/chosen": -82.80943298339844, + "logps/rejected": -368.1133728027344, + "loss": 0.149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7108200192451477, + "rewards/margins": 5.1515398025512695, + "rewards/rejected": -4.4407196044921875, + "step": 6109 + }, + { + "epoch": 0.36, + "learning_rate": 7.468922654949365e-08, + "logits/chosen": -2.109761953353882, + "logits/rejected": -2.116060256958008, + "logps/chosen": -17.191896438598633, + "logps/rejected": -103.79975128173828, + "loss": 0.5536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20029611885547638, + "rewards/margins": 0.9928626418113708, + "rewards/rejected": -1.193158745765686, + "step": 6110 + }, + { + "epoch": 0.36, + "learning_rate": 7.468103112775084e-08, + "logits/chosen": -2.205540418624878, + "logits/rejected": -2.204878330230713, + "logps/chosen": -30.20345687866211, + "logps/rejected": -246.07147216796875, + "loss": 0.5258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4383527934551239, + "rewards/margins": 1.7047045230865479, + "rewards/rejected": -2.143057346343994, + "step": 6111 + }, + { + "epoch": 0.36, + "learning_rate": 7.46728348292171e-08, + "logits/chosen": -1.8589028120040894, + "logits/rejected": -1.8562557697296143, + "logps/chosen": -19.70831871032715, + "logps/rejected": -150.83395385742188, + "loss": 0.5154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12023621052503586, + "rewards/margins": 0.7182571887969971, + "rewards/rejected": -0.5980209708213806, + "step": 6112 + }, + { + "epoch": 0.36, + "learning_rate": 7.466463765418356e-08, + "logits/chosen": -2.023815870285034, + "logits/rejected": -2.0208332538604736, + "logps/chosen": -44.48652267456055, + "logps/rejected": -124.953369140625, + "loss": 0.2374, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0614169836044312, + "rewards/margins": 1.3884646892547607, + "rewards/rejected": -0.327047735452652, + "step": 6113 + }, + { + "epoch": 0.36, + "learning_rate": 7.465643960294146e-08, + "logits/chosen": -1.9438303709030151, + "logits/rejected": -1.9146603345870972, + "logps/chosen": -253.17062377929688, + "logps/rejected": -380.0073547363281, + "loss": 0.3281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8248016238212585, + "rewards/margins": 0.396157830953598, + "rewards/rejected": 0.4286437928676605, + "step": 6114 + }, + { + "epoch": 0.36, + "learning_rate": 7.464824067578199e-08, + "logits/chosen": -1.9464342594146729, + "logits/rejected": -1.9423253536224365, + "logps/chosen": -25.96591567993164, + "logps/rejected": -243.32952880859375, + "loss": 0.214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7014538049697876, + "rewards/margins": 2.0544657707214355, + "rewards/rejected": -1.3530120849609375, + "step": 6115 + }, + { + "epoch": 0.36, + "learning_rate": 7.464004087299646e-08, + "logits/chosen": -2.067514419555664, + "logits/rejected": -1.9916023015975952, + "logps/chosen": -174.16522216796875, + "logps/rejected": -357.023193359375, + "loss": 0.1001, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3030471801757812, + "rewards/margins": 2.093531847000122, + "rewards/rejected": 0.20951537787914276, + "step": 6116 + }, + { + "epoch": 0.36, + "learning_rate": 7.463184019487616e-08, + "logits/chosen": -1.9272129535675049, + "logits/rejected": -1.9166063070297241, + "logps/chosen": -4.107766151428223, + "logps/rejected": -122.13825988769531, + "loss": 0.4605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006532955449074507, + "rewards/margins": 1.3567529916763306, + "rewards/rejected": -1.3632858991622925, + "step": 6117 + }, + { + "epoch": 0.36, + "learning_rate": 7.462363864171239e-08, + "logits/chosen": -2.0863704681396484, + "logits/rejected": -2.0857040882110596, + "logps/chosen": -13.875516891479492, + "logps/rejected": -220.99591064453125, + "loss": 0.3402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04997396469116211, + "rewards/margins": 4.387223720550537, + "rewards/rejected": -4.337249755859375, + "step": 6118 + }, + { + "epoch": 0.36, + "learning_rate": 7.461543621379653e-08, + "logits/chosen": -1.9472538232803345, + "logits/rejected": -1.9546611309051514, + "logps/chosen": -13.686504364013672, + "logps/rejected": -105.24623107910156, + "loss": 0.6325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031019974499940872, + "rewards/margins": 0.21268653869628906, + "rewards/rejected": -0.24370650947093964, + "step": 6119 + }, + { + "epoch": 0.36, + "learning_rate": 7.460723291142e-08, + "logits/chosen": -2.018009901046753, + "logits/rejected": -2.0162851810455322, + "logps/chosen": -0.022840304300189018, + "logps/rejected": -136.59365844726562, + "loss": 0.525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0012259772047400475, + "rewards/margins": 0.8445853590965271, + "rewards/rejected": -0.8433594107627869, + "step": 6120 + }, + { + "epoch": 0.36, + "learning_rate": 7.459902873487415e-08, + "logits/chosen": -2.209178924560547, + "logits/rejected": -2.1870956420898438, + "logps/chosen": -47.632606506347656, + "logps/rejected": -308.8419189453125, + "loss": 0.3954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45492133498191833, + "rewards/margins": 1.0560382604599, + "rewards/rejected": -0.601116955280304, + "step": 6121 + }, + { + "epoch": 0.36, + "learning_rate": 7.45908236844505e-08, + "logits/chosen": -1.9843089580535889, + "logits/rejected": -2.018357515335083, + "logps/chosen": -199.89483642578125, + "logps/rejected": -218.64105224609375, + "loss": 0.4462, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.298028588294983, + "rewards/margins": -0.2111145257949829, + "rewards/rejected": 1.5091431140899658, + "step": 6122 + }, + { + "epoch": 0.36, + "learning_rate": 7.458261776044049e-08, + "logits/chosen": -1.8550986051559448, + "logits/rejected": -1.841036319732666, + "logps/chosen": -250.601318359375, + "logps/rejected": -379.084228515625, + "loss": 0.1284, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8442718982696533, + "rewards/margins": 1.7248810529708862, + "rewards/rejected": 0.11939086765050888, + "step": 6123 + }, + { + "epoch": 0.36, + "learning_rate": 7.457441096313566e-08, + "logits/chosen": -2.124688148498535, + "logits/rejected": -2.1189749240875244, + "logps/chosen": -13.962477684020996, + "logps/rejected": -326.5071716308594, + "loss": 0.3035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10958804935216904, + "rewards/margins": 5.035872936248779, + "rewards/rejected": -4.9262847900390625, + "step": 6124 + }, + { + "epoch": 0.36, + "learning_rate": 7.456620329282755e-08, + "logits/chosen": -1.8255480527877808, + "logits/rejected": -1.8008300065994263, + "logps/chosen": -209.22215270996094, + "logps/rejected": -278.0638122558594, + "loss": 0.3062, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9241364002227783, + "rewards/margins": 0.2688506841659546, + "rewards/rejected": 1.6552857160568237, + "step": 6125 + }, + { + "epoch": 0.36, + "learning_rate": 7.455799474980772e-08, + "logits/chosen": -1.9281296730041504, + "logits/rejected": -1.9269098043441772, + "logps/chosen": -195.94871520996094, + "logps/rejected": -256.84722900390625, + "loss": 0.3085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6642547845840454, + "rewards/margins": 0.5468047857284546, + "rewards/rejected": 1.1174499988555908, + "step": 6126 + }, + { + "epoch": 0.36, + "learning_rate": 7.45497853343678e-08, + "logits/chosen": -2.0268454551696777, + "logits/rejected": -2.0135843753814697, + "logps/chosen": -30.504846572875977, + "logps/rejected": -138.88278198242188, + "loss": 0.4195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02090778388082981, + "rewards/margins": 1.5622637271881104, + "rewards/rejected": -1.5413559675216675, + "step": 6127 + }, + { + "epoch": 0.36, + "learning_rate": 7.45415750467994e-08, + "logits/chosen": -2.0054309368133545, + "logits/rejected": -2.0034728050231934, + "logps/chosen": -9.552716255187988, + "logps/rejected": -145.40289306640625, + "loss": 0.4071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14495030045509338, + "rewards/margins": 1.515409231185913, + "rewards/rejected": -1.370458960533142, + "step": 6128 + }, + { + "epoch": 0.36, + "learning_rate": 7.453336388739423e-08, + "logits/chosen": -1.9808505773544312, + "logits/rejected": -1.9530171155929565, + "logps/chosen": -180.95703125, + "logps/rejected": -316.3002014160156, + "loss": 0.2222, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0040695667266846, + "rewards/margins": 0.7905594110488892, + "rewards/rejected": 1.2135101556777954, + "step": 6129 + }, + { + "epoch": 0.36, + "learning_rate": 7.452515185644397e-08, + "logits/chosen": -1.748042345046997, + "logits/rejected": -1.7475026845932007, + "logps/chosen": -8.029036521911621, + "logps/rejected": -62.43751525878906, + "loss": 0.6782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18373523652553558, + "rewards/margins": 0.2670910358428955, + "rewards/rejected": -0.4508262574672699, + "step": 6130 + }, + { + "epoch": 0.36, + "learning_rate": 7.451693895424034e-08, + "logits/chosen": -1.9325120449066162, + "logits/rejected": -1.9289019107818604, + "logps/chosen": -40.070945739746094, + "logps/rejected": -274.2608337402344, + "loss": 0.4077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39915695786476135, + "rewards/margins": 0.8375908136367798, + "rewards/rejected": -0.43843385577201843, + "step": 6131 + }, + { + "epoch": 0.36, + "learning_rate": 7.450872518107511e-08, + "logits/chosen": -1.8596315383911133, + "logits/rejected": -1.8635765314102173, + "logps/chosen": -16.83977508544922, + "logps/rejected": -166.6214599609375, + "loss": 0.4077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1592928022146225, + "rewards/margins": 2.1168289184570312, + "rewards/rejected": -2.2761216163635254, + "step": 6132 + }, + { + "epoch": 0.36, + "learning_rate": 7.450051053724009e-08, + "logits/chosen": -1.7978509664535522, + "logits/rejected": -1.786154866218567, + "logps/chosen": -317.86376953125, + "logps/rejected": -482.98260498046875, + "loss": 0.3556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7899841666221619, + "rewards/margins": 0.7197876572608948, + "rewards/rejected": 0.07019653171300888, + "step": 6133 + }, + { + "epoch": 0.36, + "learning_rate": 7.449229502302707e-08, + "logits/chosen": -1.8652470111846924, + "logits/rejected": -1.8713454008102417, + "logps/chosen": -67.92404174804688, + "logps/rejected": -229.969970703125, + "loss": 0.4128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5314010977745056, + "rewards/margins": 0.6738144159317017, + "rewards/rejected": -0.14241333305835724, + "step": 6134 + }, + { + "epoch": 0.36, + "learning_rate": 7.448407863872794e-08, + "logits/chosen": -1.9900239706039429, + "logits/rejected": -1.9783989191055298, + "logps/chosen": -100.87860107421875, + "logps/rejected": -265.222900390625, + "loss": 0.2531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5903610587120056, + "rewards/margins": 2.799682855606079, + "rewards/rejected": -2.2093217372894287, + "step": 6135 + }, + { + "epoch": 0.36, + "learning_rate": 7.447586138463457e-08, + "logits/chosen": -1.9282768964767456, + "logits/rejected": -1.9279119968414307, + "logps/chosen": -161.8370361328125, + "logps/rejected": -322.63323974609375, + "loss": 0.1405, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8109451532363892, + "rewards/margins": 1.5791457891464233, + "rewards/rejected": 0.23179931938648224, + "step": 6136 + }, + { + "epoch": 0.36, + "learning_rate": 7.446764326103888e-08, + "logits/chosen": -2.2261440753936768, + "logits/rejected": -2.210697889328003, + "logps/chosen": -37.9983024597168, + "logps/rejected": -152.69737243652344, + "loss": 0.4567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04722099378705025, + "rewards/margins": 1.4994755983352661, + "rewards/rejected": -1.5466965436935425, + "step": 6137 + }, + { + "epoch": 0.36, + "learning_rate": 7.445942426823282e-08, + "logits/chosen": -2.0035645961761475, + "logits/rejected": -1.9887040853500366, + "logps/chosen": -0.45838791131973267, + "logps/rejected": -289.6691589355469, + "loss": 0.3333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0400393083691597, + "rewards/margins": 5.310905933380127, + "rewards/rejected": -5.270866394042969, + "step": 6138 + }, + { + "epoch": 0.36, + "learning_rate": 7.445120440650836e-08, + "logits/chosen": -1.9947595596313477, + "logits/rejected": -1.9224027395248413, + "logps/chosen": -383.43621826171875, + "logps/rejected": -498.42803955078125, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6405091285705566, + "rewards/margins": 1.6185669898986816, + "rewards/rejected": 1.021942138671875, + "step": 6139 + }, + { + "epoch": 0.36, + "learning_rate": 7.444298367615753e-08, + "logits/chosen": -2.2087032794952393, + "logits/rejected": -2.191908836364746, + "logps/chosen": -21.697345733642578, + "logps/rejected": -186.091796875, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5235439538955688, + "rewards/margins": 2.57377290725708, + "rewards/rejected": -2.0502288341522217, + "step": 6140 + }, + { + "epoch": 0.36, + "learning_rate": 7.443476207747235e-08, + "logits/chosen": -1.9464775323867798, + "logits/rejected": -1.944554090499878, + "logps/chosen": -82.49669647216797, + "logps/rejected": -276.68536376953125, + "loss": 0.5191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6051902770996094, + "rewards/margins": 3.570301055908203, + "rewards/rejected": -4.1754913330078125, + "step": 6141 + }, + { + "epoch": 0.36, + "learning_rate": 7.442653961074488e-08, + "logits/chosen": -1.7991793155670166, + "logits/rejected": -1.7988853454589844, + "logps/chosen": -0.061918023973703384, + "logps/rejected": -51.19512176513672, + "loss": 0.6931, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004340921528637409, + "rewards/margins": 0.002286352217197418, + "rewards/rejected": -0.006627273745834827, + "step": 6142 + }, + { + "epoch": 0.36, + "learning_rate": 7.441831627626725e-08, + "logits/chosen": -1.8034368753433228, + "logits/rejected": -1.8483752012252808, + "logps/chosen": -196.80862426757812, + "logps/rejected": -312.50799560546875, + "loss": 0.096, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.011065721511841, + "rewards/margins": 1.9607453346252441, + "rewards/rejected": 0.05032043531537056, + "step": 6143 + }, + { + "epoch": 0.36, + "learning_rate": 7.44100920743316e-08, + "logits/chosen": -1.9262707233428955, + "logits/rejected": -1.9143987894058228, + "logps/chosen": -6.501962661743164, + "logps/rejected": -177.8445587158203, + "loss": 0.4118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01249008160084486, + "rewards/margins": 2.0222909450531006, + "rewards/rejected": -2.034780979156494, + "step": 6144 + }, + { + "epoch": 0.36, + "learning_rate": 7.440186700523006e-08, + "logits/chosen": -2.1152000427246094, + "logits/rejected": -2.115950107574463, + "logps/chosen": -39.08866882324219, + "logps/rejected": -215.28982543945312, + "loss": 0.2651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5555340051651001, + "rewards/margins": 2.356562614440918, + "rewards/rejected": -1.8010284900665283, + "step": 6145 + }, + { + "epoch": 0.36, + "learning_rate": 7.439364106925484e-08, + "logits/chosen": -2.113467216491699, + "logits/rejected": -2.1052446365356445, + "logps/chosen": -53.37574005126953, + "logps/rejected": -242.47683715820312, + "loss": 0.6187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02253875695168972, + "rewards/margins": 0.2964279353618622, + "rewards/rejected": -0.31896668672561646, + "step": 6146 + }, + { + "epoch": 0.36, + "learning_rate": 7.438541426669818e-08, + "logits/chosen": -1.9825626611709595, + "logits/rejected": -1.988592267036438, + "logps/chosen": -40.7998161315918, + "logps/rejected": -315.89593505859375, + "loss": 0.2934, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.326669305562973, + "rewards/margins": 2.631805419921875, + "rewards/rejected": -2.305136203765869, + "step": 6147 + }, + { + "epoch": 0.36, + "learning_rate": 7.437718659785231e-08, + "logits/chosen": -1.8217259645462036, + "logits/rejected": -1.8130388259887695, + "logps/chosen": -297.4788818359375, + "logps/rejected": -472.6898193359375, + "loss": 0.2388, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4702484607696533, + "rewards/margins": 0.7608734369277954, + "rewards/rejected": 0.7093750238418579, + "step": 6148 + }, + { + "epoch": 0.36, + "learning_rate": 7.436895806300954e-08, + "logits/chosen": -1.861668348312378, + "logits/rejected": -1.8654141426086426, + "logps/chosen": -0.26719576120376587, + "logps/rejected": -58.53221893310547, + "loss": 0.4025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03706550598144531, + "rewards/margins": 2.166903257369995, + "rewards/rejected": -2.12983775138855, + "step": 6149 + }, + { + "epoch": 0.36, + "learning_rate": 7.436072866246217e-08, + "logits/chosen": -1.9750745296478271, + "logits/rejected": -1.9535839557647705, + "logps/chosen": -263.35064697265625, + "logps/rejected": -432.3465576171875, + "loss": 0.2512, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7557190656661987, + "rewards/margins": 0.5651581287384033, + "rewards/rejected": 1.1905609369277954, + "step": 6150 + }, + { + "epoch": 0.36, + "learning_rate": 7.435249839650256e-08, + "logits/chosen": -1.8822177648544312, + "logits/rejected": -1.8918949365615845, + "logps/chosen": -8.627603530883789, + "logps/rejected": -71.33338928222656, + "loss": 0.5071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.049901869148015976, + "rewards/margins": 1.1150484085083008, + "rewards/rejected": -1.1649502515792847, + "step": 6151 + }, + { + "epoch": 0.36, + "learning_rate": 7.434426726542308e-08, + "logits/chosen": -1.8924579620361328, + "logits/rejected": -1.904036045074463, + "logps/chosen": -266.977783203125, + "logps/rejected": -292.085693359375, + "loss": 0.3548, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.046112060546875, + "rewards/margins": 0.5532379150390625, + "rewards/rejected": 0.4928741455078125, + "step": 6152 + }, + { + "epoch": 0.36, + "learning_rate": 7.433603526951615e-08, + "logits/chosen": -1.9876188039779663, + "logits/rejected": -1.970269799232483, + "logps/chosen": -217.02871704101562, + "logps/rejected": -308.6357421875, + "loss": 0.3742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9581574201583862, + "rewards/margins": 0.0075348615646362305, + "rewards/rejected": 1.95062255859375, + "step": 6153 + }, + { + "epoch": 0.36, + "learning_rate": 7.432780240907422e-08, + "logits/chosen": -2.1681454181671143, + "logits/rejected": -2.165592670440674, + "logps/chosen": -35.59641647338867, + "logps/rejected": -150.24034118652344, + "loss": 0.5574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18118897080421448, + "rewards/margins": 0.22263947129249573, + "rewards/rejected": -0.04145050048828125, + "step": 6154 + }, + { + "epoch": 0.36, + "learning_rate": 7.431956868438972e-08, + "logits/chosen": -2.0027430057525635, + "logits/rejected": -2.0064053535461426, + "logps/chosen": -98.85142517089844, + "logps/rejected": -227.39198303222656, + "loss": 0.3752, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.400247186422348, + "rewards/margins": 1.076594591140747, + "rewards/rejected": -0.6763473749160767, + "step": 6155 + }, + { + "epoch": 0.36, + "learning_rate": 7.431133409575521e-08, + "logits/chosen": -1.884460210800171, + "logits/rejected": -1.882162094116211, + "logps/chosen": -60.59116744995117, + "logps/rejected": -162.88211059570312, + "loss": 0.1888, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0201267004013062, + "rewards/margins": 2.5817692279815674, + "rewards/rejected": -1.5616425275802612, + "step": 6156 + }, + { + "epoch": 0.36, + "learning_rate": 7.430309864346317e-08, + "logits/chosen": -1.9532477855682373, + "logits/rejected": -1.920148253440857, + "logps/chosen": -274.6177673339844, + "logps/rejected": -400.669677734375, + "loss": 0.0849, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.66070556640625, + "rewards/margins": 2.2216827869415283, + "rewards/rejected": -0.5609771609306335, + "step": 6157 + }, + { + "epoch": 0.36, + "learning_rate": 7.429486232780619e-08, + "logits/chosen": -2.098196029663086, + "logits/rejected": -2.0943710803985596, + "logps/chosen": -0.43831703066825867, + "logps/rejected": -139.53050231933594, + "loss": 0.3636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022644584998488426, + "rewards/margins": 3.1046931743621826, + "rewards/rejected": -3.127337694168091, + "step": 6158 + }, + { + "epoch": 0.36, + "learning_rate": 7.428662514907687e-08, + "logits/chosen": -1.7569339275360107, + "logits/rejected": -1.7515695095062256, + "logps/chosen": -23.28360366821289, + "logps/rejected": -99.53645324707031, + "loss": 0.551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1055791899561882, + "rewards/margins": 0.6107199192047119, + "rewards/rejected": -0.5051407217979431, + "step": 6159 + }, + { + "epoch": 0.36, + "learning_rate": 7.427838710756784e-08, + "logits/chosen": -1.9417381286621094, + "logits/rejected": -1.9133554697036743, + "logps/chosen": -240.45135498046875, + "logps/rejected": -401.6804504394531, + "loss": 0.1196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9292770624160767, + "rewards/margins": 2.4421372413635254, + "rewards/rejected": -1.5128601789474487, + "step": 6160 + }, + { + "epoch": 0.36, + "learning_rate": 7.427014820357171e-08, + "logits/chosen": -2.0660791397094727, + "logits/rejected": -2.0578248500823975, + "logps/chosen": -32.82786560058594, + "logps/rejected": -250.0171661376953, + "loss": 0.5897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.039318084716796875, + "rewards/margins": 0.37919387221336365, + "rewards/rejected": -0.4185119569301605, + "step": 6161 + }, + { + "epoch": 0.36, + "learning_rate": 7.426190843738123e-08, + "logits/chosen": -2.100712537765503, + "logits/rejected": -2.0977792739868164, + "logps/chosen": -13.63280963897705, + "logps/rejected": -284.4217529296875, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2308005392551422, + "rewards/margins": 4.881463050842285, + "rewards/rejected": -4.650662422180176, + "step": 6162 + }, + { + "epoch": 0.36, + "learning_rate": 7.425366780928906e-08, + "logits/chosen": -2.1121745109558105, + "logits/rejected": -2.12638258934021, + "logps/chosen": -175.3644561767578, + "logps/rejected": -267.2746887207031, + "loss": 0.2402, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3988388776779175, + "rewards/margins": 0.8281174302101135, + "rewards/rejected": 0.570721447467804, + "step": 6163 + }, + { + "epoch": 0.36, + "learning_rate": 7.424542631958799e-08, + "logits/chosen": -2.142658233642578, + "logits/rejected": -2.1316978931427, + "logps/chosen": -196.71942138671875, + "logps/rejected": -340.49530029296875, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8354309797286987, + "rewards/margins": 2.7943177223205566, + "rewards/rejected": -0.9588867425918579, + "step": 6164 + }, + { + "epoch": 0.36, + "learning_rate": 7.423718396857076e-08, + "logits/chosen": -1.912739872932434, + "logits/rejected": -1.900947093963623, + "logps/chosen": -194.82501220703125, + "logps/rejected": -240.42401123046875, + "loss": 0.3903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7620819211006165, + "rewards/margins": 0.26050108671188354, + "rewards/rejected": 0.5015808343887329, + "step": 6165 + }, + { + "epoch": 0.36, + "learning_rate": 7.42289407565302e-08, + "logits/chosen": -1.9779118299484253, + "logits/rejected": -1.942651629447937, + "logps/chosen": -174.41688537597656, + "logps/rejected": -519.6201171875, + "loss": 0.0639, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0908005237579346, + "rewards/margins": 2.769944906234741, + "rewards/rejected": -0.6791443228721619, + "step": 6166 + }, + { + "epoch": 0.36, + "learning_rate": 7.422069668375916e-08, + "logits/chosen": -2.027907133102417, + "logits/rejected": -2.0157361030578613, + "logps/chosen": -199.2156982421875, + "logps/rejected": -447.1195068359375, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7115341424942017, + "rewards/margins": 5.455671787261963, + "rewards/rejected": -3.7441375255584717, + "step": 6167 + }, + { + "epoch": 0.36, + "learning_rate": 7.42124517505505e-08, + "logits/chosen": -1.7812440395355225, + "logits/rejected": -1.7240486145019531, + "logps/chosen": -261.3349609375, + "logps/rejected": -390.1381530761719, + "loss": 0.1749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.929174780845642, + "rewards/margins": 1.2842620611190796, + "rewards/rejected": 0.6449127197265625, + "step": 6168 + }, + { + "epoch": 0.36, + "learning_rate": 7.420420595719712e-08, + "logits/chosen": -1.9353394508361816, + "logits/rejected": -1.9309046268463135, + "logps/chosen": -186.52899169921875, + "logps/rejected": -300.17889404296875, + "loss": 0.112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6355926990509033, + "rewards/margins": 1.780450463294983, + "rewards/rejected": -0.14485779404640198, + "step": 6169 + }, + { + "epoch": 0.36, + "learning_rate": 7.419595930399194e-08, + "logits/chosen": -1.8634169101715088, + "logits/rejected": -1.8436126708984375, + "logps/chosen": -209.14605712890625, + "logps/rejected": -432.60455322265625, + "loss": 0.0659, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3375946283340454, + "rewards/margins": 3.3368163108825684, + "rewards/rejected": -1.9992218017578125, + "step": 6170 + }, + { + "epoch": 0.36, + "learning_rate": 7.418771179122793e-08, + "logits/chosen": -2.1199662685394287, + "logits/rejected": -2.092832565307617, + "logps/chosen": -203.36544799804688, + "logps/rejected": -238.50894165039062, + "loss": 0.0787, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2255799770355225, + "rewards/margins": 2.87190580368042, + "rewards/rejected": -0.6463257074356079, + "step": 6171 + }, + { + "epoch": 0.36, + "learning_rate": 7.417946341919808e-08, + "logits/chosen": -2.039832592010498, + "logits/rejected": -1.9972293376922607, + "logps/chosen": -229.497314453125, + "logps/rejected": -408.4290771484375, + "loss": 0.088, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.26654052734375, + "rewards/margins": 2.354412794113159, + "rewards/rejected": -0.08787231892347336, + "step": 6172 + }, + { + "epoch": 0.36, + "learning_rate": 7.417121418819541e-08, + "logits/chosen": -2.087618827819824, + "logits/rejected": -2.0920255184173584, + "logps/chosen": -50.891761779785156, + "logps/rejected": -246.41842651367188, + "loss": 0.9935, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4776134490966797, + "rewards/margins": -0.7396648526191711, + "rewards/rejected": 0.26205140352249146, + "step": 6173 + }, + { + "epoch": 0.36, + "learning_rate": 7.416296409851299e-08, + "logits/chosen": -1.8844337463378906, + "logits/rejected": -1.8685331344604492, + "logps/chosen": -226.03114318847656, + "logps/rejected": -434.5821228027344, + "loss": 0.0913, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.539692759513855, + "rewards/margins": 2.3040573596954346, + "rewards/rejected": -0.7643646597862244, + "step": 6174 + }, + { + "epoch": 0.36, + "learning_rate": 7.415471315044389e-08, + "logits/chosen": -1.9646074771881104, + "logits/rejected": -1.9595965147018433, + "logps/chosen": -2.7271406650543213, + "logps/rejected": -84.91039276123047, + "loss": 0.551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07110645622015, + "rewards/margins": 0.5079953670501709, + "rewards/rejected": -0.4368889033794403, + "step": 6175 + }, + { + "epoch": 0.36, + "learning_rate": 7.414646134428123e-08, + "logits/chosen": -1.8352434635162354, + "logits/rejected": -1.8362324237823486, + "logps/chosen": -6.594738960266113, + "logps/rejected": -76.91349029541016, + "loss": 0.7776, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12111754715442657, + "rewards/margins": -0.37160706520080566, + "rewards/rejected": 0.49272462725639343, + "step": 6176 + }, + { + "epoch": 0.36, + "learning_rate": 7.413820868031815e-08, + "logits/chosen": -2.0167486667633057, + "logits/rejected": -2.0047383308410645, + "logps/chosen": -7.877677917480469, + "logps/rejected": -139.16468811035156, + "loss": 0.5137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012518787756562233, + "rewards/margins": 0.8774464726448059, + "rewards/rejected": -0.8649277091026306, + "step": 6177 + }, + { + "epoch": 0.36, + "learning_rate": 7.41299551588478e-08, + "logits/chosen": -1.876611351966858, + "logits/rejected": -1.9768232107162476, + "logps/chosen": -343.2861328125, + "logps/rejected": -324.6941223144531, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8694580793380737, + "rewards/margins": 4.894448757171631, + "rewards/rejected": -3.0249907970428467, + "step": 6178 + }, + { + "epoch": 0.36, + "learning_rate": 7.412170078016343e-08, + "logits/chosen": -2.0214414596557617, + "logits/rejected": -2.0120959281921387, + "logps/chosen": -58.18913269042969, + "logps/rejected": -196.6157684326172, + "loss": 0.7963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9650676846504211, + "rewards/margins": 1.176201343536377, + "rewards/rejected": -2.1412689685821533, + "step": 6179 + }, + { + "epoch": 0.36, + "learning_rate": 7.411344554455825e-08, + "logits/chosen": -1.9892991781234741, + "logits/rejected": -1.9867202043533325, + "logps/chosen": -48.067047119140625, + "logps/rejected": -145.40469360351562, + "loss": 0.623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1810939759016037, + "rewards/margins": 0.5069965124130249, + "rewards/rejected": -0.6880905032157898, + "step": 6180 + }, + { + "epoch": 0.36, + "learning_rate": 7.410518945232554e-08, + "logits/chosen": -1.9719171524047852, + "logits/rejected": -1.9668792486190796, + "logps/chosen": -239.0944061279297, + "logps/rejected": -260.82794189453125, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.291998267173767, + "rewards/margins": 0.2940185070037842, + "rewards/rejected": 0.9979797601699829, + "step": 6181 + }, + { + "epoch": 0.36, + "learning_rate": 7.409693250375859e-08, + "logits/chosen": -2.0583322048187256, + "logits/rejected": -2.0650622844696045, + "logps/chosen": -24.9865779876709, + "logps/rejected": -124.33480834960938, + "loss": 0.3817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33613356947898865, + "rewards/margins": 1.04510498046875, + "rewards/rejected": -0.7089714407920837, + "step": 6182 + }, + { + "epoch": 0.36, + "learning_rate": 7.408867469915073e-08, + "logits/chosen": -1.9013679027557373, + "logits/rejected": -1.8866835832595825, + "logps/chosen": -223.06692504882812, + "logps/rejected": -320.5956115722656, + "loss": 0.2955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7708450555801392, + "rewards/margins": 0.39563441276550293, + "rewards/rejected": 1.3752106428146362, + "step": 6183 + }, + { + "epoch": 0.36, + "learning_rate": 7.40804160387953e-08, + "logits/chosen": -2.054675817489624, + "logits/rejected": -2.0535049438476562, + "logps/chosen": -24.932762145996094, + "logps/rejected": -157.22291564941406, + "loss": 0.6594, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4087192714214325, + "rewards/margins": -0.29329338669776917, + "rewards/rejected": 0.7020126581192017, + "step": 6184 + }, + { + "epoch": 0.36, + "learning_rate": 7.407215652298571e-08, + "logits/chosen": -2.067873239517212, + "logits/rejected": -2.056009531021118, + "logps/chosen": -32.02034378051758, + "logps/rejected": -95.18932342529297, + "loss": 0.4469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46666526794433594, + "rewards/margins": 0.4436206817626953, + "rewards/rejected": 0.023044586181640625, + "step": 6185 + }, + { + "epoch": 0.36, + "learning_rate": 7.406389615201538e-08, + "logits/chosen": -1.9129654169082642, + "logits/rejected": -1.9680715799331665, + "logps/chosen": -199.35862731933594, + "logps/rejected": -313.0887451171875, + "loss": 0.0546, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0849366188049316, + "rewards/margins": 3.163626194000244, + "rewards/rejected": -1.0786895751953125, + "step": 6186 + }, + { + "epoch": 0.36, + "learning_rate": 7.405563492617774e-08, + "logits/chosen": -2.2492775917053223, + "logits/rejected": -2.257333278656006, + "logps/chosen": -90.99882507324219, + "logps/rejected": -219.34507751464844, + "loss": 0.1111, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3578376770019531, + "rewards/margins": 3.4719245433807373, + "rewards/rejected": -2.114086866378784, + "step": 6187 + }, + { + "epoch": 0.36, + "learning_rate": 7.404737284576626e-08, + "logits/chosen": -1.890272855758667, + "logits/rejected": -1.8475310802459717, + "logps/chosen": -129.95281982421875, + "logps/rejected": -245.7765350341797, + "loss": 0.1855, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7096328735351562, + "rewards/margins": 1.3968048095703125, + "rewards/rejected": 0.31282806396484375, + "step": 6188 + }, + { + "epoch": 0.36, + "learning_rate": 7.40391099110745e-08, + "logits/chosen": -1.929830551147461, + "logits/rejected": -1.9267702102661133, + "logps/chosen": -1.426993727684021, + "logps/rejected": -131.35447692871094, + "loss": 0.5033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0850917249917984, + "rewards/margins": 0.8876383900642395, + "rewards/rejected": -0.8025466799736023, + "step": 6189 + }, + { + "epoch": 0.36, + "learning_rate": 7.403084612239595e-08, + "logits/chosen": -2.1268815994262695, + "logits/rejected": -2.1350033283233643, + "logps/chosen": -12.542015075683594, + "logps/rejected": -207.75608825683594, + "loss": 0.6842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5496001243591309, + "rewards/margins": 0.8616776466369629, + "rewards/rejected": -1.4112777709960938, + "step": 6190 + }, + { + "epoch": 0.36, + "learning_rate": 7.40225814800242e-08, + "logits/chosen": -1.9955224990844727, + "logits/rejected": -1.9886692762374878, + "logps/chosen": -21.64783477783203, + "logps/rejected": -65.11555480957031, + "loss": 0.6353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07051658630371094, + "rewards/margins": 0.2759716212749481, + "rewards/rejected": -0.34648820757865906, + "step": 6191 + }, + { + "epoch": 0.36, + "learning_rate": 7.401431598425285e-08, + "logits/chosen": -2.0346617698669434, + "logits/rejected": -2.0367825031280518, + "logps/chosen": -21.24734878540039, + "logps/rejected": -61.05033874511719, + "loss": 0.42, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3740268647670746, + "rewards/margins": 0.9507290124893188, + "rewards/rejected": -0.5767021179199219, + "step": 6192 + }, + { + "epoch": 0.36, + "learning_rate": 7.400604963537555e-08, + "logits/chosen": -2.0034096240997314, + "logits/rejected": -2.0311264991760254, + "logps/chosen": -238.80743408203125, + "logps/rejected": -242.17306518554688, + "loss": 0.0446, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.013079881668091, + "rewards/margins": 2.651301622390747, + "rewards/rejected": 0.36177825927734375, + "step": 6193 + }, + { + "epoch": 0.36, + "learning_rate": 7.399778243368591e-08, + "logits/chosen": -2.0320684909820557, + "logits/rejected": -2.008634328842163, + "logps/chosen": -190.0843963623047, + "logps/rejected": -489.6851806640625, + "loss": 0.1221, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.749049425125122, + "rewards/margins": 2.199549913406372, + "rewards/rejected": -0.45050048828125, + "step": 6194 + }, + { + "epoch": 0.36, + "learning_rate": 7.398951437947766e-08, + "logits/chosen": -1.9833095073699951, + "logits/rejected": -1.9884241819381714, + "logps/chosen": -1.439056158065796, + "logps/rejected": -88.96337890625, + "loss": 0.5443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026208674535155296, + "rewards/margins": 0.4514123797416687, + "rewards/rejected": -0.42520371079444885, + "step": 6195 + }, + { + "epoch": 0.36, + "learning_rate": 7.398124547304452e-08, + "logits/chosen": -2.03216552734375, + "logits/rejected": -2.014528274536133, + "logps/chosen": -138.15274047851562, + "logps/rejected": -334.7703857421875, + "loss": 0.2495, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0286682844161987, + "rewards/margins": 1.9155519008636475, + "rewards/rejected": -0.886883556842804, + "step": 6196 + }, + { + "epoch": 0.36, + "learning_rate": 7.397297571468024e-08, + "logits/chosen": -1.8249528408050537, + "logits/rejected": -1.8221874237060547, + "logps/chosen": -189.79348754882812, + "logps/rejected": -318.26690673828125, + "loss": 0.2384, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2594817876815796, + "rewards/margins": 1.186926245689392, + "rewards/rejected": 0.0725555419921875, + "step": 6197 + }, + { + "epoch": 0.36, + "learning_rate": 7.396470510467858e-08, + "logits/chosen": -2.0261683464050293, + "logits/rejected": -1.9921424388885498, + "logps/chosen": -49.46080017089844, + "logps/rejected": -379.15789794921875, + "loss": 0.1719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6702251434326172, + "rewards/margins": 4.4505720138549805, + "rewards/rejected": -3.780346632003784, + "step": 6198 + }, + { + "epoch": 0.36, + "learning_rate": 7.395643364333338e-08, + "logits/chosen": -1.8065685033798218, + "logits/rejected": -1.7471990585327148, + "logps/chosen": -180.593017578125, + "logps/rejected": -422.78314208984375, + "loss": 0.2563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7708053588867188, + "rewards/margins": 1.6649643182754517, + "rewards/rejected": -0.8941589593887329, + "step": 6199 + }, + { + "epoch": 0.36, + "learning_rate": 7.394816133093847e-08, + "logits/chosen": -2.125502586364746, + "logits/rejected": -2.158595323562622, + "logps/chosen": -220.60679626464844, + "logps/rejected": -243.4261474609375, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9175888299942017, + "rewards/margins": 0.25211334228515625, + "rewards/rejected": 1.6654754877090454, + "step": 6200 + }, + { + "epoch": 0.36, + "learning_rate": 7.393988816778774e-08, + "logits/chosen": -1.879016637802124, + "logits/rejected": -1.8615808486938477, + "logps/chosen": -187.7386016845703, + "logps/rejected": -301.04364013671875, + "loss": 0.1239, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0286271572113037, + "rewards/margins": 1.6333024501800537, + "rewards/rejected": 0.39532470703125, + "step": 6201 + }, + { + "epoch": 0.36, + "learning_rate": 7.393161415417508e-08, + "logits/chosen": -1.9992125034332275, + "logits/rejected": -1.9955198764801025, + "logps/chosen": -43.643402099609375, + "logps/rejected": -191.1546173095703, + "loss": 0.4094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06014060974121094, + "rewards/margins": 2.423860549926758, + "rewards/rejected": -2.4840011596679688, + "step": 6202 + }, + { + "epoch": 0.36, + "learning_rate": 7.39233392903944e-08, + "logits/chosen": -1.9185259342193604, + "logits/rejected": -1.8988386392593384, + "logps/chosen": -134.937744140625, + "logps/rejected": -219.37408447265625, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8049835562705994, + "rewards/margins": 0.7283920645713806, + "rewards/rejected": 0.07659149169921875, + "step": 6203 + }, + { + "epoch": 0.36, + "learning_rate": 7.39150635767397e-08, + "logits/chosen": -1.9759559631347656, + "logits/rejected": -1.9610822200775146, + "logps/chosen": -101.5733642578125, + "logps/rejected": -325.856689453125, + "loss": 0.2783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3975624144077301, + "rewards/margins": 2.25849986076355, + "rewards/rejected": -1.860937476158142, + "step": 6204 + }, + { + "epoch": 0.36, + "learning_rate": 7.390678701350497e-08, + "logits/chosen": -1.7864022254943848, + "logits/rejected": -1.7163678407669067, + "logps/chosen": -189.3232879638672, + "logps/rejected": -449.0411376953125, + "loss": 0.1664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4959319829940796, + "rewards/margins": 1.7248259782791138, + "rewards/rejected": -0.22889403998851776, + "step": 6205 + }, + { + "epoch": 0.36, + "learning_rate": 7.389850960098422e-08, + "logits/chosen": -1.9373477697372437, + "logits/rejected": -1.9233006238937378, + "logps/chosen": -220.90109252929688, + "logps/rejected": -442.04022216796875, + "loss": 0.0503, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9904022216796875, + "rewards/margins": 2.8683502674102783, + "rewards/rejected": -0.877947986125946, + "step": 6206 + }, + { + "epoch": 0.36, + "learning_rate": 7.38902313394715e-08, + "logits/chosen": -2.0911784172058105, + "logits/rejected": -2.0974557399749756, + "logps/chosen": -18.18832015991211, + "logps/rejected": -210.25924682617188, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09048614650964737, + "rewards/margins": 2.920832872390747, + "rewards/rejected": -2.8303468227386475, + "step": 6207 + }, + { + "epoch": 0.36, + "learning_rate": 7.388195222926091e-08, + "logits/chosen": -1.9552574157714844, + "logits/rejected": -1.974621057510376, + "logps/chosen": -205.0232391357422, + "logps/rejected": -385.94720458984375, + "loss": 0.0513, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0366973876953125, + "rewards/margins": 3.144052028656006, + "rewards/rejected": -1.107354760169983, + "step": 6208 + }, + { + "epoch": 0.36, + "learning_rate": 7.387367227064655e-08, + "logits/chosen": -1.9689507484436035, + "logits/rejected": -1.9571305513381958, + "logps/chosen": -78.07402038574219, + "logps/rejected": -322.12542724609375, + "loss": 0.4025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4657302796840668, + "rewards/margins": 0.7265182733535767, + "rewards/rejected": -0.2607879638671875, + "step": 6209 + }, + { + "epoch": 0.36, + "learning_rate": 7.386539146392259e-08, + "logits/chosen": -2.0439045429229736, + "logits/rejected": -2.0144872665405273, + "logps/chosen": -179.4648895263672, + "logps/rejected": -407.2707214355469, + "loss": 0.1735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7910064458847046, + "rewards/margins": 1.1536468267440796, + "rewards/rejected": 0.637359619140625, + "step": 6210 + }, + { + "epoch": 0.36, + "learning_rate": 7.385710980938316e-08, + "logits/chosen": -2.0576705932617188, + "logits/rejected": -2.049640655517578, + "logps/chosen": -0.016316566616296768, + "logps/rejected": -178.7263641357422, + "loss": 0.477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007410416728816926, + "rewards/margins": 1.1620458364486694, + "rewards/rejected": -1.162786841392517, + "step": 6211 + }, + { + "epoch": 0.36, + "learning_rate": 7.384882730732252e-08, + "logits/chosen": -1.8429080247879028, + "logits/rejected": -1.8426405191421509, + "logps/chosen": -213.99853515625, + "logps/rejected": -377.7109375, + "loss": 0.0969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9163696765899658, + "rewards/margins": 2.0059814453125, + "rewards/rejected": -0.08961182087659836, + "step": 6212 + }, + { + "epoch": 0.36, + "learning_rate": 7.384054395803486e-08, + "logits/chosen": -2.230605363845825, + "logits/rejected": -2.2288429737091064, + "logps/chosen": -8.312780380249023, + "logps/rejected": -156.60360717773438, + "loss": 0.5251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.062197495251894, + "rewards/margins": 0.8057529330253601, + "rewards/rejected": -0.867950439453125, + "step": 6213 + }, + { + "epoch": 0.36, + "learning_rate": 7.383225976181447e-08, + "logits/chosen": -2.1477584838867188, + "logits/rejected": -2.1300628185272217, + "logps/chosen": -18.351572036743164, + "logps/rejected": -220.69189453125, + "loss": 0.2427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5274984240531921, + "rewards/margins": 2.573436737060547, + "rewards/rejected": -2.04593825340271, + "step": 6214 + }, + { + "epoch": 0.36, + "learning_rate": 7.382397471895563e-08, + "logits/chosen": -2.018883228302002, + "logits/rejected": -2.012286424636841, + "logps/chosen": -10.507291793823242, + "logps/rejected": -185.6561279296875, + "loss": 0.3542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18878403306007385, + "rewards/margins": 2.158618927001953, + "rewards/rejected": -1.9698349237442017, + "step": 6215 + }, + { + "epoch": 0.36, + "learning_rate": 7.381568882975266e-08, + "logits/chosen": -1.756020188331604, + "logits/rejected": -1.7529041767120361, + "logps/chosen": -196.6749267578125, + "logps/rejected": -399.0865478515625, + "loss": 0.1263, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2874313592910767, + "rewards/margins": 1.917585849761963, + "rewards/rejected": -0.6301544308662415, + "step": 6216 + }, + { + "epoch": 0.36, + "learning_rate": 7.380740209449995e-08, + "logits/chosen": -1.997829794883728, + "logits/rejected": -1.9928077459335327, + "logps/chosen": -13.369059562683105, + "logps/rejected": -147.76937866210938, + "loss": 0.5939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24413509666919708, + "rewards/margins": 0.25145626068115234, + "rewards/rejected": -0.007321167271584272, + "step": 6217 + }, + { + "epoch": 0.36, + "learning_rate": 7.379911451349186e-08, + "logits/chosen": -2.011927604675293, + "logits/rejected": -2.03680157661438, + "logps/chosen": -218.46971130371094, + "logps/rejected": -424.6192626953125, + "loss": 0.1889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8660629987716675, + "rewards/margins": 1.0931870937347412, + "rewards/rejected": 0.772875964641571, + "step": 6218 + }, + { + "epoch": 0.36, + "learning_rate": 7.37908260870228e-08, + "logits/chosen": -2.0641610622406006, + "logits/rejected": -2.061034917831421, + "logps/chosen": -13.906509399414062, + "logps/rejected": -252.094482421875, + "loss": 0.4057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06570930778980255, + "rewards/margins": 1.8958942890167236, + "rewards/rejected": -1.8301849365234375, + "step": 6219 + }, + { + "epoch": 0.36, + "learning_rate": 7.378253681538723e-08, + "logits/chosen": -2.1041274070739746, + "logits/rejected": -2.1046645641326904, + "logps/chosen": -41.099205017089844, + "logps/rejected": -119.81355285644531, + "loss": 0.9503, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.39407578110694885, + "rewards/margins": -0.3001716732978821, + "rewards/rejected": -0.09390411525964737, + "step": 6220 + }, + { + "epoch": 0.36, + "learning_rate": 7.377424669887962e-08, + "logits/chosen": -1.9137353897094727, + "logits/rejected": -1.9767552614212036, + "logps/chosen": -334.3422546386719, + "logps/rejected": -409.47113037109375, + "loss": 0.0825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6984955072402954, + "rewards/margins": 2.4954559803009033, + "rewards/rejected": -0.7969604730606079, + "step": 6221 + }, + { + "epoch": 0.36, + "learning_rate": 7.376595573779447e-08, + "logits/chosen": -2.0863940715789795, + "logits/rejected": -2.0807392597198486, + "logps/chosen": -8.642510510981083e-05, + "logps/rejected": -122.6063232421875, + "loss": 0.4947, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.329383268166566e-06, + "rewards/margins": 1.0255885124206543, + "rewards/rejected": -1.025582194328308, + "step": 6222 + }, + { + "epoch": 0.36, + "learning_rate": 7.375766393242632e-08, + "logits/chosen": -2.0391781330108643, + "logits/rejected": -2.0354039669036865, + "logps/chosen": -10.124053955078125, + "logps/rejected": -276.1883544921875, + "loss": 0.3421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0964130386710167, + "rewards/margins": 2.9893712997436523, + "rewards/rejected": -2.892958164215088, + "step": 6223 + }, + { + "epoch": 0.36, + "learning_rate": 7.374937128306973e-08, + "logits/chosen": -2.099795341491699, + "logits/rejected": -2.1018662452697754, + "logps/chosen": -11.04316520690918, + "logps/rejected": -48.19475173950195, + "loss": 0.6608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09017162770032883, + "rewards/margins": 0.30308055877685547, + "rewards/rejected": -0.3932521939277649, + "step": 6224 + }, + { + "epoch": 0.36, + "learning_rate": 7.374107779001932e-08, + "logits/chosen": -2.1318423748016357, + "logits/rejected": -2.1277902126312256, + "logps/chosen": -31.029375076293945, + "logps/rejected": -235.31333923339844, + "loss": 0.3792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0810735747218132, + "rewards/margins": 2.323702096939087, + "rewards/rejected": -2.242628574371338, + "step": 6225 + }, + { + "epoch": 0.36, + "learning_rate": 7.373278345356968e-08, + "logits/chosen": -1.703157901763916, + "logits/rejected": -1.6870919466018677, + "logps/chosen": -3.8715734481811523, + "logps/rejected": -206.78500366210938, + "loss": 0.3962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07514210045337677, + "rewards/margins": 1.5240038633346558, + "rewards/rejected": -1.4488617181777954, + "step": 6226 + }, + { + "epoch": 0.36, + "learning_rate": 7.372448827401549e-08, + "logits/chosen": -1.820422887802124, + "logits/rejected": -1.8214131593704224, + "logps/chosen": -83.12726593017578, + "logps/rejected": -252.88888549804688, + "loss": 0.2157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5554878115653992, + "rewards/margins": 2.7446556091308594, + "rewards/rejected": -2.1891677379608154, + "step": 6227 + }, + { + "epoch": 0.36, + "learning_rate": 7.371619225165142e-08, + "logits/chosen": -2.152693510055542, + "logits/rejected": -2.1456167697906494, + "logps/chosen": -6.577899932861328, + "logps/rejected": -246.43785095214844, + "loss": 0.3543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30464181303977966, + "rewards/margins": 1.816206455230713, + "rewards/rejected": -1.5115646123886108, + "step": 6228 + }, + { + "epoch": 0.36, + "learning_rate": 7.370789538677221e-08, + "logits/chosen": -1.8506847620010376, + "logits/rejected": -1.832351565361023, + "logps/chosen": -217.91183471679688, + "logps/rejected": -375.3119812011719, + "loss": 0.1825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7132843732833862, + "rewards/margins": 1.152108907699585, + "rewards/rejected": 0.561175525188446, + "step": 6229 + }, + { + "epoch": 0.36, + "learning_rate": 7.369959767967258e-08, + "logits/chosen": -1.751156210899353, + "logits/rejected": -1.7388721704483032, + "logps/chosen": -62.95679473876953, + "logps/rejected": -155.3599090576172, + "loss": 0.2822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8434562683105469, + "rewards/margins": 1.0998634099960327, + "rewards/rejected": -0.2564071714878082, + "step": 6230 + }, + { + "epoch": 0.36, + "learning_rate": 7.369129913064731e-08, + "logits/chosen": -1.9851675033569336, + "logits/rejected": -1.9762353897094727, + "logps/chosen": -49.78425598144531, + "logps/rejected": -394.6227722167969, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17193832993507385, + "rewards/margins": 4.6920647621154785, + "rewards/rejected": -4.5201263427734375, + "step": 6231 + }, + { + "epoch": 0.36, + "learning_rate": 7.36829997399912e-08, + "logits/chosen": -1.875849723815918, + "logits/rejected": -1.8746628761291504, + "logps/chosen": -39.02789306640625, + "logps/rejected": -250.70819091796875, + "loss": 0.1731, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0241211652755737, + "rewards/margins": 2.5635499954223633, + "rewards/rejected": -1.5394287109375, + "step": 6232 + }, + { + "epoch": 0.36, + "learning_rate": 7.36746995079991e-08, + "logits/chosen": -1.9675569534301758, + "logits/rejected": -2.0283026695251465, + "logps/chosen": -230.13900756835938, + "logps/rejected": -389.96685791015625, + "loss": 0.2554, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4762085676193237, + "rewards/margins": 0.6269440054893494, + "rewards/rejected": 0.8492645621299744, + "step": 6233 + }, + { + "epoch": 0.36, + "learning_rate": 7.366639843496586e-08, + "logits/chosen": -1.9305692911148071, + "logits/rejected": -1.9199109077453613, + "logps/chosen": -65.98329162597656, + "logps/rejected": -288.7181701660156, + "loss": 0.4565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14322815835475922, + "rewards/margins": 1.5231444835662842, + "rewards/rejected": -1.6663726568222046, + "step": 6234 + }, + { + "epoch": 0.36, + "learning_rate": 7.36580965211864e-08, + "logits/chosen": -2.059887647628784, + "logits/rejected": -2.0498576164245605, + "logps/chosen": -26.179107666015625, + "logps/rejected": -99.62545013427734, + "loss": 0.4787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20049534738063812, + "rewards/margins": 0.6350992321968079, + "rewards/rejected": -0.43460389971733093, + "step": 6235 + }, + { + "epoch": 0.36, + "learning_rate": 7.364979376695562e-08, + "logits/chosen": -1.873002529144287, + "logits/rejected": -1.8116940259933472, + "logps/chosen": -225.91578674316406, + "logps/rejected": -360.53521728515625, + "loss": 0.4606, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.004876732826233, + "rewards/margins": 0.160003662109375, + "rewards/rejected": 0.8448730707168579, + "step": 6236 + }, + { + "epoch": 0.36, + "learning_rate": 7.364149017256848e-08, + "logits/chosen": -1.9862585067749023, + "logits/rejected": -1.9870009422302246, + "logps/chosen": -2.267719268798828, + "logps/rejected": -15.634676933288574, + "loss": 0.6668, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.033037614077329636, + "rewards/margins": -0.007794618606567383, + "rewards/rejected": 0.04083223268389702, + "step": 6237 + }, + { + "epoch": 0.36, + "learning_rate": 7.363318573831997e-08, + "logits/chosen": -1.8956815004348755, + "logits/rejected": -1.8944745063781738, + "logps/chosen": -7.212079071905464e-05, + "logps/rejected": -152.8150634765625, + "loss": 0.3825, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.814326134943258e-07, + "rewards/margins": 3.0180776119232178, + "rewards/rejected": -3.0180771350860596, + "step": 6238 + }, + { + "epoch": 0.36, + "learning_rate": 7.36248804645051e-08, + "logits/chosen": -2.0632338523864746, + "logits/rejected": -2.0416576862335205, + "logps/chosen": -155.55970764160156, + "logps/rejected": -273.5117492675781, + "loss": 0.223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7516708374023438, + "rewards/margins": 2.0223283767700195, + "rewards/rejected": -1.2706574201583862, + "step": 6239 + }, + { + "epoch": 0.36, + "learning_rate": 7.36165743514189e-08, + "logits/chosen": -2.087446689605713, + "logits/rejected": -2.046722888946533, + "logps/chosen": -186.16041564941406, + "logps/rejected": -284.43701171875, + "loss": 0.3291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8568649291992188, + "rewards/margins": 0.8614883422851562, + "rewards/rejected": -0.0046234130859375, + "step": 6240 + }, + { + "epoch": 0.36, + "learning_rate": 7.360826739935647e-08, + "logits/chosen": -2.119156837463379, + "logits/rejected": -2.1101021766662598, + "logps/chosen": -4.176797389984131, + "logps/rejected": -126.18196105957031, + "loss": 0.7867, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.30349135398864746, + "rewards/margins": -0.1250840574502945, + "rewards/rejected": -0.17840729653835297, + "step": 6241 + }, + { + "epoch": 0.36, + "learning_rate": 7.35999596086129e-08, + "logits/chosen": -2.0941665172576904, + "logits/rejected": -2.0993075370788574, + "logps/chosen": -27.11184310913086, + "logps/rejected": -192.79356384277344, + "loss": 0.445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0509759895503521, + "rewards/margins": 1.369362473487854, + "rewards/rejected": -1.42033851146698, + "step": 6242 + }, + { + "epoch": 0.36, + "learning_rate": 7.359165097948332e-08, + "logits/chosen": -1.9661370515823364, + "logits/rejected": -1.9514271020889282, + "logps/chosen": -25.903244018554688, + "logps/rejected": -184.80947875976562, + "loss": 0.2513, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1492786407470703, + "rewards/margins": 1.0457401275634766, + "rewards/rejected": 0.10353851318359375, + "step": 6243 + }, + { + "epoch": 0.36, + "learning_rate": 7.35833415122629e-08, + "logits/chosen": -1.9579001665115356, + "logits/rejected": -1.9564889669418335, + "logps/chosen": -71.7830581665039, + "logps/rejected": -184.11671447753906, + "loss": 0.4543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18482665717601776, + "rewards/margins": 0.8779953122138977, + "rewards/rejected": -0.6931686401367188, + "step": 6244 + }, + { + "epoch": 0.36, + "learning_rate": 7.357503120724684e-08, + "logits/chosen": -1.951848030090332, + "logits/rejected": -1.948288083076477, + "logps/chosen": -5.547569274902344, + "logps/rejected": -173.86346435546875, + "loss": 0.4043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08606719970703125, + "rewards/margins": 1.6852158308029175, + "rewards/rejected": -1.5991486310958862, + "step": 6245 + }, + { + "epoch": 0.36, + "learning_rate": 7.356672006473035e-08, + "logits/chosen": -1.8622878789901733, + "logits/rejected": -1.852757453918457, + "logps/chosen": -42.15266418457031, + "logps/rejected": -307.9967041015625, + "loss": 0.3376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14014244079589844, + "rewards/margins": 2.1122922897338867, + "rewards/rejected": -1.9721497297286987, + "step": 6246 + }, + { + "epoch": 0.36, + "learning_rate": 7.355840808500868e-08, + "logits/chosen": -1.7948169708251953, + "logits/rejected": -1.7815191745758057, + "logps/chosen": -199.42254638671875, + "logps/rejected": -354.6639404296875, + "loss": 0.3554, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2416870594024658, + "rewards/margins": 0.2253967523574829, + "rewards/rejected": 1.016290307044983, + "step": 6247 + }, + { + "epoch": 0.36, + "learning_rate": 7.355009526837713e-08, + "logits/chosen": -1.8421157598495483, + "logits/rejected": -1.8282666206359863, + "logps/chosen": -210.49777221679688, + "logps/rejected": -295.6646423339844, + "loss": 0.2254, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.318792700767517, + "rewards/margins": 1.322973608970642, + "rewards/rejected": -0.004180908203125, + "step": 6248 + }, + { + "epoch": 0.36, + "learning_rate": 7.3541781615131e-08, + "logits/chosen": -2.165912389755249, + "logits/rejected": -2.165529251098633, + "logps/chosen": -3.05812668800354, + "logps/rejected": -0.8245086073875427, + "loss": 0.7063, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.034461069852113724, + "rewards/margins": -0.018940966576337814, + "rewards/rejected": -0.015520102344453335, + "step": 6249 + }, + { + "epoch": 0.36, + "learning_rate": 7.353346712556562e-08, + "logits/chosen": -1.9332218170166016, + "logits/rejected": -1.9204602241516113, + "logps/chosen": -171.1412353515625, + "logps/rejected": -326.10076904296875, + "loss": 0.2489, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5538452863693237, + "rewards/margins": 0.8839203715324402, + "rewards/rejected": 0.6699249148368835, + "step": 6250 + }, + { + "epoch": 0.36, + "learning_rate": 7.352515179997637e-08, + "logits/chosen": -2.047091245651245, + "logits/rejected": -2.0943963527679443, + "logps/chosen": -165.05587768554688, + "logps/rejected": -304.2580261230469, + "loss": 0.0632, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.111401319503784, + "rewards/margins": 2.617410182952881, + "rewards/rejected": -0.5060089230537415, + "step": 6251 + }, + { + "epoch": 0.36, + "learning_rate": 7.351683563865867e-08, + "logits/chosen": -1.8519383668899536, + "logits/rejected": -1.8282103538513184, + "logps/chosen": -335.3442077636719, + "logps/rejected": -513.3241577148438, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8757294416427612, + "rewards/margins": 4.254891872406006, + "rewards/rejected": -2.379162549972534, + "step": 6252 + }, + { + "epoch": 0.36, + "learning_rate": 7.350851864190793e-08, + "logits/chosen": -2.0424041748046875, + "logits/rejected": -2.042515754699707, + "logps/chosen": -67.79846954345703, + "logps/rejected": -265.1841735839844, + "loss": 0.3906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16053619980812073, + "rewards/margins": 1.5523834228515625, + "rewards/rejected": -1.3918472528457642, + "step": 6253 + }, + { + "epoch": 0.36, + "learning_rate": 7.35002008100196e-08, + "logits/chosen": -1.9648860692977905, + "logits/rejected": -1.9989396333694458, + "logps/chosen": -281.6134338378906, + "logps/rejected": -437.03582763671875, + "loss": 0.113, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1712615489959717, + "rewards/margins": 1.6585357189178467, + "rewards/rejected": 0.512725830078125, + "step": 6254 + }, + { + "epoch": 0.36, + "learning_rate": 7.34918821432892e-08, + "logits/chosen": -1.9961137771606445, + "logits/rejected": -1.9966299533843994, + "logps/chosen": -0.16302689909934998, + "logps/rejected": -192.24436950683594, + "loss": 0.3743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01540179830044508, + "rewards/margins": 2.464585542678833, + "rewards/rejected": -2.449183702468872, + "step": 6255 + }, + { + "epoch": 0.36, + "learning_rate": 7.348356264201223e-08, + "logits/chosen": -2.1476471424102783, + "logits/rejected": -2.1564059257507324, + "logps/chosen": -6.343069553375244, + "logps/rejected": -207.20095825195312, + "loss": 0.3052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18391433358192444, + "rewards/margins": 4.207406520843506, + "rewards/rejected": -4.023492336273193, + "step": 6256 + }, + { + "epoch": 0.36, + "learning_rate": 7.347524230648425e-08, + "logits/chosen": -1.8825103044509888, + "logits/rejected": -1.9959046840667725, + "logps/chosen": -255.7222900390625, + "logps/rejected": -307.8009033203125, + "loss": 0.1522, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.846826195716858, + "rewards/margins": 1.175408959388733, + "rewards/rejected": 0.671417236328125, + "step": 6257 + }, + { + "epoch": 0.36, + "learning_rate": 7.346692113700083e-08, + "logits/chosen": -1.8973363637924194, + "logits/rejected": -1.8859267234802246, + "logps/chosen": -166.45712280273438, + "logps/rejected": -300.1590881347656, + "loss": 0.2215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6165664792060852, + "rewards/margins": 2.529331922531128, + "rewards/rejected": -1.9127655029296875, + "step": 6258 + }, + { + "epoch": 0.36, + "learning_rate": 7.345859913385757e-08, + "logits/chosen": -2.1655821800231934, + "logits/rejected": -2.165290594100952, + "logps/chosen": -6.871975421905518, + "logps/rejected": -77.55844116210938, + "loss": 0.6366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0005828857538290322, + "rewards/margins": 0.19375306367874146, + "rewards/rejected": -0.19317017495632172, + "step": 6259 + }, + { + "epoch": 0.36, + "learning_rate": 7.345027629735014e-08, + "logits/chosen": -1.8355438709259033, + "logits/rejected": -1.8314718008041382, + "logps/chosen": -27.6026611328125, + "logps/rejected": -149.7812042236328, + "loss": 0.5845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5319433212280273, + "rewards/margins": 1.0838450193405151, + "rewards/rejected": -1.6157883405685425, + "step": 6260 + }, + { + "epoch": 0.36, + "learning_rate": 7.344195262777417e-08, + "logits/chosen": -2.0004093647003174, + "logits/rejected": -1.9573005437850952, + "logps/chosen": -226.52490234375, + "logps/rejected": -239.91571044921875, + "loss": 0.2752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2913696765899658, + "rewards/margins": 0.9463913440704346, + "rewards/rejected": 0.34497833251953125, + "step": 6261 + }, + { + "epoch": 0.36, + "learning_rate": 7.343362812542538e-08, + "logits/chosen": -1.8543212413787842, + "logits/rejected": -1.887332558631897, + "logps/chosen": -287.81121826171875, + "logps/rejected": -288.4133605957031, + "loss": 0.2435, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.457403540611267, + "rewards/margins": 0.7892852425575256, + "rewards/rejected": 0.6681182980537415, + "step": 6262 + }, + { + "epoch": 0.36, + "learning_rate": 7.34253027905995e-08, + "logits/chosen": -1.9273449182510376, + "logits/rejected": -1.9759681224822998, + "logps/chosen": -201.5343017578125, + "logps/rejected": -268.1189880371094, + "loss": 0.2218, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6954926252365112, + "rewards/margins": 0.8472046256065369, + "rewards/rejected": 0.8482879996299744, + "step": 6263 + }, + { + "epoch": 0.36, + "learning_rate": 7.341697662359228e-08, + "logits/chosen": -1.9790716171264648, + "logits/rejected": -1.9778567552566528, + "logps/chosen": -0.0002882390981540084, + "logps/rejected": -90.5658950805664, + "loss": 0.4947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0726158734541968e-06, + "rewards/margins": 1.0985660552978516, + "rewards/rejected": -1.0985649824142456, + "step": 6264 + }, + { + "epoch": 0.36, + "learning_rate": 7.340864962469952e-08, + "logits/chosen": -1.8283171653747559, + "logits/rejected": -1.8287935256958008, + "logps/chosen": -0.012970702722668648, + "logps/rejected": -157.19943237304688, + "loss": 0.432, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00028704843134619296, + "rewards/margins": 1.5274137258529663, + "rewards/rejected": -1.5277007818222046, + "step": 6265 + }, + { + "epoch": 0.36, + "learning_rate": 7.340032179421698e-08, + "logits/chosen": -2.0588974952697754, + "logits/rejected": -2.0596628189086914, + "logps/chosen": -13.632467269897461, + "logps/rejected": -80.13780212402344, + "loss": 0.4594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.057140350341796875, + "rewards/margins": 1.1785019636154175, + "rewards/rejected": -1.1213616132736206, + "step": 6266 + }, + { + "epoch": 0.36, + "learning_rate": 7.339199313244059e-08, + "logits/chosen": -2.0882537364959717, + "logits/rejected": -2.083843231201172, + "logps/chosen": -49.66207504272461, + "logps/rejected": -127.48988342285156, + "loss": 0.3259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5235851407051086, + "rewards/margins": 1.19871187210083, + "rewards/rejected": -0.6751266717910767, + "step": 6267 + }, + { + "epoch": 0.36, + "learning_rate": 7.338366363966616e-08, + "logits/chosen": -1.9005111455917358, + "logits/rejected": -1.8888583183288574, + "logps/chosen": -276.0123596191406, + "logps/rejected": -433.68182373046875, + "loss": 0.046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8573273420333862, + "rewards/margins": 4.369967937469482, + "rewards/rejected": -2.5126404762268066, + "step": 6268 + }, + { + "epoch": 0.36, + "learning_rate": 7.337533331618963e-08, + "logits/chosen": -2.115971088409424, + "logits/rejected": -2.1004416942596436, + "logps/chosen": -14.236440658569336, + "logps/rejected": -199.06802368164062, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17752361297607422, + "rewards/margins": 3.383039712905884, + "rewards/rejected": -3.2055160999298096, + "step": 6269 + }, + { + "epoch": 0.36, + "learning_rate": 7.33670021623069e-08, + "logits/chosen": -1.8306280374526978, + "logits/rejected": -1.7505638599395752, + "logps/chosen": -144.63314819335938, + "logps/rejected": -440.7933044433594, + "loss": 0.5585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3685241639614105, + "rewards/margins": 0.33323973417282104, + "rewards/rejected": 0.03528442606329918, + "step": 6270 + }, + { + "epoch": 0.36, + "learning_rate": 7.335867017831396e-08, + "logits/chosen": -2.0679094791412354, + "logits/rejected": -2.1005795001983643, + "logps/chosen": -158.53070068359375, + "logps/rejected": -247.84075927734375, + "loss": 0.3759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1762787103652954, + "rewards/margins": 0.45493316650390625, + "rewards/rejected": 0.7213455438613892, + "step": 6271 + }, + { + "epoch": 0.36, + "learning_rate": 7.33503373645068e-08, + "logits/chosen": -2.149409770965576, + "logits/rejected": -2.1479289531707764, + "logps/chosen": -71.36500549316406, + "logps/rejected": -234.59710693359375, + "loss": 0.2001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7270294427871704, + "rewards/margins": 2.238482713699341, + "rewards/rejected": -1.5114532709121704, + "step": 6272 + }, + { + "epoch": 0.37, + "learning_rate": 7.334200372118143e-08, + "logits/chosen": -1.9103646278381348, + "logits/rejected": -1.908787488937378, + "logps/chosen": -18.114988327026367, + "logps/rejected": -171.1912841796875, + "loss": 0.5422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09177475422620773, + "rewards/margins": 0.3479301631450653, + "rewards/rejected": -0.256155401468277, + "step": 6273 + }, + { + "epoch": 0.37, + "learning_rate": 7.333366924863391e-08, + "logits/chosen": -1.772912859916687, + "logits/rejected": -1.777159333229065, + "logps/chosen": -23.101930618286133, + "logps/rejected": -188.44061279296875, + "loss": 0.5379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5618749856948853, + "rewards/margins": 1.321245789527893, + "rewards/rejected": -1.8831207752227783, + "step": 6274 + }, + { + "epoch": 0.37, + "learning_rate": 7.332533394716033e-08, + "logits/chosen": -2.0266354084014893, + "logits/rejected": -2.029430389404297, + "logps/chosen": -15.498876571655273, + "logps/rejected": -66.45780944824219, + "loss": 0.4715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009581947699189186, + "rewards/margins": 1.232927918434143, + "rewards/rejected": -1.2233459949493408, + "step": 6275 + }, + { + "epoch": 0.37, + "learning_rate": 7.331699781705678e-08, + "logits/chosen": -2.1611955165863037, + "logits/rejected": -2.150059938430786, + "logps/chosen": -40.56190490722656, + "logps/rejected": -318.55462646484375, + "loss": 0.2332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3564155697822571, + "rewards/margins": 4.78626823425293, + "rewards/rejected": -4.429852485656738, + "step": 6276 + }, + { + "epoch": 0.37, + "learning_rate": 7.330866085861941e-08, + "logits/chosen": -2.0488405227661133, + "logits/rejected": -2.0415360927581787, + "logps/chosen": -45.47995376586914, + "logps/rejected": -415.98590087890625, + "loss": 0.2443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45255205035209656, + "rewards/margins": 3.5715768337249756, + "rewards/rejected": -3.1190247535705566, + "step": 6277 + }, + { + "epoch": 0.37, + "learning_rate": 7.330032307214441e-08, + "logits/chosen": -2.1488800048828125, + "logits/rejected": -2.197798252105713, + "logps/chosen": -208.5740509033203, + "logps/rejected": -428.218017578125, + "loss": 0.1041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9012222290039062, + "rewards/margins": 4.987251281738281, + "rewards/rejected": -4.086029052734375, + "step": 6278 + }, + { + "epoch": 0.37, + "learning_rate": 7.329198445792794e-08, + "logits/chosen": -2.0907394886016846, + "logits/rejected": -2.06843900680542, + "logps/chosen": -85.04485321044922, + "logps/rejected": -356.4438171386719, + "loss": 0.8738, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7131454944610596, + "rewards/margins": 3.1081559658050537, + "rewards/rejected": -4.821301460266113, + "step": 6279 + }, + { + "epoch": 0.37, + "learning_rate": 7.328364501626625e-08, + "logits/chosen": -1.5905654430389404, + "logits/rejected": -1.5993708372116089, + "logps/chosen": -0.06753961741924286, + "logps/rejected": -150.22689819335938, + "loss": 0.4552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0012654460733756423, + "rewards/margins": 1.223865270614624, + "rewards/rejected": -1.2225998640060425, + "step": 6280 + }, + { + "epoch": 0.37, + "learning_rate": 7.327530474745563e-08, + "logits/chosen": -2.1135716438293457, + "logits/rejected": -2.11539888381958, + "logps/chosen": -0.012162578292191029, + "logps/rejected": -140.66824340820312, + "loss": 0.3769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005137705011293292, + "rewards/margins": 2.529381275177002, + "rewards/rejected": -2.529895067214966, + "step": 6281 + }, + { + "epoch": 0.37, + "learning_rate": 7.32669636517923e-08, + "logits/chosen": -2.1768603324890137, + "logits/rejected": -2.1708521842956543, + "logps/chosen": -0.8293335437774658, + "logps/rejected": -210.72119140625, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08742695301771164, + "rewards/margins": 3.1569247245788574, + "rewards/rejected": -3.06949782371521, + "step": 6282 + }, + { + "epoch": 0.37, + "learning_rate": 7.325862172957261e-08, + "logits/chosen": -1.9626020193099976, + "logits/rejected": -1.9193569421768188, + "logps/chosen": -206.62734985351562, + "logps/rejected": -592.4092407226562, + "loss": 0.0723, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3899673223495483, + "rewards/margins": 6.885982036590576, + "rewards/rejected": -5.496014595031738, + "step": 6283 + }, + { + "epoch": 0.37, + "learning_rate": 7.325027898109292e-08, + "logits/chosen": -2.112074851989746, + "logits/rejected": -2.1086013317108154, + "logps/chosen": -62.28355407714844, + "logps/rejected": -224.83096313476562, + "loss": 0.4175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011218261905014515, + "rewards/margins": 1.7949203252792358, + "rewards/rejected": -1.8061386346817017, + "step": 6284 + }, + { + "epoch": 0.37, + "learning_rate": 7.324193540664958e-08, + "logits/chosen": -2.06510329246521, + "logits/rejected": -2.0709521770477295, + "logps/chosen": -19.46458625793457, + "logps/rejected": -147.86141967773438, + "loss": 0.6871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030852317810058594, + "rewards/margins": 0.005002785474061966, + "rewards/rejected": -0.03585510328412056, + "step": 6285 + }, + { + "epoch": 0.37, + "learning_rate": 7.323359100653902e-08, + "logits/chosen": -2.175997734069824, + "logits/rejected": -2.175135612487793, + "logps/chosen": -2.6968157291412354, + "logps/rejected": -39.06507873535156, + "loss": 0.5926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09560246765613556, + "rewards/margins": 0.3241013288497925, + "rewards/rejected": -0.22849884629249573, + "step": 6286 + }, + { + "epoch": 0.37, + "learning_rate": 7.322524578105764e-08, + "logits/chosen": -2.1176462173461914, + "logits/rejected": -2.1117050647735596, + "logps/chosen": -17.087146759033203, + "logps/rejected": -187.4437713623047, + "loss": 0.3511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0598270408809185, + "rewards/margins": 2.9531757831573486, + "rewards/rejected": -2.8933486938476562, + "step": 6287 + }, + { + "epoch": 0.37, + "learning_rate": 7.321689973050194e-08, + "logits/chosen": -2.0054268836975098, + "logits/rejected": -2.0119612216949463, + "logps/chosen": -128.09335327148438, + "logps/rejected": -290.8704528808594, + "loss": 0.1605, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.774163842201233, + "rewards/margins": 1.2922455072402954, + "rewards/rejected": 0.4819183349609375, + "step": 6288 + }, + { + "epoch": 0.37, + "learning_rate": 7.320855285516839e-08, + "logits/chosen": -2.0717058181762695, + "logits/rejected": -2.062720537185669, + "logps/chosen": -8.106042514555156e-05, + "logps/rejected": -152.53485107421875, + "loss": 0.3744, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1085948017353076e-06, + "rewards/margins": 2.8562350273132324, + "rewards/rejected": -2.856236219406128, + "step": 6289 + }, + { + "epoch": 0.37, + "learning_rate": 7.320020515535353e-08, + "logits/chosen": -1.9158419370651245, + "logits/rejected": -1.9165458679199219, + "logps/chosen": -192.71832275390625, + "logps/rejected": -346.3173522949219, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4928529262542725, + "rewards/margins": 4.80989408493042, + "rewards/rejected": -2.3170411586761475, + "step": 6290 + }, + { + "epoch": 0.37, + "learning_rate": 7.319185663135388e-08, + "logits/chosen": -1.8997206687927246, + "logits/rejected": -1.8904962539672852, + "logps/chosen": -160.9913330078125, + "logps/rejected": -273.7040100097656, + "loss": 0.4533, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2722686529159546, + "rewards/margins": -0.10827946662902832, + "rewards/rejected": 1.380548119544983, + "step": 6291 + }, + { + "epoch": 0.37, + "learning_rate": 7.318350728346603e-08, + "logits/chosen": -2.163482427597046, + "logits/rejected": -2.1367390155792236, + "logps/chosen": -115.5254898071289, + "logps/rejected": -342.0261535644531, + "loss": 0.2139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4130966365337372, + "rewards/margins": 4.985099792480469, + "rewards/rejected": -4.572003364562988, + "step": 6292 + }, + { + "epoch": 0.37, + "learning_rate": 7.317515711198662e-08, + "logits/chosen": -1.9692213535308838, + "logits/rejected": -2.0129435062408447, + "logps/chosen": -173.8045654296875, + "logps/rejected": -336.99267578125, + "loss": 0.1423, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2000168561935425, + "rewards/margins": 1.8858017921447754, + "rewards/rejected": -0.6857849359512329, + "step": 6293 + }, + { + "epoch": 0.37, + "learning_rate": 7.316680611721227e-08, + "logits/chosen": -1.9923392534255981, + "logits/rejected": -1.9883630275726318, + "logps/chosen": -27.409334182739258, + "logps/rejected": -258.3448791503906, + "loss": 0.29, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25576743483543396, + "rewards/margins": 2.6241114139556885, + "rewards/rejected": -2.3683440685272217, + "step": 6294 + }, + { + "epoch": 0.37, + "learning_rate": 7.315845429943964e-08, + "logits/chosen": -1.9834131002426147, + "logits/rejected": -1.9686253070831299, + "logps/chosen": -190.79647827148438, + "logps/rejected": -322.5528869628906, + "loss": 0.3757, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5598922967910767, + "rewards/margins": 0.03719329833984375, + "rewards/rejected": 1.522698998451233, + "step": 6295 + }, + { + "epoch": 0.37, + "learning_rate": 7.315010165896543e-08, + "logits/chosen": -2.0823476314544678, + "logits/rejected": -2.0831637382507324, + "logps/chosen": -9.461930274963379, + "logps/rejected": -95.54428100585938, + "loss": 0.6006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03116321563720703, + "rewards/margins": 0.3711366653442383, + "rewards/rejected": -0.33997344970703125, + "step": 6296 + }, + { + "epoch": 0.37, + "learning_rate": 7.314174819608635e-08, + "logits/chosen": -1.9527783393859863, + "logits/rejected": -1.9384580850601196, + "logps/chosen": -39.9445915222168, + "logps/rejected": -110.91217041015625, + "loss": 0.6684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002819061279296875, + "rewards/margins": 0.013373566791415215, + "rewards/rejected": -0.01619262807071209, + "step": 6297 + }, + { + "epoch": 0.37, + "learning_rate": 7.313339391109919e-08, + "logits/chosen": -2.01399827003479, + "logits/rejected": -2.0647389888763428, + "logps/chosen": -165.65777587890625, + "logps/rejected": -314.099609375, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.915277123451233, + "rewards/margins": 4.7938385009765625, + "rewards/rejected": -2.878561496734619, + "step": 6298 + }, + { + "epoch": 0.37, + "learning_rate": 7.312503880430072e-08, + "logits/chosen": -2.076946258544922, + "logits/rejected": -2.068800210952759, + "logps/chosen": -63.25374221801758, + "logps/rejected": -284.83929443359375, + "loss": 0.437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25213661789894104, + "rewards/margins": 0.9488651752471924, + "rewards/rejected": -0.696728527545929, + "step": 6299 + }, + { + "epoch": 0.37, + "learning_rate": 7.311668287598777e-08, + "logits/chosen": -1.9925627708435059, + "logits/rejected": -1.9920721054077148, + "logps/chosen": -224.4359130859375, + "logps/rejected": -350.0693054199219, + "loss": 0.0882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.992407202720642, + "rewards/margins": 2.242047071456909, + "rewards/rejected": -0.24963989853858948, + "step": 6300 + }, + { + "epoch": 0.37, + "learning_rate": 7.310832612645715e-08, + "logits/chosen": -2.027371644973755, + "logits/rejected": -2.021414279937744, + "logps/chosen": -43.45563507080078, + "logps/rejected": -348.4389953613281, + "loss": 0.2555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38830071687698364, + "rewards/margins": 5.264732360839844, + "rewards/rejected": -4.876431465148926, + "step": 6301 + }, + { + "epoch": 0.37, + "learning_rate": 7.309996855600577e-08, + "logits/chosen": -1.966281771659851, + "logits/rejected": -1.9710026979446411, + "logps/chosen": -0.0007271686336025596, + "logps/rejected": -165.90530395507812, + "loss": 0.3633, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2287207027839031e-05, + "rewards/margins": 3.184243679046631, + "rewards/rejected": -3.184256076812744, + "step": 6302 + }, + { + "epoch": 0.37, + "learning_rate": 7.309161016493049e-08, + "logits/chosen": -1.9812034368515015, + "logits/rejected": -1.9762388467788696, + "logps/chosen": -362.69061279296875, + "logps/rejected": -439.84893798828125, + "loss": 0.2253, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.555615186691284, + "rewards/margins": 0.6391600370407104, + "rewards/rejected": 1.9164551496505737, + "step": 6303 + }, + { + "epoch": 0.37, + "learning_rate": 7.308325095352827e-08, + "logits/chosen": -2.1135294437408447, + "logits/rejected": -2.1077799797058105, + "logps/chosen": -65.99578857421875, + "logps/rejected": -305.086669921875, + "loss": 0.2693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6405898928642273, + "rewards/margins": 1.9436936378479004, + "rewards/rejected": -1.3031036853790283, + "step": 6304 + }, + { + "epoch": 0.37, + "learning_rate": 7.307489092209608e-08, + "logits/chosen": -2.1716930866241455, + "logits/rejected": -2.171109914779663, + "logps/chosen": -3.2721595764160156, + "logps/rejected": -175.08004760742188, + "loss": 0.3987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019572902470827103, + "rewards/margins": 1.9547398090362549, + "rewards/rejected": -1.9351669549942017, + "step": 6305 + }, + { + "epoch": 0.37, + "learning_rate": 7.306653007093087e-08, + "logits/chosen": -1.8868775367736816, + "logits/rejected": -1.9566391706466675, + "logps/chosen": -315.6355285644531, + "logps/rejected": -294.6471252441406, + "loss": 0.1374, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.039080858230591, + "rewards/margins": 1.4272186756134033, + "rewards/rejected": 0.6118621826171875, + "step": 6306 + }, + { + "epoch": 0.37, + "learning_rate": 7.30581684003297e-08, + "logits/chosen": -2.0932905673980713, + "logits/rejected": -2.0845298767089844, + "logps/chosen": -35.22563552856445, + "logps/rejected": -203.2230682373047, + "loss": 0.3772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5108165740966797, + "rewards/margins": 0.9065456390380859, + "rewards/rejected": -0.39572906494140625, + "step": 6307 + }, + { + "epoch": 0.37, + "learning_rate": 7.304980591058959e-08, + "logits/chosen": -1.9181755781173706, + "logits/rejected": -1.9025487899780273, + "logps/chosen": -187.60443115234375, + "logps/rejected": -333.6448669433594, + "loss": 0.3247, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.104009985923767, + "rewards/margins": 0.7397887706756592, + "rewards/rejected": 0.3642211854457855, + "step": 6308 + }, + { + "epoch": 0.37, + "learning_rate": 7.304144260200762e-08, + "logits/chosen": -1.8067082166671753, + "logits/rejected": -1.8073208332061768, + "logps/chosen": -247.1905517578125, + "logps/rejected": -370.59429931640625, + "loss": 0.1752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.73779296875, + "rewards/margins": 1.272039771080017, + "rewards/rejected": 0.4657531678676605, + "step": 6309 + }, + { + "epoch": 0.37, + "learning_rate": 7.303307847488092e-08, + "logits/chosen": -2.0405943393707275, + "logits/rejected": -2.0365593433380127, + "logps/chosen": -12.832942008972168, + "logps/rejected": -155.33258056640625, + "loss": 0.4235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11578922718763351, + "rewards/margins": 1.4482018947601318, + "rewards/rejected": -1.3324127197265625, + "step": 6310 + }, + { + "epoch": 0.37, + "learning_rate": 7.302471352950659e-08, + "logits/chosen": -2.02351450920105, + "logits/rejected": -2.00477933883667, + "logps/chosen": -165.6922149658203, + "logps/rejected": -270.890380859375, + "loss": 0.2455, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2692855596542358, + "rewards/margins": 0.8609848022460938, + "rewards/rejected": 0.4083007872104645, + "step": 6311 + }, + { + "epoch": 0.37, + "learning_rate": 7.301634776618182e-08, + "logits/chosen": -1.6578863859176636, + "logits/rejected": -1.6494650840759277, + "logps/chosen": -161.55267333984375, + "logps/rejected": -215.63668823242188, + "loss": 0.4474, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2224884033203125, + "rewards/margins": -0.16995549201965332, + "rewards/rejected": 1.3924438953399658, + "step": 6312 + }, + { + "epoch": 0.37, + "learning_rate": 7.300798118520378e-08, + "logits/chosen": -2.0995187759399414, + "logits/rejected": -2.095344066619873, + "logps/chosen": -184.2392578125, + "logps/rejected": -325.3983154296875, + "loss": 0.5989, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1715881824493408, + "rewards/margins": -0.64642333984375, + "rewards/rejected": 1.8180115222930908, + "step": 6313 + }, + { + "epoch": 0.37, + "learning_rate": 7.299961378686971e-08, + "logits/chosen": -1.9674112796783447, + "logits/rejected": -1.9661965370178223, + "logps/chosen": -5.8051652908325195, + "logps/rejected": -97.61433410644531, + "loss": 0.597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07025151699781418, + "rewards/margins": 0.3448029160499573, + "rewards/rejected": -0.2745513916015625, + "step": 6314 + }, + { + "epoch": 0.37, + "learning_rate": 7.299124557147686e-08, + "logits/chosen": -2.0410330295562744, + "logits/rejected": -2.0301902294158936, + "logps/chosen": -6.437214324250817e-05, + "logps/rejected": -111.22676849365234, + "loss": 0.5692, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.960341127320135e-07, + "rewards/margins": 0.5783372521400452, + "rewards/rejected": -0.5783378481864929, + "step": 6315 + }, + { + "epoch": 0.37, + "learning_rate": 7.298287653932249e-08, + "logits/chosen": -1.9590744972229004, + "logits/rejected": -1.9781053066253662, + "logps/chosen": -192.32879638671875, + "logps/rejected": -461.67108154296875, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.798384189605713, + "rewards/margins": 7.249782085418701, + "rewards/rejected": -4.451397895812988, + "step": 6316 + }, + { + "epoch": 0.37, + "learning_rate": 7.297450669070394e-08, + "logits/chosen": -2.0285401344299316, + "logits/rejected": -2.025630474090576, + "logps/chosen": -185.65052795410156, + "logps/rejected": -256.4132385253906, + "loss": 0.3788, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7793197631835938, + "rewards/margins": -0.00496673583984375, + "rewards/rejected": 1.7842864990234375, + "step": 6317 + }, + { + "epoch": 0.37, + "learning_rate": 7.296613602591852e-08, + "logits/chosen": -1.7579262256622314, + "logits/rejected": -1.7582975625991821, + "logps/chosen": -216.17593383789062, + "logps/rejected": -308.6470947265625, + "loss": 0.2034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.335463047027588, + "rewards/margins": 0.8180009126663208, + "rewards/rejected": 1.517462134361267, + "step": 6318 + }, + { + "epoch": 0.37, + "learning_rate": 7.295776454526362e-08, + "logits/chosen": -1.8902279138565063, + "logits/rejected": -1.890445351600647, + "logps/chosen": -36.06195068359375, + "logps/rejected": -275.00164794921875, + "loss": 0.305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00787429790943861, + "rewards/margins": 3.858884572982788, + "rewards/rejected": -3.8667588233947754, + "step": 6319 + }, + { + "epoch": 0.37, + "learning_rate": 7.29493922490366e-08, + "logits/chosen": -2.082019329071045, + "logits/rejected": -2.0836384296417236, + "logps/chosen": -8.45182003104128e-05, + "logps/rejected": -88.0488510131836, + "loss": 0.5047, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2185926102101803e-07, + "rewards/margins": 0.9888774156570435, + "rewards/rejected": -0.9888771176338196, + "step": 6320 + }, + { + "epoch": 0.37, + "learning_rate": 7.294101913753494e-08, + "logits/chosen": -2.217869281768799, + "logits/rejected": -2.209475517272949, + "logps/chosen": -38.32621765136719, + "logps/rejected": -64.96694946289062, + "loss": 0.5538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3354392945766449, + "rewards/margins": 0.25503310561180115, + "rewards/rejected": 0.08040618896484375, + "step": 6321 + }, + { + "epoch": 0.37, + "learning_rate": 7.293264521105604e-08, + "logits/chosen": -1.869643211364746, + "logits/rejected": -1.857185959815979, + "logps/chosen": -25.558977127075195, + "logps/rejected": -317.67059326171875, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03842811658978462, + "rewards/margins": 3.8324437141418457, + "rewards/rejected": -3.794015645980835, + "step": 6322 + }, + { + "epoch": 0.37, + "learning_rate": 7.292427046989743e-08, + "logits/chosen": -1.9423173666000366, + "logits/rejected": -2.0740902423858643, + "logps/chosen": -296.7159729003906, + "logps/rejected": -410.8821716308594, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0641236305236816, + "rewards/margins": 1.93656325340271, + "rewards/rejected": 0.12756042182445526, + "step": 6323 + }, + { + "epoch": 0.37, + "learning_rate": 7.291589491435658e-08, + "logits/chosen": -2.074594736099243, + "logits/rejected": -2.072312831878662, + "logps/chosen": -0.00025150415604002774, + "logps/rejected": -204.74803161621094, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.081034861272201e-05, + "rewards/margins": 3.470048427581787, + "rewards/rejected": -3.4700591564178467, + "step": 6324 + }, + { + "epoch": 0.37, + "learning_rate": 7.290751854473105e-08, + "logits/chosen": -2.1285886764526367, + "logits/rejected": -2.1229867935180664, + "logps/chosen": -12.224369049072266, + "logps/rejected": -166.26995849609375, + "loss": 0.5573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0571962371468544, + "rewards/margins": 0.6904340386390686, + "rewards/rejected": -0.7476302981376648, + "step": 6325 + }, + { + "epoch": 0.37, + "learning_rate": 7.289914136131841e-08, + "logits/chosen": -1.7781355381011963, + "logits/rejected": -1.8118500709533691, + "logps/chosen": -282.24798583984375, + "logps/rejected": -316.1109619140625, + "loss": 0.1666, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.797595262527466, + "rewards/margins": 1.0593842267990112, + "rewards/rejected": 1.7382110357284546, + "step": 6326 + }, + { + "epoch": 0.37, + "learning_rate": 7.289076336441626e-08, + "logits/chosen": -1.8842570781707764, + "logits/rejected": -1.8876677751541138, + "logps/chosen": -282.4218444824219, + "logps/rejected": -352.80780029296875, + "loss": 0.1101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6321258544921875, + "rewards/margins": 3.052359104156494, + "rewards/rejected": -1.420233130455017, + "step": 6327 + }, + { + "epoch": 0.37, + "learning_rate": 7.288238455432224e-08, + "logits/chosen": -2.0450284481048584, + "logits/rejected": -2.022914409637451, + "logps/chosen": -202.16041564941406, + "logps/rejected": -466.052734375, + "loss": 0.1384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6406189203262329, + "rewards/margins": 3.2831053733825684, + "rewards/rejected": -2.642486572265625, + "step": 6328 + }, + { + "epoch": 0.37, + "learning_rate": 7.287400493133396e-08, + "logits/chosen": -1.958789348602295, + "logits/rejected": -1.9784164428710938, + "logps/chosen": -272.70159912109375, + "logps/rejected": -484.520263671875, + "loss": 0.1386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7877869009971619, + "rewards/margins": 3.7410218715667725, + "rewards/rejected": -2.953234910964966, + "step": 6329 + }, + { + "epoch": 0.37, + "learning_rate": 7.286562449574917e-08, + "logits/chosen": -2.021069288253784, + "logits/rejected": -1.993687629699707, + "logps/chosen": -257.35601806640625, + "logps/rejected": -485.6251220703125, + "loss": 0.1049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7504761219024658, + "rewards/margins": 2.4333620071411133, + "rewards/rejected": -0.6828857660293579, + "step": 6330 + }, + { + "epoch": 0.37, + "learning_rate": 7.285724324786553e-08, + "logits/chosen": -1.8405656814575195, + "logits/rejected": -1.8441559076309204, + "logps/chosen": -0.00150781380943954, + "logps/rejected": -234.55215454101562, + "loss": 0.3444, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.963314808672294e-05, + "rewards/margins": 4.778128623962402, + "rewards/rejected": -4.7781982421875, + "step": 6331 + }, + { + "epoch": 0.37, + "learning_rate": 7.284886118798081e-08, + "logits/chosen": -1.7157844305038452, + "logits/rejected": -1.7161638736724854, + "logps/chosen": -45.3711051940918, + "logps/rejected": -179.1480712890625, + "loss": 0.4768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023478318005800247, + "rewards/margins": 1.2265263795852661, + "rewards/rejected": -1.2500046491622925, + "step": 6332 + }, + { + "epoch": 0.37, + "learning_rate": 7.284047831639276e-08, + "logits/chosen": -1.889845848083496, + "logits/rejected": -1.8698080778121948, + "logps/chosen": -253.03240966796875, + "logps/rejected": -407.912353515625, + "loss": 0.2135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.643109142780304, + "rewards/margins": 1.0889374017715454, + "rewards/rejected": -0.44582825899124146, + "step": 6333 + }, + { + "epoch": 0.37, + "learning_rate": 7.28320946333992e-08, + "logits/chosen": -1.8325207233428955, + "logits/rejected": -1.8070884943008423, + "logps/chosen": -242.0272216796875, + "logps/rejected": -460.6556396484375, + "loss": 0.0922, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0533126592636108, + "rewards/margins": 5.393153667449951, + "rewards/rejected": -4.339840888977051, + "step": 6334 + }, + { + "epoch": 0.37, + "learning_rate": 7.282371013929797e-08, + "logits/chosen": -1.9980549812316895, + "logits/rejected": -2.0034523010253906, + "logps/chosen": -44.41823196411133, + "logps/rejected": -184.23867797851562, + "loss": 0.3568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13482895493507385, + "rewards/margins": 2.12956166267395, + "rewards/rejected": -1.9947327375411987, + "step": 6335 + }, + { + "epoch": 0.37, + "learning_rate": 7.281532483438691e-08, + "logits/chosen": -1.7805993556976318, + "logits/rejected": -1.7838436365127563, + "logps/chosen": -165.47811889648438, + "logps/rejected": -332.59393310546875, + "loss": 0.0681, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1047897338867188, + "rewards/margins": 2.8915817737579346, + "rewards/rejected": -0.786791980266571, + "step": 6336 + }, + { + "epoch": 0.37, + "learning_rate": 7.280693871896389e-08, + "logits/chosen": -1.7960294485092163, + "logits/rejected": -1.8007277250289917, + "logps/chosen": -176.16001892089844, + "logps/rejected": -243.52780151367188, + "loss": 0.5168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8265945315361023, + "rewards/margins": 0.01185297966003418, + "rewards/rejected": 0.8147415518760681, + "step": 6337 + }, + { + "epoch": 0.37, + "learning_rate": 7.279855179332685e-08, + "logits/chosen": -1.7580081224441528, + "logits/rejected": -1.7603495121002197, + "logps/chosen": -38.54077911376953, + "logps/rejected": -77.89424133300781, + "loss": 0.545, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3217926025390625, + "rewards/margins": 0.3544860780239105, + "rewards/rejected": -0.03269348293542862, + "step": 6338 + }, + { + "epoch": 0.37, + "learning_rate": 7.279016405777375e-08, + "logits/chosen": -1.8889883756637573, + "logits/rejected": -1.8218728303909302, + "logps/chosen": -238.9459686279297, + "logps/rejected": -368.9685363769531, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9035049676895142, + "rewards/margins": 0.9732894897460938, + "rewards/rejected": -0.06978454440832138, + "step": 6339 + }, + { + "epoch": 0.37, + "learning_rate": 7.278177551260254e-08, + "logits/chosen": -1.9786550998687744, + "logits/rejected": -1.9680614471435547, + "logps/chosen": -161.006591796875, + "logps/rejected": -260.5587463378906, + "loss": 0.1813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6656341552734375, + "rewards/margins": 1.4468994140625, + "rewards/rejected": 0.2187347412109375, + "step": 6340 + }, + { + "epoch": 0.37, + "learning_rate": 7.27733861581112e-08, + "logits/chosen": -1.984941840171814, + "logits/rejected": -2.0217485427856445, + "logps/chosen": -245.36233520507812, + "logps/rejected": -431.16015625, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4736785888671875, + "rewards/margins": 4.650738716125488, + "rewards/rejected": -2.1770598888397217, + "step": 6341 + }, + { + "epoch": 0.37, + "learning_rate": 7.276499599459782e-08, + "logits/chosen": -2.005981206893921, + "logits/rejected": -2.008777379989624, + "logps/chosen": -34.48466491699219, + "logps/rejected": -147.82034301757812, + "loss": 0.3835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5415012240409851, + "rewards/margins": 1.0259082317352295, + "rewards/rejected": -0.4844070374965668, + "step": 6342 + }, + { + "epoch": 0.37, + "learning_rate": 7.27566050223604e-08, + "logits/chosen": -1.9870961904525757, + "logits/rejected": -2.0399162769317627, + "logps/chosen": -238.09239196777344, + "logps/rejected": -389.4832763671875, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.992091417312622, + "rewards/margins": 4.3067731857299805, + "rewards/rejected": -2.3146820068359375, + "step": 6343 + }, + { + "epoch": 0.37, + "learning_rate": 7.274821324169708e-08, + "logits/chosen": -2.22143292427063, + "logits/rejected": -2.215885877609253, + "logps/chosen": -22.71674156188965, + "logps/rejected": -281.4527587890625, + "loss": 0.2211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6380991339683533, + "rewards/margins": 2.9060709476470947, + "rewards/rejected": -2.2679717540740967, + "step": 6344 + }, + { + "epoch": 0.37, + "learning_rate": 7.273982065290594e-08, + "logits/chosen": -1.9087592363357544, + "logits/rejected": -1.8941092491149902, + "logps/chosen": -90.44647216796875, + "logps/rejected": -243.58566284179688, + "loss": 0.1894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3961623907089233, + "rewards/margins": 1.6174637079238892, + "rewards/rejected": -0.22130127251148224, + "step": 6345 + }, + { + "epoch": 0.37, + "learning_rate": 7.273142725628514e-08, + "logits/chosen": -1.9595155715942383, + "logits/rejected": -1.9546947479248047, + "logps/chosen": -0.023019207641482353, + "logps/rejected": -87.72427368164062, + "loss": 0.493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000708910811226815, + "rewards/margins": 1.1647710800170898, + "rewards/rejected": -1.1654800176620483, + "step": 6346 + }, + { + "epoch": 0.37, + "learning_rate": 7.272303305213285e-08, + "logits/chosen": -1.906928300857544, + "logits/rejected": -1.902448296546936, + "logps/chosen": -38.546409606933594, + "logps/rejected": -139.93466186523438, + "loss": 0.6105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32992324233055115, + "rewards/margins": 0.07887038588523865, + "rewards/rejected": 0.2510528564453125, + "step": 6347 + }, + { + "epoch": 0.37, + "learning_rate": 7.271463804074727e-08, + "logits/chosen": -2.030977964401245, + "logits/rejected": -2.029127359390259, + "logps/chosen": -6.426721096038818, + "logps/rejected": -97.84684753417969, + "loss": 0.4502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1806230992078781, + "rewards/margins": 1.0751405954360962, + "rewards/rejected": -0.8945175409317017, + "step": 6348 + }, + { + "epoch": 0.37, + "learning_rate": 7.270624222242665e-08, + "logits/chosen": -2.1046411991119385, + "logits/rejected": -2.1102793216705322, + "logps/chosen": -0.9815533757209778, + "logps/rejected": -128.51344299316406, + "loss": 0.4632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014103877358138561, + "rewards/margins": 1.2519925832748413, + "rewards/rejected": -1.2660964727401733, + "step": 6349 + }, + { + "epoch": 0.37, + "learning_rate": 7.269784559746923e-08, + "logits/chosen": -2.043936014175415, + "logits/rejected": -2.0253286361694336, + "logps/chosen": -33.26475524902344, + "logps/rejected": -177.49203491210938, + "loss": 0.1651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8956581354141235, + "rewards/margins": 2.939899444580078, + "rewards/rejected": -2.044241428375244, + "step": 6350 + }, + { + "epoch": 0.37, + "learning_rate": 7.26894481661733e-08, + "logits/chosen": -1.8245691061019897, + "logits/rejected": -1.8230977058410645, + "logps/chosen": -316.2354736328125, + "logps/rejected": -359.601318359375, + "loss": 0.5023, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.514434814453125, + "rewards/margins": -0.218963623046875, + "rewards/rejected": 0.7333984375, + "step": 6351 + }, + { + "epoch": 0.37, + "learning_rate": 7.26810499288372e-08, + "logits/chosen": -1.8943836688995361, + "logits/rejected": -1.857027530670166, + "logps/chosen": -234.66824340820312, + "logps/rejected": -441.5624084472656, + "loss": 0.2588, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6973739862442017, + "rewards/margins": 0.6252853870391846, + "rewards/rejected": 1.072088599205017, + "step": 6352 + }, + { + "epoch": 0.37, + "learning_rate": 7.267265088575926e-08, + "logits/chosen": -1.926038384437561, + "logits/rejected": -1.92276132106781, + "logps/chosen": -15.586424827575684, + "logps/rejected": -160.58102416992188, + "loss": 0.4062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2114727944135666, + "rewards/margins": 1.7035092115402222, + "rewards/rejected": -1.4920364618301392, + "step": 6353 + }, + { + "epoch": 0.37, + "learning_rate": 7.266425103723785e-08, + "logits/chosen": -2.0489158630371094, + "logits/rejected": -2.0394835472106934, + "logps/chosen": -58.5443115234375, + "logps/rejected": -183.1640625, + "loss": 0.3583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32856789231300354, + "rewards/margins": 1.4291126728057861, + "rewards/rejected": -1.100544810295105, + "step": 6354 + }, + { + "epoch": 0.37, + "learning_rate": 7.265585038357138e-08, + "logits/chosen": -1.9805116653442383, + "logits/rejected": -1.9798924922943115, + "logps/chosen": -241.63348388671875, + "logps/rejected": -241.46812438964844, + "loss": 0.8425, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4804367125034332, + "rewards/margins": -0.8509842157363892, + "rewards/rejected": 0.37054750323295593, + "step": 6355 + }, + { + "epoch": 0.37, + "learning_rate": 7.264744892505829e-08, + "logits/chosen": -2.016178607940674, + "logits/rejected": -2.0755269527435303, + "logps/chosen": -232.416748046875, + "logps/rejected": -265.37860107421875, + "loss": 0.331, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4137115478515625, + "rewards/margins": 0.27743828296661377, + "rewards/rejected": 1.1362732648849487, + "step": 6356 + }, + { + "epoch": 0.37, + "learning_rate": 7.263904666199703e-08, + "logits/chosen": -1.9948687553405762, + "logits/rejected": -1.9927349090576172, + "logps/chosen": -0.0017301682382822037, + "logps/rejected": -146.7061767578125, + "loss": 0.3488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00029208851628936827, + "rewards/margins": 3.9832944869995117, + "rewards/rejected": -3.9830024242401123, + "step": 6357 + }, + { + "epoch": 0.37, + "learning_rate": 7.26306435946861e-08, + "logits/chosen": -1.9822195768356323, + "logits/rejected": -1.9896186590194702, + "logps/chosen": -68.64225006103516, + "logps/rejected": -255.757568359375, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.936633288860321, + "rewards/margins": 1.2876098155975342, + "rewards/rejected": -0.3509765565395355, + "step": 6358 + }, + { + "epoch": 0.37, + "learning_rate": 7.262223972342398e-08, + "logits/chosen": -2.113640546798706, + "logits/rejected": -2.114572048187256, + "logps/chosen": -63.82904815673828, + "logps/rejected": -340.79156494140625, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19054947793483734, + "rewards/margins": 3.3677406311035156, + "rewards/rejected": -3.1771912574768066, + "step": 6359 + }, + { + "epoch": 0.37, + "learning_rate": 7.261383504850927e-08, + "logits/chosen": -2.037153482437134, + "logits/rejected": -1.9859793186187744, + "logps/chosen": -266.4071044921875, + "logps/rejected": -433.32562255859375, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2153289318084717, + "rewards/margins": 4.117227077484131, + "rewards/rejected": -0.901898205280304, + "step": 6360 + }, + { + "epoch": 0.37, + "learning_rate": 7.260542957024053e-08, + "logits/chosen": -2.0368831157684326, + "logits/rejected": -2.007720947265625, + "logps/chosen": -77.2781982421875, + "logps/rejected": -394.5394592285156, + "loss": 0.4143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0012344360584393144, + "rewards/margins": 6.717875957489014, + "rewards/rejected": -6.716641426086426, + "step": 6361 + }, + { + "epoch": 0.37, + "learning_rate": 7.259702328891635e-08, + "logits/chosen": -1.9326906204223633, + "logits/rejected": -1.9679827690124512, + "logps/chosen": -247.25680541992188, + "logps/rejected": -330.858642578125, + "loss": 0.0714, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2727447748184204, + "rewards/margins": 3.2035646438598633, + "rewards/rejected": -1.9308197498321533, + "step": 6362 + }, + { + "epoch": 0.37, + "learning_rate": 7.258861620483537e-08, + "logits/chosen": -2.0256967544555664, + "logits/rejected": -2.0420310497283936, + "logps/chosen": -204.6656494140625, + "logps/rejected": -264.66595458984375, + "loss": 0.3547, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5700501203536987, + "rewards/margins": 0.12841796875, + "rewards/rejected": 1.4416321516036987, + "step": 6363 + }, + { + "epoch": 0.37, + "learning_rate": 7.258020831829625e-08, + "logits/chosen": -1.7718243598937988, + "logits/rejected": -1.7162996530532837, + "logps/chosen": -277.32342529296875, + "logps/rejected": -622.1603393554688, + "loss": 0.076, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.189739942550659, + "rewards/margins": 2.70233154296875, + "rewards/rejected": -0.512591540813446, + "step": 6364 + }, + { + "epoch": 0.37, + "learning_rate": 7.257179962959767e-08, + "logits/chosen": -2.066749095916748, + "logits/rejected": -2.069969654083252, + "logps/chosen": -0.010814887471497059, + "logps/rejected": -142.7957763671875, + "loss": 0.5892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0046740989200770855, + "rewards/margins": 0.45556676387786865, + "rewards/rejected": -0.4508926570415497, + "step": 6365 + }, + { + "epoch": 0.37, + "learning_rate": 7.256339013903837e-08, + "logits/chosen": -1.968670129776001, + "logits/rejected": -1.970583200454712, + "logps/chosen": -26.888139724731445, + "logps/rejected": -285.76611328125, + "loss": 0.2579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32059288024902344, + "rewards/margins": 2.250997543334961, + "rewards/rejected": -1.9304046630859375, + "step": 6366 + }, + { + "epoch": 0.37, + "learning_rate": 7.255497984691707e-08, + "logits/chosen": -1.9571011066436768, + "logits/rejected": -1.954201579093933, + "logps/chosen": -50.3032112121582, + "logps/rejected": -399.958984375, + "loss": 0.2802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40779533982276917, + "rewards/margins": 4.057477951049805, + "rewards/rejected": -3.6496827602386475, + "step": 6367 + }, + { + "epoch": 0.37, + "learning_rate": 7.254656875353257e-08, + "logits/chosen": -1.915018916130066, + "logits/rejected": -1.907673716545105, + "logps/chosen": -19.62997055053711, + "logps/rejected": -162.228271484375, + "loss": 0.452, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7879083752632141, + "rewards/margins": 0.2612069845199585, + "rewards/rejected": 0.5267013907432556, + "step": 6368 + }, + { + "epoch": 0.37, + "learning_rate": 7.253815685918366e-08, + "logits/chosen": -1.9701870679855347, + "logits/rejected": -1.9630206823349, + "logps/chosen": -0.00016295333625748754, + "logps/rejected": -153.55992126464844, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.246165190124884e-05, + "rewards/margins": 3.0027952194213867, + "rewards/rejected": -3.0027527809143066, + "step": 6369 + }, + { + "epoch": 0.37, + "learning_rate": 7.252974416416916e-08, + "logits/chosen": -2.126528263092041, + "logits/rejected": -2.1192104816436768, + "logps/chosen": -1.0736055374145508, + "logps/rejected": -84.04042053222656, + "loss": 0.7005, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01619328372180462, + "rewards/margins": -0.029757795855402946, + "rewards/rejected": 0.045951079577207565, + "step": 6370 + }, + { + "epoch": 0.37, + "learning_rate": 7.252133066878794e-08, + "logits/chosen": -2.165386199951172, + "logits/rejected": -2.1675076484680176, + "logps/chosen": -136.67835998535156, + "logps/rejected": -326.99639892578125, + "loss": 0.1878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7239761352539062, + "rewards/margins": 1.0387649536132812, + "rewards/rejected": 0.685211181640625, + "step": 6371 + }, + { + "epoch": 0.37, + "learning_rate": 7.251291637333891e-08, + "logits/chosen": -2.044454574584961, + "logits/rejected": -2.032914400100708, + "logps/chosen": -15.240113258361816, + "logps/rejected": -228.40216064453125, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01565074920654297, + "rewards/margins": 3.081059217453003, + "rewards/rejected": -3.06540846824646, + "step": 6372 + }, + { + "epoch": 0.37, + "learning_rate": 7.250450127812095e-08, + "logits/chosen": -2.024512767791748, + "logits/rejected": -2.011183738708496, + "logps/chosen": -18.06756591796875, + "logps/rejected": -298.6187438964844, + "loss": 0.1721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.534296452999115, + "rewards/margins": 5.332276344299316, + "rewards/rejected": -4.797979831695557, + "step": 6373 + }, + { + "epoch": 0.37, + "learning_rate": 7.249608538343304e-08, + "logits/chosen": -1.857940673828125, + "logits/rejected": -1.915534496307373, + "logps/chosen": -151.72927856445312, + "logps/rejected": -384.79986572265625, + "loss": 0.1183, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.516534447669983, + "rewards/margins": 1.615454077720642, + "rewards/rejected": -0.09891968220472336, + "step": 6374 + }, + { + "epoch": 0.37, + "learning_rate": 7.248766868957412e-08, + "logits/chosen": -2.0251400470733643, + "logits/rejected": -1.995705246925354, + "logps/chosen": -118.0154800415039, + "logps/rejected": -199.21963500976562, + "loss": 0.2388, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4511390924453735, + "rewards/margins": 1.1255042552947998, + "rewards/rejected": 0.32563477754592896, + "step": 6375 + }, + { + "epoch": 0.37, + "learning_rate": 7.247925119684322e-08, + "logits/chosen": -1.9347220659255981, + "logits/rejected": -1.9239596128463745, + "logps/chosen": -55.61153030395508, + "logps/rejected": -130.38833618164062, + "loss": 0.4343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4463752806186676, + "rewards/margins": 0.7192947864532471, + "rewards/rejected": -0.2729194760322571, + "step": 6376 + }, + { + "epoch": 0.37, + "learning_rate": 7.247083290553935e-08, + "logits/chosen": -1.9188438653945923, + "logits/rejected": -1.9186396598815918, + "logps/chosen": -53.03472137451172, + "logps/rejected": -243.90513610839844, + "loss": 0.338, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1069663763046265, + "rewards/margins": 0.5376838445663452, + "rewards/rejected": 0.5692825317382812, + "step": 6377 + }, + { + "epoch": 0.37, + "learning_rate": 7.24624138159616e-08, + "logits/chosen": -1.8997974395751953, + "logits/rejected": -1.8065723180770874, + "logps/chosen": -230.75912475585938, + "logps/rejected": -352.4749755859375, + "loss": 0.1335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9180176258087158, + "rewards/margins": 1.9224731922149658, + "rewards/rejected": -0.00445556640625, + "step": 6378 + }, + { + "epoch": 0.37, + "learning_rate": 7.245399392840901e-08, + "logits/chosen": -1.93784761428833, + "logits/rejected": -1.9273751974105835, + "logps/chosen": -163.62448120117188, + "logps/rejected": -295.4215393066406, + "loss": 0.2364, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.061029076576233, + "rewards/margins": 1.4715241193771362, + "rewards/rejected": -0.41049501299858093, + "step": 6379 + }, + { + "epoch": 0.37, + "learning_rate": 7.244557324318074e-08, + "logits/chosen": -2.1899213790893555, + "logits/rejected": -2.183560371398926, + "logps/chosen": -45.71833801269531, + "logps/rejected": -137.7813720703125, + "loss": 0.3786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09844856709241867, + "rewards/margins": 1.8263972997665405, + "rewards/rejected": -1.727948784828186, + "step": 6380 + }, + { + "epoch": 0.37, + "learning_rate": 7.24371517605759e-08, + "logits/chosen": -2.0137076377868652, + "logits/rejected": -2.00252628326416, + "logps/chosen": -265.96954345703125, + "logps/rejected": -480.9491271972656, + "loss": 0.1411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.88104248046875, + "rewards/margins": 4.807821750640869, + "rewards/rejected": -3.926779270172119, + "step": 6381 + }, + { + "epoch": 0.37, + "learning_rate": 7.24287294808937e-08, + "logits/chosen": -1.8446159362792969, + "logits/rejected": -1.8097198009490967, + "logps/chosen": -187.95364379882812, + "logps/rejected": -573.5939331054688, + "loss": 0.0846, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4158082008361816, + "rewards/margins": 2.0352907180786133, + "rewards/rejected": 0.3805175721645355, + "step": 6382 + }, + { + "epoch": 0.37, + "learning_rate": 7.242030640443328e-08, + "logits/chosen": -1.9876915216445923, + "logits/rejected": -1.9318206310272217, + "logps/chosen": -191.2906951904297, + "logps/rejected": -340.37298583984375, + "loss": 0.4724, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.3782455921173096, + "rewards/margins": -0.4057173728942871, + "rewards/rejected": 2.7839629650115967, + "step": 6383 + }, + { + "epoch": 0.37, + "learning_rate": 7.241188253149394e-08, + "logits/chosen": -2.0253517627716064, + "logits/rejected": -2.0093605518341064, + "logps/chosen": -90.08946228027344, + "logps/rejected": -335.59503173828125, + "loss": 0.3138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05067291483283043, + "rewards/margins": 2.0856125354766846, + "rewards/rejected": -2.0349395275115967, + "step": 6384 + }, + { + "epoch": 0.37, + "learning_rate": 7.240345786237489e-08, + "logits/chosen": -2.0812933444976807, + "logits/rejected": -2.085994243621826, + "logps/chosen": -35.30619812011719, + "logps/rejected": -134.92156982421875, + "loss": 0.897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.978127658367157, + "rewards/margins": 0.28307610750198364, + "rewards/rejected": -1.2612037658691406, + "step": 6385 + }, + { + "epoch": 0.37, + "learning_rate": 7.239503239737543e-08, + "logits/chosen": -2.1503238677978516, + "logits/rejected": -2.1511683464050293, + "logps/chosen": -0.18185336887836456, + "logps/rejected": -59.615074157714844, + "loss": 0.4445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0066836620680987835, + "rewards/margins": 1.4714974164962769, + "rewards/rejected": -1.4781811237335205, + "step": 6386 + }, + { + "epoch": 0.37, + "learning_rate": 7.238660613679488e-08, + "logits/chosen": -1.687535047531128, + "logits/rejected": -1.6964738368988037, + "logps/chosen": -35.93154525756836, + "logps/rejected": -174.8826446533203, + "loss": 0.4526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11498413234949112, + "rewards/margins": 1.0938644409179688, + "rewards/rejected": -0.9788803458213806, + "step": 6387 + }, + { + "epoch": 0.37, + "learning_rate": 7.237817908093257e-08, + "logits/chosen": -2.0929653644561768, + "logits/rejected": -2.0881834030151367, + "logps/chosen": -6.883986473083496, + "logps/rejected": -94.3440933227539, + "loss": 0.6365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08008108288049698, + "rewards/margins": 0.1445944905281067, + "rewards/rejected": -0.06451340019702911, + "step": 6388 + }, + { + "epoch": 0.37, + "learning_rate": 7.236975123008789e-08, + "logits/chosen": -2.1485354900360107, + "logits/rejected": -2.1449527740478516, + "logps/chosen": -33.317848205566406, + "logps/rejected": -143.85105895996094, + "loss": 0.245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9832252860069275, + "rewards/margins": 1.3583245277404785, + "rewards/rejected": -0.37509918212890625, + "step": 6389 + }, + { + "epoch": 0.37, + "learning_rate": 7.23613225845602e-08, + "logits/chosen": -1.895599126815796, + "logits/rejected": -1.8870373964309692, + "logps/chosen": -142.74798583984375, + "logps/rejected": -354.9616394042969, + "loss": 0.1417, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3306366205215454, + "rewards/margins": 1.6931304931640625, + "rewards/rejected": -0.3624939024448395, + "step": 6390 + }, + { + "epoch": 0.37, + "learning_rate": 7.235289314464896e-08, + "logits/chosen": -1.927185297012329, + "logits/rejected": -1.8521246910095215, + "logps/chosen": -294.22979736328125, + "logps/rejected": -410.961669921875, + "loss": 0.101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.681329369544983, + "rewards/margins": 2.342395067214966, + "rewards/rejected": -0.6610656976699829, + "step": 6391 + }, + { + "epoch": 0.37, + "learning_rate": 7.234446291065363e-08, + "logits/chosen": -1.8733588457107544, + "logits/rejected": -1.8616224527359009, + "logps/chosen": -180.6590576171875, + "logps/rejected": -353.48760986328125, + "loss": 0.209, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7673279047012329, + "rewards/margins": 2.3339996337890625, + "rewards/rejected": -1.5666717290878296, + "step": 6392 + }, + { + "epoch": 0.37, + "learning_rate": 7.233603188287368e-08, + "logits/chosen": -2.151564598083496, + "logits/rejected": -2.1510987281799316, + "logps/chosen": -10.506778717041016, + "logps/rejected": -66.59580993652344, + "loss": 0.6001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18843431770801544, + "rewards/margins": 0.14694491028785706, + "rewards/rejected": 0.041489411145448685, + "step": 6393 + }, + { + "epoch": 0.37, + "learning_rate": 7.232760006160859e-08, + "logits/chosen": -1.9336843490600586, + "logits/rejected": -1.9302898645401, + "logps/chosen": -38.01221466064453, + "logps/rejected": -132.952392578125, + "loss": 0.719, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.16233444213867188, + "rewards/margins": -0.1516365110874176, + "rewards/rejected": -0.010697937570512295, + "step": 6394 + }, + { + "epoch": 0.37, + "learning_rate": 7.231916744715794e-08, + "logits/chosen": -2.075389862060547, + "logits/rejected": -2.0835251808166504, + "logps/chosen": -227.28956604003906, + "logps/rejected": -324.2331237792969, + "loss": 0.1085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.011372447013855, + "rewards/margins": 3.8380966186523438, + "rewards/rejected": -2.8267242908477783, + "step": 6395 + }, + { + "epoch": 0.37, + "learning_rate": 7.23107340398213e-08, + "logits/chosen": -2.0053889751434326, + "logits/rejected": -1.9754300117492676, + "logps/chosen": -222.4169921875, + "logps/rejected": -292.3380126953125, + "loss": 0.0703, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.537786841392517, + "rewards/margins": 3.1728515625, + "rewards/rejected": -1.635064721107483, + "step": 6396 + }, + { + "epoch": 0.37, + "learning_rate": 7.230229983989825e-08, + "logits/chosen": -1.9038317203521729, + "logits/rejected": -1.8971935510635376, + "logps/chosen": -71.668212890625, + "logps/rejected": -270.20758056640625, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16614913940429688, + "rewards/margins": 2.2627036571502686, + "rewards/rejected": -2.0965545177459717, + "step": 6397 + }, + { + "epoch": 0.37, + "learning_rate": 7.229386484768841e-08, + "logits/chosen": -1.9088934659957886, + "logits/rejected": -1.9069163799285889, + "logps/chosen": -214.56997680664062, + "logps/rejected": -330.79443359375, + "loss": 0.1187, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6547058820724487, + "rewards/margins": 1.701470971107483, + "rewards/rejected": -0.04676513746380806, + "step": 6398 + }, + { + "epoch": 0.37, + "learning_rate": 7.228542906349146e-08, + "logits/chosen": -1.8257837295532227, + "logits/rejected": -1.8184151649475098, + "logps/chosen": -236.1197509765625, + "logps/rejected": -446.95831298828125, + "loss": 0.0623, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8940308094024658, + "rewards/margins": 3.5522584915161133, + "rewards/rejected": -1.658227562904358, + "step": 6399 + }, + { + "epoch": 0.37, + "learning_rate": 7.227699248760704e-08, + "logits/chosen": -1.8733841180801392, + "logits/rejected": -1.9302723407745361, + "logps/chosen": -207.41941833496094, + "logps/rejected": -401.4625549316406, + "loss": 0.1246, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6266555786132812, + "rewards/margins": 1.8327316045761108, + "rewards/rejected": -0.20607605576515198, + "step": 6400 + }, + { + "epoch": 0.37, + "learning_rate": 7.226855512033487e-08, + "logits/chosen": -2.006620168685913, + "logits/rejected": -2.0251567363739014, + "logps/chosen": -232.1648406982422, + "logps/rejected": -408.8541259765625, + "loss": 0.1598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7534103393554688, + "rewards/margins": 1.9642746448516846, + "rewards/rejected": -1.2108643054962158, + "step": 6401 + }, + { + "epoch": 0.37, + "learning_rate": 7.22601169619747e-08, + "logits/chosen": -2.08784818649292, + "logits/rejected": -2.0910258293151855, + "logps/chosen": -185.4979705810547, + "logps/rejected": -306.3058776855469, + "loss": 0.1585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9431915283203125, + "rewards/margins": 1.301300048828125, + "rewards/rejected": 0.6418914794921875, + "step": 6402 + }, + { + "epoch": 0.37, + "learning_rate": 7.225167801282632e-08, + "logits/chosen": -2.067718982696533, + "logits/rejected": -2.0702991485595703, + "logps/chosen": -137.25985717773438, + "logps/rejected": -231.15655517578125, + "loss": 0.2572, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5991531610488892, + "rewards/margins": 0.7089187502861023, + "rewards/rejected": 0.8902344107627869, + "step": 6403 + }, + { + "epoch": 0.37, + "learning_rate": 7.224323827318946e-08, + "logits/chosen": -1.9486585855484009, + "logits/rejected": -1.9415494203567505, + "logps/chosen": -10.802010536193848, + "logps/rejected": -206.38848876953125, + "loss": 0.5467, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03176632151007652, + "rewards/margins": 0.8140737414360046, + "rewards/rejected": -0.7823074460029602, + "step": 6404 + }, + { + "epoch": 0.37, + "learning_rate": 7.223479774336398e-08, + "logits/chosen": -2.181302070617676, + "logits/rejected": -2.1671364307403564, + "logps/chosen": -41.36467742919922, + "logps/rejected": -294.6864929199219, + "loss": 0.3661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015871047973632812, + "rewards/margins": 2.390965700149536, + "rewards/rejected": -2.3750946521759033, + "step": 6405 + }, + { + "epoch": 0.37, + "learning_rate": 7.222635642364972e-08, + "logits/chosen": -1.9883846044540405, + "logits/rejected": -2.0098607540130615, + "logps/chosen": -259.6064147949219, + "logps/rejected": -347.9116516113281, + "loss": 0.0799, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7026489973068237, + "rewards/margins": 2.4389374256134033, + "rewards/rejected": -0.7362884879112244, + "step": 6406 + }, + { + "epoch": 0.37, + "learning_rate": 7.221791431434656e-08, + "logits/chosen": -2.049055814743042, + "logits/rejected": -2.0557451248168945, + "logps/chosen": -66.5886459350586, + "logps/rejected": -227.2506866455078, + "loss": 0.755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8099327087402344, + "rewards/margins": 0.755632758140564, + "rewards/rejected": -1.5655654668807983, + "step": 6407 + }, + { + "epoch": 0.37, + "learning_rate": 7.22094714157544e-08, + "logits/chosen": -2.087200164794922, + "logits/rejected": -2.0860931873321533, + "logps/chosen": -0.0006773653440177441, + "logps/rejected": -141.5605010986328, + "loss": 0.4561, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4783215192437638e-05, + "rewards/margins": 1.405971884727478, + "rewards/rejected": -1.4059571027755737, + "step": 6408 + }, + { + "epoch": 0.37, + "learning_rate": 7.220102772817319e-08, + "logits/chosen": -1.8560770750045776, + "logits/rejected": -1.8574093580245972, + "logps/chosen": -18.75057601928711, + "logps/rejected": -122.39299011230469, + "loss": 0.3948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3329136073589325, + "rewards/margins": 1.1506534814834595, + "rewards/rejected": -0.8177399039268494, + "step": 6409 + }, + { + "epoch": 0.37, + "learning_rate": 7.219258325190289e-08, + "logits/chosen": -1.962828278541565, + "logits/rejected": -1.9686393737792969, + "logps/chosen": -0.00037485500797629356, + "logps/rejected": -335.48138427734375, + "loss": 0.3406, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.950327355705667e-06, + "rewards/margins": 4.638174533843994, + "rewards/rejected": -4.63818359375, + "step": 6410 + }, + { + "epoch": 0.37, + "learning_rate": 7.218413798724345e-08, + "logits/chosen": -2.055654287338257, + "logits/rejected": -2.052680492401123, + "logps/chosen": -29.109243392944336, + "logps/rejected": -275.6750183105469, + "loss": 0.2843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.410386860370636, + "rewards/margins": 3.308671712875366, + "rewards/rejected": -2.898284912109375, + "step": 6411 + }, + { + "epoch": 0.37, + "learning_rate": 7.217569193449494e-08, + "logits/chosen": -2.1829967498779297, + "logits/rejected": -2.1724534034729004, + "logps/chosen": -0.00024840550031512976, + "logps/rejected": -216.01226806640625, + "loss": 0.3635, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.88788543609553e-06, + "rewards/margins": 3.3334474563598633, + "rewards/rejected": -3.333453416824341, + "step": 6412 + }, + { + "epoch": 0.37, + "learning_rate": 7.216724509395734e-08, + "logits/chosen": -1.9400668144226074, + "logits/rejected": -1.9409074783325195, + "logps/chosen": -160.20291137695312, + "logps/rejected": -462.7769470214844, + "loss": 0.0701, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3368958234786987, + "rewards/margins": 5.188940525054932, + "rewards/rejected": -3.8520448207855225, + "step": 6413 + }, + { + "epoch": 0.37, + "learning_rate": 7.21587974659308e-08, + "logits/chosen": -1.969234824180603, + "logits/rejected": -1.95720636844635, + "logps/chosen": -49.00364685058594, + "logps/rejected": -331.656494140625, + "loss": 0.2585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5214504599571228, + "rewards/margins": 2.021596908569336, + "rewards/rejected": -1.500146508216858, + "step": 6414 + }, + { + "epoch": 0.37, + "learning_rate": 7.215034905071536e-08, + "logits/chosen": -1.8192036151885986, + "logits/rejected": -1.811668038368225, + "logps/chosen": -204.74282836914062, + "logps/rejected": -277.06524658203125, + "loss": 0.2865, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.044537305831909, + "rewards/margins": 0.42520439624786377, + "rewards/rejected": 1.6193329095840454, + "step": 6415 + }, + { + "epoch": 0.37, + "learning_rate": 7.214189984861118e-08, + "logits/chosen": -1.9766106605529785, + "logits/rejected": -1.9379987716674805, + "logps/chosen": -233.16732788085938, + "logps/rejected": -450.63055419921875, + "loss": 0.0522, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9091094732284546, + "rewards/margins": 2.988879442214966, + "rewards/rejected": -1.0797699689865112, + "step": 6416 + }, + { + "epoch": 0.37, + "learning_rate": 7.213344985991841e-08, + "logits/chosen": -1.9525620937347412, + "logits/rejected": -1.9557991027832031, + "logps/chosen": -0.6336274147033691, + "logps/rejected": -196.4511260986328, + "loss": 0.3688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017360001802444458, + "rewards/margins": 2.7812271118164062, + "rewards/rejected": -2.7985870838165283, + "step": 6417 + }, + { + "epoch": 0.37, + "learning_rate": 7.212499908493725e-08, + "logits/chosen": -2.0774283409118652, + "logits/rejected": -2.0773849487304688, + "logps/chosen": -8.938871383666992, + "logps/rejected": -132.02011108398438, + "loss": 0.484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016280079260468483, + "rewards/margins": 1.0284876823425293, + "rewards/rejected": -1.0447677373886108, + "step": 6418 + }, + { + "epoch": 0.37, + "learning_rate": 7.211654752396788e-08, + "logits/chosen": -1.9744601249694824, + "logits/rejected": -1.9428589344024658, + "logps/chosen": -260.70220947265625, + "logps/rejected": -451.9999694824219, + "loss": 0.1169, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.762109398841858, + "rewards/margins": 1.944403052330017, + "rewards/rejected": -0.18229369819164276, + "step": 6419 + }, + { + "epoch": 0.37, + "learning_rate": 7.210809517731057e-08, + "logits/chosen": -2.142688751220703, + "logits/rejected": -2.1362435817718506, + "logps/chosen": -24.34207534790039, + "logps/rejected": -226.16238403320312, + "loss": 0.3663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5700744986534119, + "rewards/margins": 0.8154846429824829, + "rewards/rejected": -0.24541015923023224, + "step": 6420 + }, + { + "epoch": 0.37, + "learning_rate": 7.209964204526555e-08, + "logits/chosen": -1.776772379875183, + "logits/rejected": -1.7550445795059204, + "logps/chosen": -247.9402618408203, + "logps/rejected": -357.76397705078125, + "loss": 0.1443, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5476913452148438, + "rewards/margins": 1.456974744796753, + "rewards/rejected": 0.09071655571460724, + "step": 6421 + }, + { + "epoch": 0.37, + "learning_rate": 7.209118812813318e-08, + "logits/chosen": -1.7876988649368286, + "logits/rejected": -1.7874782085418701, + "logps/chosen": -220.2481689453125, + "logps/rejected": -328.6428527832031, + "loss": 0.2124, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.914117455482483, + "rewards/margins": 0.7880462408065796, + "rewards/rejected": 1.1260712146759033, + "step": 6422 + }, + { + "epoch": 0.37, + "learning_rate": 7.208273342621371e-08, + "logits/chosen": -1.9685401916503906, + "logits/rejected": -1.9771276712417603, + "logps/chosen": -9.404207229614258, + "logps/rejected": -114.59086608886719, + "loss": 0.6425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.020264435559511185, + "rewards/margins": 0.15964126586914062, + "rewards/rejected": -0.13937683403491974, + "step": 6423 + }, + { + "epoch": 0.37, + "learning_rate": 7.207427793980757e-08, + "logits/chosen": -1.9801536798477173, + "logits/rejected": -1.9516679048538208, + "logps/chosen": -23.929855346679688, + "logps/rejected": -314.0130310058594, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18116112053394318, + "rewards/margins": 3.50449800491333, + "rewards/rejected": -3.3233368396759033, + "step": 6424 + }, + { + "epoch": 0.37, + "learning_rate": 7.206582166921506e-08, + "logits/chosen": -1.8769789934158325, + "logits/rejected": -1.891908884048462, + "logps/chosen": -330.5899658203125, + "logps/rejected": -574.8244018554688, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.801159620285034, + "rewards/margins": 6.904803276062012, + "rewards/rejected": -4.103643894195557, + "step": 6425 + }, + { + "epoch": 0.37, + "learning_rate": 7.205736461473665e-08, + "logits/chosen": -1.91315758228302, + "logits/rejected": -1.9070680141448975, + "logps/chosen": -2.1925997734069824, + "logps/rejected": -169.69622802734375, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06221255287528038, + "rewards/margins": 2.4627020359039307, + "rewards/rejected": -2.524914503097534, + "step": 6426 + }, + { + "epoch": 0.37, + "learning_rate": 7.204890677667277e-08, + "logits/chosen": -2.1538426876068115, + "logits/rejected": -2.154418706893921, + "logps/chosen": -56.76420974731445, + "logps/rejected": -258.78961181640625, + "loss": 0.4872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09463615715503693, + "rewards/margins": 1.0932811498641968, + "rewards/rejected": -0.998645007610321, + "step": 6427 + }, + { + "epoch": 0.37, + "learning_rate": 7.204044815532385e-08, + "logits/chosen": -1.8201032876968384, + "logits/rejected": -1.833960771560669, + "logps/chosen": -5.784961700439453, + "logps/rejected": -122.49807739257812, + "loss": 0.3502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20931163430213928, + "rewards/margins": 1.8745245933532715, + "rewards/rejected": -1.6652129888534546, + "step": 6428 + }, + { + "epoch": 0.37, + "learning_rate": 7.20319887509904e-08, + "logits/chosen": -1.9566649198532104, + "logits/rejected": -1.9616490602493286, + "logps/chosen": -33.035160064697266, + "logps/rejected": -219.57470703125, + "loss": 0.4923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3139316737651825, + "rewards/margins": 0.420645534992218, + "rewards/rejected": -0.10671386867761612, + "step": 6429 + }, + { + "epoch": 0.37, + "learning_rate": 7.202352856397295e-08, + "logits/chosen": -1.8956650495529175, + "logits/rejected": -1.8853306770324707, + "logps/chosen": -86.33534240722656, + "logps/rejected": -395.0810546875, + "loss": 0.0991, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.514062523841858, + "rewards/margins": 3.7181029319763184, + "rewards/rejected": -2.20404052734375, + "step": 6430 + }, + { + "epoch": 0.37, + "learning_rate": 7.201506759457203e-08, + "logits/chosen": -1.7687093019485474, + "logits/rejected": -1.7668167352676392, + "logps/chosen": -2.353656768798828, + "logps/rejected": -116.87710571289062, + "loss": 0.5793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016368580982089043, + "rewards/margins": 0.5063609480857849, + "rewards/rejected": -0.5227295160293579, + "step": 6431 + }, + { + "epoch": 0.37, + "learning_rate": 7.200660584308822e-08, + "logits/chosen": -1.863384485244751, + "logits/rejected": -1.865660548210144, + "logps/chosen": -14.696379661560059, + "logps/rejected": -230.6306610107422, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22618284821510315, + "rewards/margins": 2.7681171894073486, + "rewards/rejected": -2.5419342517852783, + "step": 6432 + }, + { + "epoch": 0.37, + "learning_rate": 7.199814330982212e-08, + "logits/chosen": -1.8099908828735352, + "logits/rejected": -1.7996814250946045, + "logps/chosen": -198.27947998046875, + "logps/rejected": -236.20700073242188, + "loss": 0.4708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7050125002861023, + "rewards/margins": 0.00580286979675293, + "rewards/rejected": 0.6992096304893494, + "step": 6433 + }, + { + "epoch": 0.37, + "learning_rate": 7.198967999507437e-08, + "logits/chosen": -1.9649403095245361, + "logits/rejected": -1.8348979949951172, + "logps/chosen": -161.54873657226562, + "logps/rejected": -428.3041076660156, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9816192984580994, + "rewards/margins": 1.1487152576446533, + "rewards/rejected": -0.16709594428539276, + "step": 6434 + }, + { + "epoch": 0.37, + "learning_rate": 7.198121589914563e-08, + "logits/chosen": -2.1164584159851074, + "logits/rejected": -2.120171546936035, + "logps/chosen": -10.854056358337402, + "logps/rejected": -181.68983459472656, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0985964760184288, + "rewards/margins": 1.2431734800338745, + "rewards/rejected": -1.1445770263671875, + "step": 6435 + }, + { + "epoch": 0.37, + "learning_rate": 7.197275102233658e-08, + "logits/chosen": -2.223689317703247, + "logits/rejected": -2.1998467445373535, + "logps/chosen": -37.092063903808594, + "logps/rejected": -357.4547424316406, + "loss": 0.3572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0066238404251635075, + "rewards/margins": 5.385325908660889, + "rewards/rejected": -5.391949653625488, + "step": 6436 + }, + { + "epoch": 0.37, + "learning_rate": 7.196428536494791e-08, + "logits/chosen": -2.01859188079834, + "logits/rejected": -2.018975257873535, + "logps/chosen": -2.151496648788452, + "logps/rejected": -286.357177734375, + "loss": 0.5103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13785459101200104, + "rewards/margins": 1.187816858291626, + "rewards/rejected": -1.3256714344024658, + "step": 6437 + }, + { + "epoch": 0.37, + "learning_rate": 7.195581892728041e-08, + "logits/chosen": -2.0179800987243652, + "logits/rejected": -2.026000738143921, + "logps/chosen": -74.17078399658203, + "logps/rejected": -197.32980346679688, + "loss": 0.3842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061794281005859375, + "rewards/margins": 1.3346961736679077, + "rewards/rejected": -1.2729018926620483, + "step": 6438 + }, + { + "epoch": 0.37, + "learning_rate": 7.194735170963482e-08, + "logits/chosen": -1.8975963592529297, + "logits/rejected": -1.8894069194793701, + "logps/chosen": -20.96994400024414, + "logps/rejected": -195.22360229492188, + "loss": 0.3516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23016758263111115, + "rewards/margins": 1.5249079465866089, + "rewards/rejected": -1.2947403192520142, + "step": 6439 + }, + { + "epoch": 0.37, + "learning_rate": 7.193888371231194e-08, + "logits/chosen": -1.8677231073379517, + "logits/rejected": -1.9376453161239624, + "logps/chosen": -195.57244873046875, + "logps/rejected": -348.3521728515625, + "loss": 0.1447, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8685455322265625, + "rewards/margins": 1.3912384510040283, + "rewards/rejected": 0.47730714082717896, + "step": 6440 + }, + { + "epoch": 0.37, + "learning_rate": 7.193041493561258e-08, + "logits/chosen": -1.9034152030944824, + "logits/rejected": -1.8947138786315918, + "logps/chosen": -319.75537109375, + "logps/rejected": -385.678955078125, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8747131824493408, + "rewards/margins": 0.253021240234375, + "rewards/rejected": 1.6216919422149658, + "step": 6441 + }, + { + "epoch": 0.37, + "learning_rate": 7.192194537983762e-08, + "logits/chosen": -1.7774112224578857, + "logits/rejected": -1.8735097646713257, + "logps/chosen": -439.4854736328125, + "logps/rejected": -447.35784912109375, + "loss": 0.4793, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8313507437705994, + "rewards/margins": -0.05176389217376709, + "rewards/rejected": 0.8831146359443665, + "step": 6442 + }, + { + "epoch": 0.37, + "learning_rate": 7.191347504528795e-08, + "logits/chosen": -1.7956089973449707, + "logits/rejected": -1.809095025062561, + "logps/chosen": -209.03903198242188, + "logps/rejected": -370.2110290527344, + "loss": 0.0686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.483221411705017, + "rewards/margins": 2.5976805686950684, + "rewards/rejected": -1.1144592761993408, + "step": 6443 + }, + { + "epoch": 0.38, + "learning_rate": 7.190500393226443e-08, + "logits/chosen": -1.6741448640823364, + "logits/rejected": -1.661522388458252, + "logps/chosen": -154.2217254638672, + "logps/rejected": -320.10888671875, + "loss": 0.2324, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.766200304031372, + "rewards/margins": 0.9721390008926392, + "rewards/rejected": 0.7940613031387329, + "step": 6444 + }, + { + "epoch": 0.38, + "learning_rate": 7.189653204106803e-08, + "logits/chosen": -2.002063035964966, + "logits/rejected": -2.0486841201782227, + "logps/chosen": -233.5954132080078, + "logps/rejected": -521.2574462890625, + "loss": 0.0533, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5860977172851562, + "rewards/margins": 2.8928298950195312, + "rewards/rejected": -1.306732177734375, + "step": 6445 + }, + { + "epoch": 0.38, + "learning_rate": 7.188805937199968e-08, + "logits/chosen": -1.9260185956954956, + "logits/rejected": -1.911293864250183, + "logps/chosen": -158.01150512695312, + "logps/rejected": -196.84768676757812, + "loss": 0.3126, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1272109746932983, + "rewards/margins": 0.6148315072059631, + "rewards/rejected": 0.5123794674873352, + "step": 6446 + }, + { + "epoch": 0.38, + "learning_rate": 7.187958592536044e-08, + "logits/chosen": -2.1366305351257324, + "logits/rejected": -2.135228395462036, + "logps/chosen": -0.22063420712947845, + "logps/rejected": -152.09075927734375, + "loss": 0.4977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04255613312125206, + "rewards/margins": 1.007198452949524, + "rewards/rejected": -0.964642345905304, + "step": 6447 + }, + { + "epoch": 0.38, + "learning_rate": 7.187111170145125e-08, + "logits/chosen": -2.037627935409546, + "logits/rejected": -2.007631301879883, + "logps/chosen": -231.15093994140625, + "logps/rejected": -333.8832702636719, + "loss": 0.2239, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8261200189590454, + "rewards/margins": 1.0702393054962158, + "rewards/rejected": 0.7558807730674744, + "step": 6448 + }, + { + "epoch": 0.38, + "learning_rate": 7.18626367005732e-08, + "logits/chosen": -1.8041942119598389, + "logits/rejected": -1.813927173614502, + "logps/chosen": -15.478925704956055, + "logps/rejected": -120.1556625366211, + "loss": 0.6469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10990333557128906, + "rewards/margins": 0.10817375034093857, + "rewards/rejected": 0.0017295837169513106, + "step": 6449 + }, + { + "epoch": 0.38, + "learning_rate": 7.185416092302735e-08, + "logits/chosen": -2.0585193634033203, + "logits/rejected": -2.065079689025879, + "logps/chosen": -65.51532745361328, + "logps/rejected": -107.65843200683594, + "loss": 0.53, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08266525715589523, + "rewards/margins": 0.5850853323936462, + "rewards/rejected": -0.5024200677871704, + "step": 6450 + }, + { + "epoch": 0.38, + "learning_rate": 7.18456843691148e-08, + "logits/chosen": -2.194561243057251, + "logits/rejected": -2.1908185482025146, + "logps/chosen": -0.005210021045058966, + "logps/rejected": -214.4169464111328, + "loss": 0.4124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008857209468260407, + "rewards/margins": 1.911217451095581, + "rewards/rejected": -1.9103317260742188, + "step": 6451 + }, + { + "epoch": 0.38, + "learning_rate": 7.183720703913669e-08, + "logits/chosen": -2.1391165256500244, + "logits/rejected": -2.1045427322387695, + "logps/chosen": -38.166526794433594, + "logps/rejected": -276.15667724609375, + "loss": 0.2751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36988526582717896, + "rewards/margins": 2.882460117340088, + "rewards/rejected": -2.5125749111175537, + "step": 6452 + }, + { + "epoch": 0.38, + "learning_rate": 7.182872893339417e-08, + "logits/chosen": -2.2059011459350586, + "logits/rejected": -2.1858625411987305, + "logps/chosen": -8.749747939873487e-05, + "logps/rejected": -173.71905517578125, + "loss": 0.3807, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.023285201692488e-06, + "rewards/margins": 2.381817579269409, + "rewards/rejected": -2.3818085193634033, + "step": 6453 + }, + { + "epoch": 0.38, + "learning_rate": 7.182025005218842e-08, + "logits/chosen": -2.104572296142578, + "logits/rejected": -2.0719048976898193, + "logps/chosen": -148.17376708984375, + "logps/rejected": -388.2634582519531, + "loss": 0.1483, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9063094854354858, + "rewards/margins": 1.3402481079101562, + "rewards/rejected": 0.5660614371299744, + "step": 6454 + }, + { + "epoch": 0.38, + "learning_rate": 7.181177039582065e-08, + "logits/chosen": -2.0186591148376465, + "logits/rejected": -2.006582498550415, + "logps/chosen": -1.7020655870437622, + "logps/rejected": -287.6576843261719, + "loss": 0.3781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1228313073515892, + "rewards/margins": 2.030512571334839, + "rewards/rejected": -1.9076813459396362, + "step": 6455 + }, + { + "epoch": 0.38, + "learning_rate": 7.180328996459211e-08, + "logits/chosen": -1.980864405632019, + "logits/rejected": -1.9836210012435913, + "logps/chosen": -132.3404541015625, + "logps/rejected": -299.9249267578125, + "loss": 0.2411, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1591171026229858, + "rewards/margins": 1.028202772140503, + "rewards/rejected": 0.13091431558132172, + "step": 6456 + }, + { + "epoch": 0.38, + "learning_rate": 7.179480875880404e-08, + "logits/chosen": -2.116102933883667, + "logits/rejected": -2.1109511852264404, + "logps/chosen": -40.734066009521484, + "logps/rejected": -230.28634643554688, + "loss": 0.2403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.784259021282196, + "rewards/margins": 1.7021605968475342, + "rewards/rejected": -0.9179016351699829, + "step": 6457 + }, + { + "epoch": 0.38, + "learning_rate": 7.178632677875777e-08, + "logits/chosen": -1.9664673805236816, + "logits/rejected": -1.966105580329895, + "logps/chosen": -17.38968276977539, + "logps/rejected": -81.866943359375, + "loss": 0.846, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.16876220703125, + "rewards/margins": -0.44420701265335083, + "rewards/rejected": 0.27544480562210083, + "step": 6458 + }, + { + "epoch": 0.38, + "learning_rate": 7.17778440247546e-08, + "logits/chosen": -2.03125262260437, + "logits/rejected": -2.030363082885742, + "logps/chosen": -229.90750122070312, + "logps/rejected": -403.58831787109375, + "loss": 0.1718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7192779779434204, + "rewards/margins": 2.058947801589966, + "rewards/rejected": -1.3396698236465454, + "step": 6459 + }, + { + "epoch": 0.38, + "learning_rate": 7.17693604970959e-08, + "logits/chosen": -1.9652763605117798, + "logits/rejected": -1.960057258605957, + "logps/chosen": -63.51819610595703, + "logps/rejected": -111.88064575195312, + "loss": 0.6152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1314384490251541, + "rewards/margins": 0.4351486563682556, + "rewards/rejected": -0.5665870904922485, + "step": 6460 + }, + { + "epoch": 0.38, + "learning_rate": 7.176087619608301e-08, + "logits/chosen": -1.9948192834854126, + "logits/rejected": -1.9882267713546753, + "logps/chosen": -14.681741714477539, + "logps/rejected": -135.20584106445312, + "loss": 0.4963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028206443414092064, + "rewards/margins": 0.9614496231079102, + "rewards/rejected": -0.9896560907363892, + "step": 6461 + }, + { + "epoch": 0.38, + "learning_rate": 7.175239112201735e-08, + "logits/chosen": -2.0695390701293945, + "logits/rejected": -2.068578004837036, + "logps/chosen": -23.250303268432617, + "logps/rejected": -246.3548126220703, + "loss": 0.3833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37147387862205505, + "rewards/margins": 1.1467835903167725, + "rewards/rejected": -0.7753097414970398, + "step": 6462 + }, + { + "epoch": 0.38, + "learning_rate": 7.174390527520037e-08, + "logits/chosen": -1.780731201171875, + "logits/rejected": -1.7482988834381104, + "logps/chosen": -204.01034545898438, + "logps/rejected": -400.44549560546875, + "loss": 0.0436, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.216998338699341, + "rewards/margins": 2.961996555328369, + "rewards/rejected": -0.7449981570243835, + "step": 6463 + }, + { + "epoch": 0.38, + "learning_rate": 7.173541865593351e-08, + "logits/chosen": -2.051133155822754, + "logits/rejected": -2.0524613857269287, + "logps/chosen": -22.48224639892578, + "logps/rejected": -70.40784454345703, + "loss": 0.3954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14133338630199432, + "rewards/margins": 1.409719467163086, + "rewards/rejected": -1.2683861255645752, + "step": 6464 + }, + { + "epoch": 0.38, + "learning_rate": 7.172693126451826e-08, + "logits/chosen": -2.109050989151001, + "logits/rejected": -2.111501455307007, + "logps/chosen": -3.1994035243988037, + "logps/rejected": -211.14523315429688, + "loss": 0.3614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027965283021330833, + "rewards/margins": 3.836441993713379, + "rewards/rejected": -3.8644073009490967, + "step": 6465 + }, + { + "epoch": 0.38, + "learning_rate": 7.171844310125612e-08, + "logits/chosen": -1.9782776832580566, + "logits/rejected": -1.9755988121032715, + "logps/chosen": -24.542234420776367, + "logps/rejected": -210.48825073242188, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1271982192993164, + "rewards/margins": 1.8010263442993164, + "rewards/rejected": -1.673828125, + "step": 6466 + }, + { + "epoch": 0.38, + "learning_rate": 7.170995416644865e-08, + "logits/chosen": -2.0110485553741455, + "logits/rejected": -2.0205507278442383, + "logps/chosen": -220.4254150390625, + "logps/rejected": -415.220703125, + "loss": 0.083, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1861023902893066, + "rewards/margins": 2.040890693664551, + "rewards/rejected": 0.14521180093288422, + "step": 6467 + }, + { + "epoch": 0.38, + "learning_rate": 7.170146446039742e-08, + "logits/chosen": -1.9590082168579102, + "logits/rejected": -1.943593144416809, + "logps/chosen": -23.234874725341797, + "logps/rejected": -236.3369140625, + "loss": 0.5591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6960150003433228, + "rewards/margins": 4.37470817565918, + "rewards/rejected": -5.070723056793213, + "step": 6468 + }, + { + "epoch": 0.38, + "learning_rate": 7.169297398340402e-08, + "logits/chosen": -1.964837670326233, + "logits/rejected": -1.9675707817077637, + "logps/chosen": -149.25177001953125, + "logps/rejected": -190.07833862304688, + "loss": 3.3129, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.489265441894531, + "rewards/margins": -1.9391462802886963, + "rewards/rejected": -2.550119161605835, + "step": 6469 + }, + { + "epoch": 0.38, + "learning_rate": 7.168448273577007e-08, + "logits/chosen": -1.9019314050674438, + "logits/rejected": -1.8990973234176636, + "logps/chosen": -3.3249173164367676, + "logps/rejected": -170.26658630371094, + "loss": 0.6466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.061020947992801666, + "rewards/margins": 0.19149675965309143, + "rewards/rejected": -0.2525177001953125, + "step": 6470 + }, + { + "epoch": 0.38, + "learning_rate": 7.167599071779723e-08, + "logits/chosen": -2.034039258956909, + "logits/rejected": -2.0305638313293457, + "logps/chosen": -12.57162857055664, + "logps/rejected": -108.89877319335938, + "loss": 0.4484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3050669729709625, + "rewards/margins": 0.8515521287918091, + "rewards/rejected": -0.5464851260185242, + "step": 6471 + }, + { + "epoch": 0.38, + "learning_rate": 7.166749792978716e-08, + "logits/chosen": -1.859924077987671, + "logits/rejected": -1.8608810901641846, + "logps/chosen": -23.66622543334961, + "logps/rejected": -144.88165283203125, + "loss": 0.471, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08206844329833984, + "rewards/margins": 1.2025507688522339, + "rewards/rejected": -1.2846192121505737, + "step": 6472 + }, + { + "epoch": 0.38, + "learning_rate": 7.165900437204157e-08, + "logits/chosen": -1.9216406345367432, + "logits/rejected": -1.9181932210922241, + "logps/chosen": -0.0003358858812134713, + "logps/rejected": -164.28253173828125, + "loss": 0.3654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.864044133981224e-05, + "rewards/margins": 2.973339557647705, + "rewards/rejected": -2.973358154296875, + "step": 6473 + }, + { + "epoch": 0.38, + "learning_rate": 7.165051004486221e-08, + "logits/chosen": -1.8211545944213867, + "logits/rejected": -1.7599375247955322, + "logps/chosen": -244.13998413085938, + "logps/rejected": -599.2919921875, + "loss": 0.1917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.596368432044983, + "rewards/margins": 1.111047387123108, + "rewards/rejected": 0.485321044921875, + "step": 6474 + }, + { + "epoch": 0.38, + "learning_rate": 7.164201494855085e-08, + "logits/chosen": -1.8444017171859741, + "logits/rejected": -1.8372561931610107, + "logps/chosen": -55.04645919799805, + "logps/rejected": -265.5319519042969, + "loss": 0.4933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.437482088804245, + "rewards/margins": 2.5764338970184326, + "rewards/rejected": -3.013916015625, + "step": 6475 + }, + { + "epoch": 0.38, + "learning_rate": 7.163351908340922e-08, + "logits/chosen": -2.028237819671631, + "logits/rejected": -2.029294729232788, + "logps/chosen": -5.379767417907715, + "logps/rejected": -68.8889389038086, + "loss": 0.6499, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.670288040462765e-06, + "rewards/margins": 0.17252007126808167, + "rewards/rejected": -0.17252273857593536, + "step": 6476 + }, + { + "epoch": 0.38, + "learning_rate": 7.162502244973919e-08, + "logits/chosen": -1.9293947219848633, + "logits/rejected": -1.9081470966339111, + "logps/chosen": -195.68081665039062, + "logps/rejected": -350.0730285644531, + "loss": 0.2164, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2066971063613892, + "rewards/margins": 1.0250076055526733, + "rewards/rejected": 0.18168945610523224, + "step": 6477 + }, + { + "epoch": 0.38, + "learning_rate": 7.161652504784257e-08, + "logits/chosen": -1.9959442615509033, + "logits/rejected": -1.9663716554641724, + "logps/chosen": -315.850341796875, + "logps/rejected": -605.048828125, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5499541759490967, + "rewards/margins": 2.7635650634765625, + "rewards/rejected": -0.21361084282398224, + "step": 6478 + }, + { + "epoch": 0.38, + "learning_rate": 7.160802687802125e-08, + "logits/chosen": -1.900781512260437, + "logits/rejected": -1.8862556219100952, + "logps/chosen": -340.6729431152344, + "logps/rejected": -489.1152648925781, + "loss": 0.5705, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.6780914068222046, + "rewards/margins": -0.6532348394393921, + "rewards/rejected": 2.3313262462615967, + "step": 6479 + }, + { + "epoch": 0.38, + "learning_rate": 7.159952794057713e-08, + "logits/chosen": -2.1682300567626953, + "logits/rejected": -2.1612370014190674, + "logps/chosen": -122.28096008300781, + "logps/rejected": -354.93658447265625, + "loss": 0.1245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.986602783203125, + "rewards/margins": 4.38897705078125, + "rewards/rejected": -3.402374267578125, + "step": 6480 + }, + { + "epoch": 0.38, + "learning_rate": 7.159102823581211e-08, + "logits/chosen": -1.9323469400405884, + "logits/rejected": -1.925293207168579, + "logps/chosen": -238.28671264648438, + "logps/rejected": -291.2041015625, + "loss": 0.4022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.256997674703598, + "rewards/margins": 0.46041566133499146, + "rewards/rejected": -0.20341797173023224, + "step": 6481 + }, + { + "epoch": 0.38, + "learning_rate": 7.158252776402814e-08, + "logits/chosen": -1.8963422775268555, + "logits/rejected": -1.9159799814224243, + "logps/chosen": -186.35174560546875, + "logps/rejected": -261.68817138671875, + "loss": 0.2223, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5082367658615112, + "rewards/margins": 0.9807495474815369, + "rewards/rejected": 0.5274872183799744, + "step": 6482 + }, + { + "epoch": 0.38, + "learning_rate": 7.157402652552724e-08, + "logits/chosen": -2.063236951828003, + "logits/rejected": -2.054173707962036, + "logps/chosen": -96.94817352294922, + "logps/rejected": -390.15313720703125, + "loss": 0.3867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27543869614601135, + "rewards/margins": 4.93602991104126, + "rewards/rejected": -5.211468696594238, + "step": 6483 + }, + { + "epoch": 0.38, + "learning_rate": 7.156552452061137e-08, + "logits/chosen": -2.0153613090515137, + "logits/rejected": -2.000200033187866, + "logps/chosen": -35.64411544799805, + "logps/rejected": -139.25949096679688, + "loss": 0.5076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14898720383644104, + "rewards/margins": 1.0307095050811768, + "rewards/rejected": -1.1796966791152954, + "step": 6484 + }, + { + "epoch": 0.38, + "learning_rate": 7.155702174958261e-08, + "logits/chosen": -2.1009857654571533, + "logits/rejected": -2.1003265380859375, + "logps/chosen": -24.56463623046875, + "logps/rejected": -201.1483154296875, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37604257464408875, + "rewards/margins": 2.200514554977417, + "rewards/rejected": -1.8244720697402954, + "step": 6485 + }, + { + "epoch": 0.38, + "learning_rate": 7.154851821274295e-08, + "logits/chosen": -2.210493326187134, + "logits/rejected": -2.2094740867614746, + "logps/chosen": -0.00030085325124673545, + "logps/rejected": -93.98394775390625, + "loss": 0.6011, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.861808727378957e-05, + "rewards/margins": 0.4091801643371582, + "rewards/rejected": -0.40914154052734375, + "step": 6486 + }, + { + "epoch": 0.38, + "learning_rate": 7.154001391039454e-08, + "logits/chosen": -2.221083641052246, + "logits/rejected": -2.2190186977386475, + "logps/chosen": -41.87592315673828, + "logps/rejected": -259.41412353515625, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31152573227882385, + "rewards/margins": 1.3989921808242798, + "rewards/rejected": -1.0874664783477783, + "step": 6487 + }, + { + "epoch": 0.38, + "learning_rate": 7.153150884283947e-08, + "logits/chosen": -2.0025570392608643, + "logits/rejected": -1.9974654912948608, + "logps/chosen": -9.427175521850586, + "logps/rejected": -193.46519470214844, + "loss": 0.4488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011084556579589844, + "rewards/margins": 1.4587652683258057, + "rewards/rejected": -1.4476807117462158, + "step": 6488 + }, + { + "epoch": 0.38, + "learning_rate": 7.152300301037988e-08, + "logits/chosen": -2.0325851440429688, + "logits/rejected": -2.0339035987854004, + "logps/chosen": -26.11139678955078, + "logps/rejected": -94.31556701660156, + "loss": 0.4618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09210338443517685, + "rewards/margins": 1.664689064025879, + "rewards/rejected": -1.756792426109314, + "step": 6489 + }, + { + "epoch": 0.38, + "learning_rate": 7.151449641331795e-08, + "logits/chosen": -1.8585538864135742, + "logits/rejected": -1.8442935943603516, + "logps/chosen": -161.4766845703125, + "logps/rejected": -226.08380126953125, + "loss": 0.1355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4647995233535767, + "rewards/margins": 2.1623611450195312, + "rewards/rejected": -0.6975616812705994, + "step": 6490 + }, + { + "epoch": 0.38, + "learning_rate": 7.150598905195587e-08, + "logits/chosen": -2.0635316371917725, + "logits/rejected": -2.0607898235321045, + "logps/chosen": -0.0003736634098459035, + "logps/rejected": -114.55443572998047, + "loss": 0.4893, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.297510895412415e-05, + "rewards/margins": 1.1038142442703247, + "rewards/rejected": -1.1037712097167969, + "step": 6491 + }, + { + "epoch": 0.38, + "learning_rate": 7.149748092659584e-08, + "logits/chosen": -1.9820948839187622, + "logits/rejected": -1.975501537322998, + "logps/chosen": -30.347801208496094, + "logps/rejected": -98.48788452148438, + "loss": 0.5301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.241871640086174, + "rewards/margins": 0.40576475858688354, + "rewards/rejected": -0.16389313340187073, + "step": 6492 + }, + { + "epoch": 0.38, + "learning_rate": 7.148897203754015e-08, + "logits/chosen": -1.9379829168319702, + "logits/rejected": -1.997434139251709, + "logps/chosen": -246.56521606445312, + "logps/rejected": -386.42486572265625, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3345947265625, + "rewards/margins": 4.517694473266602, + "rewards/rejected": -2.1830995082855225, + "step": 6493 + }, + { + "epoch": 0.38, + "learning_rate": 7.148046238509107e-08, + "logits/chosen": -2.249042272567749, + "logits/rejected": -2.248924732208252, + "logps/chosen": -65.31236267089844, + "logps/rejected": -251.65634155273438, + "loss": 0.2838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37766724824905396, + "rewards/margins": 2.725088596343994, + "rewards/rejected": -2.347421407699585, + "step": 6494 + }, + { + "epoch": 0.38, + "learning_rate": 7.147195196955086e-08, + "logits/chosen": -1.9920330047607422, + "logits/rejected": -1.99351966381073, + "logps/chosen": -29.31868553161621, + "logps/rejected": -111.12789154052734, + "loss": 0.4818, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5463300943374634, + "rewards/margins": 0.3170499801635742, + "rewards/rejected": 0.22928009927272797, + "step": 6495 + }, + { + "epoch": 0.38, + "learning_rate": 7.146344079122192e-08, + "logits/chosen": -2.005425453186035, + "logits/rejected": -2.0085408687591553, + "logps/chosen": -0.000425534148234874, + "logps/rejected": -145.99514770507812, + "loss": 0.6877, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5376060875714757e-05, + "rewards/margins": 0.024013319984078407, + "rewards/rejected": -0.02403869666159153, + "step": 6496 + }, + { + "epoch": 0.38, + "learning_rate": 7.145492885040656e-08, + "logits/chosen": -2.0701286792755127, + "logits/rejected": -2.073817014694214, + "logps/chosen": -18.358013153076172, + "logps/rejected": -42.47526168823242, + "loss": 0.6754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04974250867962837, + "rewards/margins": 0.0723140686750412, + "rewards/rejected": -0.12205658107995987, + "step": 6497 + }, + { + "epoch": 0.38, + "learning_rate": 7.144641614740719e-08, + "logits/chosen": -2.0636141300201416, + "logits/rejected": -2.071406602859497, + "logps/chosen": -213.9926300048828, + "logps/rejected": -346.19305419921875, + "loss": 0.0821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8773285150527954, + "rewards/margins": 2.1673614978790283, + "rewards/rejected": -0.2900329530239105, + "step": 6498 + }, + { + "epoch": 0.38, + "learning_rate": 7.143790268252619e-08, + "logits/chosen": -1.9598114490509033, + "logits/rejected": -1.9596480131149292, + "logps/chosen": -0.0005775379831902683, + "logps/rejected": -111.36246490478516, + "loss": 0.4433, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3520827855682e-05, + "rewards/margins": 1.5320191383361816, + "rewards/rejected": -1.5320526361465454, + "step": 6499 + }, + { + "epoch": 0.38, + "learning_rate": 7.142938845606605e-08, + "logits/chosen": -1.9582041501998901, + "logits/rejected": -1.9545834064483643, + "logps/chosen": -18.332386016845703, + "logps/rejected": -227.61895751953125, + "loss": 0.234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6157623529434204, + "rewards/margins": 1.754663109779358, + "rewards/rejected": -1.1389007568359375, + "step": 6500 + }, + { + "epoch": 0.38, + "learning_rate": 7.142087346832919e-08, + "logits/chosen": -2.1131420135498047, + "logits/rejected": -2.106386423110962, + "logps/chosen": -32.04894256591797, + "logps/rejected": -145.15524291992188, + "loss": 0.327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.349029541015625, + "rewards/margins": 1.626580834388733, + "rewards/rejected": -1.277551293373108, + "step": 6501 + }, + { + "epoch": 0.38, + "learning_rate": 7.141235771961814e-08, + "logits/chosen": -1.798638105392456, + "logits/rejected": -1.8201180696487427, + "logps/chosen": -184.04336547851562, + "logps/rejected": -371.1535949707031, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1054763793945312, + "rewards/margins": 0.29548490047454834, + "rewards/rejected": 0.8099914789199829, + "step": 6502 + }, + { + "epoch": 0.38, + "learning_rate": 7.140384121023537e-08, + "logits/chosen": -2.075777530670166, + "logits/rejected": -2.073806047439575, + "logps/chosen": -40.303741455078125, + "logps/rejected": -159.5157470703125, + "loss": 0.3741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22340011596679688, + "rewards/margins": 1.788683295249939, + "rewards/rejected": -1.565283179283142, + "step": 6503 + }, + { + "epoch": 0.38, + "learning_rate": 7.139532394048349e-08, + "logits/chosen": -2.074554681777954, + "logits/rejected": -2.0749690532684326, + "logps/chosen": -70.14653778076172, + "logps/rejected": -197.42202758789062, + "loss": 0.2166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7059532403945923, + "rewards/margins": 2.2539024353027344, + "rewards/rejected": -1.547949194908142, + "step": 6504 + }, + { + "epoch": 0.38, + "learning_rate": 7.138680591066504e-08, + "logits/chosen": -2.034359931945801, + "logits/rejected": -2.032594680786133, + "logps/chosen": -9.819053649902344, + "logps/rejected": -123.77867126464844, + "loss": 0.506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.036590576171875, + "rewards/margins": 0.544604480266571, + "rewards/rejected": -0.508013904094696, + "step": 6505 + }, + { + "epoch": 0.38, + "learning_rate": 7.137828712108263e-08, + "logits/chosen": -1.9862079620361328, + "logits/rejected": -1.9832547903060913, + "logps/chosen": -179.67776489257812, + "logps/rejected": -292.9507751464844, + "loss": 0.294, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4108597040176392, + "rewards/margins": 0.4915512204170227, + "rewards/rejected": 0.9193084836006165, + "step": 6506 + }, + { + "epoch": 0.38, + "learning_rate": 7.13697675720389e-08, + "logits/chosen": -1.8061994314193726, + "logits/rejected": -1.804835319519043, + "logps/chosen": -0.9028146266937256, + "logps/rejected": -113.14041137695312, + "loss": 0.3973, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16895747184753418, + "rewards/margins": 1.4169670343399048, + "rewards/rejected": -1.2480095624923706, + "step": 6507 + }, + { + "epoch": 0.38, + "learning_rate": 7.136124726383649e-08, + "logits/chosen": -2.081566572189331, + "logits/rejected": -2.081144332885742, + "logps/chosen": -0.0004122712998650968, + "logps/rejected": -158.33396911621094, + "loss": 0.3659, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6398207410238683e-05, + "rewards/margins": 3.0579097270965576, + "rewards/rejected": -3.057936191558838, + "step": 6508 + }, + { + "epoch": 0.38, + "learning_rate": 7.135272619677808e-08, + "logits/chosen": -2.1829802989959717, + "logits/rejected": -2.1559507846832275, + "logps/chosen": -53.448150634765625, + "logps/rejected": -304.9918212890625, + "loss": 0.2296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6085441708564758, + "rewards/margins": 2.7067863941192627, + "rewards/rejected": -2.0982422828674316, + "step": 6509 + }, + { + "epoch": 0.38, + "learning_rate": 7.134420437116639e-08, + "logits/chosen": -1.9947142601013184, + "logits/rejected": -1.9943400621414185, + "logps/chosen": -54.45167541503906, + "logps/rejected": -93.28353881835938, + "loss": 0.3538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6149253845214844, + "rewards/margins": 1.0690116882324219, + "rewards/rejected": -0.4540863037109375, + "step": 6510 + }, + { + "epoch": 0.38, + "learning_rate": 7.133568178730416e-08, + "logits/chosen": -1.9859663248062134, + "logits/rejected": -1.9868991374969482, + "logps/chosen": -49.6304931640625, + "logps/rejected": -417.71746826171875, + "loss": 0.1348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.978009045124054, + "rewards/margins": 4.3955230712890625, + "rewards/rejected": -3.4175140857696533, + "step": 6511 + }, + { + "epoch": 0.38, + "learning_rate": 7.132715844549414e-08, + "logits/chosen": -2.0048398971557617, + "logits/rejected": -2.0559544563293457, + "logps/chosen": -330.0097351074219, + "logps/rejected": -405.50677490234375, + "loss": 0.2445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10718689113855362, + "rewards/margins": 2.2493133544921875, + "rewards/rejected": -2.1421265602111816, + "step": 6512 + }, + { + "epoch": 0.38, + "learning_rate": 7.131863434603914e-08, + "logits/chosen": -2.071843147277832, + "logits/rejected": -2.0741379261016846, + "logps/chosen": -93.40573120117188, + "logps/rejected": -206.77554321289062, + "loss": 0.47, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033350374549627304, + "rewards/margins": 1.1025093793869019, + "rewards/rejected": -1.135859727859497, + "step": 6513 + }, + { + "epoch": 0.38, + "learning_rate": 7.131010948924196e-08, + "logits/chosen": -2.013488531112671, + "logits/rejected": -2.0088326930999756, + "logps/chosen": -50.5529899597168, + "logps/rejected": -181.2400665283203, + "loss": 0.5195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5831878781318665, + "rewards/margins": 0.0685928463935852, + "rewards/rejected": 0.5145950317382812, + "step": 6514 + }, + { + "epoch": 0.38, + "learning_rate": 7.130158387540548e-08, + "logits/chosen": -1.7956756353378296, + "logits/rejected": -1.79548180103302, + "logps/chosen": -5.662347316741943, + "logps/rejected": -103.91043853759766, + "loss": 0.578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07512059062719345, + "rewards/margins": 0.36901557445526123, + "rewards/rejected": -0.2938949763774872, + "step": 6515 + }, + { + "epoch": 0.38, + "learning_rate": 7.12930575048325e-08, + "logits/chosen": -2.099123477935791, + "logits/rejected": -2.066370725631714, + "logps/chosen": -7.2708210945129395, + "logps/rejected": -245.914306640625, + "loss": 0.4353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07441677898168564, + "rewards/margins": 1.7634494304656982, + "rewards/rejected": -1.837866187095642, + "step": 6516 + }, + { + "epoch": 0.38, + "learning_rate": 7.128453037782599e-08, + "logits/chosen": -2.024980068206787, + "logits/rejected": -2.0317881107330322, + "logps/chosen": -203.4200439453125, + "logps/rejected": -264.6203308105469, + "loss": 0.1206, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6114944219589233, + "rewards/margins": 1.8633438348770142, + "rewards/rejected": -0.25184938311576843, + "step": 6517 + }, + { + "epoch": 0.38, + "learning_rate": 7.127600249468883e-08, + "logits/chosen": -1.9932466745376587, + "logits/rejected": -1.9996840953826904, + "logps/chosen": -212.11624145507812, + "logps/rejected": -309.91387939453125, + "loss": 0.0983, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6906601190567017, + "rewards/margins": 1.872822642326355, + "rewards/rejected": -0.18216247856616974, + "step": 6518 + }, + { + "epoch": 0.38, + "learning_rate": 7.1267473855724e-08, + "logits/chosen": -2.1213550567626953, + "logits/rejected": -2.116533041000366, + "logps/chosen": -0.003011378925293684, + "logps/rejected": -225.57354736328125, + "loss": 0.3604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020195243996568024, + "rewards/margins": 3.4698054790496826, + "rewards/rejected": -3.4700074195861816, + "step": 6519 + }, + { + "epoch": 0.38, + "learning_rate": 7.125894446123447e-08, + "logits/chosen": -1.895822525024414, + "logits/rejected": -1.9068094491958618, + "logps/chosen": -224.8714599609375, + "logps/rejected": -434.29583740234375, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3610000610351562, + "rewards/margins": 3.5040817260742188, + "rewards/rejected": -1.1430816650390625, + "step": 6520 + }, + { + "epoch": 0.38, + "learning_rate": 7.125041431152325e-08, + "logits/chosen": -1.9497686624526978, + "logits/rejected": -1.945159912109375, + "logps/chosen": -46.085750579833984, + "logps/rejected": -107.67716217041016, + "loss": 0.4347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3579147458076477, + "rewards/margins": 0.8278244137763977, + "rewards/rejected": -0.46990966796875, + "step": 6521 + }, + { + "epoch": 0.38, + "learning_rate": 7.124188340689334e-08, + "logits/chosen": -2.082097291946411, + "logits/rejected": -2.085345506668091, + "logps/chosen": -189.23175048828125, + "logps/rejected": -297.433837890625, + "loss": 0.0819, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.029650926589966, + "rewards/margins": 2.4760193824768066, + "rewards/rejected": -0.44636842608451843, + "step": 6522 + }, + { + "epoch": 0.38, + "learning_rate": 7.123335174764783e-08, + "logits/chosen": -1.8283567428588867, + "logits/rejected": -1.8141802549362183, + "logps/chosen": -150.68032836914062, + "logps/rejected": -295.3048400878906, + "loss": 0.0911, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.266409397125244, + "rewards/margins": 2.3876373767852783, + "rewards/rejected": -0.12122803181409836, + "step": 6523 + }, + { + "epoch": 0.38, + "learning_rate": 7.122481933408983e-08, + "logits/chosen": -1.88948392868042, + "logits/rejected": -1.9012328386306763, + "logps/chosen": -252.00296020507812, + "logps/rejected": -304.4451599121094, + "loss": 0.0558, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6721619367599487, + "rewards/margins": 3.4371461868286133, + "rewards/rejected": -1.764984130859375, + "step": 6524 + }, + { + "epoch": 0.38, + "learning_rate": 7.121628616652241e-08, + "logits/chosen": -2.090860366821289, + "logits/rejected": -2.0845072269439697, + "logps/chosen": -3.652045488357544, + "logps/rejected": -278.582275390625, + "loss": 0.2937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14796507358551025, + "rewards/margins": 4.388396263122559, + "rewards/rejected": -4.240431308746338, + "step": 6525 + }, + { + "epoch": 0.38, + "learning_rate": 7.120775224524871e-08, + "logits/chosen": -2.201003313064575, + "logits/rejected": -2.192826509475708, + "logps/chosen": -45.720211029052734, + "logps/rejected": -275.06585693359375, + "loss": 0.4401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27366524934768677, + "rewards/margins": 4.124430179595947, + "rewards/rejected": -4.398095607757568, + "step": 6526 + }, + { + "epoch": 0.38, + "learning_rate": 7.119921757057195e-08, + "logits/chosen": -1.9959423542022705, + "logits/rejected": -1.9775917530059814, + "logps/chosen": -4.018319606781006, + "logps/rejected": -219.10867309570312, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036684393882751465, + "rewards/margins": 3.9435954093933105, + "rewards/rejected": -3.9802796840667725, + "step": 6527 + }, + { + "epoch": 0.38, + "learning_rate": 7.119068214279524e-08, + "logits/chosen": -2.051736354827881, + "logits/rejected": -2.034134864807129, + "logps/chosen": -17.227563858032227, + "logps/rejected": -135.68191528320312, + "loss": 0.4521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02943859063088894, + "rewards/margins": 0.9304273724555969, + "rewards/rejected": -0.900988757610321, + "step": 6528 + }, + { + "epoch": 0.38, + "learning_rate": 7.118214596222189e-08, + "logits/chosen": -1.9300318956375122, + "logits/rejected": -1.9890762567520142, + "logps/chosen": -284.3398132324219, + "logps/rejected": -237.58966064453125, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2460358142852783, + "rewards/margins": 3.5232362747192383, + "rewards/rejected": -1.2772003412246704, + "step": 6529 + }, + { + "epoch": 0.38, + "learning_rate": 7.117360902915507e-08, + "logits/chosen": -2.006605863571167, + "logits/rejected": -1.9983872175216675, + "logps/chosen": -0.00014125791494734585, + "logps/rejected": -207.76927185058594, + "loss": 0.3407, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.554803919949336e-06, + "rewards/margins": 3.859987497329712, + "rewards/rejected": -3.8599929809570312, + "step": 6530 + }, + { + "epoch": 0.38, + "learning_rate": 7.116507134389808e-08, + "logits/chosen": -1.9086496829986572, + "logits/rejected": -1.8923728466033936, + "logps/chosen": -215.02633666992188, + "logps/rejected": -263.9664611816406, + "loss": 0.3763, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.090960741043091, + "rewards/margins": -0.01242375373840332, + "rewards/rejected": 2.103384494781494, + "step": 6531 + }, + { + "epoch": 0.38, + "learning_rate": 7.115653290675427e-08, + "logits/chosen": -1.9260027408599854, + "logits/rejected": -1.9322919845581055, + "logps/chosen": -7.11671746103093e-05, + "logps/rejected": -239.90452575683594, + "loss": 0.4195, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.267625627107918e-06, + "rewards/margins": 1.7836536169052124, + "rewards/rejected": -1.7836579084396362, + "step": 6532 + }, + { + "epoch": 0.38, + "learning_rate": 7.114799371802689e-08, + "logits/chosen": -1.9761340618133545, + "logits/rejected": -1.9739856719970703, + "logps/chosen": -24.185039520263672, + "logps/rejected": -87.7651596069336, + "loss": 0.4855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09763278812170029, + "rewards/margins": 0.6984819173812866, + "rewards/rejected": -0.6008491516113281, + "step": 6533 + }, + { + "epoch": 0.38, + "learning_rate": 7.113945377801934e-08, + "logits/chosen": -2.019277334213257, + "logits/rejected": -2.0144128799438477, + "logps/chosen": -51.34097671508789, + "logps/rejected": -95.05855560302734, + "loss": 0.7374, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.713867333019152e-05, + "rewards/margins": 0.00766830425709486, + "rewards/rejected": -0.0076011656783521175, + "step": 6534 + }, + { + "epoch": 0.38, + "learning_rate": 7.113091308703497e-08, + "logits/chosen": -1.9074667692184448, + "logits/rejected": -1.9112606048583984, + "logps/chosen": -238.22512817382812, + "logps/rejected": -195.62698364257812, + "loss": 0.3388, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.581146240234375, + "rewards/margins": 0.4266265630722046, + "rewards/rejected": 1.1545196771621704, + "step": 6535 + }, + { + "epoch": 0.38, + "learning_rate": 7.112237164537722e-08, + "logits/chosen": -2.038743257522583, + "logits/rejected": -2.044079542160034, + "logps/chosen": -0.4741266965866089, + "logps/rejected": -59.6515998840332, + "loss": 0.4409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008720717392861843, + "rewards/margins": 1.5633724927902222, + "rewards/rejected": -1.5720932483673096, + "step": 6536 + }, + { + "epoch": 0.38, + "learning_rate": 7.111382945334949e-08, + "logits/chosen": -1.8676085472106934, + "logits/rejected": -1.8648180961608887, + "logps/chosen": -0.0015193700091913342, + "logps/rejected": -255.62777709960938, + "loss": 0.3404, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.962610986083746e-05, + "rewards/margins": 3.8545467853546143, + "rewards/rejected": -3.854527235031128, + "step": 6537 + }, + { + "epoch": 0.38, + "learning_rate": 7.110528651125529e-08, + "logits/chosen": -2.0745818614959717, + "logits/rejected": -2.0634219646453857, + "logps/chosen": -1.4762988090515137, + "logps/rejected": -188.65365600585938, + "loss": 0.4019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03007814846932888, + "rewards/margins": 2.2944350242614746, + "rewards/rejected": -2.3245131969451904, + "step": 6538 + }, + { + "epoch": 0.38, + "learning_rate": 7.109674281939806e-08, + "logits/chosen": -2.006220579147339, + "logits/rejected": -1.967880368232727, + "logps/chosen": -284.435546875, + "logps/rejected": -536.6441650390625, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.653424024581909, + "rewards/margins": 5.611462593078613, + "rewards/rejected": -2.958038330078125, + "step": 6539 + }, + { + "epoch": 0.38, + "learning_rate": 7.108819837808133e-08, + "logits/chosen": -2.1946537494659424, + "logits/rejected": -2.2021312713623047, + "logps/chosen": -96.41856384277344, + "logps/rejected": -131.22726440429688, + "loss": 0.6758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2701965272426605, + "rewards/margins": 0.5282615423202515, + "rewards/rejected": -0.7984580993652344, + "step": 6540 + }, + { + "epoch": 0.38, + "learning_rate": 7.107965318760864e-08, + "logits/chosen": -1.9684213399887085, + "logits/rejected": -1.950394630432129, + "logps/chosen": -1.269709825515747, + "logps/rejected": -386.7106628417969, + "loss": 0.3657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01790638081729412, + "rewards/margins": 3.5088727474212646, + "rewards/rejected": -3.5267791748046875, + "step": 6541 + }, + { + "epoch": 0.38, + "learning_rate": 7.107110724828356e-08, + "logits/chosen": -2.133657217025757, + "logits/rejected": -2.1329801082611084, + "logps/chosen": -26.161264419555664, + "logps/rejected": -227.68185424804688, + "loss": 0.27, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4530973434448242, + "rewards/margins": 2.682690382003784, + "rewards/rejected": -2.22959303855896, + "step": 6542 + }, + { + "epoch": 0.38, + "learning_rate": 7.106256056040966e-08, + "logits/chosen": -1.9303593635559082, + "logits/rejected": -1.9345085620880127, + "logps/chosen": -193.5906982421875, + "logps/rejected": -210.42959594726562, + "loss": 0.3907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0290650129318237, + "rewards/margins": 0.15535283088684082, + "rewards/rejected": 0.8737121820449829, + "step": 6543 + }, + { + "epoch": 0.38, + "learning_rate": 7.10540131242906e-08, + "logits/chosen": -2.0959904193878174, + "logits/rejected": -2.1001267433166504, + "logps/chosen": -164.55215454101562, + "logps/rejected": -386.24798583984375, + "loss": 0.0701, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.25250244140625, + "rewards/margins": 3.0734071731567383, + "rewards/rejected": -1.8209046125411987, + "step": 6544 + }, + { + "epoch": 0.38, + "learning_rate": 7.104546494023e-08, + "logits/chosen": -1.9210609197616577, + "logits/rejected": -1.9749784469604492, + "logps/chosen": -213.32064819335938, + "logps/rejected": -305.77166748046875, + "loss": 0.1, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9122177362442017, + "rewards/margins": 1.8592239618301392, + "rewards/rejected": 0.0529937744140625, + "step": 6545 + }, + { + "epoch": 0.38, + "learning_rate": 7.103691600853154e-08, + "logits/chosen": -1.8643076419830322, + "logits/rejected": -1.852050542831421, + "logps/chosen": -229.4400634765625, + "logps/rejected": -479.5798034667969, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9927856922149658, + "rewards/margins": 3.4968597888946533, + "rewards/rejected": -1.5040740966796875, + "step": 6546 + }, + { + "epoch": 0.38, + "learning_rate": 7.102836632949894e-08, + "logits/chosen": -1.9474846124649048, + "logits/rejected": -2.008023977279663, + "logps/chosen": -194.56011962890625, + "logps/rejected": -433.871337890625, + "loss": 0.0567, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8441590070724487, + "rewards/margins": 2.5958404541015625, + "rewards/rejected": -0.7516815066337585, + "step": 6547 + }, + { + "epoch": 0.38, + "learning_rate": 7.101981590343589e-08, + "logits/chosen": -2.1672940254211426, + "logits/rejected": -2.1377995014190674, + "logps/chosen": -96.82899475097656, + "logps/rejected": -542.7445678710938, + "loss": 0.4971, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.681836724281311, + "rewards/margins": 11.269326210021973, + "rewards/rejected": -11.951163291931152, + "step": 6548 + }, + { + "epoch": 0.38, + "learning_rate": 7.101126473064616e-08, + "logits/chosen": -2.0441946983337402, + "logits/rejected": -2.0506751537323, + "logps/chosen": -27.764820098876953, + "logps/rejected": -66.75553894042969, + "loss": 0.5817, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3002948760986328, + "rewards/margins": 0.7902927398681641, + "rewards/rejected": -1.0905876159667969, + "step": 6549 + }, + { + "epoch": 0.38, + "learning_rate": 7.100271281143355e-08, + "logits/chosen": -2.070981740951538, + "logits/rejected": -2.0671091079711914, + "logps/chosen": -13.607789039611816, + "logps/rejected": -107.58485412597656, + "loss": 0.6619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11566219478845596, + "rewards/margins": 0.20556509494781494, + "rewards/rejected": -0.3212272822856903, + "step": 6550 + }, + { + "epoch": 0.38, + "learning_rate": 7.09941601461018e-08, + "logits/chosen": -2.0631022453308105, + "logits/rejected": -2.053267002105713, + "logps/chosen": -6.153632164001465, + "logps/rejected": -259.00579833984375, + "loss": 0.2766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2246943563222885, + "rewards/margins": 5.029999732971191, + "rewards/rejected": -4.805305480957031, + "step": 6551 + }, + { + "epoch": 0.38, + "learning_rate": 7.098560673495482e-08, + "logits/chosen": -1.9339326620101929, + "logits/rejected": -1.9864580631256104, + "logps/chosen": -166.58468627929688, + "logps/rejected": -307.07110595703125, + "loss": 0.0721, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7883682250976562, + "rewards/margins": 2.8679823875427246, + "rewards/rejected": -1.079614281654358, + "step": 6552 + }, + { + "epoch": 0.38, + "learning_rate": 7.097705257829645e-08, + "logits/chosen": -1.9838992357254028, + "logits/rejected": -1.9777276515960693, + "logps/chosen": -173.41873168945312, + "logps/rejected": -304.72900390625, + "loss": 0.3266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.829022228717804, + "rewards/margins": 0.7137817740440369, + "rewards/rejected": 0.11524047702550888, + "step": 6553 + }, + { + "epoch": 0.38, + "learning_rate": 7.096849767643054e-08, + "logits/chosen": -2.0967800617218018, + "logits/rejected": -2.0920727252960205, + "logps/chosen": -1.873267650604248, + "logps/rejected": -238.6923370361328, + "loss": 0.3999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07652000337839127, + "rewards/margins": 2.508553981781006, + "rewards/rejected": -2.585073947906494, + "step": 6554 + }, + { + "epoch": 0.38, + "learning_rate": 7.095994202966104e-08, + "logits/chosen": -1.9856845140457153, + "logits/rejected": -1.9789721965789795, + "logps/chosen": -247.40542602539062, + "logps/rejected": -327.904052734375, + "loss": 0.5296, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.005010962486267, + "rewards/margins": -0.327606201171875, + "rewards/rejected": 1.332617163658142, + "step": 6555 + }, + { + "epoch": 0.38, + "learning_rate": 7.095138563829185e-08, + "logits/chosen": -1.819034457206726, + "logits/rejected": -1.8176074028015137, + "logps/chosen": -73.86070251464844, + "logps/rejected": -131.00941467285156, + "loss": 0.4929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42979127168655396, + "rewards/margins": 0.4471115171909332, + "rewards/rejected": -0.01732025109231472, + "step": 6556 + }, + { + "epoch": 0.38, + "learning_rate": 7.094282850262698e-08, + "logits/chosen": -1.9829190969467163, + "logits/rejected": -1.9446943998336792, + "logps/chosen": -152.79293823242188, + "logps/rejected": -355.2633361816406, + "loss": 0.1269, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.802204966545105, + "rewards/margins": 1.6024948358535767, + "rewards/rejected": 0.19971008598804474, + "step": 6557 + }, + { + "epoch": 0.38, + "learning_rate": 7.093427062297038e-08, + "logits/chosen": -1.8402953147888184, + "logits/rejected": -1.7916873693466187, + "logps/chosen": -219.08407592773438, + "logps/rejected": -393.338623046875, + "loss": 0.2479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08173217624425888, + "rewards/margins": 2.1875154972076416, + "rewards/rejected": -2.105783224105835, + "step": 6558 + }, + { + "epoch": 0.38, + "learning_rate": 7.092571199962611e-08, + "logits/chosen": -2.039722442626953, + "logits/rejected": -2.037989377975464, + "logps/chosen": -0.007033681031316519, + "logps/rejected": -269.17120361328125, + "loss": 0.3425, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.0725568548368756e-06, + "rewards/margins": 5.117417335510254, + "rewards/rejected": -5.117410182952881, + "step": 6559 + }, + { + "epoch": 0.38, + "learning_rate": 7.091715263289817e-08, + "logits/chosen": -2.150346279144287, + "logits/rejected": -2.144338607788086, + "logps/chosen": -9.064582824707031, + "logps/rejected": -132.00245666503906, + "loss": 0.3597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.183990478515625, + "rewards/margins": 1.6994171142578125, + "rewards/rejected": -1.5154266357421875, + "step": 6560 + }, + { + "epoch": 0.38, + "learning_rate": 7.090859252309067e-08, + "logits/chosen": -1.9445337057113647, + "logits/rejected": -1.9341223239898682, + "logps/chosen": -4.758809566497803, + "logps/rejected": -102.35115051269531, + "loss": 0.5008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11193089932203293, + "rewards/margins": 0.8266739845275879, + "rewards/rejected": -0.7147430777549744, + "step": 6561 + }, + { + "epoch": 0.38, + "learning_rate": 7.090003167050768e-08, + "logits/chosen": -1.7625623941421509, + "logits/rejected": -1.6789737939834595, + "logps/chosen": -173.47555541992188, + "logps/rejected": -373.4127502441406, + "loss": 0.1321, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3062957525253296, + "rewards/margins": 2.5287537574768066, + "rewards/rejected": -1.2224578857421875, + "step": 6562 + }, + { + "epoch": 0.38, + "learning_rate": 7.089147007545333e-08, + "logits/chosen": -2.0995123386383057, + "logits/rejected": -2.093356132507324, + "logps/chosen": -165.68089294433594, + "logps/rejected": -261.8491516113281, + "loss": 0.2307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.881805419921875, + "rewards/margins": 1.1982390880584717, + "rewards/rejected": -0.31643372774124146, + "step": 6563 + }, + { + "epoch": 0.38, + "learning_rate": 7.088290773823176e-08, + "logits/chosen": -2.121354818344116, + "logits/rejected": -2.1018736362457275, + "logps/chosen": -0.0005962843424640596, + "logps/rejected": -229.75999450683594, + "loss": 0.3448, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.783690620679408e-05, + "rewards/margins": 4.86790657043457, + "rewards/rejected": -4.867808818817139, + "step": 6564 + }, + { + "epoch": 0.38, + "learning_rate": 7.087434465914716e-08, + "logits/chosen": -1.9608138799667358, + "logits/rejected": -1.97011137008667, + "logps/chosen": -0.34751683473587036, + "logps/rejected": -129.9471893310547, + "loss": 0.3637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03313812240958214, + "rewards/margins": 2.9165494441986084, + "rewards/rejected": -2.883411407470703, + "step": 6565 + }, + { + "epoch": 0.38, + "learning_rate": 7.086578083850375e-08, + "logits/chosen": -1.935479760169983, + "logits/rejected": -1.93948495388031, + "logps/chosen": -0.3427211344242096, + "logps/rejected": -56.91461181640625, + "loss": 0.6802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012744823470711708, + "rewards/margins": 0.10353554785251617, + "rewards/rejected": -0.11628036946058273, + "step": 6566 + }, + { + "epoch": 0.38, + "learning_rate": 7.085721627660573e-08, + "logits/chosen": -2.046536445617676, + "logits/rejected": -2.0515084266662598, + "logps/chosen": -300.4798889160156, + "logps/rejected": -371.3842468261719, + "loss": 0.0665, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6188080310821533, + "rewards/margins": 2.366973876953125, + "rewards/rejected": 0.25183412432670593, + "step": 6567 + }, + { + "epoch": 0.38, + "learning_rate": 7.084865097375736e-08, + "logits/chosen": -2.2018632888793945, + "logits/rejected": -2.193035125732422, + "logps/chosen": -36.73957824707031, + "logps/rejected": -183.00389099121094, + "loss": 0.245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5227455496788025, + "rewards/margins": 2.61385440826416, + "rewards/rejected": -2.091108798980713, + "step": 6568 + }, + { + "epoch": 0.38, + "learning_rate": 7.084008493026294e-08, + "logits/chosen": -2.0496866703033447, + "logits/rejected": -2.0326497554779053, + "logps/chosen": -151.8863525390625, + "logps/rejected": -192.58563232421875, + "loss": 0.2556, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5706909894943237, + "rewards/margins": 0.6037461161613464, + "rewards/rejected": 0.9669448733329773, + "step": 6569 + }, + { + "epoch": 0.38, + "learning_rate": 7.083151814642675e-08, + "logits/chosen": -2.018002986907959, + "logits/rejected": -2.017774820327759, + "logps/chosen": -160.15585327148438, + "logps/rejected": -268.1946105957031, + "loss": 0.5072, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.372395396232605, + "rewards/margins": -0.31705474853515625, + "rewards/rejected": 1.6894501447677612, + "step": 6570 + }, + { + "epoch": 0.38, + "learning_rate": 7.082295062255314e-08, + "logits/chosen": -2.004241704940796, + "logits/rejected": -2.011766195297241, + "logps/chosen": -187.96441650390625, + "logps/rejected": -325.46783447265625, + "loss": 0.1724, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.954975962638855, + "rewards/margins": 1.057478427886963, + "rewards/rejected": 0.8974975943565369, + "step": 6571 + }, + { + "epoch": 0.38, + "learning_rate": 7.081438235894646e-08, + "logits/chosen": -2.0927608013153076, + "logits/rejected": -2.0895166397094727, + "logps/chosen": -0.003715875558555126, + "logps/rejected": -107.06188201904297, + "loss": 0.4813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016867338854353875, + "rewards/margins": 1.1737799644470215, + "rewards/rejected": -1.1739486455917358, + "step": 6572 + }, + { + "epoch": 0.38, + "learning_rate": 7.080581335591111e-08, + "logits/chosen": -2.011042594909668, + "logits/rejected": -1.9847952127456665, + "logps/chosen": -302.8802490234375, + "logps/rejected": -447.1095275878906, + "loss": 0.2101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0925415754318237, + "rewards/margins": 1.047134518623352, + "rewards/rejected": 0.04540710523724556, + "step": 6573 + }, + { + "epoch": 0.38, + "learning_rate": 7.079724361375151e-08, + "logits/chosen": -2.0493552684783936, + "logits/rejected": -2.0509321689605713, + "logps/chosen": -0.010596765205264091, + "logps/rejected": -140.07119750976562, + "loss": 0.5724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007333243265748024, + "rewards/margins": 0.5543570518493652, + "rewards/rejected": -0.5550903677940369, + "step": 6574 + }, + { + "epoch": 0.38, + "learning_rate": 7.078867313277207e-08, + "logits/chosen": -2.010359525680542, + "logits/rejected": -2.0027716159820557, + "logps/chosen": -66.70500183105469, + "logps/rejected": -337.115478515625, + "loss": 0.1161, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2281204462051392, + "rewards/margins": 4.424067974090576, + "rewards/rejected": -3.1959474086761475, + "step": 6575 + }, + { + "epoch": 0.38, + "learning_rate": 7.078010191327729e-08, + "logits/chosen": -1.7248623371124268, + "logits/rejected": -1.7159883975982666, + "logps/chosen": -333.67236328125, + "logps/rejected": -422.94921875, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44536134600639343, + "rewards/margins": 0.7365875244140625, + "rewards/rejected": -0.29122620820999146, + "step": 6576 + }, + { + "epoch": 0.38, + "learning_rate": 7.077152995557164e-08, + "logits/chosen": -1.9267170429229736, + "logits/rejected": -1.9291408061981201, + "logps/chosen": -13.381845474243164, + "logps/rejected": -175.80702209472656, + "loss": 0.5292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02514028549194336, + "rewards/margins": 0.8401017189025879, + "rewards/rejected": -0.8652420043945312, + "step": 6577 + }, + { + "epoch": 0.38, + "learning_rate": 7.076295725995963e-08, + "logits/chosen": -2.0037264823913574, + "logits/rejected": -2.0064337253570557, + "logps/chosen": -0.0008903839043341577, + "logps/rejected": -173.54287719726562, + "loss": 0.3476, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5997592931380495e-05, + "rewards/margins": 4.970935344696045, + "rewards/rejected": -4.97097110748291, + "step": 6578 + }, + { + "epoch": 0.38, + "learning_rate": 7.07543838267458e-08, + "logits/chosen": -2.1607022285461426, + "logits/rejected": -2.150785207748413, + "logps/chosen": -90.84822082519531, + "logps/rejected": -362.04095458984375, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14765778183937073, + "rewards/margins": 5.042768955230713, + "rewards/rejected": -4.895111083984375, + "step": 6579 + }, + { + "epoch": 0.38, + "learning_rate": 7.074580965623476e-08, + "logits/chosen": -1.823079228401184, + "logits/rejected": -1.8399171829223633, + "logps/chosen": -272.9786071777344, + "logps/rejected": -324.69232177734375, + "loss": 0.2344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8007416129112244, + "rewards/margins": 1.0602234601974487, + "rewards/rejected": -0.259481817483902, + "step": 6580 + }, + { + "epoch": 0.38, + "learning_rate": 7.073723474873108e-08, + "logits/chosen": -2.0117175579071045, + "logits/rejected": -2.010603666305542, + "logps/chosen": -32.57172393798828, + "logps/rejected": -134.28866577148438, + "loss": 0.3855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2554771602153778, + "rewards/margins": 1.432155728340149, + "rewards/rejected": -1.1766785383224487, + "step": 6581 + }, + { + "epoch": 0.38, + "learning_rate": 7.07286591045394e-08, + "logits/chosen": -1.8565099239349365, + "logits/rejected": -1.851965069770813, + "logps/chosen": -309.325927734375, + "logps/rejected": -469.44659423828125, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9637939929962158, + "rewards/margins": 2.802175998687744, + "rewards/rejected": -0.8383819460868835, + "step": 6582 + }, + { + "epoch": 0.38, + "learning_rate": 7.072008272396432e-08, + "logits/chosen": -1.9698119163513184, + "logits/rejected": -1.957985281944275, + "logps/chosen": -208.87503051757812, + "logps/rejected": -440.8409118652344, + "loss": 0.0543, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8335511684417725, + "rewards/margins": 2.5648224353790283, + "rewards/rejected": 0.268728643655777, + "step": 6583 + }, + { + "epoch": 0.38, + "learning_rate": 7.071150560731056e-08, + "logits/chosen": -1.8941694498062134, + "logits/rejected": -1.8787983655929565, + "logps/chosen": -62.0174560546875, + "logps/rejected": -225.3267059326172, + "loss": 0.2251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8284576535224915, + "rewards/margins": 1.7079238891601562, + "rewards/rejected": -0.8794662356376648, + "step": 6584 + }, + { + "epoch": 0.38, + "learning_rate": 7.070292775488281e-08, + "logits/chosen": -2.1097750663757324, + "logits/rejected": -2.100149154663086, + "logps/chosen": -0.00016891310224309564, + "logps/rejected": -208.4867401123047, + "loss": 0.3275, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.341496373352129e-06, + "rewards/margins": 3.3454678058624268, + "rewards/rejected": -3.3454742431640625, + "step": 6585 + }, + { + "epoch": 0.38, + "learning_rate": 7.069434916698582e-08, + "logits/chosen": -1.9920698404312134, + "logits/rejected": -2.0080511569976807, + "logps/chosen": -226.8149871826172, + "logps/rejected": -399.4012756347656, + "loss": 0.0608, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4775054454803467, + "rewards/margins": 2.3561460971832275, + "rewards/rejected": 0.12135925143957138, + "step": 6586 + }, + { + "epoch": 0.38, + "learning_rate": 7.06857698439243e-08, + "logits/chosen": -2.014523983001709, + "logits/rejected": -1.9733107089996338, + "logps/chosen": -198.89276123046875, + "logps/rejected": -436.26995849609375, + "loss": 0.3096, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.908190906047821, + "rewards/margins": 0.45757749676704407, + "rewards/rejected": 0.450613409280777, + "step": 6587 + }, + { + "epoch": 0.38, + "learning_rate": 7.067718978600305e-08, + "logits/chosen": -1.9933291673660278, + "logits/rejected": -2.008587598800659, + "logps/chosen": -186.47119140625, + "logps/rejected": -308.86029052734375, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3723466396331787, + "rewards/margins": 2.303114414215088, + "rewards/rejected": 0.06923218071460724, + "step": 6588 + }, + { + "epoch": 0.38, + "learning_rate": 7.066860899352686e-08, + "logits/chosen": -1.9025558233261108, + "logits/rejected": -1.90480375289917, + "logps/chosen": -0.0031334925442934036, + "logps/rejected": -248.282958984375, + "loss": 0.4268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020846408733632416, + "rewards/margins": 2.032961130142212, + "rewards/rejected": -2.0331695079803467, + "step": 6589 + }, + { + "epoch": 0.38, + "learning_rate": 7.066002746680062e-08, + "logits/chosen": -1.9591405391693115, + "logits/rejected": -1.991881251335144, + "logps/chosen": -219.16909790039062, + "logps/rejected": -276.7803955078125, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.978607177734375, + "rewards/margins": 0.699237048625946, + "rewards/rejected": 0.27937012910842896, + "step": 6590 + }, + { + "epoch": 0.38, + "learning_rate": 7.06514452061291e-08, + "logits/chosen": -2.1664140224456787, + "logits/rejected": -2.1548004150390625, + "logps/chosen": -74.26502990722656, + "logps/rejected": -225.154296875, + "loss": 0.2999, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037300873547792435, + "rewards/margins": 3.90226674079895, + "rewards/rejected": -3.8649659156799316, + "step": 6591 + }, + { + "epoch": 0.38, + "learning_rate": 7.064286221181726e-08, + "logits/chosen": -1.9861063957214355, + "logits/rejected": -1.9897961616516113, + "logps/chosen": -25.19393539428711, + "logps/rejected": -147.02899169921875, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7190170288085938, + "rewards/margins": 1.096318006515503, + "rewards/rejected": -0.37730103731155396, + "step": 6592 + }, + { + "epoch": 0.38, + "learning_rate": 7.063427848416996e-08, + "logits/chosen": -2.1244170665740967, + "logits/rejected": -2.1094119548797607, + "logps/chosen": -6.341797416098416e-05, + "logps/rejected": -271.53857421875, + "loss": 0.338, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1723069443833083e-07, + "rewards/margins": 6.968639850616455, + "rewards/rejected": -6.968640327453613, + "step": 6593 + }, + { + "epoch": 0.38, + "learning_rate": 7.062569402349217e-08, + "logits/chosen": -1.9787023067474365, + "logits/rejected": -1.9823614358901978, + "logps/chosen": -13.035919189453125, + "logps/rejected": -46.218624114990234, + "loss": 0.5281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09745683521032333, + "rewards/margins": 0.6349888443946838, + "rewards/rejected": -0.5375320315361023, + "step": 6594 + }, + { + "epoch": 0.38, + "learning_rate": 7.061710883008883e-08, + "logits/chosen": -2.1234958171844482, + "logits/rejected": -2.1226468086242676, + "logps/chosen": -15.030871391296387, + "logps/rejected": -139.04605102539062, + "loss": 0.5998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09403886646032333, + "rewards/margins": 0.35731610655784607, + "rewards/rejected": -0.45135498046875, + "step": 6595 + }, + { + "epoch": 0.38, + "learning_rate": 7.060852290426493e-08, + "logits/chosen": -2.0610580444335938, + "logits/rejected": -2.0449166297912598, + "logps/chosen": -88.04273986816406, + "logps/rejected": -352.76123046875, + "loss": 0.2549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3486267030239105, + "rewards/margins": 5.608740329742432, + "rewards/rejected": -5.260113716125488, + "step": 6596 + }, + { + "epoch": 0.38, + "learning_rate": 7.059993624632551e-08, + "logits/chosen": -2.1223831176757812, + "logits/rejected": -2.10884428024292, + "logps/chosen": -31.073915481567383, + "logps/rejected": -359.7444763183594, + "loss": 0.1799, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8665747046470642, + "rewards/margins": 5.40062952041626, + "rewards/rejected": -4.534054756164551, + "step": 6597 + }, + { + "epoch": 0.38, + "learning_rate": 7.059134885657556e-08, + "logits/chosen": -1.8257066011428833, + "logits/rejected": -1.8170349597930908, + "logps/chosen": -192.99282836914062, + "logps/rejected": -289.656494140625, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8251098990440369, + "rewards/margins": 1.283136010169983, + "rewards/rejected": -0.45802614092826843, + "step": 6598 + }, + { + "epoch": 0.38, + "learning_rate": 7.05827607353202e-08, + "logits/chosen": -2.0593514442443848, + "logits/rejected": -2.0503334999084473, + "logps/chosen": -2.6848068237304688, + "logps/rejected": -49.50982666015625, + "loss": 0.5726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04130208492279053, + "rewards/margins": 0.478023886680603, + "rewards/rejected": -0.4367218017578125, + "step": 6599 + }, + { + "epoch": 0.38, + "learning_rate": 7.057417188286448e-08, + "logits/chosen": -1.9503058195114136, + "logits/rejected": -1.9631526470184326, + "logps/chosen": -325.6509094238281, + "logps/rejected": -558.0046997070312, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.112286329269409, + "rewards/margins": 8.431490898132324, + "rewards/rejected": -6.319204807281494, + "step": 6600 + }, + { + "epoch": 0.38, + "learning_rate": 7.056558229951355e-08, + "logits/chosen": -1.9475773572921753, + "logits/rejected": -1.9402432441711426, + "logps/chosen": -57.803871154785156, + "logps/rejected": -222.4401397705078, + "loss": 0.3193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.206828311085701, + "rewards/margins": 2.5343873500823975, + "rewards/rejected": -2.327558994293213, + "step": 6601 + }, + { + "epoch": 0.38, + "learning_rate": 7.055699198557254e-08, + "logits/chosen": -2.079416513442993, + "logits/rejected": -2.087735652923584, + "logps/chosen": -0.6946941018104553, + "logps/rejected": -210.129638671875, + "loss": 0.35, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006473082583397627, + "rewards/margins": 4.552841663360596, + "rewards/rejected": -4.546368598937988, + "step": 6602 + }, + { + "epoch": 0.38, + "learning_rate": 7.054840094134661e-08, + "logits/chosen": -1.7037408351898193, + "logits/rejected": -1.726204752922058, + "logps/chosen": -215.43551635742188, + "logps/rejected": -433.1590270996094, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9624847173690796, + "rewards/margins": 4.531649589538574, + "rewards/rejected": -2.569164991378784, + "step": 6603 + }, + { + "epoch": 0.38, + "learning_rate": 7.053980916714095e-08, + "logits/chosen": -1.896494746208191, + "logits/rejected": -1.8957267999649048, + "logps/chosen": -59.45106506347656, + "logps/rejected": -145.76589965820312, + "loss": 0.5041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29879266023635864, + "rewards/margins": 0.3383312225341797, + "rewards/rejected": -0.03953857347369194, + "step": 6604 + }, + { + "epoch": 0.38, + "learning_rate": 7.053121666326083e-08, + "logits/chosen": -1.7390981912612915, + "logits/rejected": -1.7482655048370361, + "logps/chosen": -180.53787231445312, + "logps/rejected": -407.3946533203125, + "loss": 0.1093, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0079773664474487, + "rewards/margins": 3.4253664016723633, + "rewards/rejected": -2.417388916015625, + "step": 6605 + }, + { + "epoch": 0.38, + "learning_rate": 7.052262343001144e-08, + "logits/chosen": -1.8936023712158203, + "logits/rejected": -1.8820942640304565, + "logps/chosen": -46.64657974243164, + "logps/rejected": -304.94317626953125, + "loss": 0.1964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7640407681465149, + "rewards/margins": 2.7749171257019043, + "rewards/rejected": -2.010876417160034, + "step": 6606 + }, + { + "epoch": 0.38, + "learning_rate": 7.05140294676981e-08, + "logits/chosen": -2.049337863922119, + "logits/rejected": -2.0219154357910156, + "logps/chosen": -150.9043731689453, + "logps/rejected": -400.02154541015625, + "loss": 0.0456, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.084742784500122, + "rewards/margins": 5.3332719802856445, + "rewards/rejected": -3.2485291957855225, + "step": 6607 + }, + { + "epoch": 0.38, + "learning_rate": 7.050543477662607e-08, + "logits/chosen": -1.780575156211853, + "logits/rejected": -1.7797201871871948, + "logps/chosen": -43.320068359375, + "logps/rejected": -88.18620300292969, + "loss": 0.7498, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.019817352294921875, + "rewards/margins": -0.18746109306812286, + "rewards/rejected": 0.167643740773201, + "step": 6608 + }, + { + "epoch": 0.38, + "learning_rate": 7.049683935710071e-08, + "logits/chosen": -2.036168336868286, + "logits/rejected": -2.036034107208252, + "logps/chosen": -2.7360849380493164, + "logps/rejected": -1.7063391208648682, + "loss": 0.6701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01771519146859646, + "rewards/margins": 0.05964691936969757, + "rewards/rejected": -0.04193172603845596, + "step": 6609 + }, + { + "epoch": 0.38, + "learning_rate": 7.048824320942734e-08, + "logits/chosen": -1.9915021657943726, + "logits/rejected": -1.9890024662017822, + "logps/chosen": -155.58953857421875, + "logps/rejected": -245.41468811035156, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.14434814453125, + "rewards/margins": 0.27490687370300293, + "rewards/rejected": 1.869441270828247, + "step": 6610 + }, + { + "epoch": 0.38, + "learning_rate": 7.047964633391137e-08, + "logits/chosen": -2.001641273498535, + "logits/rejected": -2.014568328857422, + "logps/chosen": -75.20671081542969, + "logps/rejected": -217.32798767089844, + "loss": 0.7866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3815200924873352, + "rewards/margins": 0.1484130620956421, + "rewards/rejected": -0.5299331545829773, + "step": 6611 + }, + { + "epoch": 0.38, + "learning_rate": 7.047104873085817e-08, + "logits/chosen": -2.080420732498169, + "logits/rejected": -2.0808818340301514, + "logps/chosen": -25.934595108032227, + "logps/rejected": -334.35284423828125, + "loss": 0.2223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6219190955162048, + "rewards/margins": 4.178688049316406, + "rewards/rejected": -3.5567688941955566, + "step": 6612 + }, + { + "epoch": 0.38, + "learning_rate": 7.046245040057319e-08, + "logits/chosen": -2.078984022140503, + "logits/rejected": -2.038175344467163, + "logps/chosen": -262.035888671875, + "logps/rejected": -407.3995361328125, + "loss": 0.1857, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.817840576171875, + "rewards/margins": 1.058477759361267, + "rewards/rejected": 0.7593628168106079, + "step": 6613 + }, + { + "epoch": 0.38, + "learning_rate": 7.045385134336187e-08, + "logits/chosen": -2.133802890777588, + "logits/rejected": -2.129659414291382, + "logps/chosen": -21.922618865966797, + "logps/rejected": -201.79440307617188, + "loss": 0.4083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2168022245168686, + "rewards/margins": 1.6073403358459473, + "rewards/rejected": -1.3905380964279175, + "step": 6614 + }, + { + "epoch": 0.38, + "learning_rate": 7.04452515595297e-08, + "logits/chosen": -1.845568060874939, + "logits/rejected": -1.824559211730957, + "logps/chosen": -179.44375610351562, + "logps/rejected": -447.019287109375, + "loss": 0.0781, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2095459699630737, + "rewards/margins": 2.904263496398926, + "rewards/rejected": -1.6947174072265625, + "step": 6615 + }, + { + "epoch": 0.39, + "learning_rate": 7.04366510493822e-08, + "logits/chosen": -1.8107407093048096, + "logits/rejected": -1.85139799118042, + "logps/chosen": -303.7052917480469, + "logps/rejected": -491.6199951171875, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3204407691955566, + "rewards/margins": 5.576074600219727, + "rewards/rejected": -2.255633592605591, + "step": 6616 + }, + { + "epoch": 0.39, + "learning_rate": 7.042804981322488e-08, + "logits/chosen": -2.139773368835449, + "logits/rejected": -2.1229407787323, + "logps/chosen": -46.81157684326172, + "logps/rejected": -217.72305297851562, + "loss": 0.3143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8768901824951172, + "rewards/margins": 0.9607784152030945, + "rewards/rejected": -0.08388824760913849, + "step": 6617 + }, + { + "epoch": 0.39, + "learning_rate": 7.041944785136331e-08, + "logits/chosen": -2.039609432220459, + "logits/rejected": -2.0207862854003906, + "logps/chosen": -62.04543685913086, + "logps/rejected": -296.0062561035156, + "loss": 0.4057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09855842590332031, + "rewards/margins": 3.9219799041748047, + "rewards/rejected": -4.020538330078125, + "step": 6618 + }, + { + "epoch": 0.39, + "learning_rate": 7.041084516410307e-08, + "logits/chosen": -2.0223934650421143, + "logits/rejected": -2.0157740116119385, + "logps/chosen": -152.69149780273438, + "logps/rejected": -244.9728240966797, + "loss": 0.2773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.985729992389679, + "rewards/margins": 0.7831802368164062, + "rewards/rejected": 0.2025497406721115, + "step": 6619 + }, + { + "epoch": 0.39, + "learning_rate": 7.040224175174977e-08, + "logits/chosen": -2.0008509159088135, + "logits/rejected": -1.9655441045761108, + "logps/chosen": -134.3684844970703, + "logps/rejected": -337.6378173828125, + "loss": 0.4248, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1549453735351562, + "rewards/margins": -0.04156959056854248, + "rewards/rejected": 1.1965149641036987, + "step": 6620 + }, + { + "epoch": 0.39, + "learning_rate": 7.039363761460903e-08, + "logits/chosen": -2.1225404739379883, + "logits/rejected": -2.120699167251587, + "logps/chosen": -3.492067813873291, + "logps/rejected": -128.10440063476562, + "loss": 0.5317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34559836983680725, + "rewards/margins": 0.241057351231575, + "rewards/rejected": 0.10454101860523224, + "step": 6621 + }, + { + "epoch": 0.39, + "learning_rate": 7.038503275298655e-08, + "logits/chosen": -2.0219221115112305, + "logits/rejected": -2.0639429092407227, + "logps/chosen": -268.26361083984375, + "logps/rejected": -596.0623779296875, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.947885274887085, + "rewards/margins": 4.4346466064453125, + "rewards/rejected": -1.486761450767517, + "step": 6622 + }, + { + "epoch": 0.39, + "learning_rate": 7.037642716718798e-08, + "logits/chosen": -2.035980463027954, + "logits/rejected": -1.9706525802612305, + "logps/chosen": -212.44876098632812, + "logps/rejected": -340.96527099609375, + "loss": 0.5168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6941055655479431, + "rewards/margins": 0.07219696044921875, + "rewards/rejected": 0.6219086050987244, + "step": 6623 + }, + { + "epoch": 0.39, + "learning_rate": 7.036782085751905e-08, + "logits/chosen": -1.9606670141220093, + "logits/rejected": -1.9609284400939941, + "logps/chosen": -78.44136047363281, + "logps/rejected": -227.2659454345703, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5574424862861633, + "rewards/margins": 1.182959794998169, + "rewards/rejected": -0.6255173087120056, + "step": 6624 + }, + { + "epoch": 0.39, + "learning_rate": 7.035921382428547e-08, + "logits/chosen": -2.0441036224365234, + "logits/rejected": -2.0856902599334717, + "logps/chosen": -280.9755859375, + "logps/rejected": -383.03070068359375, + "loss": 0.1498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.852642834186554, + "rewards/margins": 1.8970794677734375, + "rewards/rejected": -1.0444366931915283, + "step": 6625 + }, + { + "epoch": 0.39, + "learning_rate": 7.035060606779306e-08, + "logits/chosen": -1.8934575319290161, + "logits/rejected": -1.910488247871399, + "logps/chosen": -275.23431396484375, + "logps/rejected": -468.7267761230469, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.828405737876892, + "rewards/margins": 4.924096584320068, + "rewards/rejected": -3.095690965652466, + "step": 6626 + }, + { + "epoch": 0.39, + "learning_rate": 7.034199758834756e-08, + "logits/chosen": -2.044114828109741, + "logits/rejected": -2.106745958328247, + "logps/chosen": -176.91256713867188, + "logps/rejected": -358.93865966796875, + "loss": 0.0542, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.357385277748108, + "rewards/margins": 4.115896701812744, + "rewards/rejected": -2.7585113048553467, + "step": 6627 + }, + { + "epoch": 0.39, + "learning_rate": 7.033338838625481e-08, + "logits/chosen": -1.845678448677063, + "logits/rejected": -1.87042236328125, + "logps/chosen": -337.072509765625, + "logps/rejected": -419.3182373046875, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1418092250823975, + "rewards/margins": 5.004248142242432, + "rewards/rejected": -1.8624390363693237, + "step": 6628 + }, + { + "epoch": 0.39, + "learning_rate": 7.032477846182063e-08, + "logits/chosen": -2.1396498680114746, + "logits/rejected": -2.1304497718811035, + "logps/chosen": -22.792438507080078, + "logps/rejected": -297.6868591308594, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28324222564697266, + "rewards/margins": 5.411022663116455, + "rewards/rejected": -5.127780437469482, + "step": 6629 + }, + { + "epoch": 0.39, + "learning_rate": 7.031616781535093e-08, + "logits/chosen": -2.010584831237793, + "logits/rejected": -2.0192315578460693, + "logps/chosen": -34.474544525146484, + "logps/rejected": -188.09146118164062, + "loss": 0.3879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41571924090385437, + "rewards/margins": 1.0195919275283813, + "rewards/rejected": -0.6038727164268494, + "step": 6630 + }, + { + "epoch": 0.39, + "learning_rate": 7.030755644715154e-08, + "logits/chosen": -2.117121696472168, + "logits/rejected": -2.1137263774871826, + "logps/chosen": -3.070615291595459, + "logps/rejected": -94.72059631347656, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14779667556285858, + "rewards/margins": 2.2251060009002686, + "rewards/rejected": -2.0773093700408936, + "step": 6631 + }, + { + "epoch": 0.39, + "learning_rate": 7.029894435752842e-08, + "logits/chosen": -1.941834568977356, + "logits/rejected": -1.9209693670272827, + "logps/chosen": -200.43798828125, + "logps/rejected": -396.01287841796875, + "loss": 0.0555, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.465840220451355, + "rewards/margins": 4.23007345199585, + "rewards/rejected": -2.764233350753784, + "step": 6632 + }, + { + "epoch": 0.39, + "learning_rate": 7.029033154678751e-08, + "logits/chosen": -1.8621494770050049, + "logits/rejected": -1.8657124042510986, + "logps/chosen": -130.5433349609375, + "logps/rejected": -263.1350402832031, + "loss": 0.1259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9287827014923096, + "rewards/margins": 1.6440597772598267, + "rewards/rejected": 0.2847228944301605, + "step": 6633 + }, + { + "epoch": 0.39, + "learning_rate": 7.028171801523476e-08, + "logits/chosen": -1.8184739351272583, + "logits/rejected": -1.8207918405532837, + "logps/chosen": -179.4859619140625, + "logps/rejected": -342.0960693359375, + "loss": 0.3042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8786651492118835, + "rewards/margins": 1.2417815923690796, + "rewards/rejected": -0.36311647295951843, + "step": 6634 + }, + { + "epoch": 0.39, + "learning_rate": 7.02731037631762e-08, + "logits/chosen": -1.8514176607131958, + "logits/rejected": -1.8394495248794556, + "logps/chosen": -14.068681716918945, + "logps/rejected": -271.5617980957031, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002485942794010043, + "rewards/margins": 5.212977886199951, + "rewards/rejected": -5.210492134094238, + "step": 6635 + }, + { + "epoch": 0.39, + "learning_rate": 7.026448879091779e-08, + "logits/chosen": -2.0474765300750732, + "logits/rejected": -2.035156011581421, + "logps/chosen": -9.751125617185608e-05, + "logps/rejected": -201.11997985839844, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5496851801799494e-06, + "rewards/margins": 2.9142305850982666, + "rewards/rejected": -2.914232015609741, + "step": 6636 + }, + { + "epoch": 0.39, + "learning_rate": 7.025587309876565e-08, + "logits/chosen": -1.9727435111999512, + "logits/rejected": -1.9777400493621826, + "logps/chosen": -117.44496154785156, + "logps/rejected": -235.00389099121094, + "loss": 0.3885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7058082818984985, + "rewards/margins": 0.4756172299385071, + "rewards/rejected": 0.23019103705883026, + "step": 6637 + }, + { + "epoch": 0.39, + "learning_rate": 7.024725668702579e-08, + "logits/chosen": -2.0595126152038574, + "logits/rejected": -2.0459461212158203, + "logps/chosen": -72.25767517089844, + "logps/rejected": -388.58428955078125, + "loss": 0.3113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5096214413642883, + "rewards/margins": 1.7063255310058594, + "rewards/rejected": -1.1967041492462158, + "step": 6638 + }, + { + "epoch": 0.39, + "learning_rate": 7.023863955600435e-08, + "logits/chosen": -2.0072240829467773, + "logits/rejected": -2.0073294639587402, + "logps/chosen": -0.09810244292020798, + "logps/rejected": -28.29296112060547, + "loss": 0.634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024111985694617033, + "rewards/margins": 0.22044551372528076, + "rewards/rejected": -0.22285671532154083, + "step": 6639 + }, + { + "epoch": 0.39, + "learning_rate": 7.023002170600743e-08, + "logits/chosen": -1.9987008571624756, + "logits/rejected": -1.9842791557312012, + "logps/chosen": -28.48604393005371, + "logps/rejected": -434.6132507324219, + "loss": 0.3126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2222389280796051, + "rewards/margins": 4.060355186462402, + "rewards/rejected": -3.838116407394409, + "step": 6640 + }, + { + "epoch": 0.39, + "learning_rate": 7.022140313734119e-08, + "logits/chosen": -2.078681707382202, + "logits/rejected": -2.066235303878784, + "logps/chosen": -36.27149963378906, + "logps/rejected": -230.6918182373047, + "loss": 0.3039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7828914523124695, + "rewards/margins": 1.2713603973388672, + "rewards/rejected": -0.4884689450263977, + "step": 6641 + }, + { + "epoch": 0.39, + "learning_rate": 7.021278385031179e-08, + "logits/chosen": -1.9607751369476318, + "logits/rejected": -1.9463752508163452, + "logps/chosen": -8.875212669372559, + "logps/rejected": -181.188720703125, + "loss": 0.3963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0999908447265625, + "rewards/margins": 1.7408965826034546, + "rewards/rejected": -1.640905737876892, + "step": 6642 + }, + { + "epoch": 0.39, + "learning_rate": 7.020416384522543e-08, + "logits/chosen": -2.091151475906372, + "logits/rejected": -2.0413968563079834, + "logps/chosen": -219.91250610351562, + "logps/rejected": -465.71038818359375, + "loss": 0.0374, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.538722276687622, + "rewards/margins": 4.6077561378479, + "rewards/rejected": -3.0690338611602783, + "step": 6643 + }, + { + "epoch": 0.39, + "learning_rate": 7.019554312238835e-08, + "logits/chosen": -2.0075159072875977, + "logits/rejected": -1.9451795816421509, + "logps/chosen": -229.06231689453125, + "logps/rejected": -356.96307373046875, + "loss": 0.1297, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2221877574920654, + "rewards/margins": 1.3672133684158325, + "rewards/rejected": 0.8549743890762329, + "step": 6644 + }, + { + "epoch": 0.39, + "learning_rate": 7.018692168210677e-08, + "logits/chosen": -2.294269561767578, + "logits/rejected": -2.295607328414917, + "logps/chosen": -0.01440946850925684, + "logps/rejected": -54.768890380859375, + "loss": 0.4306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010424434440210462, + "rewards/margins": 1.6964013576507568, + "rewards/rejected": -1.6974438428878784, + "step": 6645 + }, + { + "epoch": 0.39, + "learning_rate": 7.017829952468701e-08, + "logits/chosen": -2.1704916954040527, + "logits/rejected": -2.153337240219116, + "logps/chosen": -86.05863952636719, + "logps/rejected": -263.6488037109375, + "loss": 0.1979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.606048583984375, + "rewards/margins": 4.786952495574951, + "rewards/rejected": -4.180903911590576, + "step": 6646 + }, + { + "epoch": 0.39, + "learning_rate": 7.016967665043534e-08, + "logits/chosen": -1.9897785186767578, + "logits/rejected": -1.9889864921569824, + "logps/chosen": -26.032869338989258, + "logps/rejected": -118.45616149902344, + "loss": 0.3275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4285549223423004, + "rewards/margins": 1.5916194915771484, + "rewards/rejected": -1.1630645990371704, + "step": 6647 + }, + { + "epoch": 0.39, + "learning_rate": 7.01610530596581e-08, + "logits/chosen": -2.056976795196533, + "logits/rejected": -2.0422515869140625, + "logps/chosen": -41.322486877441406, + "logps/rejected": -312.8193054199219, + "loss": 0.3772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03671875223517418, + "rewards/margins": 3.4126951694488525, + "rewards/rejected": -3.449414014816284, + "step": 6648 + }, + { + "epoch": 0.39, + "learning_rate": 7.015242875266164e-08, + "logits/chosen": -1.7719919681549072, + "logits/rejected": -1.7699973583221436, + "logps/chosen": -1.971185326576233, + "logps/rejected": -140.05767822265625, + "loss": 0.5612, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02571713924407959, + "rewards/margins": 0.6800385117530823, + "rewards/rejected": -0.7057556509971619, + "step": 6649 + }, + { + "epoch": 0.39, + "learning_rate": 7.014380372975232e-08, + "logits/chosen": -1.90902578830719, + "logits/rejected": -1.891928791999817, + "logps/chosen": -206.3140869140625, + "logps/rejected": -406.3131103515625, + "loss": 0.0817, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1152329444885254, + "rewards/margins": 2.282609701156616, + "rewards/rejected": -0.16737671196460724, + "step": 6650 + }, + { + "epoch": 0.39, + "learning_rate": 7.013517799123658e-08, + "logits/chosen": -2.010751247406006, + "logits/rejected": -1.9393185377120972, + "logps/chosen": -98.25821685791016, + "logps/rejected": -401.8686218261719, + "loss": 0.3389, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.182233452796936, + "rewards/margins": 0.23345720767974854, + "rewards/rejected": 0.9487762451171875, + "step": 6651 + }, + { + "epoch": 0.39, + "learning_rate": 7.012655153742081e-08, + "logits/chosen": -1.7687501907348633, + "logits/rejected": -1.7032650709152222, + "logps/chosen": -314.1094970703125, + "logps/rejected": -762.1298828125, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3688294887542725, + "rewards/margins": 5.829473972320557, + "rewards/rejected": -3.460644483566284, + "step": 6652 + }, + { + "epoch": 0.39, + "learning_rate": 7.011792436861148e-08, + "logits/chosen": -1.8213284015655518, + "logits/rejected": -1.81746244430542, + "logps/chosen": -205.62374877929688, + "logps/rejected": -337.28228759765625, + "loss": 0.0787, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.767085313796997, + "rewards/margins": 3.7507948875427246, + "rewards/rejected": -1.983709692955017, + "step": 6653 + }, + { + "epoch": 0.39, + "learning_rate": 7.010929648511508e-08, + "logits/chosen": -2.1344685554504395, + "logits/rejected": -2.133037805557251, + "logps/chosen": -0.0003674647305160761, + "logps/rejected": -101.626220703125, + "loss": 0.6319, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.318237425759435e-05, + "rewards/margins": 0.2592454254627228, + "rewards/rejected": -0.25915223360061646, + "step": 6654 + }, + { + "epoch": 0.39, + "learning_rate": 7.010066788723809e-08, + "logits/chosen": -1.9598281383514404, + "logits/rejected": -1.9812496900558472, + "logps/chosen": -180.97781372070312, + "logps/rejected": -346.5323791503906, + "loss": 0.0514, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7022368907928467, + "rewards/margins": 2.6098296642303467, + "rewards/rejected": 0.0924072265625, + "step": 6655 + }, + { + "epoch": 0.39, + "learning_rate": 7.009203857528706e-08, + "logits/chosen": -1.93911612033844, + "logits/rejected": -1.9220194816589355, + "logps/chosen": -20.384885787963867, + "logps/rejected": -264.91180419921875, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18664531409740448, + "rewards/margins": 2.5249204635620117, + "rewards/rejected": -2.338275194168091, + "step": 6656 + }, + { + "epoch": 0.39, + "learning_rate": 7.008340854956853e-08, + "logits/chosen": -2.0484330654144287, + "logits/rejected": -2.0499267578125, + "logps/chosen": -0.00041870801942422986, + "logps/rejected": -176.83779907226562, + "loss": 0.3636, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.675566272751894e-05, + "rewards/margins": 3.1513779163360596, + "rewards/rejected": -3.1513946056365967, + "step": 6657 + }, + { + "epoch": 0.39, + "learning_rate": 7.00747778103891e-08, + "logits/chosen": -1.9691979885101318, + "logits/rejected": -1.941636323928833, + "logps/chosen": -284.6334228515625, + "logps/rejected": -387.5872802734375, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3636200428009033, + "rewards/margins": 3.1853790283203125, + "rewards/rejected": 0.17824096977710724, + "step": 6658 + }, + { + "epoch": 0.39, + "learning_rate": 7.006614635805537e-08, + "logits/chosen": -1.8895854949951172, + "logits/rejected": -1.8674464225769043, + "logps/chosen": -82.60004425048828, + "logps/rejected": -292.74835205078125, + "loss": 0.2245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4786232113838196, + "rewards/margins": 3.169003963470459, + "rewards/rejected": -2.690380811691284, + "step": 6659 + }, + { + "epoch": 0.39, + "learning_rate": 7.005751419287396e-08, + "logits/chosen": -2.0478477478027344, + "logits/rejected": -2.058852195739746, + "logps/chosen": -161.57489013671875, + "logps/rejected": -249.24220275878906, + "loss": 0.2274, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7335891723632812, + "rewards/margins": 0.7390472292900085, + "rewards/rejected": 0.9945419430732727, + "step": 6660 + }, + { + "epoch": 0.39, + "learning_rate": 7.004888131515154e-08, + "logits/chosen": -2.171952724456787, + "logits/rejected": -2.159278392791748, + "logps/chosen": -39.721927642822266, + "logps/rejected": -316.6510009765625, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29393503069877625, + "rewards/margins": 3.233400344848633, + "rewards/rejected": -2.939465284347534, + "step": 6661 + }, + { + "epoch": 0.39, + "learning_rate": 7.004024772519478e-08, + "logits/chosen": -2.047738552093506, + "logits/rejected": -2.0379676818847656, + "logps/chosen": -11.126762390136719, + "logps/rejected": -208.85516357421875, + "loss": 0.3956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08747940510511398, + "rewards/margins": 1.7715950012207031, + "rewards/rejected": -1.6841156482696533, + "step": 6662 + }, + { + "epoch": 0.39, + "learning_rate": 7.00316134233104e-08, + "logits/chosen": -1.9615874290466309, + "logits/rejected": -1.9561823606491089, + "logps/chosen": -176.64736938476562, + "logps/rejected": -427.61260986328125, + "loss": 0.0562, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5914642810821533, + "rewards/margins": 2.501394748687744, + "rewards/rejected": 0.09006958454847336, + "step": 6663 + }, + { + "epoch": 0.39, + "learning_rate": 7.00229784098051e-08, + "logits/chosen": -1.951167106628418, + "logits/rejected": -1.872465968132019, + "logps/chosen": -286.8242492675781, + "logps/rejected": -545.4893798828125, + "loss": 0.0822, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7008270025253296, + "rewards/margins": 2.4854583740234375, + "rewards/rejected": -0.7846313714981079, + "step": 6664 + }, + { + "epoch": 0.39, + "learning_rate": 7.001434268498569e-08, + "logits/chosen": -1.9018958806991577, + "logits/rejected": -1.9138377904891968, + "logps/chosen": -121.28683471679688, + "logps/rejected": -151.4757843017578, + "loss": 0.3926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8824737668037415, + "rewards/margins": 0.5143356323242188, + "rewards/rejected": 0.3681381344795227, + "step": 6665 + }, + { + "epoch": 0.39, + "learning_rate": 7.000570624915891e-08, + "logits/chosen": -1.9506011009216309, + "logits/rejected": -1.7754697799682617, + "logps/chosen": -123.34504699707031, + "logps/rejected": -480.3061218261719, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8210784792900085, + "rewards/margins": 1.007196068763733, + "rewards/rejected": -0.18611755967140198, + "step": 6666 + }, + { + "epoch": 0.39, + "learning_rate": 6.99970691026316e-08, + "logits/chosen": -1.9417451620101929, + "logits/rejected": -1.9110684394836426, + "logps/chosen": -287.7006530761719, + "logps/rejected": -352.1890869140625, + "loss": 0.4197, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09099426120519638, + "rewards/margins": 0.9000579714775085, + "rewards/rejected": -0.809063732624054, + "step": 6667 + }, + { + "epoch": 0.39, + "learning_rate": 6.998843124571058e-08, + "logits/chosen": -2.11268949508667, + "logits/rejected": -2.1122169494628906, + "logps/chosen": -188.45574951171875, + "logps/rejected": -233.34022521972656, + "loss": 0.1537, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.318167209625244, + "rewards/margins": 1.2962723970413208, + "rewards/rejected": 1.0218948125839233, + "step": 6668 + }, + { + "epoch": 0.39, + "learning_rate": 6.99797926787027e-08, + "logits/chosen": -1.9550901651382446, + "logits/rejected": -1.9320437908172607, + "logps/chosen": -139.88360595703125, + "logps/rejected": -369.8625183105469, + "loss": 0.0728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4874130487442017, + "rewards/margins": 3.7628464698791504, + "rewards/rejected": -2.275433301925659, + "step": 6669 + }, + { + "epoch": 0.39, + "learning_rate": 6.997115340191487e-08, + "logits/chosen": -2.2297587394714355, + "logits/rejected": -2.228275775909424, + "logps/chosen": -55.12794876098633, + "logps/rejected": -138.01795959472656, + "loss": 0.5302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021464157849550247, + "rewards/margins": 0.5668407678604126, + "rewards/rejected": -0.5453765988349915, + "step": 6670 + }, + { + "epoch": 0.39, + "learning_rate": 6.996251341565395e-08, + "logits/chosen": -1.8689699172973633, + "logits/rejected": -1.858721137046814, + "logps/chosen": -127.58002471923828, + "logps/rejected": -430.39453125, + "loss": 0.1535, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0335731506347656, + "rewards/margins": 2.5846076011657715, + "rewards/rejected": -1.5510345697402954, + "step": 6671 + }, + { + "epoch": 0.39, + "learning_rate": 6.995387272022693e-08, + "logits/chosen": -2.0418074131011963, + "logits/rejected": -2.1058785915374756, + "logps/chosen": -267.8446960449219, + "logps/rejected": -409.0003356933594, + "loss": 0.0845, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7110413312911987, + "rewards/margins": 2.342190742492676, + "rewards/rejected": -0.6311492919921875, + "step": 6672 + }, + { + "epoch": 0.39, + "learning_rate": 6.994523131594075e-08, + "logits/chosen": -1.9472137689590454, + "logits/rejected": -1.946544885635376, + "logps/chosen": -16.393217086791992, + "logps/rejected": -246.8553924560547, + "loss": 0.3916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4051874279975891, + "rewards/margins": 0.8551050424575806, + "rewards/rejected": -0.44991761445999146, + "step": 6673 + }, + { + "epoch": 0.39, + "learning_rate": 6.993658920310237e-08, + "logits/chosen": -2.2282350063323975, + "logits/rejected": -2.222486734390259, + "logps/chosen": -20.215394973754883, + "logps/rejected": -170.46371459960938, + "loss": 0.6391, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.18503914773464203, + "rewards/margins": -0.017556369304656982, + "rewards/rejected": 0.202595517039299, + "step": 6674 + }, + { + "epoch": 0.39, + "learning_rate": 6.992794638201884e-08, + "logits/chosen": -1.997144103050232, + "logits/rejected": -2.003176689147949, + "logps/chosen": -174.97506713867188, + "logps/rejected": -333.86834716796875, + "loss": 0.4607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6304122805595398, + "rewards/margins": 0.01569974422454834, + "rewards/rejected": 0.6147125363349915, + "step": 6675 + }, + { + "epoch": 0.39, + "learning_rate": 6.991930285299716e-08, + "logits/chosen": -1.8364301919937134, + "logits/rejected": -1.8301548957824707, + "logps/chosen": -29.39673614501953, + "logps/rejected": -146.14541625976562, + "loss": 0.397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02522583119571209, + "rewards/margins": 2.011383056640625, + "rewards/rejected": -1.9861572980880737, + "step": 6676 + }, + { + "epoch": 0.39, + "learning_rate": 6.991065861634442e-08, + "logits/chosen": -1.884518027305603, + "logits/rejected": -1.8730453252792358, + "logps/chosen": -18.55788803100586, + "logps/rejected": -271.35748291015625, + "loss": 0.359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.033901214599609375, + "rewards/margins": 4.584430694580078, + "rewards/rejected": -4.550529479980469, + "step": 6677 + }, + { + "epoch": 0.39, + "learning_rate": 6.990201367236769e-08, + "logits/chosen": -2.124474287033081, + "logits/rejected": -2.1626434326171875, + "logps/chosen": -213.58148193359375, + "logps/rejected": -410.05194091796875, + "loss": 0.0594, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.253692626953125, + "rewards/margins": 2.5939788818359375, + "rewards/rejected": -0.3402862548828125, + "step": 6678 + }, + { + "epoch": 0.39, + "learning_rate": 6.989336802137408e-08, + "logits/chosen": -1.7869426012039185, + "logits/rejected": -1.7860910892486572, + "logps/chosen": -8.506726264953613, + "logps/rejected": -249.17095947265625, + "loss": 0.4055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21681518852710724, + "rewards/margins": 4.111145496368408, + "rewards/rejected": -4.32796049118042, + "step": 6679 + }, + { + "epoch": 0.39, + "learning_rate": 6.988472166367074e-08, + "logits/chosen": -1.9170321226119995, + "logits/rejected": -1.9098994731903076, + "logps/chosen": -131.65965270996094, + "logps/rejected": -299.60546875, + "loss": 0.3089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4289871156215668, + "rewards/margins": 1.7969131469726562, + "rewards/rejected": -1.367926001548767, + "step": 6680 + }, + { + "epoch": 0.39, + "learning_rate": 6.987607459956481e-08, + "logits/chosen": -1.9856542348861694, + "logits/rejected": -1.9788963794708252, + "logps/chosen": -46.564937591552734, + "logps/rejected": -196.37353515625, + "loss": 0.6216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7404544949531555, + "rewards/margins": 1.580796480178833, + "rewards/rejected": -2.3212509155273438, + "step": 6681 + }, + { + "epoch": 0.39, + "learning_rate": 6.986742682936348e-08, + "logits/chosen": -2.1540098190307617, + "logits/rejected": -2.1378462314605713, + "logps/chosen": -60.538856506347656, + "logps/rejected": -284.504150390625, + "loss": 0.2187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8367988467216492, + "rewards/margins": 2.464087724685669, + "rewards/rejected": -1.627288818359375, + "step": 6682 + }, + { + "epoch": 0.39, + "learning_rate": 6.985877835337397e-08, + "logits/chosen": -2.0992753505706787, + "logits/rejected": -2.0968775749206543, + "logps/chosen": -10.910782814025879, + "logps/rejected": -116.42292785644531, + "loss": 0.4428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41822901368141174, + "rewards/margins": 0.5159462690353394, + "rewards/rejected": -0.09771728515625, + "step": 6683 + }, + { + "epoch": 0.39, + "learning_rate": 6.985012917190352e-08, + "logits/chosen": -1.9794085025787354, + "logits/rejected": -1.9776735305786133, + "logps/chosen": -5.816329002380371, + "logps/rejected": -45.0521125793457, + "loss": 0.6047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0869833454489708, + "rewards/margins": 0.19390663504600525, + "rewards/rejected": -0.10692329704761505, + "step": 6684 + }, + { + "epoch": 0.39, + "learning_rate": 6.984147928525939e-08, + "logits/chosen": -2.004758596420288, + "logits/rejected": -1.9752370119094849, + "logps/chosen": -263.72149658203125, + "logps/rejected": -364.30450439453125, + "loss": 0.137, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6396484375, + "rewards/margins": 1.4351378679275513, + "rewards/rejected": 1.2045105695724487, + "step": 6685 + }, + { + "epoch": 0.39, + "learning_rate": 6.983282869374882e-08, + "logits/chosen": -1.9992365837097168, + "logits/rejected": -1.988128423690796, + "logps/chosen": -23.40692901611328, + "logps/rejected": -72.65159606933594, + "loss": 0.5217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06818848103284836, + "rewards/margins": 0.9388591647148132, + "rewards/rejected": -1.0070476531982422, + "step": 6686 + }, + { + "epoch": 0.39, + "learning_rate": 6.982417739767921e-08, + "logits/chosen": -1.8801789283752441, + "logits/rejected": -1.847862958908081, + "logps/chosen": -235.04470825195312, + "logps/rejected": -345.63970947265625, + "loss": 0.0858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8152436017990112, + "rewards/margins": 2.713613986968994, + "rewards/rejected": -0.8983703851699829, + "step": 6687 + }, + { + "epoch": 0.39, + "learning_rate": 6.981552539735783e-08, + "logits/chosen": -1.9792168140411377, + "logits/rejected": -1.9750871658325195, + "logps/chosen": -58.07389450073242, + "logps/rejected": -167.18707275390625, + "loss": 0.4515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14333724975585938, + "rewards/margins": 0.9092491269111633, + "rewards/rejected": -0.765911877155304, + "step": 6688 + }, + { + "epoch": 0.39, + "learning_rate": 6.980687269309205e-08, + "logits/chosen": -2.017181873321533, + "logits/rejected": -2.0235514640808105, + "logps/chosen": -271.5127258300781, + "logps/rejected": -254.0303955078125, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9047698974609375, + "rewards/margins": 0.05108332633972168, + "rewards/rejected": 1.8536865711212158, + "step": 6689 + }, + { + "epoch": 0.39, + "learning_rate": 6.979821928518927e-08, + "logits/chosen": -2.053870439529419, + "logits/rejected": -2.0546677112579346, + "logps/chosen": -13.668501853942871, + "logps/rejected": -133.39891052246094, + "loss": 0.4338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07406120747327805, + "rewards/margins": 1.4493297338485718, + "rewards/rejected": -1.375268578529358, + "step": 6690 + }, + { + "epoch": 0.39, + "learning_rate": 6.978956517395692e-08, + "logits/chosen": -2.0965089797973633, + "logits/rejected": -2.080940008163452, + "logps/chosen": -240.10726928710938, + "logps/rejected": -340.9685974121094, + "loss": 0.2357, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3500335216522217, + "rewards/margins": 0.6824949979782104, + "rewards/rejected": 1.6675385236740112, + "step": 6691 + }, + { + "epoch": 0.39, + "learning_rate": 6.97809103597024e-08, + "logits/chosen": -2.0058000087738037, + "logits/rejected": -2.0083160400390625, + "logps/chosen": -37.252803802490234, + "logps/rejected": -222.3800506591797, + "loss": 0.4101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04607544094324112, + "rewards/margins": 2.830082893371582, + "rewards/rejected": -2.8761582374572754, + "step": 6692 + }, + { + "epoch": 0.39, + "learning_rate": 6.977225484273318e-08, + "logits/chosen": -2.035550832748413, + "logits/rejected": -2.027738571166992, + "logps/chosen": -2.238044500350952, + "logps/rejected": -354.8325500488281, + "loss": 0.3429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011092639528214931, + "rewards/margins": 6.722827911376953, + "rewards/rejected": -6.733920574188232, + "step": 6693 + }, + { + "epoch": 0.39, + "learning_rate": 6.976359862335674e-08, + "logits/chosen": -2.0292251110076904, + "logits/rejected": -2.0440280437469482, + "logps/chosen": -0.8556557893753052, + "logps/rejected": -193.686767578125, + "loss": 0.3656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004760748241096735, + "rewards/margins": 3.0153777599334717, + "rewards/rejected": -3.0201385021209717, + "step": 6694 + }, + { + "epoch": 0.39, + "learning_rate": 6.975494170188062e-08, + "logits/chosen": -1.8382717370986938, + "logits/rejected": -1.8365918397903442, + "logps/chosen": -70.93699645996094, + "logps/rejected": -249.1482696533203, + "loss": 0.5048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6103981137275696, + "rewards/margins": 1.4987802505493164, + "rewards/rejected": -2.109178304672241, + "step": 6695 + }, + { + "epoch": 0.39, + "learning_rate": 6.974628407861234e-08, + "logits/chosen": -1.789379358291626, + "logits/rejected": -1.839147686958313, + "logps/chosen": -283.8427429199219, + "logps/rejected": -410.6976318359375, + "loss": 0.118, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6260894536972046, + "rewards/margins": 1.6482818126678467, + "rewards/rejected": -0.02219238318502903, + "step": 6696 + }, + { + "epoch": 0.39, + "learning_rate": 6.973762575385946e-08, + "logits/chosen": -2.0068318843841553, + "logits/rejected": -2.0008952617645264, + "logps/chosen": -24.341590881347656, + "logps/rejected": -149.9770050048828, + "loss": 0.4459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3463546931743622, + "rewards/margins": 0.6955963373184204, + "rewards/rejected": -0.3492416441440582, + "step": 6697 + }, + { + "epoch": 0.39, + "learning_rate": 6.972896672792956e-08, + "logits/chosen": -1.8463194370269775, + "logits/rejected": -1.8418879508972168, + "logps/chosen": -108.92025756835938, + "logps/rejected": -327.8857116699219, + "loss": 0.2607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44468003511428833, + "rewards/margins": 2.722240447998047, + "rewards/rejected": -2.2775604724884033, + "step": 6698 + }, + { + "epoch": 0.39, + "learning_rate": 6.972030700113025e-08, + "logits/chosen": -1.9709035158157349, + "logits/rejected": -1.9565223455429077, + "logps/chosen": -169.53579711914062, + "logps/rejected": -298.795166015625, + "loss": 0.2212, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2088119983673096, + "rewards/margins": 0.7415695190429688, + "rewards/rejected": 1.4672424793243408, + "step": 6699 + }, + { + "epoch": 0.39, + "learning_rate": 6.97116465737692e-08, + "logits/chosen": -2.1186068058013916, + "logits/rejected": -2.1196343898773193, + "logps/chosen": -14.111817359924316, + "logps/rejected": -49.59579086303711, + "loss": 0.6052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011595535092055798, + "rewards/margins": 0.3222900629043579, + "rewards/rejected": -0.31069451570510864, + "step": 6700 + }, + { + "epoch": 0.39, + "learning_rate": 6.970298544615402e-08, + "logits/chosen": -2.116220712661743, + "logits/rejected": -2.12113094329834, + "logps/chosen": -15.215469360351562, + "logps/rejected": -123.90414428710938, + "loss": 0.5084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10184784233570099, + "rewards/margins": 0.8191558718681335, + "rewards/rejected": -0.7173080444335938, + "step": 6701 + }, + { + "epoch": 0.39, + "learning_rate": 6.969432361859242e-08, + "logits/chosen": -2.0829484462738037, + "logits/rejected": -2.0769433975219727, + "logps/chosen": -3.8504185795318335e-05, + "logps/rejected": -92.3929214477539, + "loss": 0.608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4066544054003316e-06, + "rewards/margins": 0.36734989285469055, + "rewards/rejected": -0.36734849214553833, + "step": 6702 + }, + { + "epoch": 0.39, + "learning_rate": 6.968566109139211e-08, + "logits/chosen": -1.9593271017074585, + "logits/rejected": -2.004594564437866, + "logps/chosen": -197.70623779296875, + "logps/rejected": -273.7781982421875, + "loss": 0.2491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5889923572540283, + "rewards/margins": 0.6967804431915283, + "rewards/rejected": 0.8922119140625, + "step": 6703 + }, + { + "epoch": 0.39, + "learning_rate": 6.967699786486084e-08, + "logits/chosen": -1.9816001653671265, + "logits/rejected": -1.9876450300216675, + "logps/chosen": -5.61637020111084, + "logps/rejected": -137.0046844482422, + "loss": 0.4309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012747383676469326, + "rewards/margins": 1.5115445852279663, + "rewards/rejected": -1.5242919921875, + "step": 6704 + }, + { + "epoch": 0.39, + "learning_rate": 6.966833393930635e-08, + "logits/chosen": -1.9893162250518799, + "logits/rejected": -1.9908841848373413, + "logps/chosen": -16.53310775756836, + "logps/rejected": -145.13555908203125, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7270433306694031, + "rewards/margins": 0.7477129101753235, + "rewards/rejected": -0.02066955529153347, + "step": 6705 + }, + { + "epoch": 0.39, + "learning_rate": 6.965966931503641e-08, + "logits/chosen": -1.8810744285583496, + "logits/rejected": -1.8825197219848633, + "logps/chosen": -15.087786674499512, + "logps/rejected": -78.20889282226562, + "loss": 0.5421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09648771584033966, + "rewards/margins": 0.5964703559875488, + "rewards/rejected": -0.6929580569267273, + "step": 6706 + }, + { + "epoch": 0.39, + "learning_rate": 6.965100399235886e-08, + "logits/chosen": -1.8386653661727905, + "logits/rejected": -1.8323982954025269, + "logps/chosen": -266.48828125, + "logps/rejected": -383.04193115234375, + "loss": 0.1075, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.529266357421875, + "rewards/margins": 2.0399017333984375, + "rewards/rejected": -0.5106353759765625, + "step": 6707 + }, + { + "epoch": 0.39, + "learning_rate": 6.964233797158155e-08, + "logits/chosen": -2.055732250213623, + "logits/rejected": -2.048773765563965, + "logps/chosen": -137.8174285888672, + "logps/rejected": -350.4651184082031, + "loss": 0.2163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9985763430595398, + "rewards/margins": 1.3932632207870483, + "rewards/rejected": -0.39468690752983093, + "step": 6708 + }, + { + "epoch": 0.39, + "learning_rate": 6.963367125301229e-08, + "logits/chosen": -1.9229339361190796, + "logits/rejected": -1.9215165376663208, + "logps/chosen": -170.84925842285156, + "logps/rejected": -238.65293884277344, + "loss": 0.4596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7136215567588806, + "rewards/margins": 0.2497864067554474, + "rewards/rejected": 0.4638351500034332, + "step": 6709 + }, + { + "epoch": 0.39, + "learning_rate": 6.962500383695899e-08, + "logits/chosen": -2.042750835418701, + "logits/rejected": -2.0133519172668457, + "logps/chosen": -195.3194122314453, + "logps/rejected": -474.8841857910156, + "loss": 1.2311, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1696548461914062, + "rewards/margins": 3.659285068511963, + "rewards/rejected": -5.828939914703369, + "step": 6710 + }, + { + "epoch": 0.39, + "learning_rate": 6.961633572372956e-08, + "logits/chosen": -2.1085307598114014, + "logits/rejected": -2.10779070854187, + "logps/chosen": -0.0020764695946127176, + "logps/rejected": -43.92925262451172, + "loss": 0.5929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0002807820856105536, + "rewards/margins": 0.45056915283203125, + "rewards/rejected": -0.4502883851528168, + "step": 6711 + }, + { + "epoch": 0.39, + "learning_rate": 6.960766691363193e-08, + "logits/chosen": -2.099733591079712, + "logits/rejected": -2.094733238220215, + "logps/chosen": -178.4065704345703, + "logps/rejected": -246.9309844970703, + "loss": 0.0996, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7779617309570312, + "rewards/margins": 2.485119581222534, + "rewards/rejected": -0.7071579098701477, + "step": 6712 + }, + { + "epoch": 0.39, + "learning_rate": 6.959899740697405e-08, + "logits/chosen": -1.9417604207992554, + "logits/rejected": -1.9338421821594238, + "logps/chosen": -28.451770782470703, + "logps/rejected": -153.62594604492188, + "loss": 0.3325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44390374422073364, + "rewards/margins": 1.7652537822723389, + "rewards/rejected": -1.32135009765625, + "step": 6713 + }, + { + "epoch": 0.39, + "learning_rate": 6.959032720406393e-08, + "logits/chosen": -1.8860527276992798, + "logits/rejected": -1.9073752164840698, + "logps/chosen": -292.28875732421875, + "logps/rejected": -313.7058410644531, + "loss": 0.1098, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.485241651535034, + "rewards/margins": 1.8136138916015625, + "rewards/rejected": 0.6716278195381165, + "step": 6714 + }, + { + "epoch": 0.39, + "learning_rate": 6.958165630520956e-08, + "logits/chosen": -1.896559715270996, + "logits/rejected": -1.8871146440505981, + "logps/chosen": -249.30079650878906, + "logps/rejected": -399.640625, + "loss": 0.159, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3815659284591675, + "rewards/margins": 1.4459640979766846, + "rewards/rejected": -0.06439819186925888, + "step": 6715 + }, + { + "epoch": 0.39, + "learning_rate": 6.957298471071896e-08, + "logits/chosen": -1.8953317403793335, + "logits/rejected": -1.8919906616210938, + "logps/chosen": -0.4932226538658142, + "logps/rejected": -178.90194702148438, + "loss": 0.4058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024056142196059227, + "rewards/margins": 2.1870689392089844, + "rewards/rejected": -2.211125135421753, + "step": 6716 + }, + { + "epoch": 0.39, + "learning_rate": 6.956431242090022e-08, + "logits/chosen": -2.050936222076416, + "logits/rejected": -2.052845001220703, + "logps/chosen": -7.580747127532959, + "logps/rejected": -64.63175201416016, + "loss": 0.5831, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0076712132431566715, + "rewards/margins": 0.5867242217063904, + "rewards/rejected": -0.5943954586982727, + "step": 6717 + }, + { + "epoch": 0.39, + "learning_rate": 6.955563943606138e-08, + "logits/chosen": -1.9607185125350952, + "logits/rejected": -1.950273871421814, + "logps/chosen": -143.13229370117188, + "logps/rejected": -296.0159912109375, + "loss": 0.2002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4799988269805908, + "rewards/margins": 0.9894775748252869, + "rewards/rejected": 0.49052125215530396, + "step": 6718 + }, + { + "epoch": 0.39, + "learning_rate": 6.95469657565106e-08, + "logits/chosen": -1.975358486175537, + "logits/rejected": -2.0228211879730225, + "logps/chosen": -171.24667358398438, + "logps/rejected": -367.4148254394531, + "loss": 0.1686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5633087158203125, + "rewards/margins": 1.2605712413787842, + "rewards/rejected": 0.30273744463920593, + "step": 6719 + }, + { + "epoch": 0.39, + "learning_rate": 6.953829138255595e-08, + "logits/chosen": -2.005509614944458, + "logits/rejected": -1.9976788759231567, + "logps/chosen": -22.180992126464844, + "logps/rejected": -130.45831298828125, + "loss": 0.2247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7521843314170837, + "rewards/margins": 2.918198585510254, + "rewards/rejected": -2.1660141944885254, + "step": 6720 + }, + { + "epoch": 0.39, + "learning_rate": 6.952961631450565e-08, + "logits/chosen": -1.7988917827606201, + "logits/rejected": -1.8036173582077026, + "logps/chosen": -2.2491278648376465, + "logps/rejected": -196.76589965820312, + "loss": 0.3981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024430274963378906, + "rewards/margins": 2.1257669925689697, + "rewards/rejected": -2.101336717605591, + "step": 6721 + }, + { + "epoch": 0.39, + "learning_rate": 6.952094055266782e-08, + "logits/chosen": -1.9807250499725342, + "logits/rejected": -1.9577727317810059, + "logps/chosen": -146.38397216796875, + "logps/rejected": -340.9526672363281, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3050994873046875, + "rewards/margins": 3.4385805130004883, + "rewards/rejected": -1.1334809064865112, + "step": 6722 + }, + { + "epoch": 0.39, + "learning_rate": 6.951226409735071e-08, + "logits/chosen": -2.127105951309204, + "logits/rejected": -2.0949578285217285, + "logps/chosen": -55.79386901855469, + "logps/rejected": -337.53082275390625, + "loss": 0.2569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48952218890190125, + "rewards/margins": 2.1305317878723145, + "rewards/rejected": -1.6410095691680908, + "step": 6723 + }, + { + "epoch": 0.39, + "learning_rate": 6.950358694886254e-08, + "logits/chosen": -1.950516700744629, + "logits/rejected": -1.9646395444869995, + "logps/chosen": -199.81668090820312, + "logps/rejected": -273.5840759277344, + "loss": 0.4984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37673646211624146, + "rewards/margins": 0.05693361163139343, + "rewards/rejected": 0.319802850484848, + "step": 6724 + }, + { + "epoch": 0.39, + "learning_rate": 6.949490910751155e-08, + "logits/chosen": -2.1703219413757324, + "logits/rejected": -2.175067186355591, + "logps/chosen": -11.94127368927002, + "logps/rejected": -149.421630859375, + "loss": 0.5448, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02491149865090847, + "rewards/margins": 0.698535144329071, + "rewards/rejected": -0.7234466671943665, + "step": 6725 + }, + { + "epoch": 0.39, + "learning_rate": 6.9486230573606e-08, + "logits/chosen": -2.0287647247314453, + "logits/rejected": -2.010873317718506, + "logps/chosen": -199.80963134765625, + "logps/rejected": -242.09600830078125, + "loss": 0.9237, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1537017822265625, + "rewards/margins": -0.6674484610557556, + "rewards/rejected": 0.5137466788291931, + "step": 6726 + }, + { + "epoch": 0.39, + "learning_rate": 6.947755134745427e-08, + "logits/chosen": -1.8829609155654907, + "logits/rejected": -1.8832932710647583, + "logps/chosen": -1.7162060737609863, + "logps/rejected": -152.29417419433594, + "loss": 0.4584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055590011179447174, + "rewards/margins": 1.252545952796936, + "rewards/rejected": -1.196955919265747, + "step": 6727 + }, + { + "epoch": 0.39, + "learning_rate": 6.946887142936462e-08, + "logits/chosen": -1.9563446044921875, + "logits/rejected": -1.9625670909881592, + "logps/chosen": -10.182279586791992, + "logps/rejected": -225.06954956054688, + "loss": 0.3683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11500978469848633, + "rewards/margins": 2.036926031112671, + "rewards/rejected": -1.9219162464141846, + "step": 6728 + }, + { + "epoch": 0.39, + "learning_rate": 6.946019081964542e-08, + "logits/chosen": -2.031747341156006, + "logits/rejected": -2.0550100803375244, + "logps/chosen": -229.36875915527344, + "logps/rejected": -400.42828369140625, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0693910121917725, + "rewards/margins": 4.712124824523926, + "rewards/rejected": -2.6427338123321533, + "step": 6729 + }, + { + "epoch": 0.39, + "learning_rate": 6.945150951860507e-08, + "logits/chosen": -1.8899872303009033, + "logits/rejected": -1.890300989151001, + "logps/chosen": -34.32601547241211, + "logps/rejected": -241.4046173095703, + "loss": 0.2793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34158286452293396, + "rewards/margins": 2.9161694049835205, + "rewards/rejected": -2.5745866298675537, + "step": 6730 + }, + { + "epoch": 0.39, + "learning_rate": 6.944282752655193e-08, + "logits/chosen": -2.0629377365112305, + "logits/rejected": -2.0367634296417236, + "logps/chosen": -61.540008544921875, + "logps/rejected": -451.88995361328125, + "loss": 0.1241, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2189263105392456, + "rewards/margins": 5.859685897827148, + "rewards/rejected": -4.640759468078613, + "step": 6731 + }, + { + "epoch": 0.39, + "learning_rate": 6.943414484379447e-08, + "logits/chosen": -1.9901012182235718, + "logits/rejected": -1.9809479713439941, + "logps/chosen": -32.56998062133789, + "logps/rejected": -167.89219665527344, + "loss": 0.4333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09640121459960938, + "rewards/margins": 1.2437782287597656, + "rewards/rejected": -1.1473770141601562, + "step": 6732 + }, + { + "epoch": 0.39, + "learning_rate": 6.942546147064112e-08, + "logits/chosen": -1.9161159992218018, + "logits/rejected": -1.9175022840499878, + "logps/chosen": -73.29199981689453, + "logps/rejected": -228.05935668945312, + "loss": 0.4934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3396904170513153, + "rewards/margins": 2.2720069885253906, + "rewards/rejected": -2.6116974353790283, + "step": 6733 + }, + { + "epoch": 0.39, + "learning_rate": 6.941677740740035e-08, + "logits/chosen": -1.9651455879211426, + "logits/rejected": -1.9583609104156494, + "logps/chosen": -45.186283111572266, + "logps/rejected": -259.3011779785156, + "loss": 0.3891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29599496722221375, + "rewards/margins": 1.0927737951278687, + "rewards/rejected": -0.7967788577079773, + "step": 6734 + }, + { + "epoch": 0.39, + "learning_rate": 6.940809265438067e-08, + "logits/chosen": -1.8970627784729004, + "logits/rejected": -1.8693313598632812, + "logps/chosen": -252.6580810546875, + "logps/rejected": -447.1663818359375, + "loss": 0.1599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.405603051185608, + "rewards/margins": 1.452996850013733, + "rewards/rejected": -0.047393798828125, + "step": 6735 + }, + { + "epoch": 0.39, + "learning_rate": 6.939940721189062e-08, + "logits/chosen": -1.980188250541687, + "logits/rejected": -1.9760310649871826, + "logps/chosen": -0.0007136096828617156, + "logps/rejected": -114.34405517578125, + "loss": 0.5647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0001595708599779755, + "rewards/margins": 0.5932847261428833, + "rewards/rejected": -0.5931251645088196, + "step": 6736 + }, + { + "epoch": 0.39, + "learning_rate": 6.939072108023871e-08, + "logits/chosen": -2.096522092819214, + "logits/rejected": -2.0904908180236816, + "logps/chosen": -21.053184509277344, + "logps/rejected": -156.962646484375, + "loss": 0.3583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0034885406494140625, + "rewards/margins": 3.0048604011535645, + "rewards/rejected": -3.0013718605041504, + "step": 6737 + }, + { + "epoch": 0.39, + "learning_rate": 6.938203425973356e-08, + "logits/chosen": -2.1042284965515137, + "logits/rejected": -2.1004951000213623, + "logps/chosen": -0.9175791144371033, + "logps/rejected": -223.0658721923828, + "loss": 0.3708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034907858818769455, + "rewards/margins": 3.362356424331665, + "rewards/rejected": -3.397264242172241, + "step": 6738 + }, + { + "epoch": 0.39, + "learning_rate": 6.937334675068372e-08, + "logits/chosen": -1.8630346059799194, + "logits/rejected": -1.8659160137176514, + "logps/chosen": -197.55816650390625, + "logps/rejected": -320.70745849609375, + "loss": 0.1095, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.086107015609741, + "rewards/margins": 1.8952836990356445, + "rewards/rejected": 0.19082336127758026, + "step": 6739 + }, + { + "epoch": 0.39, + "learning_rate": 6.936465855339786e-08, + "logits/chosen": -1.8227626085281372, + "logits/rejected": -1.821897029876709, + "logps/chosen": -0.00013279597624205053, + "logps/rejected": -145.4563751220703, + "loss": 0.4139, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6344278012402356e-06, + "rewards/margins": 1.9351428747177124, + "rewards/rejected": -1.9351402521133423, + "step": 6740 + }, + { + "epoch": 0.39, + "learning_rate": 6.93559696681846e-08, + "logits/chosen": -2.0790278911590576, + "logits/rejected": -2.056299924850464, + "logps/chosen": -198.6663360595703, + "logps/rejected": -469.3709411621094, + "loss": 0.0445, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0882797241210938, + "rewards/margins": 3.3425307273864746, + "rewards/rejected": -1.2542511224746704, + "step": 6741 + }, + { + "epoch": 0.39, + "learning_rate": 6.93472800953526e-08, + "logits/chosen": -2.021686315536499, + "logits/rejected": -2.0217480659484863, + "logps/chosen": -0.0723714828491211, + "logps/rejected": -20.26051139831543, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0064002396538853645, + "rewards/margins": -0.006368124857544899, + "rewards/rejected": 0.012768364511430264, + "step": 6742 + }, + { + "epoch": 0.39, + "learning_rate": 6.933858983521059e-08, + "logits/chosen": -2.0060856342315674, + "logits/rejected": -1.9934546947479248, + "logps/chosen": -70.64384460449219, + "logps/rejected": -318.96954345703125, + "loss": 0.2427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3502082824707031, + "rewards/margins": 4.224091529846191, + "rewards/rejected": -3.873883008956909, + "step": 6743 + }, + { + "epoch": 0.39, + "learning_rate": 6.932989888806729e-08, + "logits/chosen": -1.8782811164855957, + "logits/rejected": -1.8784517049789429, + "logps/chosen": -3.409350028960034e-05, + "logps/rejected": -177.91867065429688, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.364310595723509e-07, + "rewards/margins": 3.4527080059051514, + "rewards/rejected": -3.4527084827423096, + "step": 6744 + }, + { + "epoch": 0.39, + "learning_rate": 6.932120725423139e-08, + "logits/chosen": -1.8039741516113281, + "logits/rejected": -1.6986736059188843, + "logps/chosen": -154.7828369140625, + "logps/rejected": -471.65081787109375, + "loss": 0.1007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7450774908065796, + "rewards/margins": 2.650103807449341, + "rewards/rejected": -0.9050262570381165, + "step": 6745 + }, + { + "epoch": 0.39, + "learning_rate": 6.931251493401173e-08, + "logits/chosen": -1.9410346746444702, + "logits/rejected": -1.9502825736999512, + "logps/chosen": -27.130905151367188, + "logps/rejected": -108.81044006347656, + "loss": 0.4279, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10284119099378586, + "rewards/margins": 2.169950246810913, + "rewards/rejected": -2.2727913856506348, + "step": 6746 + }, + { + "epoch": 0.39, + "learning_rate": 6.930382192771704e-08, + "logits/chosen": -1.8106038570404053, + "logits/rejected": -1.807132363319397, + "logps/chosen": -0.2989746928215027, + "logps/rejected": -80.62187194824219, + "loss": 0.6558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0012272894382476807, + "rewards/margins": 0.1187085211277008, + "rewards/rejected": -0.11748123168945312, + "step": 6747 + }, + { + "epoch": 0.39, + "learning_rate": 6.92951282356562e-08, + "logits/chosen": -1.9096026420593262, + "logits/rejected": -1.9171338081359863, + "logps/chosen": -0.20613397657871246, + "logps/rejected": -328.305419921875, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012363703921437263, + "rewards/margins": 6.022829055786133, + "rewards/rejected": -6.035192966461182, + "step": 6748 + }, + { + "epoch": 0.39, + "learning_rate": 6.928643385813801e-08, + "logits/chosen": -2.083289861679077, + "logits/rejected": -2.0832619667053223, + "logps/chosen": -0.010630412958562374, + "logps/rejected": -109.55584716796875, + "loss": 0.4529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003565643564797938, + "rewards/margins": 1.4044179916381836, + "rewards/rejected": -1.40477454662323, + "step": 6749 + }, + { + "epoch": 0.39, + "learning_rate": 6.927773879547135e-08, + "logits/chosen": -1.8894948959350586, + "logits/rejected": -1.9014310836791992, + "logps/chosen": -29.504592895507812, + "logps/rejected": -230.85047912597656, + "loss": 0.3597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5295112729072571, + "rewards/margins": 1.2312400341033936, + "rewards/rejected": -0.7017288208007812, + "step": 6750 + }, + { + "epoch": 0.39, + "learning_rate": 6.926904304796511e-08, + "logits/chosen": -2.145359516143799, + "logits/rejected": -2.137643575668335, + "logps/chosen": -109.37835693359375, + "logps/rejected": -272.2576904296875, + "loss": 0.2262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3604774475097656, + "rewards/margins": 2.340407609939575, + "rewards/rejected": -1.9799301624298096, + "step": 6751 + }, + { + "epoch": 0.39, + "learning_rate": 6.926034661592821e-08, + "logits/chosen": -2.0552220344543457, + "logits/rejected": -2.052978992462158, + "logps/chosen": -168.02590942382812, + "logps/rejected": -309.288330078125, + "loss": 0.1781, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4983551502227783, + "rewards/margins": 1.4195221662521362, + "rewards/rejected": 0.07883300632238388, + "step": 6752 + }, + { + "epoch": 0.39, + "learning_rate": 6.925164949966958e-08, + "logits/chosen": -1.7826859951019287, + "logits/rejected": -1.8067033290863037, + "logps/chosen": -261.5492858886719, + "logps/rejected": -607.1341552734375, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9365265369415283, + "rewards/margins": 5.617190361022949, + "rewards/rejected": -3.6806640625, + "step": 6753 + }, + { + "epoch": 0.39, + "learning_rate": 6.924295169949819e-08, + "logits/chosen": -1.9831734895706177, + "logits/rejected": -1.9697415828704834, + "logps/chosen": -44.167938232421875, + "logps/rejected": -153.78335571289062, + "loss": 0.5106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3860744535923004, + "rewards/margins": 0.32814598083496094, + "rewards/rejected": 0.05792846903204918, + "step": 6754 + }, + { + "epoch": 0.39, + "learning_rate": 6.923425321572304e-08, + "logits/chosen": -1.943577527999878, + "logits/rejected": -1.9218062162399292, + "logps/chosen": -219.2623291015625, + "logps/rejected": -358.33734130859375, + "loss": 0.1856, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.527121067047119, + "rewards/margins": 0.9723236560821533, + "rewards/rejected": 1.5547974109649658, + "step": 6755 + }, + { + "epoch": 0.39, + "learning_rate": 6.922555404865312e-08, + "logits/chosen": -1.9521832466125488, + "logits/rejected": -1.9510048627853394, + "logps/chosen": -0.2100885808467865, + "logps/rejected": -16.47013282775879, + "loss": 0.6995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005666905548423529, + "rewards/margins": 0.00493242172524333, + "rewards/rejected": -0.010599327273666859, + "step": 6756 + }, + { + "epoch": 0.39, + "learning_rate": 6.921685419859748e-08, + "logits/chosen": -1.8132706880569458, + "logits/rejected": -1.7967009544372559, + "logps/chosen": -169.9876251220703, + "logps/rejected": -322.3388366699219, + "loss": 0.1964, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1136521100997925, + "rewards/margins": 1.32732093334198, + "rewards/rejected": -0.2136688232421875, + "step": 6757 + }, + { + "epoch": 0.39, + "learning_rate": 6.920815366586517e-08, + "logits/chosen": -1.8081185817718506, + "logits/rejected": -1.8042329549789429, + "logps/chosen": -109.86846923828125, + "logps/rejected": -297.2906188964844, + "loss": 0.4363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.417611688375473, + "rewards/margins": 0.6259247064590454, + "rewards/rejected": -0.20831298828125, + "step": 6758 + }, + { + "epoch": 0.39, + "learning_rate": 6.919945245076531e-08, + "logits/chosen": -1.9384173154830933, + "logits/rejected": -1.8379377126693726, + "logps/chosen": -212.18487548828125, + "logps/rejected": -499.53759765625, + "loss": 0.1564, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8402740955352783, + "rewards/margins": 1.6438355445861816, + "rewards/rejected": 0.19643859565258026, + "step": 6759 + }, + { + "epoch": 0.39, + "learning_rate": 6.919075055360695e-08, + "logits/chosen": -1.937125563621521, + "logits/rejected": -1.8901516199111938, + "logps/chosen": -336.687744140625, + "logps/rejected": -723.62646484375, + "loss": 0.0719, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5466736555099487, + "rewards/margins": 4.328991889953613, + "rewards/rejected": -2.782318115234375, + "step": 6760 + }, + { + "epoch": 0.39, + "learning_rate": 6.918204797469929e-08, + "logits/chosen": -2.0828776359558105, + "logits/rejected": -2.0734198093414307, + "logps/chosen": -0.18182145059108734, + "logps/rejected": -144.40882873535156, + "loss": 0.4993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026629019528627396, + "rewards/margins": 0.8271982073783875, + "rewards/rejected": -0.8005691766738892, + "step": 6761 + }, + { + "epoch": 0.39, + "learning_rate": 6.917334471435143e-08, + "logits/chosen": -1.757667064666748, + "logits/rejected": -1.7567094564437866, + "logps/chosen": -0.31116780638694763, + "logps/rejected": -78.22477722167969, + "loss": 0.4831, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02680651843547821, + "rewards/margins": 1.1680078506469727, + "rewards/rejected": -1.1948143243789673, + "step": 6762 + }, + { + "epoch": 0.39, + "learning_rate": 6.91646407728726e-08, + "logits/chosen": -1.9997814893722534, + "logits/rejected": -1.9860316514968872, + "logps/chosen": -97.41344451904297, + "logps/rejected": -220.84310913085938, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2604988217353821, + "rewards/margins": 2.0594489574432373, + "rewards/rejected": -1.7989501953125, + "step": 6763 + }, + { + "epoch": 0.39, + "learning_rate": 6.915593615057197e-08, + "logits/chosen": -1.887967586517334, + "logits/rejected": -1.881304144859314, + "logps/chosen": -155.8593292236328, + "logps/rejected": -423.76641845703125, + "loss": 0.0603, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.832006812095642, + "rewards/margins": 6.377410888671875, + "rewards/rejected": -4.545403957366943, + "step": 6764 + }, + { + "epoch": 0.39, + "learning_rate": 6.914723084775879e-08, + "logits/chosen": -1.9446072578430176, + "logits/rejected": -1.9411879777908325, + "logps/chosen": -208.41761779785156, + "logps/rejected": -289.92901611328125, + "loss": 0.1679, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5034637451171875, + "rewards/margins": 1.3753845691680908, + "rewards/rejected": 0.12807922065258026, + "step": 6765 + }, + { + "epoch": 0.39, + "learning_rate": 6.913852486474232e-08, + "logits/chosen": -1.670293927192688, + "logits/rejected": -1.712780237197876, + "logps/chosen": -268.41046142578125, + "logps/rejected": -490.8047790527344, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7799928188323975, + "rewards/margins": 4.173624038696289, + "rewards/rejected": -1.3936309814453125, + "step": 6766 + }, + { + "epoch": 0.39, + "learning_rate": 6.912981820183182e-08, + "logits/chosen": -1.859091877937317, + "logits/rejected": -1.842719316482544, + "logps/chosen": -142.12283325195312, + "logps/rejected": -210.32681274414062, + "loss": 0.2012, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6984115839004517, + "rewards/margins": 1.0711426734924316, + "rewards/rejected": 0.6272689700126648, + "step": 6767 + }, + { + "epoch": 0.39, + "learning_rate": 6.912111085933659e-08, + "logits/chosen": -1.6364132165908813, + "logits/rejected": -1.6380600929260254, + "logps/chosen": -16.14042091369629, + "logps/rejected": -154.978515625, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27265891432762146, + "rewards/margins": 2.764132261276245, + "rewards/rejected": -2.491473436355591, + "step": 6768 + }, + { + "epoch": 0.39, + "learning_rate": 6.9112402837566e-08, + "logits/chosen": -1.9731777906417847, + "logits/rejected": -1.9725329875946045, + "logps/chosen": -10.750118255615234, + "logps/rejected": -142.8056182861328, + "loss": 0.5319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08749532699584961, + "rewards/margins": 0.6580641865730286, + "rewards/rejected": -0.570568859577179, + "step": 6769 + }, + { + "epoch": 0.39, + "learning_rate": 6.910369413682935e-08, + "logits/chosen": -1.7737743854522705, + "logits/rejected": -1.7638583183288574, + "logps/chosen": -0.0035438300110399723, + "logps/rejected": -349.5856018066406, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018353182531427592, + "rewards/margins": 6.878182411193848, + "rewards/rejected": -6.878365993499756, + "step": 6770 + }, + { + "epoch": 0.39, + "learning_rate": 6.909498475743606e-08, + "logits/chosen": -1.9860817193984985, + "logits/rejected": -1.9810798168182373, + "logps/chosen": -81.88337707519531, + "logps/rejected": -204.236328125, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9829826354980469, + "rewards/margins": 0.7538642883300781, + "rewards/rejected": 0.22911834716796875, + "step": 6771 + }, + { + "epoch": 0.39, + "learning_rate": 6.908627469969549e-08, + "logits/chosen": -1.8976831436157227, + "logits/rejected": -1.895892858505249, + "logps/chosen": -3.660004138946533, + "logps/rejected": -91.06398010253906, + "loss": 0.5603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.045020557940006256, + "rewards/margins": 0.6282917857170105, + "rewards/rejected": -0.6733123660087585, + "step": 6772 + }, + { + "epoch": 0.39, + "learning_rate": 6.90775639639171e-08, + "logits/chosen": -1.9719935655593872, + "logits/rejected": -1.9567910432815552, + "logps/chosen": -158.484375, + "logps/rejected": -204.54989624023438, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2434784173965454, + "rewards/margins": 0.3195434808731079, + "rewards/rejected": 0.9239349365234375, + "step": 6773 + }, + { + "epoch": 0.39, + "learning_rate": 6.90688525504103e-08, + "logits/chosen": -2.0306153297424316, + "logits/rejected": -2.027702808380127, + "logps/chosen": -11.678043365478516, + "logps/rejected": -185.06439208984375, + "loss": 0.3896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009337139315903187, + "rewards/margins": 2.104402542114258, + "rewards/rejected": -2.0950653553009033, + "step": 6774 + }, + { + "epoch": 0.39, + "learning_rate": 6.906014045948458e-08, + "logits/chosen": -2.019087553024292, + "logits/rejected": -2.0222198963165283, + "logps/chosen": -38.288429260253906, + "logps/rejected": -139.0076446533203, + "loss": 0.426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.055562593042850494, + "rewards/margins": 1.3225349187850952, + "rewards/rejected": -1.3780975341796875, + "step": 6775 + }, + { + "epoch": 0.39, + "learning_rate": 6.905142769144945e-08, + "logits/chosen": -2.0673344135284424, + "logits/rejected": -2.0596256256103516, + "logps/chosen": -192.114990234375, + "logps/rejected": -282.42803955078125, + "loss": 0.3884, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.9630264043807983, + "rewards/margins": -0.03633272647857666, + "rewards/rejected": 1.999359130859375, + "step": 6776 + }, + { + "epoch": 0.39, + "learning_rate": 6.904271424661441e-08, + "logits/chosen": -1.9844162464141846, + "logits/rejected": -1.9867441654205322, + "logps/chosen": -16.843931198120117, + "logps/rejected": -181.90037536621094, + "loss": 0.1644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9512037634849548, + "rewards/margins": 4.0182342529296875, + "rewards/rejected": -3.067030429840088, + "step": 6777 + }, + { + "epoch": 0.39, + "learning_rate": 6.903400012528903e-08, + "logits/chosen": -1.9303854703903198, + "logits/rejected": -1.9304280281066895, + "logps/chosen": -30.262340545654297, + "logps/rejected": -109.64408111572266, + "loss": 0.5695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.045030783861875534, + "rewards/margins": 0.5037874579429626, + "rewards/rejected": -0.5488182306289673, + "step": 6778 + }, + { + "epoch": 0.39, + "learning_rate": 6.902528532778284e-08, + "logits/chosen": -2.0741991996765137, + "logits/rejected": -2.073758125305176, + "logps/chosen": -49.22004699707031, + "logps/rejected": -233.16915893554688, + "loss": 0.3482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19319306313991547, + "rewards/margins": 1.9640107154846191, + "rewards/rejected": -1.7708176374435425, + "step": 6779 + }, + { + "epoch": 0.39, + "learning_rate": 6.901656985440546e-08, + "logits/chosen": -2.0398237705230713, + "logits/rejected": -2.046476125717163, + "logps/chosen": -46.31631851196289, + "logps/rejected": -259.48004150390625, + "loss": 0.2471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6205295920372009, + "rewards/margins": 3.6636998653411865, + "rewards/rejected": -3.043170213699341, + "step": 6780 + }, + { + "epoch": 0.39, + "learning_rate": 6.900785370546649e-08, + "logits/chosen": -1.9888607263565063, + "logits/rejected": -1.9827967882156372, + "logps/chosen": -328.470703125, + "logps/rejected": -439.8228454589844, + "loss": 0.2917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2379668951034546, + "rewards/margins": 0.7622039318084717, + "rewards/rejected": 0.4757629334926605, + "step": 6781 + }, + { + "epoch": 0.39, + "learning_rate": 6.899913688127559e-08, + "logits/chosen": -2.113961935043335, + "logits/rejected": -2.116314172744751, + "logps/chosen": -253.01553344726562, + "logps/rejected": -433.0007629394531, + "loss": 0.0394, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.12748122215271, + "rewards/margins": 3.6094911098480225, + "rewards/rejected": -1.4820098876953125, + "step": 6782 + }, + { + "epoch": 0.39, + "learning_rate": 6.89904193821424e-08, + "logits/chosen": -2.0281789302825928, + "logits/rejected": -2.02748966217041, + "logps/chosen": -10.349544525146484, + "logps/rejected": -162.38519287109375, + "loss": 0.5618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4972791373729706, + "rewards/margins": 1.9392611980438232, + "rewards/rejected": -2.436540365219116, + "step": 6783 + }, + { + "epoch": 0.39, + "learning_rate": 6.898170120837665e-08, + "logits/chosen": -2.038597822189331, + "logits/rejected": -2.0784764289855957, + "logps/chosen": -268.03125, + "logps/rejected": -389.77593994140625, + "loss": 0.1858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.228753685951233, + "rewards/margins": 1.3455597162246704, + "rewards/rejected": -0.1168060302734375, + "step": 6784 + }, + { + "epoch": 0.39, + "learning_rate": 6.897298236028798e-08, + "logits/chosen": -1.9658808708190918, + "logits/rejected": -1.9504384994506836, + "logps/chosen": -20.451675415039062, + "logps/rejected": -206.86849975585938, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25973090529441833, + "rewards/margins": 2.440797805786133, + "rewards/rejected": -2.1810669898986816, + "step": 6785 + }, + { + "epoch": 0.39, + "learning_rate": 6.896426283818621e-08, + "logits/chosen": -1.7418428659439087, + "logits/rejected": -1.726902961730957, + "logps/chosen": -387.056396484375, + "logps/rejected": -525.0147094726562, + "loss": 0.1437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.472991943359375, + "rewards/margins": 1.7307617664337158, + "rewards/rejected": -1.2577698230743408, + "step": 6786 + }, + { + "epoch": 0.39, + "learning_rate": 6.895554264238103e-08, + "logits/chosen": -1.9497349262237549, + "logits/rejected": -1.940156102180481, + "logps/chosen": -111.58723449707031, + "logps/rejected": -223.3121337890625, + "loss": 0.3857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.347677618265152, + "rewards/margins": 1.0973541736602783, + "rewards/rejected": -0.749676525592804, + "step": 6787 + }, + { + "epoch": 0.4, + "learning_rate": 6.894682177318228e-08, + "logits/chosen": -1.9942724704742432, + "logits/rejected": -1.969078540802002, + "logps/chosen": -250.5541229248047, + "logps/rejected": -431.16693115234375, + "loss": 0.1553, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.480320692062378, + "rewards/margins": 1.115513563156128, + "rewards/rejected": 1.36480712890625, + "step": 6788 + }, + { + "epoch": 0.4, + "learning_rate": 6.893810023089972e-08, + "logits/chosen": -1.958211898803711, + "logits/rejected": -1.9536322355270386, + "logps/chosen": -73.96165466308594, + "logps/rejected": -129.49252319335938, + "loss": 0.3918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3939712643623352, + "rewards/margins": 1.1316734552383423, + "rewards/rejected": -0.7377021908760071, + "step": 6789 + }, + { + "epoch": 0.4, + "learning_rate": 6.892937801584322e-08, + "logits/chosen": -1.9459213018417358, + "logits/rejected": -1.9494878053665161, + "logps/chosen": -2.981137275695801, + "logps/rejected": -109.76309967041016, + "loss": 0.6789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05524339899420738, + "rewards/margins": 0.030457785353064537, + "rewards/rejected": 0.024785613641142845, + "step": 6790 + }, + { + "epoch": 0.4, + "learning_rate": 6.89206551283226e-08, + "logits/chosen": -1.9987117052078247, + "logits/rejected": -1.866402506828308, + "logps/chosen": -223.561279296875, + "logps/rejected": -556.1201171875, + "loss": 0.0844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8881012201309204, + "rewards/margins": 3.4841156005859375, + "rewards/rejected": -1.596014380455017, + "step": 6791 + }, + { + "epoch": 0.4, + "learning_rate": 6.891193156864779e-08, + "logits/chosen": -2.042980909347534, + "logits/rejected": -2.0396368503570557, + "logps/chosen": -32.366432189941406, + "logps/rejected": -194.53662109375, + "loss": 0.4082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.084864042699337, + "rewards/margins": 1.531568169593811, + "rewards/rejected": -1.4467041492462158, + "step": 6792 + }, + { + "epoch": 0.4, + "learning_rate": 6.890320733712863e-08, + "logits/chosen": -1.835963487625122, + "logits/rejected": -1.8329411745071411, + "logps/chosen": -7.912559509277344, + "logps/rejected": -259.45184326171875, + "loss": 0.3687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02800283394753933, + "rewards/margins": 2.054359197616577, + "rewards/rejected": -2.0263564586639404, + "step": 6793 + }, + { + "epoch": 0.4, + "learning_rate": 6.889448243407509e-08, + "logits/chosen": -1.7754853963851929, + "logits/rejected": -1.779807686805725, + "logps/chosen": -43.99454879760742, + "logps/rejected": -234.89671325683594, + "loss": 0.2517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6531131863594055, + "rewards/margins": 2.2984561920166016, + "rewards/rejected": -1.6453430652618408, + "step": 6794 + }, + { + "epoch": 0.4, + "learning_rate": 6.888575685979711e-08, + "logits/chosen": -2.067335844039917, + "logits/rejected": -2.0531978607177734, + "logps/chosen": -66.02716064453125, + "logps/rejected": -278.72882080078125, + "loss": 0.3637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.540509045124054, + "rewards/margins": 0.7107300162315369, + "rewards/rejected": -0.17022095620632172, + "step": 6795 + }, + { + "epoch": 0.4, + "learning_rate": 6.887703061460468e-08, + "logits/chosen": -1.7788615226745605, + "logits/rejected": -1.7327276468276978, + "logps/chosen": -220.07223510742188, + "logps/rejected": -299.4971618652344, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5306747555732727, + "rewards/margins": 1.0204696655273438, + "rewards/rejected": -0.48979493975639343, + "step": 6796 + }, + { + "epoch": 0.4, + "learning_rate": 6.886830369880777e-08, + "logits/chosen": -1.7911803722381592, + "logits/rejected": -1.7871671915054321, + "logps/chosen": -2.697518825531006, + "logps/rejected": -89.556640625, + "loss": 0.5397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17206747829914093, + "rewards/margins": 1.2111493349075317, + "rewards/rejected": -1.3832168579101562, + "step": 6797 + }, + { + "epoch": 0.4, + "learning_rate": 6.885957611271643e-08, + "logits/chosen": -1.8594731092453003, + "logits/rejected": -1.8602651357650757, + "logps/chosen": -34.47113800048828, + "logps/rejected": -102.09378814697266, + "loss": 1.0164, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7804750800132751, + "rewards/margins": -0.295043408870697, + "rewards/rejected": -0.4854316711425781, + "step": 6798 + }, + { + "epoch": 0.4, + "learning_rate": 6.885084785664068e-08, + "logits/chosen": -1.9524767398834229, + "logits/rejected": -1.9303489923477173, + "logps/chosen": -149.39183044433594, + "logps/rejected": -373.7150573730469, + "loss": 0.1961, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5702377557754517, + "rewards/margins": 4.421089172363281, + "rewards/rejected": -3.850851535797119, + "step": 6799 + }, + { + "epoch": 0.4, + "learning_rate": 6.884211893089062e-08, + "logits/chosen": -2.0811896324157715, + "logits/rejected": -2.0705223083496094, + "logps/chosen": -22.756772994995117, + "logps/rejected": -119.67720031738281, + "loss": 0.9715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9501269459724426, + "rewards/margins": 0.16442996263504028, + "rewards/rejected": -1.114556908607483, + "step": 6800 + }, + { + "epoch": 0.4, + "learning_rate": 6.883338933577631e-08, + "logits/chosen": -1.9247039556503296, + "logits/rejected": -1.9126057624816895, + "logps/chosen": -59.289371490478516, + "logps/rejected": -161.86993408203125, + "loss": 0.1805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0842922925949097, + "rewards/margins": 2.428417682647705, + "rewards/rejected": -1.3441253900527954, + "step": 6801 + }, + { + "epoch": 0.4, + "learning_rate": 6.88246590716079e-08, + "logits/chosen": -1.8885152339935303, + "logits/rejected": -1.8995121717453003, + "logps/chosen": -224.92507934570312, + "logps/rejected": -407.4389953613281, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0186493396759033, + "rewards/margins": 3.9874024391174316, + "rewards/rejected": -1.9687530994415283, + "step": 6802 + }, + { + "epoch": 0.4, + "learning_rate": 6.881592813869552e-08, + "logits/chosen": -1.990384578704834, + "logits/rejected": -1.9703094959259033, + "logps/chosen": -0.0001391147670801729, + "logps/rejected": -227.399169921875, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.602574901786284e-07, + "rewards/margins": 3.148879051208496, + "rewards/rejected": -3.148878574371338, + "step": 6803 + }, + { + "epoch": 0.4, + "learning_rate": 6.880719653734934e-08, + "logits/chosen": -1.8622386455535889, + "logits/rejected": -1.8830243349075317, + "logps/chosen": -160.1197509765625, + "logps/rejected": -208.3476104736328, + "loss": 0.402, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5698150396347046, + "rewards/margins": 0.01646876335144043, + "rewards/rejected": 1.5533462762832642, + "step": 6804 + }, + { + "epoch": 0.4, + "learning_rate": 6.879846426787952e-08, + "logits/chosen": -2.1117289066314697, + "logits/rejected": -2.06402850151062, + "logps/chosen": -171.988037109375, + "logps/rejected": -392.4463806152344, + "loss": 0.1314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6160736083984375, + "rewards/margins": 1.68798828125, + "rewards/rejected": -0.0719146728515625, + "step": 6805 + }, + { + "epoch": 0.4, + "learning_rate": 6.878973133059631e-08, + "logits/chosen": -2.0841100215911865, + "logits/rejected": -2.074352264404297, + "logps/chosen": -108.52188873291016, + "logps/rejected": -405.5812072753906, + "loss": 0.1885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9996498227119446, + "rewards/margins": 2.4293465614318848, + "rewards/rejected": -1.4296966791152954, + "step": 6806 + }, + { + "epoch": 0.4, + "learning_rate": 6.878099772580995e-08, + "logits/chosen": -1.8048758506774902, + "logits/rejected": -1.8034021854400635, + "logps/chosen": -176.2786102294922, + "logps/rejected": -423.380126953125, + "loss": 0.1182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7114471793174744, + "rewards/margins": 4.330404758453369, + "rewards/rejected": -3.61895751953125, + "step": 6807 + }, + { + "epoch": 0.4, + "learning_rate": 6.877226345383063e-08, + "logits/chosen": -1.896323800086975, + "logits/rejected": -1.897068977355957, + "logps/chosen": -0.94553142786026, + "logps/rejected": -150.6215362548828, + "loss": 0.3965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.038623254746198654, + "rewards/margins": 2.475983142852783, + "rewards/rejected": -2.514606475830078, + "step": 6808 + }, + { + "epoch": 0.4, + "learning_rate": 6.876352851496874e-08, + "logits/chosen": -1.9959300756454468, + "logits/rejected": -1.9962016344070435, + "logps/chosen": -5.409482479095459, + "logps/rejected": -130.18992614746094, + "loss": 0.5682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05343785509467125, + "rewards/margins": 0.6905807256698608, + "rewards/rejected": -0.7440185546875, + "step": 6809 + }, + { + "epoch": 0.4, + "learning_rate": 6.875479290953451e-08, + "logits/chosen": -2.029515027999878, + "logits/rejected": -2.016859292984009, + "logps/chosen": -0.02111217752099037, + "logps/rejected": -226.83749389648438, + "loss": 0.3907, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003841207129880786, + "rewards/margins": 2.5639073848724365, + "rewards/rejected": -2.5600662231445312, + "step": 6810 + }, + { + "epoch": 0.4, + "learning_rate": 6.87460566378383e-08, + "logits/chosen": -1.8579047918319702, + "logits/rejected": -1.8156981468200684, + "logps/chosen": -134.8405303955078, + "logps/rejected": -451.900390625, + "loss": 0.146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1355942487716675, + "rewards/margins": 1.6943345069885254, + "rewards/rejected": -0.5587402582168579, + "step": 6811 + }, + { + "epoch": 0.4, + "learning_rate": 6.873731970019046e-08, + "logits/chosen": -1.7539517879486084, + "logits/rejected": -1.7446693181991577, + "logps/chosen": -235.1640167236328, + "logps/rejected": -315.4455261230469, + "loss": 0.0654, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8633193969726562, + "rewards/margins": 2.735072374343872, + "rewards/rejected": -0.871752917766571, + "step": 6812 + }, + { + "epoch": 0.4, + "learning_rate": 6.872858209690138e-08, + "logits/chosen": -1.9320228099822998, + "logits/rejected": -1.926993489265442, + "logps/chosen": -0.0018224224913865328, + "logps/rejected": -236.1138153076172, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0003719887463375926, + "rewards/margins": 6.419703483581543, + "rewards/rejected": -6.4193315505981445, + "step": 6813 + }, + { + "epoch": 0.4, + "learning_rate": 6.871984382828145e-08, + "logits/chosen": -1.9795669317245483, + "logits/rejected": -1.9762760400772095, + "logps/chosen": -53.78324508666992, + "logps/rejected": -220.24151611328125, + "loss": 0.3345, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2373119443655014, + "rewards/margins": 1.7479259967803955, + "rewards/rejected": -1.510614037513733, + "step": 6814 + }, + { + "epoch": 0.4, + "learning_rate": 6.871110489464109e-08, + "logits/chosen": -1.9344747066497803, + "logits/rejected": -1.9359776973724365, + "logps/chosen": -145.28118896484375, + "logps/rejected": -321.970947265625, + "loss": 0.3713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8701446652412415, + "rewards/margins": 0.30764466524124146, + "rewards/rejected": 0.5625, + "step": 6815 + }, + { + "epoch": 0.4, + "learning_rate": 6.870236529629076e-08, + "logits/chosen": -1.8969515562057495, + "logits/rejected": -1.8935359716415405, + "logps/chosen": -59.39577865600586, + "logps/rejected": -194.31036376953125, + "loss": 0.2571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5006038546562195, + "rewards/margins": 3.1769986152648926, + "rewards/rejected": -2.6763947010040283, + "step": 6816 + }, + { + "epoch": 0.4, + "learning_rate": 6.869362503354095e-08, + "logits/chosen": -1.7644404172897339, + "logits/rejected": -1.767025351524353, + "logps/chosen": -12.375492095947266, + "logps/rejected": -60.47159957885742, + "loss": 0.4232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0374944694340229, + "rewards/margins": 1.6665525436401367, + "rewards/rejected": -1.6290581226348877, + "step": 6817 + }, + { + "epoch": 0.4, + "learning_rate": 6.868488410670212e-08, + "logits/chosen": -1.9047855138778687, + "logits/rejected": -1.89475417137146, + "logps/chosen": -1.2989566326141357, + "logps/rejected": -133.19903564453125, + "loss": 0.372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029212964698672295, + "rewards/margins": 1.9455841779708862, + "rewards/rejected": -1.91637122631073, + "step": 6818 + }, + { + "epoch": 0.4, + "learning_rate": 6.86761425160848e-08, + "logits/chosen": -1.8997043371200562, + "logits/rejected": -1.8989782333374023, + "logps/chosen": -63.841617584228516, + "logps/rejected": -256.47845458984375, + "loss": 0.2174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9019161462783813, + "rewards/margins": 2.089048385620117, + "rewards/rejected": -1.1871322393417358, + "step": 6819 + }, + { + "epoch": 0.4, + "learning_rate": 6.866740026199957e-08, + "logits/chosen": -1.807054877281189, + "logits/rejected": -1.8073186874389648, + "logps/chosen": -58.41405487060547, + "logps/rejected": -173.2365264892578, + "loss": 0.4856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.099156953394413, + "rewards/margins": 0.8009666800498962, + "rewards/rejected": -0.7018097043037415, + "step": 6820 + }, + { + "epoch": 0.4, + "learning_rate": 6.865865734475695e-08, + "logits/chosen": -2.084810733795166, + "logits/rejected": -2.083808183670044, + "logps/chosen": -0.0005665700882673264, + "logps/rejected": -62.278255462646484, + "loss": 0.4162, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.638949657324702e-05, + "rewards/margins": 1.6829867362976074, + "rewards/rejected": -1.682900309562683, + "step": 6821 + }, + { + "epoch": 0.4, + "learning_rate": 6.864991376466755e-08, + "logits/chosen": -2.028860330581665, + "logits/rejected": -2.0287368297576904, + "logps/chosen": -12.544617652893066, + "logps/rejected": -107.65774536132812, + "loss": 0.644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6088202595710754, + "rewards/margins": 0.926490843296051, + "rewards/rejected": -1.5353111028671265, + "step": 6822 + }, + { + "epoch": 0.4, + "learning_rate": 6.8641169522042e-08, + "logits/chosen": -1.9005168676376343, + "logits/rejected": -1.893179178237915, + "logps/chosen": -156.35842895507812, + "logps/rejected": -379.92022705078125, + "loss": 0.0599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6115998029708862, + "rewards/margins": 5.197201728820801, + "rewards/rejected": -3.585601806640625, + "step": 6823 + }, + { + "epoch": 0.4, + "learning_rate": 6.863242461719092e-08, + "logits/chosen": -2.1619362831115723, + "logits/rejected": -2.1590869426727295, + "logps/chosen": -3.587480068206787, + "logps/rejected": -156.28311157226562, + "loss": 0.4667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07389507442712784, + "rewards/margins": 1.1369978189468384, + "rewards/rejected": -1.0631027221679688, + "step": 6824 + }, + { + "epoch": 0.4, + "learning_rate": 6.862367905042496e-08, + "logits/chosen": -2.0712122917175293, + "logits/rejected": -2.070547580718994, + "logps/chosen": -195.2035369873047, + "logps/rejected": -427.6893310546875, + "loss": 0.1037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.02324378490448, + "rewards/margins": 2.1852617263793945, + "rewards/rejected": -1.162017822265625, + "step": 6825 + }, + { + "epoch": 0.4, + "learning_rate": 6.861493282205485e-08, + "logits/chosen": -1.8637839555740356, + "logits/rejected": -1.8444839715957642, + "logps/chosen": -118.26306915283203, + "logps/rejected": -304.3989562988281, + "loss": 0.1675, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1337196826934814, + "rewards/margins": 2.4673409461975098, + "rewards/rejected": -1.3336212635040283, + "step": 6826 + }, + { + "epoch": 0.4, + "learning_rate": 6.860618593239126e-08, + "logits/chosen": -1.9304704666137695, + "logits/rejected": -1.9282928705215454, + "logps/chosen": -35.562408447265625, + "logps/rejected": -233.08770751953125, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05844001844525337, + "rewards/margins": 2.773952007293701, + "rewards/rejected": -2.7155120372772217, + "step": 6827 + }, + { + "epoch": 0.4, + "learning_rate": 6.859743838174493e-08, + "logits/chosen": -1.8725614547729492, + "logits/rejected": -1.848235845565796, + "logps/chosen": -337.63409423828125, + "logps/rejected": -463.906005859375, + "loss": 0.3615, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03212585672736168, + "rewards/margins": 0.38093873858451843, + "rewards/rejected": -0.34881287813186646, + "step": 6828 + }, + { + "epoch": 0.4, + "learning_rate": 6.858869017042661e-08, + "logits/chosen": -1.9865883588790894, + "logits/rejected": -1.9817391633987427, + "logps/chosen": -22.955463409423828, + "logps/rejected": -157.70855712890625, + "loss": 0.6689, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1105876937508583, + "rewards/margins": -0.04375038295984268, + "rewards/rejected": 0.154338076710701, + "step": 6829 + }, + { + "epoch": 0.4, + "learning_rate": 6.85799412987471e-08, + "logits/chosen": -1.824428677558899, + "logits/rejected": -1.8135343790054321, + "logps/chosen": -341.3402404785156, + "logps/rejected": -776.0669555664062, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3446013927459717, + "rewards/margins": 5.4626922607421875, + "rewards/rejected": -3.118090867996216, + "step": 6830 + }, + { + "epoch": 0.4, + "learning_rate": 6.857119176701718e-08, + "logits/chosen": -2.1118366718292236, + "logits/rejected": -2.101917028427124, + "logps/chosen": -5.606117248535156, + "logps/rejected": -165.7807159423828, + "loss": 0.3554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09100880473852158, + "rewards/margins": 2.7338311672210693, + "rewards/rejected": -2.642822265625, + "step": 6831 + }, + { + "epoch": 0.4, + "learning_rate": 6.85624415755477e-08, + "logits/chosen": -1.782785177230835, + "logits/rejected": -1.7744520902633667, + "logps/chosen": -328.6144104003906, + "logps/rejected": -425.3909912109375, + "loss": 0.0917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3565094470977783, + "rewards/margins": 2.3150055408477783, + "rewards/rejected": -0.95849609375, + "step": 6832 + }, + { + "epoch": 0.4, + "learning_rate": 6.855369072464949e-08, + "logits/chosen": -1.8098540306091309, + "logits/rejected": -1.8105846643447876, + "logps/chosen": -29.193313598632812, + "logps/rejected": -153.23573303222656, + "loss": 0.2359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46095773577690125, + "rewards/margins": 3.1120617389678955, + "rewards/rejected": -2.651103973388672, + "step": 6833 + }, + { + "epoch": 0.4, + "learning_rate": 6.854493921463345e-08, + "logits/chosen": -1.957785725593567, + "logits/rejected": -1.9457416534423828, + "logps/chosen": -3.7073743442306295e-05, + "logps/rejected": -235.35647583007812, + "loss": 0.3308, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.629197966707579e-07, + "rewards/margins": 3.8829338550567627, + "rewards/rejected": -3.8829345703125, + "step": 6834 + }, + { + "epoch": 0.4, + "learning_rate": 6.853618704581043e-08, + "logits/chosen": -1.7537723779678345, + "logits/rejected": -1.7565007209777832, + "logps/chosen": -120.58003234863281, + "logps/rejected": -372.2279357910156, + "loss": 0.1395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8133590817451477, + "rewards/margins": 2.431727647781372, + "rewards/rejected": -1.6183685064315796, + "step": 6835 + }, + { + "epoch": 0.4, + "learning_rate": 6.852743421849138e-08, + "logits/chosen": -1.8832014799118042, + "logits/rejected": -1.8917564153671265, + "logps/chosen": -207.99867248535156, + "logps/rejected": -241.1806640625, + "loss": 0.2437, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1791763305664062, + "rewards/margins": 0.5502395629882812, + "rewards/rejected": 1.628936767578125, + "step": 6836 + }, + { + "epoch": 0.4, + "learning_rate": 6.851868073298724e-08, + "logits/chosen": -1.9936919212341309, + "logits/rejected": -2.0132744312286377, + "logps/chosen": -268.4520568847656, + "logps/rejected": -452.7892150878906, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.86334228515625, + "rewards/margins": 5.277639865875244, + "rewards/rejected": -2.414297580718994, + "step": 6837 + }, + { + "epoch": 0.4, + "learning_rate": 6.850992658960899e-08, + "logits/chosen": -2.0742321014404297, + "logits/rejected": -2.076380491256714, + "logps/chosen": -74.732666015625, + "logps/rejected": -149.73147583007812, + "loss": 0.9068, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.19369354844093323, + "rewards/margins": -0.5361602902412415, + "rewards/rejected": 0.3424667418003082, + "step": 6838 + }, + { + "epoch": 0.4, + "learning_rate": 6.850117178866758e-08, + "logits/chosen": -1.9182195663452148, + "logits/rejected": -1.8939809799194336, + "logps/chosen": -219.75502014160156, + "logps/rejected": -363.21807861328125, + "loss": 0.1086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.510640025138855, + "rewards/margins": 2.018904209136963, + "rewards/rejected": -0.5082641839981079, + "step": 6839 + }, + { + "epoch": 0.4, + "learning_rate": 6.849241633047406e-08, + "logits/chosen": -1.986106514930725, + "logits/rejected": -2.000871181488037, + "logps/chosen": -254.2957763671875, + "logps/rejected": -292.98834228515625, + "loss": 0.3448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.615490734577179, + "rewards/margins": 0.26983338594436646, + "rewards/rejected": 0.3456573486328125, + "step": 6840 + }, + { + "epoch": 0.4, + "learning_rate": 6.848366021533947e-08, + "logits/chosen": -2.056694507598877, + "logits/rejected": -2.0480973720550537, + "logps/chosen": -51.50754928588867, + "logps/rejected": -215.94302368164062, + "loss": 0.3395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4321449398994446, + "rewards/margins": 0.8872474431991577, + "rewards/rejected": -0.4551025331020355, + "step": 6841 + }, + { + "epoch": 0.4, + "learning_rate": 6.847490344357482e-08, + "logits/chosen": -2.015150308609009, + "logits/rejected": -2.017076253890991, + "logps/chosen": -315.52972412109375, + "logps/rejected": -510.2127685546875, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.179309129714966, + "rewards/margins": 3.6381044387817383, + "rewards/rejected": -1.458795189857483, + "step": 6842 + }, + { + "epoch": 0.4, + "learning_rate": 6.846614601549128e-08, + "logits/chosen": -2.0258729457855225, + "logits/rejected": -2.013582944869995, + "logps/chosen": -137.46775817871094, + "logps/rejected": -305.17889404296875, + "loss": 0.1513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9975082278251648, + "rewards/margins": 3.0195724964141846, + "rewards/rejected": -2.022064208984375, + "step": 6843 + }, + { + "epoch": 0.4, + "learning_rate": 6.845738793139987e-08, + "logits/chosen": -1.879320740699768, + "logits/rejected": -1.886525273323059, + "logps/chosen": -75.90096282958984, + "logps/rejected": -189.8827362060547, + "loss": 0.4113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10216369479894638, + "rewards/margins": 1.3688338994979858, + "rewards/rejected": -1.2666702270507812, + "step": 6844 + }, + { + "epoch": 0.4, + "learning_rate": 6.844862919161177e-08, + "logits/chosen": -1.8758459091186523, + "logits/rejected": -1.8256081342697144, + "logps/chosen": -219.59568786621094, + "logps/rejected": -369.5044860839844, + "loss": 0.2491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.37505042552948, + "rewards/margins": 0.9930588006973267, + "rewards/rejected": 0.38199159502983093, + "step": 6845 + }, + { + "epoch": 0.4, + "learning_rate": 6.84398697964381e-08, + "logits/chosen": -2.000594139099121, + "logits/rejected": -1.9980088472366333, + "logps/chosen": -0.00010442409256938845, + "logps/rejected": -155.90077209472656, + "loss": 0.5102, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6329970549122663e-06, + "rewards/margins": 0.881062388420105, + "rewards/rejected": -0.881060779094696, + "step": 6846 + }, + { + "epoch": 0.4, + "learning_rate": 6.843110974619009e-08, + "logits/chosen": -1.7819690704345703, + "logits/rejected": -1.768438458442688, + "logps/chosen": -10.375367164611816, + "logps/rejected": -124.73745727539062, + "loss": 0.4923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037531375885009766, + "rewards/margins": 1.0120453834533691, + "rewards/rejected": -0.9745140075683594, + "step": 6847 + }, + { + "epoch": 0.4, + "learning_rate": 6.842234904117886e-08, + "logits/chosen": -2.0488343238830566, + "logits/rejected": -2.012906551361084, + "logps/chosen": -234.3835906982422, + "logps/rejected": -319.07098388671875, + "loss": 0.3284, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.09674072265625, + "rewards/margins": 0.6017700433731079, + "rewards/rejected": 0.4949707090854645, + "step": 6848 + }, + { + "epoch": 0.4, + "learning_rate": 6.841358768171572e-08, + "logits/chosen": -1.870136022567749, + "logits/rejected": -1.8673951625823975, + "logps/chosen": -21.75337791442871, + "logps/rejected": -112.60047912597656, + "loss": 0.5519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3322286605834961, + "rewards/margins": 1.1434739828109741, + "rewards/rejected": -1.4757026433944702, + "step": 6849 + }, + { + "epoch": 0.4, + "learning_rate": 6.840482566811185e-08, + "logits/chosen": -1.798768401145935, + "logits/rejected": -1.7908300161361694, + "logps/chosen": -22.43423080444336, + "logps/rejected": -77.59135437011719, + "loss": 0.6649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18817253410816193, + "rewards/margins": 0.14435692131519318, + "rewards/rejected": -0.3325294554233551, + "step": 6850 + }, + { + "epoch": 0.4, + "learning_rate": 6.839606300067856e-08, + "logits/chosen": -1.9119237661361694, + "logits/rejected": -1.922458529472351, + "logps/chosen": -202.0374298095703, + "logps/rejected": -357.68560791015625, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5601699352264404, + "rewards/margins": 2.9327895641326904, + "rewards/rejected": -0.37261962890625, + "step": 6851 + }, + { + "epoch": 0.4, + "learning_rate": 6.838729967972712e-08, + "logits/chosen": -1.9925974607467651, + "logits/rejected": -1.9646245241165161, + "logps/chosen": -138.13076782226562, + "logps/rejected": -395.2101135253906, + "loss": 0.0694, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7264190912246704, + "rewards/margins": 3.2625794410705566, + "rewards/rejected": -1.5361603498458862, + "step": 6852 + }, + { + "epoch": 0.4, + "learning_rate": 6.837853570556885e-08, + "logits/chosen": -2.0761189460754395, + "logits/rejected": -2.0729355812072754, + "logps/chosen": -0.11378287523984909, + "logps/rejected": -156.60435485839844, + "loss": 0.4192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014002419775351882, + "rewards/margins": 1.7835383415222168, + "rewards/rejected": -1.7821381092071533, + "step": 6853 + }, + { + "epoch": 0.4, + "learning_rate": 6.83697710785151e-08, + "logits/chosen": -1.8301630020141602, + "logits/rejected": -1.8138233423233032, + "logps/chosen": -0.0001878665352705866, + "logps/rejected": -165.32339477539062, + "loss": 0.3904, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.8073728976305574e-05, + "rewards/margins": 2.2343316078186035, + "rewards/rejected": -2.234283447265625, + "step": 6854 + }, + { + "epoch": 0.4, + "learning_rate": 6.836100579887722e-08, + "logits/chosen": -2.1302201747894287, + "logits/rejected": -2.175676107406616, + "logps/chosen": -284.9583740234375, + "logps/rejected": -422.79803466796875, + "loss": 0.0507, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.12384033203125, + "rewards/margins": 2.868572950363159, + "rewards/rejected": -0.744732677936554, + "step": 6855 + }, + { + "epoch": 0.4, + "learning_rate": 6.835223986696659e-08, + "logits/chosen": -1.9231722354888916, + "logits/rejected": -1.9260752201080322, + "logps/chosen": -60.89263153076172, + "logps/rejected": -144.05621337890625, + "loss": 0.4889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021033858880400658, + "rewards/margins": 0.6204906702041626, + "rewards/rejected": -0.599456787109375, + "step": 6856 + }, + { + "epoch": 0.4, + "learning_rate": 6.834347328309465e-08, + "logits/chosen": -1.941347360610962, + "logits/rejected": -1.9156666994094849, + "logps/chosen": -207.4560546875, + "logps/rejected": -429.1092224121094, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8412567377090454, + "rewards/margins": 4.503848552703857, + "rewards/rejected": -2.6625916957855225, + "step": 6857 + }, + { + "epoch": 0.4, + "learning_rate": 6.833470604757281e-08, + "logits/chosen": -1.9597591161727905, + "logits/rejected": -1.945639967918396, + "logps/chosen": -62.622718811035156, + "logps/rejected": -315.3818359375, + "loss": 0.2082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7507263422012329, + "rewards/margins": 2.6144471168518066, + "rewards/rejected": -1.8637207746505737, + "step": 6858 + }, + { + "epoch": 0.4, + "learning_rate": 6.832593816071252e-08, + "logits/chosen": -2.0192956924438477, + "logits/rejected": -2.019432783126831, + "logps/chosen": -211.06033325195312, + "logps/rejected": -259.9369201660156, + "loss": 0.1003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6819885969161987, + "rewards/margins": 2.521785020828247, + "rewards/rejected": -0.8397964835166931, + "step": 6859 + }, + { + "epoch": 0.4, + "learning_rate": 6.831716962282525e-08, + "logits/chosen": -2.081200122833252, + "logits/rejected": -2.074357271194458, + "logps/chosen": -64.21022033691406, + "logps/rejected": -224.3857421875, + "loss": 0.3425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0054450989700853825, + "rewards/margins": 1.9537819623947144, + "rewards/rejected": -1.9483368396759033, + "step": 6860 + }, + { + "epoch": 0.4, + "learning_rate": 6.830840043422253e-08, + "logits/chosen": -1.9000152349472046, + "logits/rejected": -1.895313024520874, + "logps/chosen": -18.970787048339844, + "logps/rejected": -112.52198791503906, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024215316399931908, + "rewards/margins": 1.8553485870361328, + "rewards/rejected": -1.8795639276504517, + "step": 6861 + }, + { + "epoch": 0.4, + "learning_rate": 6.829963059521588e-08, + "logits/chosen": -2.0561203956604004, + "logits/rejected": -2.059786081314087, + "logps/chosen": -147.96554565429688, + "logps/rejected": -291.88037109375, + "loss": 0.323, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.65240478515625, + "rewards/margins": 0.23093867301940918, + "rewards/rejected": 1.4214661121368408, + "step": 6862 + }, + { + "epoch": 0.4, + "learning_rate": 6.829086010611682e-08, + "logits/chosen": -1.8712151050567627, + "logits/rejected": -1.8669764995574951, + "logps/chosen": -0.0006019874126650393, + "logps/rejected": -92.30452728271484, + "loss": 0.5481, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.395923944386595e-07, + "rewards/margins": 0.6891533732414246, + "rewards/rejected": -0.6891540884971619, + "step": 6863 + }, + { + "epoch": 0.4, + "learning_rate": 6.828208896723696e-08, + "logits/chosen": -2.001234531402588, + "logits/rejected": -2.0006330013275146, + "logps/chosen": -6.722049713134766, + "logps/rejected": -60.7176628112793, + "loss": 0.5986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030621672049164772, + "rewards/margins": 0.3760840892791748, + "rewards/rejected": -0.3454624116420746, + "step": 6864 + }, + { + "epoch": 0.4, + "learning_rate": 6.827331717888786e-08, + "logits/chosen": -2.022700309753418, + "logits/rejected": -2.018490791320801, + "logps/chosen": -73.37749481201172, + "logps/rejected": -159.90296936035156, + "loss": 0.4268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7605026364326477, + "rewards/margins": 0.2611892819404602, + "rewards/rejected": 0.4993133544921875, + "step": 6865 + }, + { + "epoch": 0.4, + "learning_rate": 6.826454474138115e-08, + "logits/chosen": -1.878890872001648, + "logits/rejected": -1.8679664134979248, + "logps/chosen": -20.671995162963867, + "logps/rejected": -402.05157470703125, + "loss": 0.3881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009844399057328701, + "rewards/margins": 2.021993398666382, + "rewards/rejected": -2.0121490955352783, + "step": 6866 + }, + { + "epoch": 0.4, + "learning_rate": 6.825577165502847e-08, + "logits/chosen": -2.0441784858703613, + "logits/rejected": -2.043613910675049, + "logps/chosen": -15.002020835876465, + "logps/rejected": -115.4197998046875, + "loss": 0.3387, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2704049050807953, + "rewards/margins": 1.6997483968734741, + "rewards/rejected": -1.4293434619903564, + "step": 6867 + }, + { + "epoch": 0.4, + "learning_rate": 6.824699792014148e-08, + "logits/chosen": -1.9882762432098389, + "logits/rejected": -1.9635448455810547, + "logps/chosen": -71.7181625366211, + "logps/rejected": -311.5055847167969, + "loss": 0.4239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027114867698401213, + "rewards/margins": 1.782841444015503, + "rewards/rejected": -1.785552978515625, + "step": 6868 + }, + { + "epoch": 0.4, + "learning_rate": 6.823822353703185e-08, + "logits/chosen": -2.065575122833252, + "logits/rejected": -2.055530548095703, + "logps/chosen": -32.818458557128906, + "logps/rejected": -103.02421569824219, + "loss": 0.5628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07504577934741974, + "rewards/margins": 0.18864746391773224, + "rewards/rejected": -0.1136016845703125, + "step": 6869 + }, + { + "epoch": 0.4, + "learning_rate": 6.822944850601133e-08, + "logits/chosen": -2.0686116218566895, + "logits/rejected": -2.046933889389038, + "logps/chosen": -141.92388916015625, + "logps/rejected": -218.0543670654297, + "loss": 0.2966, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6227401494979858, + "rewards/margins": 0.5770111083984375, + "rewards/rejected": 1.0457290410995483, + "step": 6870 + }, + { + "epoch": 0.4, + "learning_rate": 6.822067282739163e-08, + "logits/chosen": -2.1186885833740234, + "logits/rejected": -2.1290128231048584, + "logps/chosen": -67.49360656738281, + "logps/rejected": -178.19305419921875, + "loss": 0.2347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2491363286972046, + "rewards/margins": 1.020623803138733, + "rewards/rejected": 0.22851257026195526, + "step": 6871 + }, + { + "epoch": 0.4, + "learning_rate": 6.82118965014845e-08, + "logits/chosen": -2.083472490310669, + "logits/rejected": -2.078348398208618, + "logps/chosen": -33.56461715698242, + "logps/rejected": -270.56866455078125, + "loss": 0.259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5183216333389282, + "rewards/margins": 3.246613025665283, + "rewards/rejected": -2.7282912731170654, + "step": 6872 + }, + { + "epoch": 0.4, + "learning_rate": 6.820311952860171e-08, + "logits/chosen": -1.6659414768218994, + "logits/rejected": -1.6842360496520996, + "logps/chosen": -190.92630004882812, + "logps/rejected": -295.20294189453125, + "loss": 0.4409, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.178979516029358, + "rewards/margins": 0.065155029296875, + "rewards/rejected": 1.113824486732483, + "step": 6873 + }, + { + "epoch": 0.4, + "learning_rate": 6.819434190905511e-08, + "logits/chosen": -2.009028196334839, + "logits/rejected": -1.9988880157470703, + "logps/chosen": -32.65165710449219, + "logps/rejected": -264.3861389160156, + "loss": 0.2001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5447273254394531, + "rewards/margins": 3.3277428150177, + "rewards/rejected": -2.783015489578247, + "step": 6874 + }, + { + "epoch": 0.4, + "learning_rate": 6.818556364315645e-08, + "logits/chosen": -1.883724570274353, + "logits/rejected": -1.8515492677688599, + "logps/chosen": -0.7571986317634583, + "logps/rejected": -166.33477783203125, + "loss": 0.4102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017627079039812088, + "rewards/margins": 1.8559904098510742, + "rewards/rejected": -1.8736175298690796, + "step": 6875 + }, + { + "epoch": 0.4, + "learning_rate": 6.817678473121763e-08, + "logits/chosen": -2.1293280124664307, + "logits/rejected": -2.116645336151123, + "logps/chosen": -0.00011586640175664797, + "logps/rejected": -252.60235595703125, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5734767657704651e-06, + "rewards/margins": 4.520575046539307, + "rewards/rejected": -4.520576477050781, + "step": 6876 + }, + { + "epoch": 0.4, + "learning_rate": 6.81680051735505e-08, + "logits/chosen": -1.9460381269454956, + "logits/rejected": -1.9685795307159424, + "logps/chosen": -228.2266845703125, + "logps/rejected": -249.06195068359375, + "loss": 0.3432, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1766327619552612, + "rewards/margins": 0.2602021098136902, + "rewards/rejected": 0.916430652141571, + "step": 6877 + }, + { + "epoch": 0.4, + "learning_rate": 6.815922497046697e-08, + "logits/chosen": -2.143828868865967, + "logits/rejected": -2.1447291374206543, + "logps/chosen": -21.040912628173828, + "logps/rejected": -87.59375, + "loss": 0.4433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6688482165336609, + "rewards/margins": 0.4536176323890686, + "rewards/rejected": 0.2152305692434311, + "step": 6878 + }, + { + "epoch": 0.4, + "learning_rate": 6.815044412227891e-08, + "logits/chosen": -1.9373801946640015, + "logits/rejected": -1.9294323921203613, + "logps/chosen": -19.160594940185547, + "logps/rejected": -99.89982604980469, + "loss": 0.4529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015843583270907402, + "rewards/margins": 1.061919093132019, + "rewards/rejected": -1.046075463294983, + "step": 6879 + }, + { + "epoch": 0.4, + "learning_rate": 6.814166262929832e-08, + "logits/chosen": -1.4980394840240479, + "logits/rejected": -1.4964016675949097, + "logps/chosen": -27.177196502685547, + "logps/rejected": -118.50006103515625, + "loss": 0.5072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003509521484375, + "rewards/margins": 0.7091331481933594, + "rewards/rejected": -0.7056236267089844, + "step": 6880 + }, + { + "epoch": 0.4, + "learning_rate": 6.813288049183713e-08, + "logits/chosen": -1.7390718460083008, + "logits/rejected": -1.7382599115371704, + "logps/chosen": -0.0007631377666257322, + "logps/rejected": -140.97686767578125, + "loss": 0.4417, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.268012005719356e-05, + "rewards/margins": 1.7327356338500977, + "rewards/rejected": -1.7327682971954346, + "step": 6881 + }, + { + "epoch": 0.4, + "learning_rate": 6.812409771020731e-08, + "logits/chosen": -2.0203559398651123, + "logits/rejected": -2.004094362258911, + "logps/chosen": -7.627161502838135, + "logps/rejected": -143.0350799560547, + "loss": 0.4775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1248573288321495, + "rewards/margins": 1.479596734046936, + "rewards/rejected": -1.6044540405273438, + "step": 6882 + }, + { + "epoch": 0.4, + "learning_rate": 6.81153142847209e-08, + "logits/chosen": -1.883618950843811, + "logits/rejected": -1.8807458877563477, + "logps/chosen": -204.28244018554688, + "logps/rejected": -311.3878479003906, + "loss": 0.1036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.990386962890625, + "rewards/margins": 1.8093963861465454, + "rewards/rejected": 0.18099060654640198, + "step": 6883 + }, + { + "epoch": 0.4, + "learning_rate": 6.810653021568992e-08, + "logits/chosen": -1.8043091297149658, + "logits/rejected": -1.7886631488800049, + "logps/chosen": -42.51801300048828, + "logps/rejected": -309.0179748535156, + "loss": 0.2376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3879661560058594, + "rewards/margins": 4.450481414794922, + "rewards/rejected": -4.0625152587890625, + "step": 6884 + }, + { + "epoch": 0.4, + "learning_rate": 6.80977455034264e-08, + "logits/chosen": -2.116212844848633, + "logits/rejected": -2.117485761642456, + "logps/chosen": -9.5001859664917, + "logps/rejected": -281.4620361328125, + "loss": 0.3085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09999342262744904, + "rewards/margins": 5.939773082733154, + "rewards/rejected": -5.839779853820801, + "step": 6885 + }, + { + "epoch": 0.4, + "learning_rate": 6.808896014824245e-08, + "logits/chosen": -1.8265669345855713, + "logits/rejected": -1.7924193143844604, + "logps/chosen": -231.93235778808594, + "logps/rejected": -303.177734375, + "loss": 0.2557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.920050024986267, + "rewards/margins": 0.7106566429138184, + "rewards/rejected": 1.2093933820724487, + "step": 6886 + }, + { + "epoch": 0.4, + "learning_rate": 6.808017415045016e-08, + "logits/chosen": -1.9596394300460815, + "logits/rejected": -1.9764565229415894, + "logps/chosen": -231.44338989257812, + "logps/rejected": -356.5202331542969, + "loss": 0.1298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4869552850723267, + "rewards/margins": 2.401658535003662, + "rewards/rejected": -0.914703369140625, + "step": 6887 + }, + { + "epoch": 0.4, + "learning_rate": 6.807138751036163e-08, + "logits/chosen": -2.008810520172119, + "logits/rejected": -2.0101242065429688, + "logps/chosen": -123.88993072509766, + "logps/rejected": -283.1814880371094, + "loss": 1.3421, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.655413866043091, + "rewards/margins": 1.9779174327850342, + "rewards/rejected": -4.633331298828125, + "step": 6888 + }, + { + "epoch": 0.4, + "learning_rate": 6.806260022828901e-08, + "logits/chosen": -1.988651156425476, + "logits/rejected": -1.984017252922058, + "logps/chosen": -12.973409652709961, + "logps/rejected": -279.8107604980469, + "loss": 0.3271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.143968865275383, + "rewards/margins": 4.372194766998291, + "rewards/rejected": -4.2282257080078125, + "step": 6889 + }, + { + "epoch": 0.4, + "learning_rate": 6.805381230454448e-08, + "logits/chosen": -1.9350665807724, + "logits/rejected": -1.9403151273727417, + "logps/chosen": -235.23770141601562, + "logps/rejected": -476.5429382324219, + "loss": 0.0455, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.14691162109375, + "rewards/margins": 3.003213405609131, + "rewards/rejected": -0.8563019037246704, + "step": 6890 + }, + { + "epoch": 0.4, + "learning_rate": 6.804502373944023e-08, + "logits/chosen": -1.9474772214889526, + "logits/rejected": -1.9434583187103271, + "logps/chosen": -72.13240051269531, + "logps/rejected": -194.84454345703125, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33803635835647583, + "rewards/margins": 1.3984229564666748, + "rewards/rejected": -1.0603866577148438, + "step": 6891 + }, + { + "epoch": 0.4, + "learning_rate": 6.803623453328847e-08, + "logits/chosen": -2.1090292930603027, + "logits/rejected": -2.0947318077087402, + "logps/chosen": -213.05557250976562, + "logps/rejected": -360.9022216796875, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.507415771484375, + "rewards/margins": 3.157705783843994, + "rewards/rejected": -0.6502899527549744, + "step": 6892 + }, + { + "epoch": 0.4, + "learning_rate": 6.802744468640145e-08, + "logits/chosen": -1.6421301364898682, + "logits/rejected": -1.6782174110412598, + "logps/chosen": -179.21206665039062, + "logps/rejected": -396.0663146972656, + "loss": 0.0916, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4646484851837158, + "rewards/margins": 2.679892063140869, + "rewards/rejected": -1.2152435779571533, + "step": 6893 + }, + { + "epoch": 0.4, + "learning_rate": 6.801865419909139e-08, + "logits/chosen": -2.001765251159668, + "logits/rejected": -2.0022056102752686, + "logps/chosen": -67.16806030273438, + "logps/rejected": -225.2103271484375, + "loss": 0.1601, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3227523565292358, + "rewards/margins": 2.1785247325897217, + "rewards/rejected": -0.8557724356651306, + "step": 6894 + }, + { + "epoch": 0.4, + "learning_rate": 6.80098630716706e-08, + "logits/chosen": -1.868425965309143, + "logits/rejected": -1.8391634225845337, + "logps/chosen": -204.66717529296875, + "logps/rejected": -531.1987915039062, + "loss": 0.2023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5998321771621704, + "rewards/margins": 3.1496310234069824, + "rewards/rejected": -2.5497987270355225, + "step": 6895 + }, + { + "epoch": 0.4, + "learning_rate": 6.800107130445139e-08, + "logits/chosen": -2.1221959590911865, + "logits/rejected": -2.121838331222534, + "logps/chosen": -19.74195098876953, + "logps/rejected": -51.16685485839844, + "loss": 0.6732, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08195514976978302, + "rewards/margins": -0.03828258067369461, + "rewards/rejected": 0.12023773044347763, + "step": 6896 + }, + { + "epoch": 0.4, + "learning_rate": 6.799227889774607e-08, + "logits/chosen": -2.004936695098877, + "logits/rejected": -2.0174591541290283, + "logps/chosen": -178.84103393554688, + "logps/rejected": -464.3499755859375, + "loss": 0.0557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4283630847930908, + "rewards/margins": 3.9219512939453125, + "rewards/rejected": -2.4935882091522217, + "step": 6897 + }, + { + "epoch": 0.4, + "learning_rate": 6.798348585186698e-08, + "logits/chosen": -1.8674834966659546, + "logits/rejected": -1.873228907585144, + "logps/chosen": -35.1619873046875, + "logps/rejected": -144.4414520263672, + "loss": 0.4396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4298561215400696, + "rewards/margins": 0.7984565496444702, + "rewards/rejected": -0.368600457906723, + "step": 6898 + }, + { + "epoch": 0.4, + "learning_rate": 6.797469216712653e-08, + "logits/chosen": -2.1529712677001953, + "logits/rejected": -2.1479592323303223, + "logps/chosen": -51.21669006347656, + "logps/rejected": -278.1805419921875, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34177932143211365, + "rewards/margins": 3.890078067779541, + "rewards/rejected": -4.2318572998046875, + "step": 6899 + }, + { + "epoch": 0.4, + "learning_rate": 6.796589784383706e-08, + "logits/chosen": -2.00296688079834, + "logits/rejected": -1.9985274076461792, + "logps/chosen": -23.109481811523438, + "logps/rejected": -177.59742736816406, + "loss": 0.3376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2805057466030121, + "rewards/margins": 1.766397476196289, + "rewards/rejected": -1.4858916997909546, + "step": 6900 + }, + { + "epoch": 0.4, + "learning_rate": 6.795710288231104e-08, + "logits/chosen": -1.9844398498535156, + "logits/rejected": -1.9660322666168213, + "logps/chosen": -223.30531311035156, + "logps/rejected": -617.6358032226562, + "loss": 0.1056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5843979120254517, + "rewards/margins": 4.417954921722412, + "rewards/rejected": -3.83355712890625, + "step": 6901 + }, + { + "epoch": 0.4, + "learning_rate": 6.794830728286088e-08, + "logits/chosen": -1.7465906143188477, + "logits/rejected": -1.7463059425354004, + "logps/chosen": -0.005638888105750084, + "logps/rejected": -122.455322265625, + "loss": 0.4465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022518052719533443, + "rewards/margins": 1.5083296298980713, + "rewards/rejected": -1.5085548162460327, + "step": 6902 + }, + { + "epoch": 0.4, + "learning_rate": 6.793951104579904e-08, + "logits/chosen": -1.9439903497695923, + "logits/rejected": -1.9353554248809814, + "logps/chosen": -61.887718200683594, + "logps/rejected": -161.524169921875, + "loss": 0.6971, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6937476992607117, + "rewards/margins": 0.8010292649269104, + "rewards/rejected": -1.494776964187622, + "step": 6903 + }, + { + "epoch": 0.4, + "learning_rate": 6.793071417143804e-08, + "logits/chosen": -1.923701286315918, + "logits/rejected": -1.8977199792861938, + "logps/chosen": -162.65451049804688, + "logps/rejected": -370.04229736328125, + "loss": 0.1675, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.58111572265625, + "rewards/margins": 1.677575707435608, + "rewards/rejected": -0.09645996242761612, + "step": 6904 + }, + { + "epoch": 0.4, + "learning_rate": 6.792191666009034e-08, + "logits/chosen": -2.0565083026885986, + "logits/rejected": -2.052396059036255, + "logps/chosen": -15.305671691894531, + "logps/rejected": -185.33888244628906, + "loss": 0.3319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07482299953699112, + "rewards/margins": 4.005462646484375, + "rewards/rejected": -3.9306397438049316, + "step": 6905 + }, + { + "epoch": 0.4, + "learning_rate": 6.791311851206853e-08, + "logits/chosen": -2.017252206802368, + "logits/rejected": -1.9908524751663208, + "logps/chosen": -4.787342548370361, + "logps/rejected": -242.03585815429688, + "loss": 0.3506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.036237385123968124, + "rewards/margins": 2.8470301628112793, + "rewards/rejected": -2.8107926845550537, + "step": 6906 + }, + { + "epoch": 0.4, + "learning_rate": 6.79043197276851e-08, + "logits/chosen": -2.078650951385498, + "logits/rejected": -2.057089328765869, + "logps/chosen": -231.8370361328125, + "logps/rejected": -306.89739990234375, + "loss": 0.2568, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8880538940429688, + "rewards/margins": 0.7120224237442017, + "rewards/rejected": 1.176031470298767, + "step": 6907 + }, + { + "epoch": 0.4, + "learning_rate": 6.789552030725265e-08, + "logits/chosen": -2.243342876434326, + "logits/rejected": -2.249558210372925, + "logps/chosen": -5.880192756652832, + "logps/rejected": -122.91371154785156, + "loss": 0.4205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04993791505694389, + "rewards/margins": 1.7016782760620117, + "rewards/rejected": -1.651740312576294, + "step": 6908 + }, + { + "epoch": 0.4, + "learning_rate": 6.788672025108379e-08, + "logits/chosen": -2.1357638835906982, + "logits/rejected": -2.0989978313446045, + "logps/chosen": -282.5018310546875, + "logps/rejected": -383.27117919921875, + "loss": 0.183, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.500634789466858, + "rewards/margins": 1.204919457435608, + "rewards/rejected": 0.29571533203125, + "step": 6909 + }, + { + "epoch": 0.4, + "learning_rate": 6.787791955949114e-08, + "logits/chosen": -2.0148603916168213, + "logits/rejected": -2.0184435844421387, + "logps/chosen": -83.99325561523438, + "logps/rejected": -201.16232299804688, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6747116446495056, + "rewards/margins": 1.637678623199463, + "rewards/rejected": -0.9629669189453125, + "step": 6910 + }, + { + "epoch": 0.4, + "learning_rate": 6.786911823278731e-08, + "logits/chosen": -1.9272024631500244, + "logits/rejected": -1.9296702146530151, + "logps/chosen": -26.21678924560547, + "logps/rejected": -147.44534301757812, + "loss": 0.4488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09990539401769638, + "rewards/margins": 1.3662536144256592, + "rewards/rejected": -1.2663482427597046, + "step": 6911 + }, + { + "epoch": 0.4, + "learning_rate": 6.786031627128503e-08, + "logits/chosen": -1.985918402671814, + "logits/rejected": -1.937740683555603, + "logps/chosen": -185.3694610595703, + "logps/rejected": -356.2658996582031, + "loss": 0.0551, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4307174682617188, + "rewards/margins": 3.0277023315429688, + "rewards/rejected": -0.59698486328125, + "step": 6912 + }, + { + "epoch": 0.4, + "learning_rate": 6.785151367529694e-08, + "logits/chosen": -1.9606058597564697, + "logits/rejected": -1.9819135665893555, + "logps/chosen": -147.79710388183594, + "logps/rejected": -478.4030456542969, + "loss": 0.0495, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4060715436935425, + "rewards/margins": 4.527076721191406, + "rewards/rejected": -3.1210052967071533, + "step": 6913 + }, + { + "epoch": 0.4, + "learning_rate": 6.784271044513576e-08, + "logits/chosen": -2.0633676052093506, + "logits/rejected": -2.059494733810425, + "logps/chosen": -15.371803283691406, + "logps/rejected": -258.0927734375, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1952344924211502, + "rewards/margins": 5.430119037628174, + "rewards/rejected": -5.234884738922119, + "step": 6914 + }, + { + "epoch": 0.4, + "learning_rate": 6.783390658111422e-08, + "logits/chosen": -2.0382487773895264, + "logits/rejected": -2.0552914142608643, + "logps/chosen": -233.41827392578125, + "logps/rejected": -311.51806640625, + "loss": 0.1667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8385452628135681, + "rewards/margins": 2.3608109951019287, + "rewards/rejected": -1.5222656726837158, + "step": 6915 + }, + { + "epoch": 0.4, + "learning_rate": 6.78251020835451e-08, + "logits/chosen": -1.9477198123931885, + "logits/rejected": -1.9542139768600464, + "logps/chosen": -19.655681610107422, + "logps/rejected": -219.37139892578125, + "loss": 0.4227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5577062964439392, + "rewards/margins": 0.45915892720222473, + "rewards/rejected": 0.09854736179113388, + "step": 6916 + }, + { + "epoch": 0.4, + "learning_rate": 6.781629695274114e-08, + "logits/chosen": -1.8728349208831787, + "logits/rejected": -1.8797221183776855, + "logps/chosen": -0.003161494852975011, + "logps/rejected": -272.9317626953125, + "loss": 0.3341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00044994830386713147, + "rewards/margins": 4.903480529785156, + "rewards/rejected": -4.9030303955078125, + "step": 6917 + }, + { + "epoch": 0.4, + "learning_rate": 6.780749118901519e-08, + "logits/chosen": -1.9787932634353638, + "logits/rejected": -1.9751121997833252, + "logps/chosen": -226.65261840820312, + "logps/rejected": -303.4830322265625, + "loss": 0.0866, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5993560552597046, + "rewards/margins": 2.87225341796875, + "rewards/rejected": -1.2728973627090454, + "step": 6918 + }, + { + "epoch": 0.4, + "learning_rate": 6.779868479268001e-08, + "logits/chosen": -2.0057806968688965, + "logits/rejected": -1.9698095321655273, + "logps/chosen": -171.0929718017578, + "logps/rejected": -331.33441162109375, + "loss": 0.0748, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2295761108398438, + "rewards/margins": 2.5022597312927246, + "rewards/rejected": -0.272683709859848, + "step": 6919 + }, + { + "epoch": 0.4, + "learning_rate": 6.778987776404852e-08, + "logits/chosen": -1.9157419204711914, + "logits/rejected": -1.9046369791030884, + "logps/chosen": -70.10767364501953, + "logps/rejected": -221.1251220703125, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4416816830635071, + "rewards/margins": 2.001140594482422, + "rewards/rejected": -1.5594589710235596, + "step": 6920 + }, + { + "epoch": 0.4, + "learning_rate": 6.778107010343353e-08, + "logits/chosen": -2.075690269470215, + "logits/rejected": -2.0736494064331055, + "logps/chosen": -120.25666809082031, + "logps/rejected": -220.5477294921875, + "loss": 0.1883, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7425445318222046, + "rewards/margins": 1.2816588878631592, + "rewards/rejected": 0.460885614156723, + "step": 6921 + }, + { + "epoch": 0.4, + "learning_rate": 6.777226181114796e-08, + "logits/chosen": -2.115912437438965, + "logits/rejected": -2.11411190032959, + "logps/chosen": -11.377110481262207, + "logps/rejected": -134.35751342773438, + "loss": 0.4114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23766155540943146, + "rewards/margins": 1.2640310525894165, + "rewards/rejected": -1.0263694524765015, + "step": 6922 + }, + { + "epoch": 0.4, + "learning_rate": 6.776345288750472e-08, + "logits/chosen": -2.0396840572357178, + "logits/rejected": -1.883649230003357, + "logps/chosen": -189.46630859375, + "logps/rejected": -466.50750732421875, + "loss": 0.1011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.853108286857605, + "rewards/margins": 2.3765580654144287, + "rewards/rejected": -0.523449718952179, + "step": 6923 + }, + { + "epoch": 0.4, + "learning_rate": 6.775464333281673e-08, + "logits/chosen": -2.086789131164551, + "logits/rejected": -2.0591912269592285, + "logps/chosen": -274.5591735839844, + "logps/rejected": -404.9884033203125, + "loss": 0.0817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8556549549102783, + "rewards/margins": 2.252127170562744, + "rewards/rejected": -0.39647218585014343, + "step": 6924 + }, + { + "epoch": 0.4, + "learning_rate": 6.774583314739697e-08, + "logits/chosen": -1.9205975532531738, + "logits/rejected": -1.906048059463501, + "logps/chosen": -0.09804052859544754, + "logps/rejected": -152.34115600585938, + "loss": 0.4176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007912670262157917, + "rewards/margins": 1.5320348739624023, + "rewards/rejected": -1.539947509765625, + "step": 6925 + }, + { + "epoch": 0.4, + "learning_rate": 6.773702233155843e-08, + "logits/chosen": -2.002392530441284, + "logits/rejected": -1.9986534118652344, + "logps/chosen": -79.4372329711914, + "logps/rejected": -304.67034912109375, + "loss": 0.2598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5551612973213196, + "rewards/margins": 1.4962745904922485, + "rewards/rejected": -0.941113293170929, + "step": 6926 + }, + { + "epoch": 0.4, + "learning_rate": 6.772821088561408e-08, + "logits/chosen": -1.8505433797836304, + "logits/rejected": -1.8204081058502197, + "logps/chosen": -291.2388916015625, + "logps/rejected": -519.95263671875, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.977270483970642, + "rewards/margins": 2.1651854515075684, + "rewards/rejected": -0.18791504204273224, + "step": 6927 + }, + { + "epoch": 0.4, + "learning_rate": 6.771939880987696e-08, + "logits/chosen": -1.827212929725647, + "logits/rejected": -1.824139952659607, + "logps/chosen": -62.83345413208008, + "logps/rejected": -235.67019653320312, + "loss": 0.6151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9080562591552734, + "rewards/margins": 3.1748509407043457, + "rewards/rejected": -4.082907199859619, + "step": 6928 + }, + { + "epoch": 0.4, + "learning_rate": 6.771058610466012e-08, + "logits/chosen": -2.0560526847839355, + "logits/rejected": -2.045149803161621, + "logps/chosen": -0.0003623511584009975, + "logps/rejected": -86.01150512695312, + "loss": 0.5318, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.499405491107609e-05, + "rewards/margins": 0.7878351211547852, + "rewards/rejected": -0.787860095500946, + "step": 6929 + }, + { + "epoch": 0.4, + "learning_rate": 6.770177277027662e-08, + "logits/chosen": -2.1620612144470215, + "logits/rejected": -2.1392581462860107, + "logps/chosen": -121.34252166748047, + "logps/rejected": -366.01336669921875, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.820386528968811, + "rewards/margins": 5.293128490447998, + "rewards/rejected": -3.4727418422698975, + "step": 6930 + }, + { + "epoch": 0.4, + "learning_rate": 6.769295880703957e-08, + "logits/chosen": -1.9515323638916016, + "logits/rejected": -1.9486826658248901, + "logps/chosen": -0.001597035676240921, + "logps/rejected": -129.01271057128906, + "loss": 0.465, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.418926487735007e-05, + "rewards/margins": 1.2548444271087646, + "rewards/rejected": -1.2548202276229858, + "step": 6931 + }, + { + "epoch": 0.4, + "learning_rate": 6.768414421526207e-08, + "logits/chosen": -1.8609099388122559, + "logits/rejected": -1.8579617738723755, + "logps/chosen": -83.4069595336914, + "logps/rejected": -262.4576416015625, + "loss": 0.4207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15083007514476776, + "rewards/margins": 1.200714111328125, + "rewards/rejected": -1.0498840808868408, + "step": 6932 + }, + { + "epoch": 0.4, + "learning_rate": 6.76753289952573e-08, + "logits/chosen": -1.9642548561096191, + "logits/rejected": -1.9639619588851929, + "logps/chosen": -185.01589965820312, + "logps/rejected": -230.2283935546875, + "loss": 0.4148, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.332785129547119, + "rewards/margins": -0.1984267234802246, + "rewards/rejected": 2.5312118530273438, + "step": 6933 + }, + { + "epoch": 0.4, + "learning_rate": 6.766651314733835e-08, + "logits/chosen": -2.0477864742279053, + "logits/rejected": -2.0444223880767822, + "logps/chosen": -44.855918884277344, + "logps/rejected": -269.9745788574219, + "loss": 0.1897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7240753173828125, + "rewards/margins": 3.6762008666992188, + "rewards/rejected": -2.9521255493164062, + "step": 6934 + }, + { + "epoch": 0.4, + "learning_rate": 6.765769667181844e-08, + "logits/chosen": -2.0047075748443604, + "logits/rejected": -2.0073463916778564, + "logps/chosen": -8.077898025512695, + "logps/rejected": -218.38450622558594, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2185528725385666, + "rewards/margins": 2.4322292804718018, + "rewards/rejected": -2.2136764526367188, + "step": 6935 + }, + { + "epoch": 0.4, + "learning_rate": 6.764887956901078e-08, + "logits/chosen": -1.7871936559677124, + "logits/rejected": -1.7790418863296509, + "logps/chosen": -197.40342712402344, + "logps/rejected": -282.8665771484375, + "loss": 0.3085, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3908021450042725, + "rewards/margins": 0.2706880569458008, + "rewards/rejected": 2.1201140880584717, + "step": 6936 + }, + { + "epoch": 0.4, + "learning_rate": 6.76400618392286e-08, + "logits/chosen": -1.9758477210998535, + "logits/rejected": -1.9768344163894653, + "logps/chosen": -16.455869674682617, + "logps/rejected": -143.79501342773438, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13545207679271698, + "rewards/margins": 3.1253163814544678, + "rewards/rejected": -2.9898643493652344, + "step": 6937 + }, + { + "epoch": 0.4, + "learning_rate": 6.76312434827851e-08, + "logits/chosen": -1.6275033950805664, + "logits/rejected": -1.6252411603927612, + "logps/chosen": -58.02678298950195, + "logps/rejected": -308.1742858886719, + "loss": 0.1872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8403202295303345, + "rewards/margins": 3.020678997039795, + "rewards/rejected": -2.18035888671875, + "step": 6938 + }, + { + "epoch": 0.4, + "learning_rate": 6.762242449999362e-08, + "logits/chosen": -2.0678205490112305, + "logits/rejected": -2.0574939250946045, + "logps/chosen": -235.61764526367188, + "logps/rejected": -334.87420654296875, + "loss": 0.1117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.036975145339966, + "rewards/margins": 1.6001038551330566, + "rewards/rejected": 0.43687134981155396, + "step": 6939 + }, + { + "epoch": 0.4, + "learning_rate": 6.76136048911674e-08, + "logits/chosen": -2.006131172180176, + "logits/rejected": -2.000765085220337, + "logps/chosen": -0.18747620284557343, + "logps/rejected": -84.32839965820312, + "loss": 0.7429, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.005671246442943811, + "rewards/margins": -0.2187541425228119, + "rewards/rejected": 0.21308289468288422, + "step": 6940 + }, + { + "epoch": 0.4, + "learning_rate": 6.760478465661981e-08, + "logits/chosen": -2.051377296447754, + "logits/rejected": -2.0592308044433594, + "logps/chosen": -20.824970245361328, + "logps/rejected": -63.24518585205078, + "loss": 0.7443, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19095535576343536, + "rewards/margins": 0.10670815408229828, + "rewards/rejected": -0.29766350984573364, + "step": 6941 + }, + { + "epoch": 0.4, + "learning_rate": 6.759596379666412e-08, + "logits/chosen": -2.139618396759033, + "logits/rejected": -2.137913703918457, + "logps/chosen": -17.22766876220703, + "logps/rejected": -92.25372314453125, + "loss": 0.4891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04542522504925728, + "rewards/margins": 0.9228017926216125, + "rewards/rejected": -0.8773765563964844, + "step": 6942 + }, + { + "epoch": 0.4, + "learning_rate": 6.758714231161373e-08, + "logits/chosen": -1.9385087490081787, + "logits/rejected": -1.9391261339187622, + "logps/chosen": -166.40182495117188, + "logps/rejected": -340.14947509765625, + "loss": 0.1611, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2453277111053467, + "rewards/margins": 1.2100188732147217, + "rewards/rejected": 1.035308837890625, + "step": 6943 + }, + { + "epoch": 0.4, + "learning_rate": 6.757832020178204e-08, + "logits/chosen": -1.9701937437057495, + "logits/rejected": -1.9681986570358276, + "logps/chosen": -4.30641508102417, + "logps/rejected": -38.551815032958984, + "loss": 0.6383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07567896693944931, + "rewards/margins": 0.060991618782281876, + "rewards/rejected": 0.01468734722584486, + "step": 6944 + }, + { + "epoch": 0.4, + "learning_rate": 6.756949746748241e-08, + "logits/chosen": -2.08050537109375, + "logits/rejected": -2.079562187194824, + "logps/chosen": -33.44758224487305, + "logps/rejected": -287.6531982421875, + "loss": 0.5505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6307949423789978, + "rewards/margins": 1.4706027507781982, + "rewards/rejected": -2.101397752761841, + "step": 6945 + }, + { + "epoch": 0.4, + "learning_rate": 6.756067410902831e-08, + "logits/chosen": -1.9109400510787964, + "logits/rejected": -1.9138303995132446, + "logps/chosen": -19.228221893310547, + "logps/rejected": -180.97589111328125, + "loss": 0.6032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005691909696906805, + "rewards/margins": 0.30855751037597656, + "rewards/rejected": -0.302865594625473, + "step": 6946 + }, + { + "epoch": 0.4, + "learning_rate": 6.755185012673313e-08, + "logits/chosen": -2.0122759342193604, + "logits/rejected": -2.0075080394744873, + "logps/chosen": -22.37355613708496, + "logps/rejected": -137.9599609375, + "loss": 0.2332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5589632391929626, + "rewards/margins": 2.224961996078491, + "rewards/rejected": -1.6659988164901733, + "step": 6947 + }, + { + "epoch": 0.4, + "learning_rate": 6.754302552091041e-08, + "logits/chosen": -2.027855634689331, + "logits/rejected": -2.02121639251709, + "logps/chosen": -16.425758361816406, + "logps/rejected": -171.57943725585938, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2305370420217514, + "rewards/margins": 2.424769163131714, + "rewards/rejected": -2.194232225418091, + "step": 6948 + }, + { + "epoch": 0.4, + "learning_rate": 6.75342002918736e-08, + "logits/chosen": -2.1606783866882324, + "logits/rejected": -2.160884141921997, + "logps/chosen": -5.7423553466796875, + "logps/rejected": -150.70492553710938, + "loss": 0.463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12850336730480194, + "rewards/margins": 1.5424379110336304, + "rewards/rejected": -1.6709412336349487, + "step": 6949 + }, + { + "epoch": 0.4, + "learning_rate": 6.752537443993622e-08, + "logits/chosen": -2.0026113986968994, + "logits/rejected": -2.006601572036743, + "logps/chosen": -0.00011622595047811046, + "logps/rejected": -167.57432556152344, + "loss": 0.4385, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.933768311981112e-06, + "rewards/margins": 1.5762794017791748, + "rewards/rejected": -1.57628333568573, + "step": 6950 + }, + { + "epoch": 0.4, + "learning_rate": 6.751654796541182e-08, + "logits/chosen": -1.9369107484817505, + "logits/rejected": -1.9287530183792114, + "logps/chosen": -332.31463623046875, + "logps/rejected": -413.5957336425781, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.37485671043396, + "rewards/margins": 0.31731271743774414, + "rewards/rejected": 2.057543992996216, + "step": 6951 + }, + { + "epoch": 0.4, + "learning_rate": 6.750772086861396e-08, + "logits/chosen": -1.9095886945724487, + "logits/rejected": -1.8931729793548584, + "logps/chosen": -98.91122436523438, + "logps/rejected": -407.785888671875, + "loss": 0.29, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04022064432501793, + "rewards/margins": 2.2094438076019287, + "rewards/rejected": -2.1692230701446533, + "step": 6952 + }, + { + "epoch": 0.4, + "learning_rate": 6.749889314985619e-08, + "logits/chosen": -2.0756590366363525, + "logits/rejected": -2.033262252807617, + "logps/chosen": -248.48355102539062, + "logps/rejected": -379.858642578125, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4762115478515625, + "rewards/margins": 4.2663116455078125, + "rewards/rejected": -1.79010009765625, + "step": 6953 + }, + { + "epoch": 0.4, + "learning_rate": 6.749006480945217e-08, + "logits/chosen": -1.8853384256362915, + "logits/rejected": -1.8796391487121582, + "logps/chosen": -201.44174194335938, + "logps/rejected": -450.25384521484375, + "loss": 0.0523, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9361908435821533, + "rewards/margins": 3.081234931945801, + "rewards/rejected": -1.145043969154358, + "step": 6954 + }, + { + "epoch": 0.4, + "learning_rate": 6.748123584771548e-08, + "logits/chosen": -2.010861396789551, + "logits/rejected": -1.9568111896514893, + "logps/chosen": -235.17303466796875, + "logps/rejected": -427.07232666015625, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3310182094573975, + "rewards/margins": 6.313595771789551, + "rewards/rejected": -3.9825775623321533, + "step": 6955 + }, + { + "epoch": 0.4, + "learning_rate": 6.747240626495981e-08, + "logits/chosen": -1.95135498046875, + "logits/rejected": -1.9916770458221436, + "logps/chosen": -172.53085327148438, + "logps/rejected": -252.19012451171875, + "loss": 0.2195, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6079422235488892, + "rewards/margins": 0.8681289553642273, + "rewards/rejected": 0.7398132681846619, + "step": 6956 + }, + { + "epoch": 0.4, + "learning_rate": 6.746357606149876e-08, + "logits/chosen": -2.105978012084961, + "logits/rejected": -2.0917916297912598, + "logps/chosen": -48.94886016845703, + "logps/rejected": -184.74290466308594, + "loss": 0.3088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3756507933139801, + "rewards/margins": 2.0696237087249756, + "rewards/rejected": -1.6939728260040283, + "step": 6957 + }, + { + "epoch": 0.4, + "learning_rate": 6.74547452376461e-08, + "logits/chosen": -2.085742712020874, + "logits/rejected": -2.0832653045654297, + "logps/chosen": -0.0029673571698367596, + "logps/rejected": -114.54469299316406, + "loss": 0.7676, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00023248813522513956, + "rewards/margins": -0.28630656003952026, + "rewards/rejected": 0.28607407212257385, + "step": 6958 + }, + { + "epoch": 0.4, + "learning_rate": 6.744591379371548e-08, + "logits/chosen": -1.8547934293746948, + "logits/rejected": -1.7755317687988281, + "logps/chosen": -353.46588134765625, + "logps/rejected": -585.5860595703125, + "loss": 0.0498, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.011615037918091, + "rewards/margins": 4.6859130859375, + "rewards/rejected": -2.674298048019409, + "step": 6959 + }, + { + "epoch": 0.41, + "learning_rate": 6.743708173002068e-08, + "logits/chosen": -2.147400379180908, + "logits/rejected": -2.135406970977783, + "logps/chosen": -9.099308013916016, + "logps/rejected": -208.38241577148438, + "loss": 0.279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7434589266777039, + "rewards/margins": 1.9302661418914795, + "rewards/rejected": -1.1868072748184204, + "step": 6960 + }, + { + "epoch": 0.41, + "learning_rate": 6.742824904687544e-08, + "logits/chosen": -2.0982770919799805, + "logits/rejected": -2.0941591262817383, + "logps/chosen": -8.404046093346551e-05, + "logps/rejected": -179.65480041503906, + "loss": 0.3312, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9695009945717175e-06, + "rewards/margins": 3.804039478302002, + "rewards/rejected": -3.8040435314178467, + "step": 6961 + }, + { + "epoch": 0.41, + "learning_rate": 6.741941574459355e-08, + "logits/chosen": -1.8863121271133423, + "logits/rejected": -1.938909649848938, + "logps/chosen": -215.34426879882812, + "logps/rejected": -298.0008239746094, + "loss": 0.1527, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7067703008651733, + "rewards/margins": 1.5005446672439575, + "rewards/rejected": 0.20622558891773224, + "step": 6962 + }, + { + "epoch": 0.41, + "learning_rate": 6.741058182348879e-08, + "logits/chosen": -1.9413660764694214, + "logits/rejected": -1.908998727798462, + "logps/chosen": -293.9210205078125, + "logps/rejected": -560.3779907226562, + "loss": 0.1599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21101684868335724, + "rewards/margins": 5.347521781921387, + "rewards/rejected": -5.136505126953125, + "step": 6963 + }, + { + "epoch": 0.41, + "learning_rate": 6.7401747283875e-08, + "logits/chosen": -2.061384916305542, + "logits/rejected": -2.0560431480407715, + "logps/chosen": -0.0020672432146966457, + "logps/rejected": -78.44625091552734, + "loss": 0.6456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.000870408141054213, + "rewards/margins": 0.19891422986984253, + "rewards/rejected": -0.1980438232421875, + "step": 6964 + }, + { + "epoch": 0.41, + "learning_rate": 6.739291212606604e-08, + "logits/chosen": -2.0081517696380615, + "logits/rejected": -2.0057737827301025, + "logps/chosen": -130.74742126464844, + "logps/rejected": -269.1622314453125, + "loss": 0.4131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38695070147514343, + "rewards/margins": 0.4890899956226349, + "rewards/rejected": -0.10213928669691086, + "step": 6965 + }, + { + "epoch": 0.41, + "learning_rate": 6.738407635037575e-08, + "logits/chosen": -1.7679083347320557, + "logits/rejected": -1.7629958391189575, + "logps/chosen": -50.045379638671875, + "logps/rejected": -196.37875366210938, + "loss": 0.3012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6659805178642273, + "rewards/margins": 1.3369674682617188, + "rewards/rejected": -0.6709869503974915, + "step": 6966 + }, + { + "epoch": 0.41, + "learning_rate": 6.737523995711805e-08, + "logits/chosen": -1.9247764348983765, + "logits/rejected": -1.914463996887207, + "logps/chosen": -95.78912353515625, + "logps/rejected": -277.864501953125, + "loss": 0.1162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3765732049942017, + "rewards/margins": 3.179121494293213, + "rewards/rejected": -1.8025482892990112, + "step": 6967 + }, + { + "epoch": 0.41, + "learning_rate": 6.736640294660683e-08, + "logits/chosen": -1.9907723665237427, + "logits/rejected": -1.9725579023361206, + "logps/chosen": -30.85618782043457, + "logps/rejected": -225.07232666015625, + "loss": 0.2564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41987094283103943, + "rewards/margins": 2.0444560050964355, + "rewards/rejected": -1.6245850324630737, + "step": 6968 + }, + { + "epoch": 0.41, + "learning_rate": 6.735756531915602e-08, + "logits/chosen": -1.9410042762756348, + "logits/rejected": -1.9413765668869019, + "logps/chosen": -33.53666305541992, + "logps/rejected": -179.98065185546875, + "loss": 0.4781, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1686626523733139, + "rewards/margins": 1.390394926071167, + "rewards/rejected": -1.559057593345642, + "step": 6969 + }, + { + "epoch": 0.41, + "learning_rate": 6.734872707507958e-08, + "logits/chosen": -1.8181877136230469, + "logits/rejected": -1.8031094074249268, + "logps/chosen": -183.85107421875, + "logps/rejected": -330.5145263671875, + "loss": 0.1817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0819672346115112, + "rewards/margins": 2.092376708984375, + "rewards/rejected": -1.0104095935821533, + "step": 6970 + }, + { + "epoch": 0.41, + "learning_rate": 6.73398882146915e-08, + "logits/chosen": -1.920706033706665, + "logits/rejected": -1.8981170654296875, + "logps/chosen": -177.49295043945312, + "logps/rejected": -405.19354248046875, + "loss": 0.058, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.084703207015991, + "rewards/margins": 2.729649543762207, + "rewards/rejected": -0.644946277141571, + "step": 6971 + }, + { + "epoch": 0.41, + "learning_rate": 6.733104873830577e-08, + "logits/chosen": -2.0357422828674316, + "logits/rejected": -2.038093328475952, + "logps/chosen": -0.006916610524058342, + "logps/rejected": -213.94424438476562, + "loss": 0.3536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026308870292268693, + "rewards/margins": 4.174236297607422, + "rewards/rejected": -4.17449951171875, + "step": 6972 + }, + { + "epoch": 0.41, + "learning_rate": 6.732220864623643e-08, + "logits/chosen": -2.1213490962982178, + "logits/rejected": -2.113502264022827, + "logps/chosen": -44.141212463378906, + "logps/rejected": -163.43739318847656, + "loss": 0.5712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14675560593605042, + "rewards/margins": 0.4174884855747223, + "rewards/rejected": -0.5642440915107727, + "step": 6973 + }, + { + "epoch": 0.41, + "learning_rate": 6.731336793879749e-08, + "logits/chosen": -1.8487904071807861, + "logits/rejected": -1.8502472639083862, + "logps/chosen": -6.609060764312744, + "logps/rejected": -233.14710998535156, + "loss": 0.2949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24147048592567444, + "rewards/margins": 3.0040652751922607, + "rewards/rejected": -2.762594699859619, + "step": 6974 + }, + { + "epoch": 0.41, + "learning_rate": 6.730452661630303e-08, + "logits/chosen": -2.077242612838745, + "logits/rejected": -2.052197217941284, + "logps/chosen": -81.63890075683594, + "logps/rejected": -308.322509765625, + "loss": 0.1491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1726921796798706, + "rewards/margins": 2.043581485748291, + "rewards/rejected": -0.8708893060684204, + "step": 6975 + }, + { + "epoch": 0.41, + "learning_rate": 6.729568467906715e-08, + "logits/chosen": -2.0540034770965576, + "logits/rejected": -2.0605692863464355, + "logps/chosen": -34.2565803527832, + "logps/rejected": -246.93466186523438, + "loss": 0.3764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01923828199505806, + "rewards/margins": 4.679559230804443, + "rewards/rejected": -4.698797702789307, + "step": 6976 + }, + { + "epoch": 0.41, + "learning_rate": 6.728684212740394e-08, + "logits/chosen": -1.9246796369552612, + "logits/rejected": -1.9250576496124268, + "logps/chosen": -13.670832633972168, + "logps/rejected": -27.17086410522461, + "loss": 0.626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17605991661548615, + "rewards/margins": 0.0812503844499588, + "rewards/rejected": 0.09480953216552734, + "step": 6977 + }, + { + "epoch": 0.41, + "learning_rate": 6.727799896162755e-08, + "logits/chosen": -1.816293716430664, + "logits/rejected": -1.8011488914489746, + "logps/chosen": -197.25045776367188, + "logps/rejected": -260.4110107421875, + "loss": 0.3748, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.609582543373108, + "rewards/margins": 0.08663332462310791, + "rewards/rejected": 1.52294921875, + "step": 6978 + }, + { + "epoch": 0.41, + "learning_rate": 6.726915518205211e-08, + "logits/chosen": -1.9651923179626465, + "logits/rejected": -1.9784529209136963, + "logps/chosen": -296.70391845703125, + "logps/rejected": -422.0035400390625, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4328430891036987, + "rewards/margins": 4.226736545562744, + "rewards/rejected": -2.793893575668335, + "step": 6979 + }, + { + "epoch": 0.41, + "learning_rate": 6.726031078899181e-08, + "logits/chosen": -2.0659704208374023, + "logits/rejected": -2.045074939727783, + "logps/chosen": -200.24423217773438, + "logps/rejected": -289.01495361328125, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7632766962051392, + "rewards/margins": 0.5015640258789062, + "rewards/rejected": 1.261712670326233, + "step": 6980 + }, + { + "epoch": 0.41, + "learning_rate": 6.725146578276085e-08, + "logits/chosen": -1.9141831398010254, + "logits/rejected": -1.9494677782058716, + "logps/chosen": -157.01071166992188, + "logps/rejected": -401.5353088378906, + "loss": 0.0918, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.415747046470642, + "rewards/margins": 2.6368682384490967, + "rewards/rejected": -1.2211211919784546, + "step": 6981 + }, + { + "epoch": 0.41, + "learning_rate": 6.724262016367341e-08, + "logits/chosen": -1.8809573650360107, + "logits/rejected": -1.8745659589767456, + "logps/chosen": -5.659966945648193, + "logps/rejected": -98.95668029785156, + "loss": 0.6129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008059501997195184, + "rewards/margins": 0.06243925169110298, + "rewards/rejected": -0.06163330003619194, + "step": 6982 + }, + { + "epoch": 0.41, + "learning_rate": 6.723377393204378e-08, + "logits/chosen": -2.05474853515625, + "logits/rejected": -2.0447914600372314, + "logps/chosen": -42.1361083984375, + "logps/rejected": -280.3208923339844, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31761056184768677, + "rewards/margins": 3.520833969116211, + "rewards/rejected": -3.838444471359253, + "step": 6983 + }, + { + "epoch": 0.41, + "learning_rate": 6.72249270881862e-08, + "logits/chosen": -2.1394295692443848, + "logits/rejected": -2.1244165897369385, + "logps/chosen": -54.981807708740234, + "logps/rejected": -226.94874572753906, + "loss": 0.3021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6048523187637329, + "rewards/margins": 1.267307996749878, + "rewards/rejected": -0.6624557375907898, + "step": 6984 + }, + { + "epoch": 0.41, + "learning_rate": 6.721607963241495e-08, + "logits/chosen": -2.0618395805358887, + "logits/rejected": -2.051177978515625, + "logps/chosen": -191.71044921875, + "logps/rejected": -272.3189697265625, + "loss": 0.3791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5044845938682556, + "rewards/margins": 0.4953964650630951, + "rewards/rejected": 0.009088135324418545, + "step": 6985 + }, + { + "epoch": 0.41, + "learning_rate": 6.720723156504434e-08, + "logits/chosen": -1.7476063966751099, + "logits/rejected": -1.72660231590271, + "logps/chosen": -306.44219970703125, + "logps/rejected": -421.649169921875, + "loss": 0.3514, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.083160400390625, + "rewards/margins": 0.0221710205078125, + "rewards/rejected": 2.0609893798828125, + "step": 6986 + }, + { + "epoch": 0.41, + "learning_rate": 6.719838288638869e-08, + "logits/chosen": -2.067436933517456, + "logits/rejected": -2.071613311767578, + "logps/chosen": -23.464122772216797, + "logps/rejected": -168.16943359375, + "loss": 0.3327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39822158217430115, + "rewards/margins": 2.3781471252441406, + "rewards/rejected": -1.979925513267517, + "step": 6987 + }, + { + "epoch": 0.41, + "learning_rate": 6.718953359676237e-08, + "logits/chosen": -1.9389526844024658, + "logits/rejected": -1.8971532583236694, + "logps/chosen": -233.31973266601562, + "logps/rejected": -412.6546325683594, + "loss": 0.0494, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5050506591796875, + "rewards/margins": 2.8004212379455566, + "rewards/rejected": -0.295370489358902, + "step": 6988 + }, + { + "epoch": 0.41, + "learning_rate": 6.718068369647971e-08, + "logits/chosen": -1.8916655778884888, + "logits/rejected": -1.8940367698669434, + "logps/chosen": -61.6952018737793, + "logps/rejected": -142.12217712402344, + "loss": 0.6523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9349861145019531, + "rewards/margins": 2.0919625759124756, + "rewards/rejected": -3.0269486904144287, + "step": 6989 + }, + { + "epoch": 0.41, + "learning_rate": 6.717183318585515e-08, + "logits/chosen": -1.9955812692642212, + "logits/rejected": -1.9908567667007446, + "logps/chosen": -0.3813093602657318, + "logps/rejected": -165.66604614257812, + "loss": 0.3815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012308076024055481, + "rewards/margins": 2.1036717891693115, + "rewards/rejected": -2.0913636684417725, + "step": 6990 + }, + { + "epoch": 0.41, + "learning_rate": 6.716298206520305e-08, + "logits/chosen": -2.0851168632507324, + "logits/rejected": -2.082317590713501, + "logps/chosen": -7.491678237915039, + "logps/rejected": -143.87289428710938, + "loss": 0.6388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11110448837280273, + "rewards/margins": 0.2668190896511078, + "rewards/rejected": -0.3779235780239105, + "step": 6991 + }, + { + "epoch": 0.41, + "learning_rate": 6.71541303348379e-08, + "logits/chosen": -2.096679449081421, + "logits/rejected": -2.090536117553711, + "logps/chosen": -18.424829483032227, + "logps/rejected": -97.91097259521484, + "loss": 0.5482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12556229531764984, + "rewards/margins": 0.5075165033340454, + "rewards/rejected": -0.3819541931152344, + "step": 6992 + }, + { + "epoch": 0.41, + "learning_rate": 6.71452779950741e-08, + "logits/chosen": -1.8143434524536133, + "logits/rejected": -1.7494019269943237, + "logps/chosen": -148.33982849121094, + "logps/rejected": -258.95550537109375, + "loss": 0.3801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5376877188682556, + "rewards/margins": 0.7656875848770142, + "rewards/rejected": -0.22799988090991974, + "step": 6993 + }, + { + "epoch": 0.41, + "learning_rate": 6.71364250462262e-08, + "logits/chosen": -1.9901372194290161, + "logits/rejected": -2.039832592010498, + "logps/chosen": -210.846435546875, + "logps/rejected": -338.9509582519531, + "loss": 0.1672, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6475937366485596, + "rewards/margins": 1.0202804803848267, + "rewards/rejected": 1.627313256263733, + "step": 6994 + }, + { + "epoch": 0.41, + "learning_rate": 6.712757148860864e-08, + "logits/chosen": -1.8222624063491821, + "logits/rejected": -1.8421374559402466, + "logps/chosen": -177.1253662109375, + "logps/rejected": -483.9127197265625, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3184144496917725, + "rewards/margins": 6.685205459594727, + "rewards/rejected": -4.366790771484375, + "step": 6995 + }, + { + "epoch": 0.41, + "learning_rate": 6.711871732253596e-08, + "logits/chosen": -2.065443754196167, + "logits/rejected": -2.0685031414031982, + "logps/chosen": -0.001032235217280686, + "logps/rejected": -181.58438110351562, + "loss": 0.3967, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.077240868762601e-06, + "rewards/margins": 2.073779582977295, + "rewards/rejected": -2.0737855434417725, + "step": 6996 + }, + { + "epoch": 0.41, + "learning_rate": 6.710986254832269e-08, + "logits/chosen": -2.0133821964263916, + "logits/rejected": -2.0111541748046875, + "logps/chosen": -4.988068103790283, + "logps/rejected": -60.83448028564453, + "loss": 0.5682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03222827985882759, + "rewards/margins": 0.3180185556411743, + "rewards/rejected": -0.28579026460647583, + "step": 6997 + }, + { + "epoch": 0.41, + "learning_rate": 6.710100716628345e-08, + "logits/chosen": -2.08081316947937, + "logits/rejected": -2.0771377086639404, + "logps/chosen": -45.230838775634766, + "logps/rejected": -132.71739196777344, + "loss": 0.4659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11348724365234375, + "rewards/margins": 0.9625999927520752, + "rewards/rejected": -1.076087236404419, + "step": 6998 + }, + { + "epoch": 0.41, + "learning_rate": 6.709215117673274e-08, + "logits/chosen": -2.0877020359039307, + "logits/rejected": -2.0858025550842285, + "logps/chosen": -0.0008108449401333928, + "logps/rejected": -97.75585174560547, + "loss": 0.375, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9631342840730213e-05, + "rewards/margins": 2.40946364402771, + "rewards/rejected": -2.4094932079315186, + "step": 6999 + }, + { + "epoch": 0.41, + "learning_rate": 6.708329457998524e-08, + "logits/chosen": -2.0933070182800293, + "logits/rejected": -2.0777924060821533, + "logps/chosen": -8.115023612976074, + "logps/rejected": -163.74337768554688, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07025527954101562, + "rewards/margins": 2.0735862255096436, + "rewards/rejected": -2.003330945968628, + "step": 7000 + }, + { + "epoch": 0.41, + "learning_rate": 6.707443737635554e-08, + "logits/chosen": -2.0351245403289795, + "logits/rejected": -2.0174202919006348, + "logps/chosen": -231.09164428710938, + "logps/rejected": -330.32196044921875, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4354828596115112, + "rewards/margins": 0.8126831650733948, + "rewards/rejected": 0.6227996945381165, + "step": 7001 + }, + { + "epoch": 0.41, + "learning_rate": 6.706557956615831e-08, + "logits/chosen": -2.2241978645324707, + "logits/rejected": -2.220132827758789, + "logps/chosen": -9.238588972948492e-05, + "logps/rejected": -94.59463500976562, + "loss": 0.6001, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.613280452758772e-06, + "rewards/margins": 0.3207443654537201, + "rewards/rejected": -0.32073974609375, + "step": 7002 + }, + { + "epoch": 0.41, + "learning_rate": 6.705672114970821e-08, + "logits/chosen": -1.9628124237060547, + "logits/rejected": -1.9329394102096558, + "logps/chosen": -192.18563842773438, + "logps/rejected": -302.9195556640625, + "loss": 0.1881, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2694519758224487, + "rewards/margins": 1.3682068586349487, + "rewards/rejected": -0.0987548828125, + "step": 7003 + }, + { + "epoch": 0.41, + "learning_rate": 6.704786212731994e-08, + "logits/chosen": -2.016319990158081, + "logits/rejected": -2.016463279724121, + "logps/chosen": -61.800926208496094, + "logps/rejected": -274.87298583984375, + "loss": 0.6211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8195610046386719, + "rewards/margins": 4.346842288970947, + "rewards/rejected": -5.166403293609619, + "step": 7004 + }, + { + "epoch": 0.41, + "learning_rate": 6.703900249930824e-08, + "logits/chosen": -1.9496253728866577, + "logits/rejected": -1.956313133239746, + "logps/chosen": -170.24188232421875, + "logps/rejected": -234.50503540039062, + "loss": 0.1994, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5934418439865112, + "rewards/margins": 1.203260898590088, + "rewards/rejected": 0.3901809751987457, + "step": 7005 + }, + { + "epoch": 0.41, + "learning_rate": 6.70301422659878e-08, + "logits/chosen": -1.9796693325042725, + "logits/rejected": -1.9537581205368042, + "logps/chosen": -192.24252319335938, + "logps/rejected": -291.6747131347656, + "loss": 0.2282, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7913148403167725, + "rewards/margins": 0.6232147216796875, + "rewards/rejected": 2.168100118637085, + "step": 7006 + }, + { + "epoch": 0.41, + "learning_rate": 6.702128142767341e-08, + "logits/chosen": -2.005023956298828, + "logits/rejected": -2.004807233810425, + "logps/chosen": -0.03847167268395424, + "logps/rejected": -120.73336791992188, + "loss": 0.4475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008886179886758327, + "rewards/margins": 1.4592242240905762, + "rewards/rejected": -1.4503380060195923, + "step": 7007 + }, + { + "epoch": 0.41, + "learning_rate": 6.701241998467984e-08, + "logits/chosen": -1.8261687755584717, + "logits/rejected": -1.7705694437026978, + "logps/chosen": -236.4248809814453, + "logps/rejected": -407.73956298828125, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1335678100585938, + "rewards/margins": 3.853001594543457, + "rewards/rejected": -1.7194336652755737, + "step": 7008 + }, + { + "epoch": 0.41, + "learning_rate": 6.700355793732192e-08, + "logits/chosen": -2.0978403091430664, + "logits/rejected": -2.0808093547821045, + "logps/chosen": -10.435935020446777, + "logps/rejected": -328.8280334472656, + "loss": 0.745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9920102953910828, + "rewards/margins": 1.5511231422424316, + "rewards/rejected": -2.543133497238159, + "step": 7009 + }, + { + "epoch": 0.41, + "learning_rate": 6.699469528591441e-08, + "logits/chosen": -1.9271608591079712, + "logits/rejected": -1.9073430299758911, + "logps/chosen": -282.6192321777344, + "logps/rejected": -468.05560302734375, + "loss": 0.1719, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3911163806915283, + "rewards/margins": 1.7884339094161987, + "rewards/rejected": -0.397317498922348, + "step": 7010 + }, + { + "epoch": 0.41, + "learning_rate": 6.698583203077222e-08, + "logits/chosen": -1.902939796447754, + "logits/rejected": -1.8960065841674805, + "logps/chosen": -7.065310001373291, + "logps/rejected": -84.49818420410156, + "loss": 0.5293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24641823768615723, + "rewards/margins": 0.3619394898414612, + "rewards/rejected": -0.11552124470472336, + "step": 7011 + }, + { + "epoch": 0.41, + "learning_rate": 6.697696817221018e-08, + "logits/chosen": -1.8997862339019775, + "logits/rejected": -1.8876296281814575, + "logps/chosen": -313.89599609375, + "logps/rejected": -529.9664306640625, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.04325270652771, + "rewards/margins": 5.862875461578369, + "rewards/rejected": -2.819622755050659, + "step": 7012 + }, + { + "epoch": 0.41, + "learning_rate": 6.69681037105432e-08, + "logits/chosen": -1.8913934230804443, + "logits/rejected": -1.841178059577942, + "logps/chosen": -223.8743896484375, + "logps/rejected": -320.4608459472656, + "loss": 0.1512, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5877960920333862, + "rewards/margins": 1.8931519985198975, + "rewards/rejected": -0.30535584688186646, + "step": 7013 + }, + { + "epoch": 0.41, + "learning_rate": 6.695923864608616e-08, + "logits/chosen": -2.0582644939422607, + "logits/rejected": -2.0555002689361572, + "logps/chosen": -235.3972625732422, + "logps/rejected": -349.44024658203125, + "loss": 0.0608, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.069096326828003, + "rewards/margins": 2.317579507827759, + "rewards/rejected": 0.7515167593955994, + "step": 7014 + }, + { + "epoch": 0.41, + "learning_rate": 6.695037297915401e-08, + "logits/chosen": -2.1541244983673096, + "logits/rejected": -2.1531522274017334, + "logps/chosen": -9.256071090698242, + "logps/rejected": -69.57460021972656, + "loss": 0.6349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012623977847397327, + "rewards/margins": 0.22511252760887146, + "rewards/rejected": -0.2377365082502365, + "step": 7015 + }, + { + "epoch": 0.41, + "learning_rate": 6.69415067100617e-08, + "logits/chosen": -1.7325727939605713, + "logits/rejected": -1.715403437614441, + "logps/chosen": -0.09560171514749527, + "logps/rejected": -138.48443603515625, + "loss": 0.4429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0345667339861393, + "rewards/margins": 1.4531810283660889, + "rewards/rejected": -1.4186142683029175, + "step": 7016 + }, + { + "epoch": 0.41, + "learning_rate": 6.693263983912421e-08, + "logits/chosen": -1.8309454917907715, + "logits/rejected": -1.8096286058425903, + "logps/chosen": -168.08377075195312, + "logps/rejected": -473.3643798828125, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4341964721679688, + "rewards/margins": 6.085853576660156, + "rewards/rejected": -4.6516571044921875, + "step": 7017 + }, + { + "epoch": 0.41, + "learning_rate": 6.692377236665652e-08, + "logits/chosen": -1.8593283891677856, + "logits/rejected": -1.8811252117156982, + "logps/chosen": -156.506591796875, + "logps/rejected": -212.73223876953125, + "loss": 0.3784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.052760362625122, + "rewards/margins": 0.46947330236434937, + "rewards/rejected": 0.5832870602607727, + "step": 7018 + }, + { + "epoch": 0.41, + "learning_rate": 6.691490429297364e-08, + "logits/chosen": -1.5733929872512817, + "logits/rejected": -1.5714821815490723, + "logps/chosen": -263.812744140625, + "logps/rejected": -528.3966064453125, + "loss": 0.4045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08497314900159836, + "rewards/margins": 2.0405335426330566, + "rewards/rejected": -2.125506639480591, + "step": 7019 + }, + { + "epoch": 0.41, + "learning_rate": 6.690603561839063e-08, + "logits/chosen": -2.1143641471862793, + "logits/rejected": -2.1018388271331787, + "logps/chosen": -50.69463348388672, + "logps/rejected": -192.56887817382812, + "loss": 0.3291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9269798398017883, + "rewards/margins": 0.8368812799453735, + "rewards/rejected": 0.09009857475757599, + "step": 7020 + }, + { + "epoch": 0.41, + "learning_rate": 6.689716634322255e-08, + "logits/chosen": -1.7379393577575684, + "logits/rejected": -1.6995155811309814, + "logps/chosen": -182.17367553710938, + "logps/rejected": -290.6543273925781, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0841691493988037, + "rewards/margins": 0.5642136335372925, + "rewards/rejected": 1.5199555158615112, + "step": 7021 + }, + { + "epoch": 0.41, + "learning_rate": 6.688829646778445e-08, + "logits/chosen": -1.984511375427246, + "logits/rejected": -1.9606821537017822, + "logps/chosen": -165.69276428222656, + "logps/rejected": -277.3341979980469, + "loss": 0.1798, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8552261590957642, + "rewards/margins": 1.303074598312378, + "rewards/rejected": 0.5521515011787415, + "step": 7022 + }, + { + "epoch": 0.41, + "learning_rate": 6.687942599239147e-08, + "logits/chosen": -2.190962314605713, + "logits/rejected": -2.1869633197784424, + "logps/chosen": -0.00048465136205777526, + "logps/rejected": -180.2421875, + "loss": 0.3562, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5827656170586124e-05, + "rewards/margins": 3.4365017414093018, + "rewards/rejected": -3.4365174770355225, + "step": 7023 + }, + { + "epoch": 0.41, + "learning_rate": 6.68705549173587e-08, + "logits/chosen": -1.8930327892303467, + "logits/rejected": -1.9737763404846191, + "logps/chosen": -224.66867065429688, + "logps/rejected": -227.18209838867188, + "loss": 0.1262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4486191272735596, + "rewards/margins": 1.9729050397872925, + "rewards/rejected": -0.5242859125137329, + "step": 7024 + }, + { + "epoch": 0.41, + "learning_rate": 6.68616832430013e-08, + "logits/chosen": -1.8593686819076538, + "logits/rejected": -1.8438057899475098, + "logps/chosen": -134.47406005859375, + "logps/rejected": -303.68829345703125, + "loss": 0.4302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.898693859577179, + "rewards/margins": 0.34709471464157104, + "rewards/rejected": 0.5515991449356079, + "step": 7025 + }, + { + "epoch": 0.41, + "learning_rate": 6.685281096963442e-08, + "logits/chosen": -1.8025038242340088, + "logits/rejected": -1.8113535642623901, + "logps/chosen": -140.26806640625, + "logps/rejected": -258.7403869628906, + "loss": 0.153, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3381973505020142, + "rewards/margins": 1.8273941278457642, + "rewards/rejected": -0.48919677734375, + "step": 7026 + }, + { + "epoch": 0.41, + "learning_rate": 6.684393809757327e-08, + "logits/chosen": -2.0594115257263184, + "logits/rejected": -2.025940418243408, + "logps/chosen": -158.81069946289062, + "logps/rejected": -252.90037536621094, + "loss": 0.1698, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.854009985923767, + "rewards/margins": 1.406590223312378, + "rewards/rejected": 0.4474197328090668, + "step": 7027 + }, + { + "epoch": 0.41, + "learning_rate": 6.683506462713305e-08, + "logits/chosen": -2.0642354488372803, + "logits/rejected": -2.050544023513794, + "logps/chosen": -191.65951538085938, + "logps/rejected": -355.55511474609375, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.40757155418396, + "rewards/margins": 4.057931900024414, + "rewards/rejected": -1.650360107421875, + "step": 7028 + }, + { + "epoch": 0.41, + "learning_rate": 6.682619055862897e-08, + "logits/chosen": -1.9976222515106201, + "logits/rejected": -1.9997278451919556, + "logps/chosen": -39.58580780029297, + "logps/rejected": -184.99057006835938, + "loss": 0.3561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15265236794948578, + "rewards/margins": 1.858391284942627, + "rewards/rejected": -1.70573890209198, + "step": 7029 + }, + { + "epoch": 0.41, + "learning_rate": 6.681731589237631e-08, + "logits/chosen": -1.9929746389389038, + "logits/rejected": -1.9764877557754517, + "logps/chosen": -2.8217198848724365, + "logps/rejected": -113.73948669433594, + "loss": 0.719, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.021653462201356888, + "rewards/margins": -0.23862259089946747, + "rewards/rejected": 0.26027604937553406, + "step": 7030 + }, + { + "epoch": 0.41, + "learning_rate": 6.68084406286903e-08, + "logits/chosen": -1.871187448501587, + "logits/rejected": -1.8682783842086792, + "logps/chosen": -0.057262614369392395, + "logps/rejected": -252.4470672607422, + "loss": 0.3514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017199832946062088, + "rewards/margins": 3.8063595294952393, + "rewards/rejected": -3.808079481124878, + "step": 7031 + }, + { + "epoch": 0.41, + "learning_rate": 6.679956476788629e-08, + "logits/chosen": -1.8237605094909668, + "logits/rejected": -1.821668267250061, + "logps/chosen": -151.61260986328125, + "logps/rejected": -235.7962188720703, + "loss": 0.1567, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9156280755996704, + "rewards/margins": 1.386906385421753, + "rewards/rejected": 0.5287216305732727, + "step": 7032 + }, + { + "epoch": 0.41, + "learning_rate": 6.679068831027956e-08, + "logits/chosen": -1.856481671333313, + "logits/rejected": -1.855446219444275, + "logps/chosen": -0.0008037848165258765, + "logps/rejected": -111.79039001464844, + "loss": 0.5729, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.106710159452632e-05, + "rewards/margins": 0.5575417280197144, + "rewards/rejected": -0.5575928092002869, + "step": 7033 + }, + { + "epoch": 0.41, + "learning_rate": 6.678181125618544e-08, + "logits/chosen": -2.028028726577759, + "logits/rejected": -2.0221593379974365, + "logps/chosen": -4.64909098809585e-05, + "logps/rejected": -117.8578872680664, + "loss": 0.4128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6225754368169873e-07, + "rewards/margins": 1.890114426612854, + "rewards/rejected": -1.890114665031433, + "step": 7034 + }, + { + "epoch": 0.41, + "learning_rate": 6.677293360591931e-08, + "logits/chosen": -1.9991521835327148, + "logits/rejected": -1.9975095987319946, + "logps/chosen": -87.18966674804688, + "logps/rejected": -238.12298583984375, + "loss": 0.2303, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0411102771759033, + "rewards/margins": 1.733102560043335, + "rewards/rejected": -0.6919922232627869, + "step": 7035 + }, + { + "epoch": 0.41, + "learning_rate": 6.676405535979652e-08, + "logits/chosen": -1.9423707723617554, + "logits/rejected": -1.9418851137161255, + "logps/chosen": -50.526329040527344, + "logps/rejected": -373.76605224609375, + "loss": 0.3536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21010474860668182, + "rewards/margins": 4.573671817779541, + "rewards/rejected": -4.783776760101318, + "step": 7036 + }, + { + "epoch": 0.41, + "learning_rate": 6.675517651813248e-08, + "logits/chosen": -1.9801697731018066, + "logits/rejected": -1.9812452793121338, + "logps/chosen": -57.39649200439453, + "logps/rejected": -179.84805297851562, + "loss": 0.2566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9040932059288025, + "rewards/margins": 1.424353837966919, + "rewards/rejected": -0.5202606320381165, + "step": 7037 + }, + { + "epoch": 0.41, + "learning_rate": 6.674629708124264e-08, + "logits/chosen": -2.1383230686187744, + "logits/rejected": -2.135432004928589, + "logps/chosen": -267.7958679199219, + "logps/rejected": -344.9180908203125, + "loss": 0.3925, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.5754029750823975, + "rewards/margins": -0.13153362274169922, + "rewards/rejected": 2.7069365978240967, + "step": 7038 + }, + { + "epoch": 0.41, + "learning_rate": 6.673741704944237e-08, + "logits/chosen": -2.1050071716308594, + "logits/rejected": -2.1011927127838135, + "logps/chosen": -3.5461106300354004, + "logps/rejected": -73.80611419677734, + "loss": 0.5502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08628380298614502, + "rewards/margins": 0.7283333539962769, + "rewards/rejected": -0.8146171569824219, + "step": 7039 + }, + { + "epoch": 0.41, + "learning_rate": 6.672853642304722e-08, + "logits/chosen": -1.987494945526123, + "logits/rejected": -1.9873547554016113, + "logps/chosen": -32.98572540283203, + "logps/rejected": -229.19654846191406, + "loss": 0.4435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2278621643781662, + "rewards/margins": 2.594170093536377, + "rewards/rejected": -2.8220322132110596, + "step": 7040 + }, + { + "epoch": 0.41, + "learning_rate": 6.671965520237258e-08, + "logits/chosen": -1.7506353855133057, + "logits/rejected": -1.7596137523651123, + "logps/chosen": -157.38162231445312, + "logps/rejected": -237.65811157226562, + "loss": 0.2487, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7753952741622925, + "rewards/margins": 0.8201767802238464, + "rewards/rejected": 0.955218493938446, + "step": 7041 + }, + { + "epoch": 0.41, + "learning_rate": 6.671077338773403e-08, + "logits/chosen": -1.9752503633499146, + "logits/rejected": -1.9867790937423706, + "logps/chosen": -158.46322631835938, + "logps/rejected": -223.68157958984375, + "loss": 0.1514, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0771331787109375, + "rewards/margins": 1.3672211170196533, + "rewards/rejected": 0.709912121295929, + "step": 7042 + }, + { + "epoch": 0.41, + "learning_rate": 6.670189097944706e-08, + "logits/chosen": -1.8285192251205444, + "logits/rejected": -1.8342015743255615, + "logps/chosen": -37.740478515625, + "logps/rejected": -255.39712524414062, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3608207702636719, + "rewards/margins": 1.3527030944824219, + "rewards/rejected": -0.99188232421875, + "step": 7043 + }, + { + "epoch": 0.41, + "learning_rate": 6.669300797782723e-08, + "logits/chosen": -2.0344316959381104, + "logits/rejected": -2.032132387161255, + "logps/chosen": -0.16061252355575562, + "logps/rejected": -103.16059875488281, + "loss": 0.6537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009162843227386475, + "rewards/margins": 0.15869669616222382, + "rewards/rejected": -0.14953385293483734, + "step": 7044 + }, + { + "epoch": 0.41, + "learning_rate": 6.66841243831901e-08, + "logits/chosen": -1.923511028289795, + "logits/rejected": -1.8865553140640259, + "logps/chosen": -322.34661865234375, + "logps/rejected": -443.32061767578125, + "loss": 0.2077, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.467755079269409, + "rewards/margins": 0.7282195091247559, + "rewards/rejected": 1.7395355701446533, + "step": 7045 + }, + { + "epoch": 0.41, + "learning_rate": 6.667524019585125e-08, + "logits/chosen": -1.8136529922485352, + "logits/rejected": -1.7519508600234985, + "logps/chosen": -180.48709106445312, + "logps/rejected": -309.584716796875, + "loss": 0.213, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.485528588294983, + "rewards/margins": 1.2933380603790283, + "rewards/rejected": 0.19219055771827698, + "step": 7046 + }, + { + "epoch": 0.41, + "learning_rate": 6.666635541612632e-08, + "logits/chosen": -2.0710248947143555, + "logits/rejected": -2.060086727142334, + "logps/chosen": -0.4712246060371399, + "logps/rejected": -232.5276641845703, + "loss": 0.4587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008530274033546448, + "rewards/margins": 1.440079689025879, + "rewards/rejected": -1.4486099481582642, + "step": 7047 + }, + { + "epoch": 0.41, + "learning_rate": 6.665747004433091e-08, + "logits/chosen": -1.958354115486145, + "logits/rejected": -1.9558541774749756, + "logps/chosen": -26.90530776977539, + "logps/rejected": -69.56000518798828, + "loss": 0.6041, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.18503914773464203, + "rewards/margins": -0.04168662428855896, + "rewards/rejected": 0.226725772023201, + "step": 7048 + }, + { + "epoch": 0.41, + "learning_rate": 6.664858408078069e-08, + "logits/chosen": -2.025974988937378, + "logits/rejected": -2.0179147720336914, + "logps/chosen": -23.17364501953125, + "logps/rejected": -182.09893798828125, + "loss": 0.4835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3302116394042969, + "rewards/margins": 0.5406150817871094, + "rewards/rejected": -0.2104034423828125, + "step": 7049 + }, + { + "epoch": 0.41, + "learning_rate": 6.663969752579133e-08, + "logits/chosen": -2.001354932785034, + "logits/rejected": -1.888731598854065, + "logps/chosen": -240.9044189453125, + "logps/rejected": -461.54791259765625, + "loss": 0.0628, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8910369873046875, + "rewards/margins": 3.581692695617676, + "rewards/rejected": -1.6906555891036987, + "step": 7050 + }, + { + "epoch": 0.41, + "learning_rate": 6.663081037967852e-08, + "logits/chosen": -1.9597347974777222, + "logits/rejected": -2.0168042182922363, + "logps/chosen": -300.3667907714844, + "logps/rejected": -468.87274169921875, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4253082275390625, + "rewards/margins": 3.8682403564453125, + "rewards/rejected": -1.44293212890625, + "step": 7051 + }, + { + "epoch": 0.41, + "learning_rate": 6.662192264275795e-08, + "logits/chosen": -1.913794755935669, + "logits/rejected": -1.9105708599090576, + "logps/chosen": -44.157554626464844, + "logps/rejected": -178.14678955078125, + "loss": 0.642, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12686538696289062, + "rewards/margins": -0.1314232051372528, + "rewards/rejected": 0.25828859210014343, + "step": 7052 + }, + { + "epoch": 0.41, + "learning_rate": 6.66130343153454e-08, + "logits/chosen": -1.9166083335876465, + "logits/rejected": -1.9215033054351807, + "logps/chosen": -183.9296875, + "logps/rejected": -324.99774169921875, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3668441772460938, + "rewards/margins": 1.7378677129745483, + "rewards/rejected": 0.6289764642715454, + "step": 7053 + }, + { + "epoch": 0.41, + "learning_rate": 6.66041453977566e-08, + "logits/chosen": -1.850770354270935, + "logits/rejected": -1.8920364379882812, + "logps/chosen": -235.64642333984375, + "logps/rejected": -311.8848571777344, + "loss": 0.2663, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2974166870117188, + "rewards/margins": 0.5245620608329773, + "rewards/rejected": 0.7728546261787415, + "step": 7054 + }, + { + "epoch": 0.41, + "learning_rate": 6.659525589030735e-08, + "logits/chosen": -1.76658296585083, + "logits/rejected": -1.7709765434265137, + "logps/chosen": -0.004646983928978443, + "logps/rejected": -142.43563842773438, + "loss": 0.4717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004420979821588844, + "rewards/margins": 1.258310317993164, + "rewards/rejected": -1.258752465248108, + "step": 7055 + }, + { + "epoch": 0.41, + "learning_rate": 6.658636579331343e-08, + "logits/chosen": -2.000033378601074, + "logits/rejected": -1.9979817867279053, + "logps/chosen": -6.4019951820373535, + "logps/rejected": -164.65435791015625, + "loss": 0.3045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19146589934825897, + "rewards/margins": 4.442744731903076, + "rewards/rejected": -4.251278877258301, + "step": 7056 + }, + { + "epoch": 0.41, + "learning_rate": 6.657747510709066e-08, + "logits/chosen": -2.077665090560913, + "logits/rejected": -2.077998399734497, + "logps/chosen": -6.020307540893555, + "logps/rejected": -154.81982421875, + "loss": 0.4403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15679244697093964, + "rewards/margins": 1.3147422075271606, + "rewards/rejected": -1.1579498052597046, + "step": 7057 + }, + { + "epoch": 0.41, + "learning_rate": 6.656858383195489e-08, + "logits/chosen": -2.136655330657959, + "logits/rejected": -2.1377625465393066, + "logps/chosen": -9.79107666015625, + "logps/rejected": -180.74569702148438, + "loss": 0.4865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035956382751464844, + "rewards/margins": 0.9754614233970642, + "rewards/rejected": -0.9395050406455994, + "step": 7058 + }, + { + "epoch": 0.41, + "learning_rate": 6.655969196822197e-08, + "logits/chosen": -2.109133005142212, + "logits/rejected": -2.1018102169036865, + "logps/chosen": -7.125766754150391, + "logps/rejected": -227.4961395263672, + "loss": 0.3186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4319441020488739, + "rewards/margins": 1.5529173612594604, + "rewards/rejected": -1.1209732294082642, + "step": 7059 + }, + { + "epoch": 0.41, + "learning_rate": 6.65507995162078e-08, + "logits/chosen": -2.0255134105682373, + "logits/rejected": -2.0123133659362793, + "logps/chosen": -64.7358627319336, + "logps/rejected": -241.24685668945312, + "loss": 0.6271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27329787611961365, + "rewards/margins": 1.3103950023651123, + "rewards/rejected": -1.5836929082870483, + "step": 7060 + }, + { + "epoch": 0.41, + "learning_rate": 6.654190647622826e-08, + "logits/chosen": -1.8783947229385376, + "logits/rejected": -1.880188226699829, + "logps/chosen": -14.375164031982422, + "logps/rejected": -265.52203369140625, + "loss": 0.3554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08750972896814346, + "rewards/margins": 4.658507823944092, + "rewards/rejected": -4.7460174560546875, + "step": 7061 + }, + { + "epoch": 0.41, + "learning_rate": 6.65330128485993e-08, + "logits/chosen": -1.9868513345718384, + "logits/rejected": -1.9880725145339966, + "logps/chosen": -23.685672760009766, + "logps/rejected": -183.01922607421875, + "loss": 0.3956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18208961188793182, + "rewards/margins": 1.1557468175888062, + "rewards/rejected": -0.9736572504043579, + "step": 7062 + }, + { + "epoch": 0.41, + "learning_rate": 6.652411863363685e-08, + "logits/chosen": -1.9140087366104126, + "logits/rejected": -1.861221194267273, + "logps/chosen": -258.7286071777344, + "logps/rejected": -428.66302490234375, + "loss": 0.1127, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.48191237449646, + "rewards/margins": 1.4901399612426758, + "rewards/rejected": 0.991772472858429, + "step": 7063 + }, + { + "epoch": 0.41, + "learning_rate": 6.651522383165686e-08, + "logits/chosen": -1.9016668796539307, + "logits/rejected": -1.9054927825927734, + "logps/chosen": -331.78192138671875, + "logps/rejected": -414.83135986328125, + "loss": 0.2796, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.518725633621216, + "rewards/margins": 0.3501098155975342, + "rewards/rejected": 2.1686158180236816, + "step": 7064 + }, + { + "epoch": 0.41, + "learning_rate": 6.650632844297536e-08, + "logits/chosen": -2.1330301761627197, + "logits/rejected": -2.1247377395629883, + "logps/chosen": -1.9131481647491455, + "logps/rejected": -199.488037109375, + "loss": 0.3983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09920548647642136, + "rewards/margins": 2.5568721294403076, + "rewards/rejected": -2.6560776233673096, + "step": 7065 + }, + { + "epoch": 0.41, + "learning_rate": 6.649743246790831e-08, + "logits/chosen": -1.9980262517929077, + "logits/rejected": -2.0111939907073975, + "logps/chosen": -176.5257568359375, + "logps/rejected": -438.7889709472656, + "loss": 0.0521, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1954025030136108, + "rewards/margins": 3.8334550857543945, + "rewards/rejected": -2.638052463531494, + "step": 7066 + }, + { + "epoch": 0.41, + "learning_rate": 6.648853590677177e-08, + "logits/chosen": -1.7334765195846558, + "logits/rejected": -1.7404943704605103, + "logps/chosen": -263.11749267578125, + "logps/rejected": -371.77142333984375, + "loss": 0.2318, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4700348377227783, + "rewards/margins": 1.2993621826171875, + "rewards/rejected": 0.17067261040210724, + "step": 7067 + }, + { + "epoch": 0.41, + "learning_rate": 6.647963875988179e-08, + "logits/chosen": -1.9908920526504517, + "logits/rejected": -1.9874281883239746, + "logps/chosen": -81.790283203125, + "logps/rejected": -312.11456298828125, + "loss": 0.1694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7816879153251648, + "rewards/margins": 4.82473611831665, + "rewards/rejected": -4.04304838180542, + "step": 7068 + }, + { + "epoch": 0.41, + "learning_rate": 6.64707410275544e-08, + "logits/chosen": -1.8844618797302246, + "logits/rejected": -1.8820968866348267, + "logps/chosen": -1.9107004404067993, + "logps/rejected": -144.70529174804688, + "loss": 0.3539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055369723588228226, + "rewards/margins": 2.981370687484741, + "rewards/rejected": -2.9260010719299316, + "step": 7069 + }, + { + "epoch": 0.41, + "learning_rate": 6.646184271010577e-08, + "logits/chosen": -2.0161077976226807, + "logits/rejected": -2.035353660583496, + "logps/chosen": -208.20187377929688, + "logps/rejected": -393.08038330078125, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5396363735198975, + "rewards/margins": 4.20399808883667, + "rewards/rejected": -1.664361596107483, + "step": 7070 + }, + { + "epoch": 0.41, + "learning_rate": 6.645294380785193e-08, + "logits/chosen": -1.9001965522766113, + "logits/rejected": -1.9771524667739868, + "logps/chosen": -212.66087341308594, + "logps/rejected": -485.52032470703125, + "loss": 0.1262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4834060668945312, + "rewards/margins": 1.5362259149551392, + "rewards/rejected": -0.05281982570886612, + "step": 7071 + }, + { + "epoch": 0.41, + "learning_rate": 6.644404432110905e-08, + "logits/chosen": -2.075834035873413, + "logits/rejected": -2.0718071460723877, + "logps/chosen": -0.06072098761796951, + "logps/rejected": -90.5048599243164, + "loss": 0.591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004521100781857967, + "rewards/margins": 0.5383874177932739, + "rewards/rejected": -0.5429084897041321, + "step": 7072 + }, + { + "epoch": 0.41, + "learning_rate": 6.643514425019328e-08, + "logits/chosen": -1.9604774713516235, + "logits/rejected": -1.9615514278411865, + "logps/chosen": -1.839682698249817, + "logps/rejected": -156.48350524902344, + "loss": 0.3388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014921784168109298, + "rewards/margins": 4.090559005737305, + "rewards/rejected": -4.089066982269287, + "step": 7073 + }, + { + "epoch": 0.41, + "learning_rate": 6.642624359542079e-08, + "logits/chosen": -1.9219489097595215, + "logits/rejected": -1.9226582050323486, + "logps/chosen": -145.1165313720703, + "logps/rejected": -208.53134155273438, + "loss": 0.1402, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8920624256134033, + "rewards/margins": 2.110546827316284, + "rewards/rejected": -0.21848450601100922, + "step": 7074 + }, + { + "epoch": 0.41, + "learning_rate": 6.641734235710778e-08, + "logits/chosen": -2.0111491680145264, + "logits/rejected": -2.059654474258423, + "logps/chosen": -307.3094787597656, + "logps/rejected": -361.8525390625, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8918213844299316, + "rewards/margins": 4.972186088562012, + "rewards/rejected": -2.080364942550659, + "step": 7075 + }, + { + "epoch": 0.41, + "learning_rate": 6.640844053557048e-08, + "logits/chosen": -1.8552675247192383, + "logits/rejected": -1.8042933940887451, + "logps/chosen": -137.19393920898438, + "logps/rejected": -263.6858825683594, + "loss": 0.1518, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6536422967910767, + "rewards/margins": 2.0862929821014404, + "rewards/rejected": -0.43265077471733093, + "step": 7076 + }, + { + "epoch": 0.41, + "learning_rate": 6.639953813112508e-08, + "logits/chosen": -1.9719408750534058, + "logits/rejected": -1.9635288715362549, + "logps/chosen": -1.3171253204345703, + "logps/rejected": -134.7574005126953, + "loss": 0.4499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00947729405015707, + "rewards/margins": 1.4682998657226562, + "rewards/rejected": -1.477777123451233, + "step": 7077 + }, + { + "epoch": 0.41, + "learning_rate": 6.639063514408789e-08, + "logits/chosen": -2.090035915374756, + "logits/rejected": -2.092602252960205, + "logps/chosen": -21.529985427856445, + "logps/rejected": -80.43248748779297, + "loss": 0.4215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2762453258037567, + "rewards/margins": 0.9083375930786133, + "rewards/rejected": -0.632092297077179, + "step": 7078 + }, + { + "epoch": 0.41, + "learning_rate": 6.638173157477515e-08, + "logits/chosen": -2.0975122451782227, + "logits/rejected": -2.089599609375, + "logps/chosen": -165.68157958984375, + "logps/rejected": -379.3414001464844, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.545428514480591, + "rewards/margins": 3.1420717239379883, + "rewards/rejected": -0.5966430902481079, + "step": 7079 + }, + { + "epoch": 0.41, + "learning_rate": 6.63728274235032e-08, + "logits/chosen": -1.9256967306137085, + "logits/rejected": -1.9060312509536743, + "logps/chosen": -75.99650573730469, + "logps/rejected": -287.06475830078125, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7472748160362244, + "rewards/margins": 1.1512451171875, + "rewards/rejected": -0.403970330953598, + "step": 7080 + }, + { + "epoch": 0.41, + "learning_rate": 6.63639226905883e-08, + "logits/chosen": -2.1024105548858643, + "logits/rejected": -2.1057395935058594, + "logps/chosen": -11.74966049194336, + "logps/rejected": -80.62352752685547, + "loss": 0.4643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2202337235212326, + "rewards/margins": 0.8892561197280884, + "rewards/rejected": -0.6690223813056946, + "step": 7081 + }, + { + "epoch": 0.41, + "learning_rate": 6.635501737634685e-08, + "logits/chosen": -1.9777605533599854, + "logits/rejected": -2.0203609466552734, + "logps/chosen": -287.52752685546875, + "logps/rejected": -494.958740234375, + "loss": 0.0371, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.365438938140869, + "rewards/margins": 3.6393189430236816, + "rewards/rejected": -1.2738800048828125, + "step": 7082 + }, + { + "epoch": 0.41, + "learning_rate": 6.634611148109516e-08, + "logits/chosen": -2.052048683166504, + "logits/rejected": -2.1166744232177734, + "logps/chosen": -299.008056640625, + "logps/rejected": -448.5074768066406, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.730712890625, + "rewards/margins": 3.9400298595428467, + "rewards/rejected": -3.2093169689178467, + "step": 7083 + }, + { + "epoch": 0.41, + "learning_rate": 6.633720500514964e-08, + "logits/chosen": -2.019383192062378, + "logits/rejected": -2.024653196334839, + "logps/chosen": -1.140676498413086, + "logps/rejected": -50.78040313720703, + "loss": 0.6, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030992239713668823, + "rewards/margins": 0.45497310161590576, + "rewards/rejected": -0.4859653413295746, + "step": 7084 + }, + { + "epoch": 0.41, + "learning_rate": 6.632829794882667e-08, + "logits/chosen": -1.9894888401031494, + "logits/rejected": -1.9645978212356567, + "logps/chosen": -138.57154846191406, + "logps/rejected": -296.82757568359375, + "loss": 0.03, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.905574083328247, + "rewards/margins": 4.009861946105957, + "rewards/rejected": -1.1042877435684204, + "step": 7085 + }, + { + "epoch": 0.41, + "learning_rate": 6.63193903124427e-08, + "logits/chosen": -2.1334738731384277, + "logits/rejected": -2.126291513442993, + "logps/chosen": -105.68296813964844, + "logps/rejected": -179.5509033203125, + "loss": 0.2242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5997062921524048, + "rewards/margins": 0.9545158743858337, + "rewards/rejected": 0.645190417766571, + "step": 7086 + }, + { + "epoch": 0.41, + "learning_rate": 6.631048209631418e-08, + "logits/chosen": -1.8555216789245605, + "logits/rejected": -1.8516143560409546, + "logps/chosen": -90.21723175048828, + "logps/rejected": -365.72210693359375, + "loss": 0.1786, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.156520128250122, + "rewards/margins": 2.112767219543457, + "rewards/rejected": -0.9562469720840454, + "step": 7087 + }, + { + "epoch": 0.41, + "learning_rate": 6.630157330075752e-08, + "logits/chosen": -1.9678773880004883, + "logits/rejected": -1.9630825519561768, + "logps/chosen": -2.644690990447998, + "logps/rejected": -119.01365661621094, + "loss": 0.4329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05256519466638565, + "rewards/margins": 1.6700563430786133, + "rewards/rejected": -1.6174911260604858, + "step": 7088 + }, + { + "epoch": 0.41, + "learning_rate": 6.629266392608924e-08, + "logits/chosen": -1.9940767288208008, + "logits/rejected": -1.9930192232131958, + "logps/chosen": -72.41592407226562, + "logps/rejected": -325.0408935546875, + "loss": 0.2624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4847702085971832, + "rewards/margins": 4.000907897949219, + "rewards/rejected": -3.5161378383636475, + "step": 7089 + }, + { + "epoch": 0.41, + "learning_rate": 6.628375397262585e-08, + "logits/chosen": -1.8525645732879639, + "logits/rejected": -1.7842025756835938, + "logps/chosen": -321.6343688964844, + "logps/rejected": -594.964599609375, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9209930896759033, + "rewards/margins": 5.006253242492676, + "rewards/rejected": -3.0852601528167725, + "step": 7090 + }, + { + "epoch": 0.41, + "learning_rate": 6.627484344068388e-08, + "logits/chosen": -1.9925601482391357, + "logits/rejected": -2.0022287368774414, + "logps/chosen": -199.70272827148438, + "logps/rejected": -295.51416015625, + "loss": 0.0695, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4416961669921875, + "rewards/margins": 2.1113007068634033, + "rewards/rejected": 0.33039551973342896, + "step": 7091 + }, + { + "epoch": 0.41, + "learning_rate": 6.626593233057983e-08, + "logits/chosen": -1.9894102811813354, + "logits/rejected": -1.9141414165496826, + "logps/chosen": -208.905029296875, + "logps/rejected": -488.53717041015625, + "loss": 0.2777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.962109386920929, + "rewards/margins": 1.206048607826233, + "rewards/rejected": -0.24393920600414276, + "step": 7092 + }, + { + "epoch": 0.41, + "learning_rate": 6.625702064263031e-08, + "logits/chosen": -1.9937533140182495, + "logits/rejected": -1.9842002391815186, + "logps/chosen": -222.97410583496094, + "logps/rejected": -459.7106018066406, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8961044549942017, + "rewards/margins": 3.2344346046447754, + "rewards/rejected": -1.3383301496505737, + "step": 7093 + }, + { + "epoch": 0.41, + "learning_rate": 6.62481083771519e-08, + "logits/chosen": -2.0720953941345215, + "logits/rejected": -2.0676283836364746, + "logps/chosen": -20.223562240600586, + "logps/rejected": -118.14337158203125, + "loss": 0.4273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5360057950019836, + "rewards/margins": 0.7229869961738586, + "rewards/rejected": -0.186981201171875, + "step": 7094 + }, + { + "epoch": 0.41, + "learning_rate": 6.623919553446121e-08, + "logits/chosen": -1.977439045906067, + "logits/rejected": -1.9793177843093872, + "logps/chosen": -6.008059062878601e-05, + "logps/rejected": -159.43373107910156, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.510192858717346e-07, + "rewards/margins": 3.1748604774475098, + "rewards/rejected": -3.174861192703247, + "step": 7095 + }, + { + "epoch": 0.41, + "learning_rate": 6.623028211487484e-08, + "logits/chosen": -2.131619691848755, + "logits/rejected": -2.136861801147461, + "logps/chosen": -49.744667053222656, + "logps/rejected": -155.96035766601562, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7585697174072266, + "rewards/margins": 1.344455361366272, + "rewards/rejected": -0.5858856439590454, + "step": 7096 + }, + { + "epoch": 0.41, + "learning_rate": 6.622136811870949e-08, + "logits/chosen": -2.0486819744110107, + "logits/rejected": -2.0489683151245117, + "logps/chosen": -20.840530395507812, + "logps/rejected": -105.3291015625, + "loss": 0.4911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14453621208667755, + "rewards/margins": 0.8150027990341187, + "rewards/rejected": -0.6704666018486023, + "step": 7097 + }, + { + "epoch": 0.41, + "learning_rate": 6.621245354628178e-08, + "logits/chosen": -1.971701979637146, + "logits/rejected": -1.9769277572631836, + "logps/chosen": -19.326513290405273, + "logps/rejected": -210.21458435058594, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21325455605983734, + "rewards/margins": 3.2210533618927, + "rewards/rejected": -3.007798910140991, + "step": 7098 + }, + { + "epoch": 0.41, + "learning_rate": 6.620353839790842e-08, + "logits/chosen": -1.932926058769226, + "logits/rejected": -1.9236319065093994, + "logps/chosen": -203.19200134277344, + "logps/rejected": -219.51255798339844, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7624252438545227, + "rewards/margins": 1.0830078125, + "rewards/rejected": -0.3205825984477997, + "step": 7099 + }, + { + "epoch": 0.41, + "learning_rate": 6.619462267390609e-08, + "logits/chosen": -2.1878936290740967, + "logits/rejected": -2.182936906814575, + "logps/chosen": -0.00013052913709543645, + "logps/rejected": -149.08770751953125, + "loss": 0.4182, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.747381687295274e-07, + "rewards/margins": 1.8642768859863281, + "rewards/rejected": -1.8642761707305908, + "step": 7100 + }, + { + "epoch": 0.41, + "learning_rate": 6.618570637459159e-08, + "logits/chosen": -1.9564636945724487, + "logits/rejected": -1.9556453227996826, + "logps/chosen": -228.3610076904297, + "logps/rejected": -269.90350341796875, + "loss": 0.3643, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.8438217639923096, + "rewards/margins": -0.0015060901641845703, + "rewards/rejected": 2.845327854156494, + "step": 7101 + }, + { + "epoch": 0.41, + "learning_rate": 6.617678950028159e-08, + "logits/chosen": -2.0053977966308594, + "logits/rejected": -1.9499592781066895, + "logps/chosen": -276.302978515625, + "logps/rejected": -454.0469970703125, + "loss": 0.2806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.843304455280304, + "rewards/margins": 0.9950928092002869, + "rewards/rejected": -0.15178833901882172, + "step": 7102 + }, + { + "epoch": 0.41, + "learning_rate": 6.616787205129293e-08, + "logits/chosen": -1.7742047309875488, + "logits/rejected": -1.7503581047058105, + "logps/chosen": -128.16659545898438, + "logps/rejected": -278.1277770996094, + "loss": 0.447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5234527587890625, + "rewards/margins": 0.5763946771621704, + "rewards/rejected": -0.05294189602136612, + "step": 7103 + }, + { + "epoch": 0.41, + "learning_rate": 6.615895402794234e-08, + "logits/chosen": -1.7707849740982056, + "logits/rejected": -1.7754524946212769, + "logps/chosen": -12.016600608825684, + "logps/rejected": -122.80899047851562, + "loss": 0.3324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28230592608451843, + "rewards/margins": 1.8967362642288208, + "rewards/rejected": -1.61443030834198, + "step": 7104 + }, + { + "epoch": 0.41, + "learning_rate": 6.615003543054668e-08, + "logits/chosen": -2.021752119064331, + "logits/rejected": -2.025801181793213, + "logps/chosen": -173.1535186767578, + "logps/rejected": -205.24603271484375, + "loss": 0.511, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4858718812465668, + "rewards/margins": -0.16773530840873718, + "rewards/rejected": 0.653607189655304, + "step": 7105 + }, + { + "epoch": 0.41, + "learning_rate": 6.614111625942275e-08, + "logits/chosen": -1.8008822202682495, + "logits/rejected": -1.7947567701339722, + "logps/chosen": -0.2914741337299347, + "logps/rejected": -77.51392364501953, + "loss": 0.4195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005748446565121412, + "rewards/margins": 1.4066569805145264, + "rewards/rejected": -1.4124053716659546, + "step": 7106 + }, + { + "epoch": 0.41, + "learning_rate": 6.613219651488742e-08, + "logits/chosen": -2.0668156147003174, + "logits/rejected": -2.058986186981201, + "logps/chosen": -7.9553961753845215, + "logps/rejected": -107.6520767211914, + "loss": 0.5413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018088484182953835, + "rewards/margins": 0.6959853768348694, + "rewards/rejected": -0.6778969168663025, + "step": 7107 + }, + { + "epoch": 0.41, + "learning_rate": 6.612327619725756e-08, + "logits/chosen": -1.9633150100708008, + "logits/rejected": -1.9445433616638184, + "logps/chosen": -239.22738647460938, + "logps/rejected": -336.56121826171875, + "loss": 0.1877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5863662958145142, + "rewards/margins": 1.4286545515060425, + "rewards/rejected": 0.15771178901195526, + "step": 7108 + }, + { + "epoch": 0.41, + "learning_rate": 6.611435530685006e-08, + "logits/chosen": -1.8494300842285156, + "logits/rejected": -1.8293296098709106, + "logps/chosen": -146.78378295898438, + "logps/rejected": -190.19393920898438, + "loss": 0.5375, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1137069463729858, + "rewards/margins": -0.2192978858947754, + "rewards/rejected": 1.3330048322677612, + "step": 7109 + }, + { + "epoch": 0.41, + "learning_rate": 6.610543384398183e-08, + "logits/chosen": -1.850147008895874, + "logits/rejected": -1.8622777462005615, + "logps/chosen": -197.62228393554688, + "logps/rejected": -214.26722717285156, + "loss": 0.1138, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4834046363830566, + "rewards/margins": 1.7403687238693237, + "rewards/rejected": 0.7430359125137329, + "step": 7110 + }, + { + "epoch": 0.41, + "learning_rate": 6.609651180896983e-08, + "logits/chosen": -1.9513440132141113, + "logits/rejected": -1.9334229230880737, + "logps/chosen": -44.94767379760742, + "logps/rejected": -117.39832305908203, + "loss": 0.2688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.760105550289154, + "rewards/margins": 1.7373440265655518, + "rewards/rejected": -0.9772384762763977, + "step": 7111 + }, + { + "epoch": 0.41, + "learning_rate": 6.608758920213097e-08, + "logits/chosen": -2.0487568378448486, + "logits/rejected": -2.0416259765625, + "logps/chosen": -68.5621566772461, + "logps/rejected": -229.96160888671875, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2974502742290497, + "rewards/margins": 3.027510166168213, + "rewards/rejected": -2.730059862136841, + "step": 7112 + }, + { + "epoch": 0.41, + "learning_rate": 6.607866602378226e-08, + "logits/chosen": -2.128068447113037, + "logits/rejected": -2.132337808609009, + "logps/chosen": -4.103471755981445, + "logps/rejected": -336.13128662109375, + "loss": 0.3373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03646507486701012, + "rewards/margins": 4.608447551727295, + "rewards/rejected": -4.6449127197265625, + "step": 7113 + }, + { + "epoch": 0.41, + "learning_rate": 6.606974227424069e-08, + "logits/chosen": -1.9887034893035889, + "logits/rejected": -1.986646294593811, + "logps/chosen": -0.06718342751264572, + "logps/rejected": -134.88877868652344, + "loss": 0.5336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009603396989405155, + "rewards/margins": 0.7806696891784668, + "rewards/rejected": -0.7710663080215454, + "step": 7114 + }, + { + "epoch": 0.41, + "learning_rate": 6.606081795382326e-08, + "logits/chosen": -2.0431766510009766, + "logits/rejected": -2.037522315979004, + "logps/chosen": -35.918365478515625, + "logps/rejected": -267.98382568359375, + "loss": 0.5692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5358390808105469, + "rewards/margins": 2.120041608810425, + "rewards/rejected": -2.6558806896209717, + "step": 7115 + }, + { + "epoch": 0.41, + "learning_rate": 6.605189306284702e-08, + "logits/chosen": -1.8273615837097168, + "logits/rejected": -1.8116803169250488, + "logps/chosen": -230.6621551513672, + "logps/rejected": -465.55206298828125, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6079148054122925, + "rewards/margins": 3.296983480453491, + "rewards/rejected": -1.6890686750411987, + "step": 7116 + }, + { + "epoch": 0.41, + "learning_rate": 6.6042967601629e-08, + "logits/chosen": -2.0323996543884277, + "logits/rejected": -2.0373284816741943, + "logps/chosen": -14.710321426391602, + "logps/rejected": -60.271507263183594, + "loss": 0.8229, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5310907363891602, + "rewards/margins": -0.03011453151702881, + "rewards/rejected": -0.5009762048721313, + "step": 7117 + }, + { + "epoch": 0.41, + "learning_rate": 6.603404157048634e-08, + "logits/chosen": -1.8305171728134155, + "logits/rejected": -1.8261348009109497, + "logps/chosen": -198.65948486328125, + "logps/rejected": -431.7501220703125, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.580291748046875, + "rewards/margins": 4.019970893859863, + "rewards/rejected": -1.4396790266036987, + "step": 7118 + }, + { + "epoch": 0.41, + "learning_rate": 6.602511496973605e-08, + "logits/chosen": -2.0474014282226562, + "logits/rejected": -2.0233194828033447, + "logps/chosen": -163.56521606445312, + "logps/rejected": -295.62200927734375, + "loss": 0.1949, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5744370222091675, + "rewards/margins": 0.9415757060050964, + "rewards/rejected": 0.632861316204071, + "step": 7119 + }, + { + "epoch": 0.41, + "learning_rate": 6.601618779969531e-08, + "logits/chosen": -2.0052030086517334, + "logits/rejected": -2.0080668926239014, + "logps/chosen": -0.00015234359307214618, + "logps/rejected": -157.8323974609375, + "loss": 0.3704, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.566045441199094e-05, + "rewards/margins": 2.726588249206543, + "rewards/rejected": -2.7265625, + "step": 7120 + }, + { + "epoch": 0.41, + "learning_rate": 6.600726006068122e-08, + "logits/chosen": -2.15299916267395, + "logits/rejected": -2.144153356552124, + "logps/chosen": -27.45189094543457, + "logps/rejected": -217.93606567382812, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1769697219133377, + "rewards/margins": 2.4924559593200684, + "rewards/rejected": -2.315486192703247, + "step": 7121 + }, + { + "epoch": 0.41, + "learning_rate": 6.599833175301097e-08, + "logits/chosen": -2.004910469055176, + "logits/rejected": -2.0087060928344727, + "logps/chosen": -0.00024353532353416085, + "logps/rejected": -188.52867126464844, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.482442818698473e-05, + "rewards/margins": 3.5361075401306152, + "rewards/rejected": -3.536062717437744, + "step": 7122 + }, + { + "epoch": 0.41, + "learning_rate": 6.598940287700171e-08, + "logits/chosen": -1.8681403398513794, + "logits/rejected": -1.8386718034744263, + "logps/chosen": -246.26141357421875, + "logps/rejected": -363.3586730957031, + "loss": 0.4672, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1385926008224487, + "rewards/margins": -0.18843376636505127, + "rewards/rejected": 1.3270263671875, + "step": 7123 + }, + { + "epoch": 0.41, + "learning_rate": 6.598047343297065e-08, + "logits/chosen": -1.9812825918197632, + "logits/rejected": -1.9795820713043213, + "logps/chosen": -48.10565185546875, + "logps/rejected": -237.10853576660156, + "loss": 0.4217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19828414916992188, + "rewards/margins": 3.651094913482666, + "rewards/rejected": -3.849379062652588, + "step": 7124 + }, + { + "epoch": 0.41, + "learning_rate": 6.5971543421235e-08, + "logits/chosen": -1.9579964876174927, + "logits/rejected": -1.9567744731903076, + "logps/chosen": -37.23247528076172, + "logps/rejected": -232.73233032226562, + "loss": 0.5037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45792314410209656, + "rewards/margins": 3.43754506111145, + "rewards/rejected": -3.895468235015869, + "step": 7125 + }, + { + "epoch": 0.41, + "learning_rate": 6.596261284211202e-08, + "logits/chosen": -2.047645330429077, + "logits/rejected": -2.044445514678955, + "logps/chosen": -0.0021699543576687574, + "logps/rejected": -165.78489685058594, + "loss": 0.3764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011633346002781764, + "rewards/margins": 3.0634562969207764, + "rewards/rejected": -3.063572645187378, + "step": 7126 + }, + { + "epoch": 0.41, + "learning_rate": 6.595368169591892e-08, + "logits/chosen": -1.9600577354431152, + "logits/rejected": -1.9598103761672974, + "logps/chosen": -0.18441781401634216, + "logps/rejected": -261.017822265625, + "loss": 0.3688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018067030236124992, + "rewards/margins": 4.398473739624023, + "rewards/rejected": -4.416540622711182, + "step": 7127 + }, + { + "epoch": 0.41, + "learning_rate": 6.594474998297304e-08, + "logits/chosen": -1.9322855472564697, + "logits/rejected": -1.9277795553207397, + "logps/chosen": -53.602561950683594, + "logps/rejected": -227.63137817382812, + "loss": 0.2433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6359836459159851, + "rewards/margins": 1.7375445365905762, + "rewards/rejected": -1.1015609502792358, + "step": 7128 + }, + { + "epoch": 0.41, + "learning_rate": 6.593581770359162e-08, + "logits/chosen": -1.9082167148590088, + "logits/rejected": -1.8585302829742432, + "logps/chosen": -213.99017333984375, + "logps/rejected": -611.6842041015625, + "loss": 0.1073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9153091311454773, + "rewards/margins": 4.813972473144531, + "rewards/rejected": -3.898663282394409, + "step": 7129 + }, + { + "epoch": 0.41, + "learning_rate": 6.592688485809202e-08, + "logits/chosen": -1.9663139581680298, + "logits/rejected": -1.9472328424453735, + "logps/chosen": -86.02581024169922, + "logps/rejected": -302.6663513183594, + "loss": 0.1243, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.549780249595642, + "rewards/margins": 2.2992310523986816, + "rewards/rejected": -0.74945068359375, + "step": 7130 + }, + { + "epoch": 0.41, + "learning_rate": 6.591795144679157e-08, + "logits/chosen": -1.836925983428955, + "logits/rejected": -1.7785180807113647, + "logps/chosen": -273.88201904296875, + "logps/rejected": -545.917236328125, + "loss": 0.186, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.060314893722534, + "rewards/margins": 1.06219482421875, + "rewards/rejected": 0.998120129108429, + "step": 7131 + }, + { + "epoch": 0.42, + "learning_rate": 6.59090174700076e-08, + "logits/chosen": -1.9665818214416504, + "logits/rejected": -1.967318058013916, + "logps/chosen": -258.4209899902344, + "logps/rejected": -421.70587158203125, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5750763416290283, + "rewards/margins": 6.520925998687744, + "rewards/rejected": -3.945849657058716, + "step": 7132 + }, + { + "epoch": 0.42, + "learning_rate": 6.590008292805752e-08, + "logits/chosen": -2.1226935386657715, + "logits/rejected": -2.121539831161499, + "logps/chosen": -54.70953369140625, + "logps/rejected": -167.97293090820312, + "loss": 0.346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3758133053779602, + "rewards/margins": 1.5217132568359375, + "rewards/rejected": -1.145900011062622, + "step": 7133 + }, + { + "epoch": 0.42, + "learning_rate": 6.589114782125871e-08, + "logits/chosen": -1.846643090248108, + "logits/rejected": -1.8059051036834717, + "logps/chosen": -198.86911010742188, + "logps/rejected": -486.865966796875, + "loss": 0.118, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0504242181777954, + "rewards/margins": 2.4626190662384033, + "rewards/rejected": -1.412194848060608, + "step": 7134 + }, + { + "epoch": 0.42, + "learning_rate": 6.588221214992861e-08, + "logits/chosen": -1.7833119630813599, + "logits/rejected": -1.8029619455337524, + "logps/chosen": -106.69091796875, + "logps/rejected": -198.62156677246094, + "loss": 0.5578, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.148890733718872, + "rewards/margins": -0.45045924186706543, + "rewards/rejected": 1.5993499755859375, + "step": 7135 + }, + { + "epoch": 0.42, + "learning_rate": 6.587327591438464e-08, + "logits/chosen": -1.9122895002365112, + "logits/rejected": -1.9143640995025635, + "logps/chosen": -22.616729736328125, + "logps/rejected": -71.3524169921875, + "loss": 0.7207, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.06511630862951279, + "rewards/margins": -0.15856952965259552, + "rewards/rejected": 0.09345322102308273, + "step": 7136 + }, + { + "epoch": 0.42, + "learning_rate": 6.586433911494428e-08, + "logits/chosen": -1.9393035173416138, + "logits/rejected": -1.9511815309524536, + "logps/chosen": -240.57293701171875, + "logps/rejected": -337.3754577636719, + "loss": 0.483, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.217431664466858, + "rewards/margins": -0.3127533197402954, + "rewards/rejected": 1.5301849842071533, + "step": 7137 + }, + { + "epoch": 0.42, + "learning_rate": 6.585540175192498e-08, + "logits/chosen": -1.9482078552246094, + "logits/rejected": -1.9428452253341675, + "logps/chosen": -0.1633397787809372, + "logps/rejected": -130.44296264648438, + "loss": 0.4077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012130777351558208, + "rewards/margins": 1.9939758777618408, + "rewards/rejected": -1.9818451404571533, + "step": 7138 + }, + { + "epoch": 0.42, + "learning_rate": 6.584646382564425e-08, + "logits/chosen": -1.9522948265075684, + "logits/rejected": -1.9419792890548706, + "logps/chosen": -2.8729124096571468e-05, + "logps/rejected": -121.93365478515625, + "loss": 0.632, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.079532113290043e-07, + "rewards/margins": 0.21846863627433777, + "rewards/rejected": -0.21846924722194672, + "step": 7139 + }, + { + "epoch": 0.42, + "learning_rate": 6.583752533641962e-08, + "logits/chosen": -1.6818654537200928, + "logits/rejected": -1.6877151727676392, + "logps/chosen": -20.881994247436523, + "logps/rejected": -158.7531280517578, + "loss": 0.586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005821609403938055, + "rewards/margins": 0.5065250396728516, + "rewards/rejected": -0.5007034540176392, + "step": 7140 + }, + { + "epoch": 0.42, + "learning_rate": 6.582858628456863e-08, + "logits/chosen": -1.8627488613128662, + "logits/rejected": -1.867646336555481, + "logps/chosen": -38.50117492675781, + "logps/rejected": -222.26412963867188, + "loss": 0.3066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5496814846992493, + "rewards/margins": 1.4979603290557861, + "rewards/rejected": -0.9482788443565369, + "step": 7141 + }, + { + "epoch": 0.42, + "learning_rate": 6.58196466704088e-08, + "logits/chosen": -2.0686190128326416, + "logits/rejected": -2.0684733390808105, + "logps/chosen": -78.10384368896484, + "logps/rejected": -260.443603515625, + "loss": 0.4647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3348281979560852, + "rewards/margins": 0.6060593128204346, + "rewards/rejected": -0.271231085062027, + "step": 7142 + }, + { + "epoch": 0.42, + "learning_rate": 6.581070649425777e-08, + "logits/chosen": -1.9160898923873901, + "logits/rejected": -1.931644320487976, + "logps/chosen": -177.6787109375, + "logps/rejected": -321.0667724609375, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.61175537109375, + "rewards/margins": 2.862347364425659, + "rewards/rejected": -0.25059205293655396, + "step": 7143 + }, + { + "epoch": 0.42, + "learning_rate": 6.580176575643308e-08, + "logits/chosen": -2.0275890827178955, + "logits/rejected": -2.018427848815918, + "logps/chosen": -1.407204508781433, + "logps/rejected": -126.39772033691406, + "loss": 0.4503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10575398057699203, + "rewards/margins": 1.2175941467285156, + "rewards/rejected": -1.1118401288986206, + "step": 7144 + }, + { + "epoch": 0.42, + "learning_rate": 6.579282445725239e-08, + "logits/chosen": -2.032233953475952, + "logits/rejected": -2.0287604331970215, + "logps/chosen": -162.9696807861328, + "logps/rejected": -221.34921264648438, + "loss": 0.2347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.894622802734375, + "rewards/margins": 0.8264541625976562, + "rewards/rejected": 1.0681686401367188, + "step": 7145 + }, + { + "epoch": 0.42, + "learning_rate": 6.578388259703331e-08, + "logits/chosen": -1.95914888381958, + "logits/rejected": -1.9351911544799805, + "logps/chosen": -186.27175903320312, + "logps/rejected": -302.5029296875, + "loss": 0.3441, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.269418478012085, + "rewards/margins": 0.08418893814086914, + "rewards/rejected": 2.185229539871216, + "step": 7146 + }, + { + "epoch": 0.42, + "learning_rate": 6.577494017609353e-08, + "logits/chosen": -1.8792895078659058, + "logits/rejected": -1.8850027322769165, + "logps/chosen": -14.055377960205078, + "logps/rejected": -122.72685241699219, + "loss": 0.5641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03962364420294762, + "rewards/margins": 0.5898189544677734, + "rewards/rejected": -0.5501953363418579, + "step": 7147 + }, + { + "epoch": 0.42, + "learning_rate": 6.57659971947507e-08, + "logits/chosen": -1.9212303161621094, + "logits/rejected": -1.857762098312378, + "logps/chosen": -183.43606567382812, + "logps/rejected": -506.9895324707031, + "loss": 0.0379, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2859160900115967, + "rewards/margins": 3.5480408668518066, + "rewards/rejected": -1.2621246576309204, + "step": 7148 + }, + { + "epoch": 0.42, + "learning_rate": 6.575705365332253e-08, + "logits/chosen": -2.0154149532318115, + "logits/rejected": -1.9913705587387085, + "logps/chosen": -177.56683349609375, + "logps/rejected": -285.0513000488281, + "loss": 0.203, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.111621141433716, + "rewards/margins": 0.8889862298965454, + "rewards/rejected": 1.2226349115371704, + "step": 7149 + }, + { + "epoch": 0.42, + "learning_rate": 6.574810955212675e-08, + "logits/chosen": -1.9245649576187134, + "logits/rejected": -1.9161683320999146, + "logps/chosen": -6.162993907928467, + "logps/rejected": -181.21609497070312, + "loss": 0.2659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.543572187423706, + "rewards/margins": 1.8684852123260498, + "rewards/rejected": -1.3249130249023438, + "step": 7150 + }, + { + "epoch": 0.42, + "learning_rate": 6.573916489148107e-08, + "logits/chosen": -1.9990988969802856, + "logits/rejected": -2.005350351333618, + "logps/chosen": -21.995153427124023, + "logps/rejected": -154.5858154296875, + "loss": 0.4695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6484491229057312, + "rewards/margins": 0.30042970180511475, + "rewards/rejected": 0.34801942110061646, + "step": 7151 + }, + { + "epoch": 0.42, + "learning_rate": 6.573021967170328e-08, + "logits/chosen": -1.939372181892395, + "logits/rejected": -1.9279357194900513, + "logps/chosen": -5.224797248840332, + "logps/rejected": -399.78021240234375, + "loss": 0.264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32886648178100586, + "rewards/margins": 7.570669651031494, + "rewards/rejected": -7.241803169250488, + "step": 7152 + }, + { + "epoch": 0.42, + "learning_rate": 6.572127389311112e-08, + "logits/chosen": -2.041511297225952, + "logits/rejected": -2.044365167617798, + "logps/chosen": -25.30034828186035, + "logps/rejected": -170.4373321533203, + "loss": 0.7429, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08183727413415909, + "rewards/margins": -0.200840562582016, + "rewards/rejected": 0.1190032958984375, + "step": 7153 + }, + { + "epoch": 0.42, + "learning_rate": 6.571232755602242e-08, + "logits/chosen": -1.859926700592041, + "logits/rejected": -1.8373820781707764, + "logps/chosen": -189.43365478515625, + "logps/rejected": -240.0035400390625, + "loss": 0.1037, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5983245372772217, + "rewards/margins": 1.71405029296875, + "rewards/rejected": 0.8842743039131165, + "step": 7154 + }, + { + "epoch": 0.42, + "learning_rate": 6.5703380660755e-08, + "logits/chosen": -2.0775363445281982, + "logits/rejected": -2.0671091079711914, + "logps/chosen": -8.220664978027344, + "logps/rejected": -259.9161682128906, + "loss": 0.1891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8214157223701477, + "rewards/margins": 3.8919663429260254, + "rewards/rejected": -3.0705506801605225, + "step": 7155 + }, + { + "epoch": 0.42, + "learning_rate": 6.569443320762669e-08, + "logits/chosen": -2.1095731258392334, + "logits/rejected": -2.1028249263763428, + "logps/chosen": -23.868738174438477, + "logps/rejected": -171.44174194335938, + "loss": 0.3812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33749714493751526, + "rewards/margins": 1.4962313175201416, + "rewards/rejected": -1.1587342023849487, + "step": 7156 + }, + { + "epoch": 0.42, + "learning_rate": 6.568548519695532e-08, + "logits/chosen": -1.9382951259613037, + "logits/rejected": -1.9122289419174194, + "logps/chosen": -64.01612091064453, + "logps/rejected": -448.7022399902344, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19749145209789276, + "rewards/margins": 8.412851333618164, + "rewards/rejected": -8.215359687805176, + "step": 7157 + }, + { + "epoch": 0.42, + "learning_rate": 6.567653662905881e-08, + "logits/chosen": -2.0498485565185547, + "logits/rejected": -2.0375070571899414, + "logps/chosen": -203.39669799804688, + "logps/rejected": -363.4383239746094, + "loss": 0.1482, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2605011463165283, + "rewards/margins": 1.560205101966858, + "rewards/rejected": -0.299703985452652, + "step": 7158 + }, + { + "epoch": 0.42, + "learning_rate": 6.566758750425503e-08, + "logits/chosen": -1.9476851224899292, + "logits/rejected": -1.936826467514038, + "logps/chosen": -132.70993041992188, + "logps/rejected": -230.81057739257812, + "loss": 0.5887, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.720416247844696, + "rewards/margins": -0.596234142780304, + "rewards/rejected": 1.316650390625, + "step": 7159 + }, + { + "epoch": 0.42, + "learning_rate": 6.56586378228619e-08, + "logits/chosen": -1.976861834526062, + "logits/rejected": -1.951484203338623, + "logps/chosen": -4.389797687530518, + "logps/rejected": -271.6220703125, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04950284957885742, + "rewards/margins": 2.9405672550201416, + "rewards/rejected": -2.891064405441284, + "step": 7160 + }, + { + "epoch": 0.42, + "learning_rate": 6.564968758519735e-08, + "logits/chosen": -2.0345351696014404, + "logits/rejected": -2.0150156021118164, + "logps/chosen": -140.1689453125, + "logps/rejected": -202.58233642578125, + "loss": 0.214, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1470626592636108, + "rewards/margins": 1.0299667119979858, + "rewards/rejected": 0.117095947265625, + "step": 7161 + }, + { + "epoch": 0.42, + "learning_rate": 6.564073679157937e-08, + "logits/chosen": -1.7297691106796265, + "logits/rejected": -1.7252674102783203, + "logps/chosen": -5.122422695159912, + "logps/rejected": -114.85009765625, + "loss": 0.5325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2826414406299591, + "rewards/margins": 1.1385210752487183, + "rewards/rejected": -1.421162486076355, + "step": 7162 + }, + { + "epoch": 0.42, + "learning_rate": 6.563178544232589e-08, + "logits/chosen": -2.0226898193359375, + "logits/rejected": -2.026394844055176, + "logps/chosen": -175.6546173095703, + "logps/rejected": -357.79156494140625, + "loss": 0.0913, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7891892194747925, + "rewards/margins": 2.2607650756835938, + "rewards/rejected": -0.47157594561576843, + "step": 7163 + }, + { + "epoch": 0.42, + "learning_rate": 6.562283353775495e-08, + "logits/chosen": -1.9793018102645874, + "logits/rejected": -1.959649920463562, + "logps/chosen": -124.26119995117188, + "logps/rejected": -199.40594482421875, + "loss": 0.2355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.200537085533142, + "rewards/margins": 1.388067603111267, + "rewards/rejected": -0.187530517578125, + "step": 7164 + }, + { + "epoch": 0.42, + "learning_rate": 6.561388107818453e-08, + "logits/chosen": -1.9344462156295776, + "logits/rejected": -1.8801299333572388, + "logps/chosen": -214.8892822265625, + "logps/rejected": -412.6585693359375, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2556779384613037, + "rewards/margins": 3.1228013038635254, + "rewards/rejected": -0.8671234250068665, + "step": 7165 + }, + { + "epoch": 0.42, + "learning_rate": 6.560492806393268e-08, + "logits/chosen": -2.0907416343688965, + "logits/rejected": -2.1001577377319336, + "logps/chosen": -165.98529052734375, + "logps/rejected": -233.4810333251953, + "loss": 0.0815, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.478646993637085, + "rewards/margins": 2.1517274379730225, + "rewards/rejected": 0.3269195556640625, + "step": 7166 + }, + { + "epoch": 0.42, + "learning_rate": 6.559597449531746e-08, + "logits/chosen": -1.9258238077163696, + "logits/rejected": -1.9235023260116577, + "logps/chosen": -0.007774652913212776, + "logps/rejected": -129.763671875, + "loss": 0.4008, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.481003507971764e-05, + "rewards/margins": 1.9065217971801758, + "rewards/rejected": -1.906427025794983, + "step": 7167 + }, + { + "epoch": 0.42, + "learning_rate": 6.558702037265692e-08, + "logits/chosen": -2.0509612560272217, + "logits/rejected": -2.0382871627807617, + "logps/chosen": -15.508946418762207, + "logps/rejected": -403.42510986328125, + "loss": 0.272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2888216972351074, + "rewards/margins": 6.422085762023926, + "rewards/rejected": -6.133264064788818, + "step": 7168 + }, + { + "epoch": 0.42, + "learning_rate": 6.557806569626919e-08, + "logits/chosen": -1.9615459442138672, + "logits/rejected": -1.9571231603622437, + "logps/chosen": -39.76978302001953, + "logps/rejected": -282.68701171875, + "loss": 0.2536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45770034193992615, + "rewards/margins": 2.5232489109039307, + "rewards/rejected": -2.0655486583709717, + "step": 7169 + }, + { + "epoch": 0.42, + "learning_rate": 6.556911046647235e-08, + "logits/chosen": -2.2531087398529053, + "logits/rejected": -2.245847463607788, + "logps/chosen": -36.4741096496582, + "logps/rejected": -237.307861328125, + "loss": 0.3096, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6815304160118103, + "rewards/margins": 1.3473644256591797, + "rewards/rejected": -0.6658340692520142, + "step": 7170 + }, + { + "epoch": 0.42, + "learning_rate": 6.556015468358456e-08, + "logits/chosen": -1.9957501888275146, + "logits/rejected": -1.9620094299316406, + "logps/chosen": -48.59019088745117, + "logps/rejected": -435.220458984375, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3582870662212372, + "rewards/margins": 6.436439514160156, + "rewards/rejected": -6.078152656555176, + "step": 7171 + }, + { + "epoch": 0.42, + "learning_rate": 6.555119834792395e-08, + "logits/chosen": -2.1166067123413086, + "logits/rejected": -2.115204334259033, + "logps/chosen": -37.15948486328125, + "logps/rejected": -83.06796264648438, + "loss": 0.7657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5813871622085571, + "rewards/margins": 0.4173951745033264, + "rewards/rejected": -0.9987823367118835, + "step": 7172 + }, + { + "epoch": 0.42, + "learning_rate": 6.554224145980872e-08, + "logits/chosen": -2.027639150619507, + "logits/rejected": -2.034808397293091, + "logps/chosen": -236.40286254882812, + "logps/rejected": -308.28118896484375, + "loss": 0.1093, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3757996559143066, + "rewards/margins": 1.85595703125, + "rewards/rejected": 0.5198425650596619, + "step": 7173 + }, + { + "epoch": 0.42, + "learning_rate": 6.553328401955704e-08, + "logits/chosen": -1.6865097284317017, + "logits/rejected": -1.6909914016723633, + "logps/chosen": -0.034242305904626846, + "logps/rejected": -111.87252807617188, + "loss": 0.3933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027549825608730316, + "rewards/margins": 2.306225538253784, + "rewards/rejected": -2.3065011501312256, + "step": 7174 + }, + { + "epoch": 0.42, + "learning_rate": 6.552432602748714e-08, + "logits/chosen": -1.7555701732635498, + "logits/rejected": -1.7114111185073853, + "logps/chosen": -166.0098876953125, + "logps/rejected": -460.6416015625, + "loss": 0.0659, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7829468250274658, + "rewards/margins": 2.7800416946411133, + "rewards/rejected": -0.9970947504043579, + "step": 7175 + }, + { + "epoch": 0.42, + "learning_rate": 6.551536748391722e-08, + "logits/chosen": -1.8979698419570923, + "logits/rejected": -1.883725881576538, + "logps/chosen": -228.14288330078125, + "logps/rejected": -307.03802490234375, + "loss": 0.2157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.821820080280304, + "rewards/margins": 1.4294464588165283, + "rewards/rejected": -0.6076263785362244, + "step": 7176 + }, + { + "epoch": 0.42, + "learning_rate": 6.550640838916558e-08, + "logits/chosen": -1.9329335689544678, + "logits/rejected": -1.8777015209197998, + "logps/chosen": -161.76193237304688, + "logps/rejected": -241.85165405273438, + "loss": 0.3418, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0584686994552612, + "rewards/margins": 0.5897766351699829, + "rewards/rejected": 0.46869203448295593, + "step": 7177 + }, + { + "epoch": 0.42, + "learning_rate": 6.549744874355045e-08, + "logits/chosen": -1.9052090644836426, + "logits/rejected": -1.9017486572265625, + "logps/chosen": -19.285722732543945, + "logps/rejected": -163.97787475585938, + "loss": 0.3931, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05891075357794762, + "rewards/margins": 1.976792573928833, + "rewards/rejected": -1.9178818464279175, + "step": 7178 + }, + { + "epoch": 0.42, + "learning_rate": 6.548848854739014e-08, + "logits/chosen": -1.9676704406738281, + "logits/rejected": -1.9680628776550293, + "logps/chosen": -284.445556640625, + "logps/rejected": -360.2744140625, + "loss": 0.0523, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7143921852111816, + "rewards/margins": 2.6071197986602783, + "rewards/rejected": 0.10727234184741974, + "step": 7179 + }, + { + "epoch": 0.42, + "learning_rate": 6.547952780100295e-08, + "logits/chosen": -1.8760697841644287, + "logits/rejected": -1.876328706741333, + "logps/chosen": -12.436970710754395, + "logps/rejected": -45.35994338989258, + "loss": 0.5516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4040965139865875, + "rewards/margins": 0.24709539115428925, + "rewards/rejected": 0.15700112283229828, + "step": 7180 + }, + { + "epoch": 0.42, + "learning_rate": 6.547056650470722e-08, + "logits/chosen": -2.058434247970581, + "logits/rejected": -2.038304567337036, + "logps/chosen": -32.99532699584961, + "logps/rejected": -145.0395965576172, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4647888243198395, + "rewards/margins": 2.1160964965820312, + "rewards/rejected": -1.6513077020645142, + "step": 7181 + }, + { + "epoch": 0.42, + "learning_rate": 6.546160465882127e-08, + "logits/chosen": -2.080230474472046, + "logits/rejected": -2.0820653438568115, + "logps/chosen": -45.69867706298828, + "logps/rejected": -211.53863525390625, + "loss": 0.5039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2283405363559723, + "rewards/margins": 0.5242176055908203, + "rewards/rejected": -0.295877069234848, + "step": 7182 + }, + { + "epoch": 0.42, + "learning_rate": 6.545264226366352e-08, + "logits/chosen": -1.881874680519104, + "logits/rejected": -1.8868227005004883, + "logps/chosen": -132.81689453125, + "logps/rejected": -364.7427978515625, + "loss": 0.0925, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1902923583984375, + "rewards/margins": 2.633828639984131, + "rewards/rejected": -1.443536400794983, + "step": 7183 + }, + { + "epoch": 0.42, + "learning_rate": 6.544367931955233e-08, + "logits/chosen": -1.826284408569336, + "logits/rejected": -1.8179563283920288, + "logps/chosen": -303.26470947265625, + "logps/rejected": -478.5552062988281, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7838196754455566, + "rewards/margins": 4.844812393188477, + "rewards/rejected": -2.060992479324341, + "step": 7184 + }, + { + "epoch": 0.42, + "learning_rate": 6.54347158268061e-08, + "logits/chosen": -2.099595546722412, + "logits/rejected": -2.105597496032715, + "logps/chosen": -78.15130615234375, + "logps/rejected": -209.895751953125, + "loss": 0.2447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9633552432060242, + "rewards/margins": 1.4460608959197998, + "rewards/rejected": -0.482705682516098, + "step": 7185 + }, + { + "epoch": 0.42, + "learning_rate": 6.542575178574326e-08, + "logits/chosen": -2.0459165573120117, + "logits/rejected": -2.0532846450805664, + "logps/chosen": -74.79641723632812, + "logps/rejected": -160.20753479003906, + "loss": 0.4913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1204017624258995, + "rewards/margins": 1.192670464515686, + "rewards/rejected": -1.3130722045898438, + "step": 7186 + }, + { + "epoch": 0.42, + "learning_rate": 6.541678719668227e-08, + "logits/chosen": -2.0360095500946045, + "logits/rejected": -2.0423336029052734, + "logps/chosen": -7.675876617431641, + "logps/rejected": -238.5857391357422, + "loss": 0.4024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03297147899866104, + "rewards/margins": 0.996077299118042, + "rewards/rejected": -0.9631057977676392, + "step": 7187 + }, + { + "epoch": 0.42, + "learning_rate": 6.540782205994156e-08, + "logits/chosen": -1.8960533142089844, + "logits/rejected": -1.8824454545974731, + "logps/chosen": -99.45359802246094, + "logps/rejected": -342.4298095703125, + "loss": 0.2901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9420463442802429, + "rewards/margins": 0.662310004234314, + "rewards/rejected": 0.27973634004592896, + "step": 7188 + }, + { + "epoch": 0.42, + "learning_rate": 6.539885637583966e-08, + "logits/chosen": -2.0711395740509033, + "logits/rejected": -2.0743842124938965, + "logps/chosen": -3.5692214965820312, + "logps/rejected": -204.69479370117188, + "loss": 0.3841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026477623730897903, + "rewards/margins": 2.5314815044403076, + "rewards/rejected": -2.5579590797424316, + "step": 7189 + }, + { + "epoch": 0.42, + "learning_rate": 6.538989014469507e-08, + "logits/chosen": -1.7640217542648315, + "logits/rejected": -1.7375527620315552, + "logps/chosen": -284.3118591308594, + "logps/rejected": -516.0394287109375, + "loss": 0.0632, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9517853260040283, + "rewards/margins": 2.145376682281494, + "rewards/rejected": 0.806408703327179, + "step": 7190 + }, + { + "epoch": 0.42, + "learning_rate": 6.538092336682629e-08, + "logits/chosen": -1.846333384513855, + "logits/rejected": -1.844802737236023, + "logps/chosen": -5.096051216125488, + "logps/rejected": -68.5153579711914, + "loss": 0.5838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06936030834913254, + "rewards/margins": 0.3991401493549347, + "rewards/rejected": -0.32977983355522156, + "step": 7191 + }, + { + "epoch": 0.42, + "learning_rate": 6.537195604255188e-08, + "logits/chosen": -1.727372407913208, + "logits/rejected": -1.705460786819458, + "logps/chosen": -246.9438018798828, + "logps/rejected": -418.78912353515625, + "loss": 0.0975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7491165399551392, + "rewards/margins": 2.842423915863037, + "rewards/rejected": -2.0933074951171875, + "step": 7192 + }, + { + "epoch": 0.42, + "learning_rate": 6.536298817219039e-08, + "logits/chosen": -1.869748592376709, + "logits/rejected": -1.8551708459854126, + "logps/chosen": -4.1665472984313965, + "logps/rejected": -354.28753662109375, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0033097267150878906, + "rewards/margins": 2.734941244125366, + "rewards/rejected": -2.7316315174102783, + "step": 7193 + }, + { + "epoch": 0.42, + "learning_rate": 6.535401975606042e-08, + "logits/chosen": -2.094907283782959, + "logits/rejected": -2.0949578285217285, + "logps/chosen": -334.1773681640625, + "logps/rejected": -442.7911071777344, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3233673572540283, + "rewards/margins": 6.036297798156738, + "rewards/rejected": -3.71293044090271, + "step": 7194 + }, + { + "epoch": 0.42, + "learning_rate": 6.534505079448056e-08, + "logits/chosen": -1.8663192987442017, + "logits/rejected": -1.8648988008499146, + "logps/chosen": -15.879746437072754, + "logps/rejected": -85.52432250976562, + "loss": 0.5908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1370081901550293, + "rewards/margins": 0.2081698477268219, + "rewards/rejected": -0.071161650121212, + "step": 7195 + }, + { + "epoch": 0.42, + "learning_rate": 6.533608128776944e-08, + "logits/chosen": -1.8451359272003174, + "logits/rejected": -1.8457094430923462, + "logps/chosen": -22.862224578857422, + "logps/rejected": -196.1829376220703, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1484518051147461, + "rewards/margins": 1.708022117614746, + "rewards/rejected": -1.5595703125, + "step": 7196 + }, + { + "epoch": 0.42, + "learning_rate": 6.532711123624567e-08, + "logits/chosen": -1.767808198928833, + "logits/rejected": -1.7625277042388916, + "logps/chosen": -45.84847640991211, + "logps/rejected": -167.95526123046875, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17344971001148224, + "rewards/margins": 1.3755112886428833, + "rewards/rejected": -1.2020615339279175, + "step": 7197 + }, + { + "epoch": 0.42, + "learning_rate": 6.531814064022798e-08, + "logits/chosen": -2.1038200855255127, + "logits/rejected": -2.1068949699401855, + "logps/chosen": -1.0832388401031494, + "logps/rejected": -140.15969848632812, + "loss": 0.4044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17440025508403778, + "rewards/margins": 1.3130476474761963, + "rewards/rejected": -1.138647437095642, + "step": 7198 + }, + { + "epoch": 0.42, + "learning_rate": 6.530916950003498e-08, + "logits/chosen": -2.0261929035186768, + "logits/rejected": -2.023695468902588, + "logps/chosen": -82.5631103515625, + "logps/rejected": -274.5261535644531, + "loss": 0.2092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46923524141311646, + "rewards/margins": 4.2507476806640625, + "rewards/rejected": -3.781512498855591, + "step": 7199 + }, + { + "epoch": 0.42, + "learning_rate": 6.53001978159854e-08, + "logits/chosen": -1.9711579084396362, + "logits/rejected": -1.950323224067688, + "logps/chosen": -256.7022705078125, + "logps/rejected": -485.2921142578125, + "loss": 0.0476, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7678070068359375, + "rewards/margins": 3.8453521728515625, + "rewards/rejected": -2.077545166015625, + "step": 7200 + }, + { + "epoch": 0.42, + "learning_rate": 6.529122558839794e-08, + "logits/chosen": -1.8301496505737305, + "logits/rejected": -1.8020694255828857, + "logps/chosen": -389.29376220703125, + "logps/rejected": -458.94354248046875, + "loss": 0.1999, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5510742664337158, + "rewards/margins": 1.1591095924377441, + "rewards/rejected": 0.39196473360061646, + "step": 7201 + }, + { + "epoch": 0.42, + "learning_rate": 6.528225281759138e-08, + "logits/chosen": -1.9675908088684082, + "logits/rejected": -1.9255146980285645, + "logps/chosen": -177.7100830078125, + "logps/rejected": -345.6221923828125, + "loss": 0.0309, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.263314962387085, + "rewards/margins": 3.830105781555176, + "rewards/rejected": -1.5667908191680908, + "step": 7202 + }, + { + "epoch": 0.42, + "learning_rate": 6.527327950388441e-08, + "logits/chosen": -1.9267346858978271, + "logits/rejected": -1.926156997680664, + "logps/chosen": -0.0007722225855104625, + "logps/rejected": -24.98061180114746, + "loss": 0.6979, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.707717718905769e-05, + "rewards/margins": -0.06036343798041344, + "rewards/rejected": 0.0603063590824604, + "step": 7203 + }, + { + "epoch": 0.42, + "learning_rate": 6.526430564759588e-08, + "logits/chosen": -1.9222887754440308, + "logits/rejected": -1.898168921470642, + "logps/chosen": -182.4187774658203, + "logps/rejected": -326.8700256347656, + "loss": 0.1113, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2640914916992188, + "rewards/margins": 1.5094252824783325, + "rewards/rejected": 1.7546662092208862, + "step": 7204 + }, + { + "epoch": 0.42, + "learning_rate": 6.525533124904452e-08, + "logits/chosen": -2.012510299682617, + "logits/rejected": -2.0133187770843506, + "logps/chosen": -50.741085052490234, + "logps/rejected": -211.9789581298828, + "loss": 0.2328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1619789600372314, + "rewards/margins": 1.3716301918029785, + "rewards/rejected": -0.2096511870622635, + "step": 7205 + }, + { + "epoch": 0.42, + "learning_rate": 6.524635630854918e-08, + "logits/chosen": -2.1031010150909424, + "logits/rejected": -2.097503900527954, + "logps/chosen": -15.37509822845459, + "logps/rejected": -165.8052978515625, + "loss": 0.3296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0500003807246685, + "rewards/margins": 4.163983345031738, + "rewards/rejected": -4.113983154296875, + "step": 7206 + }, + { + "epoch": 0.42, + "learning_rate": 6.523738082642868e-08, + "logits/chosen": -1.9401609897613525, + "logits/rejected": -1.9324829578399658, + "logps/chosen": -5.090171180199832e-05, + "logps/rejected": -153.80490112304688, + "loss": 0.3857, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4107582652941346e-07, + "rewards/margins": 2.1460201740264893, + "rewards/rejected": -2.1460206508636475, + "step": 7207 + }, + { + "epoch": 0.42, + "learning_rate": 6.522840480300189e-08, + "logits/chosen": -2.105240821838379, + "logits/rejected": -2.1075339317321777, + "logps/chosen": -66.74771881103516, + "logps/rejected": -172.40023803710938, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38726577162742615, + "rewards/margins": 2.537940263748169, + "rewards/rejected": -2.15067458152771, + "step": 7208 + }, + { + "epoch": 0.42, + "learning_rate": 6.521942823858766e-08, + "logits/chosen": -1.9762331247329712, + "logits/rejected": -1.9429261684417725, + "logps/chosen": -160.56964111328125, + "logps/rejected": -479.2032470703125, + "loss": 0.058, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7390717267990112, + "rewards/margins": 3.596829414367676, + "rewards/rejected": -1.857757568359375, + "step": 7209 + }, + { + "epoch": 0.42, + "learning_rate": 6.52104511335049e-08, + "logits/chosen": -2.068237543106079, + "logits/rejected": -2.0626800060272217, + "logps/chosen": -193.52137756347656, + "logps/rejected": -399.939697265625, + "loss": 0.2857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39693909883499146, + "rewards/margins": 2.118112325668335, + "rewards/rejected": -1.7211731672286987, + "step": 7210 + }, + { + "epoch": 0.42, + "learning_rate": 6.52014734880725e-08, + "logits/chosen": -1.9877244234085083, + "logits/rejected": -1.9379419088363647, + "logps/chosen": -330.2554016113281, + "logps/rejected": -467.91912841796875, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1899476051330566, + "rewards/margins": 2.9114747047424316, + "rewards/rejected": 0.278472900390625, + "step": 7211 + }, + { + "epoch": 0.42, + "learning_rate": 6.519249530260942e-08, + "logits/chosen": -2.1657087802886963, + "logits/rejected": -2.1468114852905273, + "logps/chosen": -30.054126739501953, + "logps/rejected": -276.71392822265625, + "loss": 0.3223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11153297871351242, + "rewards/margins": 5.449193477630615, + "rewards/rejected": -5.337660312652588, + "step": 7212 + }, + { + "epoch": 0.42, + "learning_rate": 6.518351657743459e-08, + "logits/chosen": -2.2170701026916504, + "logits/rejected": -2.202157735824585, + "logps/chosen": -165.2718505859375, + "logps/rejected": -275.89324951171875, + "loss": 0.1174, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3707122802734375, + "rewards/margins": 2.134448289871216, + "rewards/rejected": -0.7637359499931335, + "step": 7213 + }, + { + "epoch": 0.42, + "learning_rate": 6.517453731286697e-08, + "logits/chosen": -1.999586582183838, + "logits/rejected": -2.0010569095611572, + "logps/chosen": -25.22186279296875, + "logps/rejected": -129.0390167236328, + "loss": 0.4893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027006912976503372, + "rewards/margins": 0.8520000576972961, + "rewards/rejected": -0.8790069818496704, + "step": 7214 + }, + { + "epoch": 0.42, + "learning_rate": 6.516555750922556e-08, + "logits/chosen": -1.9444330930709839, + "logits/rejected": -1.9508236646652222, + "logps/chosen": -36.66516876220703, + "logps/rejected": -184.02230834960938, + "loss": 0.6774, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.19933739304542542, + "rewards/margins": -0.08986091613769531, + "rewards/rejected": 0.2891983091831207, + "step": 7215 + }, + { + "epoch": 0.42, + "learning_rate": 6.515657716682937e-08, + "logits/chosen": -1.9122951030731201, + "logits/rejected": -1.9068769216537476, + "logps/chosen": -305.9227294921875, + "logps/rejected": -430.4566955566406, + "loss": 0.1367, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0964601039886475, + "rewards/margins": 1.6144136190414429, + "rewards/rejected": 0.482046514749527, + "step": 7216 + }, + { + "epoch": 0.42, + "learning_rate": 6.514759628599742e-08, + "logits/chosen": -2.081181049346924, + "logits/rejected": -2.073282480239868, + "logps/chosen": -19.075042724609375, + "logps/rejected": -152.09747314453125, + "loss": 0.6, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.40741997957229614, + "rewards/margins": -0.04059028625488281, + "rewards/rejected": 0.44801026582717896, + "step": 7217 + }, + { + "epoch": 0.42, + "learning_rate": 6.513861486704875e-08, + "logits/chosen": -1.9105029106140137, + "logits/rejected": -1.903579831123352, + "logps/chosen": -27.885765075683594, + "logps/rejected": -217.87583923339844, + "loss": 0.257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4165458679199219, + "rewards/margins": 3.6794686317443848, + "rewards/rejected": -3.262922763824463, + "step": 7218 + }, + { + "epoch": 0.42, + "learning_rate": 6.512963291030245e-08, + "logits/chosen": -1.7135096788406372, + "logits/rejected": -1.7074834108352661, + "logps/chosen": -133.15533447265625, + "logps/rejected": -212.2154998779297, + "loss": 0.1805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.820660412311554, + "rewards/margins": 1.5214431285858154, + "rewards/rejected": -0.7007827758789062, + "step": 7219 + }, + { + "epoch": 0.42, + "learning_rate": 6.512065041607754e-08, + "logits/chosen": -1.9536106586456299, + "logits/rejected": -1.9510715007781982, + "logps/chosen": -16.827505111694336, + "logps/rejected": -110.76576232910156, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4580903947353363, + "rewards/margins": 1.4987276792526245, + "rewards/rejected": -1.0406372547149658, + "step": 7220 + }, + { + "epoch": 0.42, + "learning_rate": 6.51116673846932e-08, + "logits/chosen": -1.7809348106384277, + "logits/rejected": -1.7726802825927734, + "logps/chosen": -51.67219161987305, + "logps/rejected": -166.08352661132812, + "loss": 0.4929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08135376125574112, + "rewards/margins": 0.563311755657196, + "rewards/rejected": -0.4819580018520355, + "step": 7221 + }, + { + "epoch": 0.42, + "learning_rate": 6.51026838164685e-08, + "logits/chosen": -2.0417938232421875, + "logits/rejected": -2.0343515872955322, + "logps/chosen": -26.632383346557617, + "logps/rejected": -163.25083923339844, + "loss": 0.3895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16989727318286896, + "rewards/margins": 1.6831008195877075, + "rewards/rejected": -1.513203501701355, + "step": 7222 + }, + { + "epoch": 0.42, + "learning_rate": 6.50936997117226e-08, + "logits/chosen": -2.041239023208618, + "logits/rejected": -2.010673999786377, + "logps/chosen": -95.0264663696289, + "logps/rejected": -236.81886291503906, + "loss": 0.1242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6621109247207642, + "rewards/margins": 2.3809266090393066, + "rewards/rejected": -0.7188156247138977, + "step": 7223 + }, + { + "epoch": 0.42, + "learning_rate": 6.508471507077466e-08, + "logits/chosen": -2.06251859664917, + "logits/rejected": -2.0595738887786865, + "logps/chosen": -15.515558242797852, + "logps/rejected": -101.64678192138672, + "loss": 0.5251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23930206894874573, + "rewards/margins": 0.6416343450546265, + "rewards/rejected": -0.4023323059082031, + "step": 7224 + }, + { + "epoch": 0.42, + "learning_rate": 6.507572989394386e-08, + "logits/chosen": -1.7899962663650513, + "logits/rejected": -1.7708464860916138, + "logps/chosen": -93.42107391357422, + "logps/rejected": -338.907958984375, + "loss": 0.1289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8437957763671875, + "rewards/margins": 5.856072902679443, + "rewards/rejected": -5.012277126312256, + "step": 7225 + }, + { + "epoch": 0.42, + "learning_rate": 6.506674418154937e-08, + "logits/chosen": -1.8873564004898071, + "logits/rejected": -1.8983567953109741, + "logps/chosen": -210.36932373046875, + "logps/rejected": -408.86212158203125, + "loss": 0.0621, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0996062755584717, + "rewards/margins": 3.1255645751953125, + "rewards/rejected": -1.0259582996368408, + "step": 7226 + }, + { + "epoch": 0.42, + "learning_rate": 6.505775793391043e-08, + "logits/chosen": -1.9451441764831543, + "logits/rejected": -1.9496511220932007, + "logps/chosen": -76.24639892578125, + "logps/rejected": -245.23292541503906, + "loss": 0.5686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41201096773147583, + "rewards/margins": 0.6252586245536804, + "rewards/rejected": -1.0372695922851562, + "step": 7227 + }, + { + "epoch": 0.42, + "learning_rate": 6.504877115134629e-08, + "logits/chosen": -1.9010342359542847, + "logits/rejected": -1.8979265689849854, + "logps/chosen": -177.158935546875, + "logps/rejected": -376.4094543457031, + "loss": 0.0735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.843664526939392, + "rewards/margins": 2.24652099609375, + "rewards/rejected": -0.4028564393520355, + "step": 7228 + }, + { + "epoch": 0.42, + "learning_rate": 6.503978383417619e-08, + "logits/chosen": -2.06282114982605, + "logits/rejected": -2.033033609390259, + "logps/chosen": -35.623207092285156, + "logps/rejected": -226.15707397460938, + "loss": 0.1159, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2574265003204346, + "rewards/margins": 3.9452226161956787, + "rewards/rejected": -2.687796115875244, + "step": 7229 + }, + { + "epoch": 0.42, + "learning_rate": 6.503079598271937e-08, + "logits/chosen": -1.9664102792739868, + "logits/rejected": -1.9650743007659912, + "logps/chosen": -0.0002895296784117818, + "logps/rejected": -188.597900390625, + "loss": 0.3425, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.624269877240295e-06, + "rewards/margins": 4.604490756988525, + "rewards/rejected": -4.604495525360107, + "step": 7230 + }, + { + "epoch": 0.42, + "learning_rate": 6.502180759729517e-08, + "logits/chosen": -1.94879150390625, + "logits/rejected": -1.9450974464416504, + "logps/chosen": -40.082496643066406, + "logps/rejected": -256.59771728515625, + "loss": 0.2162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4978214204311371, + "rewards/margins": 2.4426746368408203, + "rewards/rejected": -1.9448531866073608, + "step": 7231 + }, + { + "epoch": 0.42, + "learning_rate": 6.50128186782229e-08, + "logits/chosen": -2.0086562633514404, + "logits/rejected": -2.0007436275482178, + "logps/chosen": -198.69989013671875, + "logps/rejected": -331.9530029296875, + "loss": 0.0704, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9055191278457642, + "rewards/margins": 2.7054429054260254, + "rewards/rejected": -0.7999237179756165, + "step": 7232 + }, + { + "epoch": 0.42, + "learning_rate": 6.500382922582185e-08, + "logits/chosen": -1.9123616218566895, + "logits/rejected": -1.8690499067306519, + "logps/chosen": -287.09637451171875, + "logps/rejected": -486.30859375, + "loss": 0.0916, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3361053466796875, + "rewards/margins": 2.291455030441284, + "rewards/rejected": -0.9553497433662415, + "step": 7233 + }, + { + "epoch": 0.42, + "learning_rate": 6.499483924041141e-08, + "logits/chosen": -2.218254327774048, + "logits/rejected": -2.20857310295105, + "logps/chosen": -7.676980749238282e-05, + "logps/rejected": -183.3291015625, + "loss": 0.376, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.549686544422002e-06, + "rewards/margins": 2.762568712234497, + "rewards/rejected": -2.7625701427459717, + "step": 7234 + }, + { + "epoch": 0.42, + "learning_rate": 6.498584872231092e-08, + "logits/chosen": -1.918493390083313, + "logits/rejected": -1.8896377086639404, + "logps/chosen": -212.99993896484375, + "logps/rejected": -666.0299072265625, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.179464817047119, + "rewards/margins": 9.242300033569336, + "rewards/rejected": -7.062835693359375, + "step": 7235 + }, + { + "epoch": 0.42, + "learning_rate": 6.497685767183978e-08, + "logits/chosen": -1.8877068758010864, + "logits/rejected": -1.9093043804168701, + "logps/chosen": -208.82420349121094, + "logps/rejected": -346.080322265625, + "loss": 0.2086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9042877554893494, + "rewards/margins": 1.8483428955078125, + "rewards/rejected": -0.9440551996231079, + "step": 7236 + }, + { + "epoch": 0.42, + "learning_rate": 6.496786608931741e-08, + "logits/chosen": -2.088108777999878, + "logits/rejected": -2.082707405090332, + "logps/chosen": -38.5639762878418, + "logps/rejected": -193.94204711914062, + "loss": 0.2319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5268363952636719, + "rewards/margins": 3.0320611000061035, + "rewards/rejected": -2.5052247047424316, + "step": 7237 + }, + { + "epoch": 0.42, + "learning_rate": 6.495887397506322e-08, + "logits/chosen": -2.029270648956299, + "logits/rejected": -2.0306129455566406, + "logps/chosen": -98.19525146484375, + "logps/rejected": -260.59161376953125, + "loss": 0.2352, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6193344593048096, + "rewards/margins": 0.9482315182685852, + "rewards/rejected": 0.6711029410362244, + "step": 7238 + }, + { + "epoch": 0.42, + "learning_rate": 6.494988132939662e-08, + "logits/chosen": -1.9422955513000488, + "logits/rejected": -1.9395500421524048, + "logps/chosen": -2.5025675296783447, + "logps/rejected": -54.279537200927734, + "loss": 0.6386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07175975292921066, + "rewards/margins": 0.34410595893859863, + "rewards/rejected": -0.4158657193183899, + "step": 7239 + }, + { + "epoch": 0.42, + "learning_rate": 6.494088815263715e-08, + "logits/chosen": -1.651463508605957, + "logits/rejected": -1.6396205425262451, + "logps/chosen": -285.9848327636719, + "logps/rejected": -405.2292785644531, + "loss": 0.2796, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7046997547149658, + "rewards/margins": 0.40877997875213623, + "rewards/rejected": 1.2959197759628296, + "step": 7240 + }, + { + "epoch": 0.42, + "learning_rate": 6.493189444510423e-08, + "logits/chosen": -2.1013669967651367, + "logits/rejected": -2.0972084999084473, + "logps/chosen": -28.814613342285156, + "logps/rejected": -181.0276641845703, + "loss": 0.3174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20635394752025604, + "rewards/margins": 2.935215950012207, + "rewards/rejected": -2.7288620471954346, + "step": 7241 + }, + { + "epoch": 0.42, + "learning_rate": 6.49229002071174e-08, + "logits/chosen": -2.034019708633423, + "logits/rejected": -2.033552408218384, + "logps/chosen": -8.063870429992676, + "logps/rejected": -159.59158325195312, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20122328400611877, + "rewards/margins": 2.0435023307800293, + "rewards/rejected": -1.842279076576233, + "step": 7242 + }, + { + "epoch": 0.42, + "learning_rate": 6.491390543899613e-08, + "logits/chosen": -1.9106718301773071, + "logits/rejected": -1.9657068252563477, + "logps/chosen": -216.86192321777344, + "logps/rejected": -594.7525024414062, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0747361183166504, + "rewards/margins": 7.274553298950195, + "rewards/rejected": -5.199817180633545, + "step": 7243 + }, + { + "epoch": 0.42, + "learning_rate": 6.490491014106001e-08, + "logits/chosen": -1.9252091646194458, + "logits/rejected": -1.8543801307678223, + "logps/chosen": -176.09092712402344, + "logps/rejected": -331.8381042480469, + "loss": 0.127, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.077009677886963, + "rewards/margins": 1.7511032819747925, + "rewards/rejected": 0.325906366109848, + "step": 7244 + }, + { + "epoch": 0.42, + "learning_rate": 6.489591431362855e-08, + "logits/chosen": -1.9331594705581665, + "logits/rejected": -1.9428538084030151, + "logps/chosen": -260.5120849609375, + "logps/rejected": -555.4102172851562, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4185608625411987, + "rewards/margins": 5.188086032867432, + "rewards/rejected": -3.7695252895355225, + "step": 7245 + }, + { + "epoch": 0.42, + "learning_rate": 6.488691795702137e-08, + "logits/chosen": -1.934423804283142, + "logits/rejected": -1.927721381187439, + "logps/chosen": -39.136680603027344, + "logps/rejected": -127.13009643554688, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3765106201171875, + "rewards/margins": 1.765679955482483, + "rewards/rejected": -1.3891693353652954, + "step": 7246 + }, + { + "epoch": 0.42, + "learning_rate": 6.487792107155804e-08, + "logits/chosen": -2.155707597732544, + "logits/rejected": -2.151358127593994, + "logps/chosen": -30.24768829345703, + "logps/rejected": -79.20269775390625, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3987869322299957, + "rewards/margins": 1.3639534711837769, + "rewards/rejected": -0.9651665091514587, + "step": 7247 + }, + { + "epoch": 0.42, + "learning_rate": 6.486892365755818e-08, + "logits/chosen": -1.978668212890625, + "logits/rejected": -1.968021273612976, + "logps/chosen": -7.083505153656006, + "logps/rejected": -220.862060546875, + "loss": 0.4185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1659063845872879, + "rewards/margins": 2.4439661502838135, + "rewards/rejected": -2.609872579574585, + "step": 7248 + }, + { + "epoch": 0.42, + "learning_rate": 6.485992571534142e-08, + "logits/chosen": -1.9793565273284912, + "logits/rejected": -1.977966547012329, + "logps/chosen": -15.723161697387695, + "logps/rejected": -90.29845428466797, + "loss": 0.6666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004650116025004536, + "rewards/margins": 0.2151821255683899, + "rewards/rejected": -0.2156471312046051, + "step": 7249 + }, + { + "epoch": 0.42, + "learning_rate": 6.485092724522741e-08, + "logits/chosen": -1.8868016004562378, + "logits/rejected": -1.8951016664505005, + "logps/chosen": -228.81930541992188, + "logps/rejected": -421.20111083984375, + "loss": 0.0722, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8762969970703125, + "rewards/margins": 2.576651096343994, + "rewards/rejected": -0.7003540396690369, + "step": 7250 + }, + { + "epoch": 0.42, + "learning_rate": 6.48419282475358e-08, + "logits/chosen": -1.901273488998413, + "logits/rejected": -1.904473066329956, + "logps/chosen": -0.00024412530183326453, + "logps/rejected": -234.71975708007812, + "loss": 0.3433, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3838699689804344e-06, + "rewards/margins": 5.39951229095459, + "rewards/rejected": -5.399514675140381, + "step": 7251 + }, + { + "epoch": 0.42, + "learning_rate": 6.483292872258631e-08, + "logits/chosen": -2.005098819732666, + "logits/rejected": -2.0000698566436768, + "logps/chosen": -29.056215286254883, + "logps/rejected": -181.73104858398438, + "loss": 0.4131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5094431042671204, + "rewards/margins": 0.7965524792671204, + "rewards/rejected": -0.287109375, + "step": 7252 + }, + { + "epoch": 0.42, + "learning_rate": 6.482392867069866e-08, + "logits/chosen": -2.0600457191467285, + "logits/rejected": -2.0440731048583984, + "logps/chosen": -207.77928161621094, + "logps/rejected": -335.9324645996094, + "loss": 0.3044, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.086979627609253, + "rewards/margins": 0.3163924217224121, + "rewards/rejected": 1.7705872058868408, + "step": 7253 + }, + { + "epoch": 0.42, + "learning_rate": 6.481492809219253e-08, + "logits/chosen": -1.874759554862976, + "logits/rejected": -1.9161624908447266, + "logps/chosen": -196.27957153320312, + "logps/rejected": -399.2254638671875, + "loss": 0.1263, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5377167463302612, + "rewards/margins": 1.591088891029358, + "rewards/rejected": -0.05337219312787056, + "step": 7254 + }, + { + "epoch": 0.42, + "learning_rate": 6.480592698738767e-08, + "logits/chosen": -1.989848256111145, + "logits/rejected": -1.9957283735275269, + "logps/chosen": -2.9843950271606445, + "logps/rejected": -73.04466247558594, + "loss": 0.4651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03887362405657768, + "rewards/margins": 1.1177631616592407, + "rewards/rejected": -1.0788894891738892, + "step": 7255 + }, + { + "epoch": 0.42, + "learning_rate": 6.479692535660389e-08, + "logits/chosen": -1.9073251485824585, + "logits/rejected": -1.9083575010299683, + "logps/chosen": -24.612491607666016, + "logps/rejected": -243.28640747070312, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0783502608537674, + "rewards/margins": 2.5742456912994385, + "rewards/rejected": -2.4958953857421875, + "step": 7256 + }, + { + "epoch": 0.42, + "learning_rate": 6.478792320016094e-08, + "logits/chosen": -1.9780118465423584, + "logits/rejected": -1.985450267791748, + "logps/chosen": -202.30715942382812, + "logps/rejected": -407.1263122558594, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1892030239105225, + "rewards/margins": 3.721322774887085, + "rewards/rejected": -1.5321197509765625, + "step": 7257 + }, + { + "epoch": 0.42, + "learning_rate": 6.477892051837859e-08, + "logits/chosen": -2.095001220703125, + "logits/rejected": -2.0935041904449463, + "logps/chosen": -5.811976909637451, + "logps/rejected": -44.65288162231445, + "loss": 0.5937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15229205787181854, + "rewards/margins": 0.06829509139060974, + "rewards/rejected": 0.0839969664812088, + "step": 7258 + }, + { + "epoch": 0.42, + "learning_rate": 6.476991731157672e-08, + "logits/chosen": -1.8578392267227173, + "logits/rejected": -1.8747230768203735, + "logps/chosen": -241.23028564453125, + "logps/rejected": -251.61753845214844, + "loss": 0.2413, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.586187720298767, + "rewards/margins": 0.7117324471473694, + "rewards/rejected": 0.8744552731513977, + "step": 7259 + }, + { + "epoch": 0.42, + "learning_rate": 6.476091358007514e-08, + "logits/chosen": -1.8416310548782349, + "logits/rejected": -1.8439010381698608, + "logps/chosen": -64.94208526611328, + "logps/rejected": -348.5577392578125, + "loss": 0.186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6221718192100525, + "rewards/margins": 5.83759880065918, + "rewards/rejected": -5.215426921844482, + "step": 7260 + }, + { + "epoch": 0.42, + "learning_rate": 6.47519093241937e-08, + "logits/chosen": -1.9514163732528687, + "logits/rejected": -1.9297648668289185, + "logps/chosen": -209.42491149902344, + "logps/rejected": -330.0671691894531, + "loss": 0.3878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3280304670333862, + "rewards/margins": 0.09064340591430664, + "rewards/rejected": 1.2373870611190796, + "step": 7261 + }, + { + "epoch": 0.42, + "learning_rate": 6.474290454425228e-08, + "logits/chosen": -1.880981683731079, + "logits/rejected": -1.8731859922409058, + "logps/chosen": -44.450416564941406, + "logps/rejected": -187.03514099121094, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3961647152900696, + "rewards/margins": 1.4905221462249756, + "rewards/rejected": -1.0943573713302612, + "step": 7262 + }, + { + "epoch": 0.42, + "learning_rate": 6.473389924057078e-08, + "logits/chosen": -1.9476205110549927, + "logits/rejected": -1.8649911880493164, + "logps/chosen": -285.30438232421875, + "logps/rejected": -695.4267578125, + "loss": 0.0764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.895544409751892, + "rewards/margins": 2.950347900390625, + "rewards/rejected": -1.054803490638733, + "step": 7263 + }, + { + "epoch": 0.42, + "learning_rate": 6.472489341346912e-08, + "logits/chosen": -2.0180208683013916, + "logits/rejected": -2.018156051635742, + "logps/chosen": -147.7566375732422, + "logps/rejected": -247.67459106445312, + "loss": 0.1631, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7855408191680908, + "rewards/margins": 1.6907868385314941, + "rewards/rejected": 0.09475403279066086, + "step": 7264 + }, + { + "epoch": 0.42, + "learning_rate": 6.47158870632672e-08, + "logits/chosen": -2.2168288230895996, + "logits/rejected": -2.2128548622131348, + "logps/chosen": -20.77699851989746, + "logps/rejected": -128.90403747558594, + "loss": 0.8202, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.20834007859230042, + "rewards/margins": -0.4648464322090149, + "rewards/rejected": 0.2565063536167145, + "step": 7265 + }, + { + "epoch": 0.42, + "learning_rate": 6.470688019028499e-08, + "logits/chosen": -1.7803798913955688, + "logits/rejected": -1.769068956375122, + "logps/chosen": -60.668006896972656, + "logps/rejected": -144.57272338867188, + "loss": 0.4695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11786041408777237, + "rewards/margins": 1.010380506515503, + "rewards/rejected": -0.8925201296806335, + "step": 7266 + }, + { + "epoch": 0.42, + "learning_rate": 6.469787279484247e-08, + "logits/chosen": -1.912811040878296, + "logits/rejected": -1.9088441133499146, + "logps/chosen": -159.8788604736328, + "logps/rejected": -229.12612915039062, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1997528076171875, + "rewards/margins": 0.05733644962310791, + "rewards/rejected": 1.1424163579940796, + "step": 7267 + }, + { + "epoch": 0.42, + "learning_rate": 6.46888648772596e-08, + "logits/chosen": -1.762399673461914, + "logits/rejected": -1.7429150342941284, + "logps/chosen": -195.38449096679688, + "logps/rejected": -389.21478271484375, + "loss": 0.0679, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3311126232147217, + "rewards/margins": 2.6525909900665283, + "rewards/rejected": -0.3214782774448395, + "step": 7268 + }, + { + "epoch": 0.42, + "learning_rate": 6.467985643785641e-08, + "logits/chosen": -1.943147897720337, + "logits/rejected": -2.026928186416626, + "logps/chosen": -184.25091552734375, + "logps/rejected": -331.5268859863281, + "loss": 0.0573, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5791809558868408, + "rewards/margins": 3.725033760070801, + "rewards/rejected": -2.14585280418396, + "step": 7269 + }, + { + "epoch": 0.42, + "learning_rate": 6.46708474769529e-08, + "logits/chosen": -1.8120791912078857, + "logits/rejected": -1.8065087795257568, + "logps/chosen": -0.01094058807939291, + "logps/rejected": -253.66741943359375, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00028301961719989777, + "rewards/margins": 3.736417531967163, + "rewards/rejected": -3.7367005348205566, + "step": 7270 + }, + { + "epoch": 0.42, + "learning_rate": 6.466183799486913e-08, + "logits/chosen": -2.11478590965271, + "logits/rejected": -2.113856792449951, + "logps/chosen": -15.460702896118164, + "logps/rejected": -58.971736907958984, + "loss": 0.6373, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.465829461812973, + "rewards/margins": -0.18454095721244812, + "rewards/rejected": 0.6503704190254211, + "step": 7271 + }, + { + "epoch": 0.42, + "learning_rate": 6.465282799192514e-08, + "logits/chosen": -1.8720088005065918, + "logits/rejected": -1.8627076148986816, + "logps/chosen": -176.16632080078125, + "logps/rejected": -313.274169921875, + "loss": 0.2137, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0562851428985596, + "rewards/margins": 1.0096176862716675, + "rewards/rejected": 0.04666748270392418, + "step": 7272 + }, + { + "epoch": 0.42, + "learning_rate": 6.464381746844104e-08, + "logits/chosen": -2.031710624694824, + "logits/rejected": -2.048765182495117, + "logps/chosen": -212.99656677246094, + "logps/rejected": -323.96563720703125, + "loss": 0.0954, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.415120005607605, + "rewards/margins": 2.126631259918213, + "rewards/rejected": -0.7115112543106079, + "step": 7273 + }, + { + "epoch": 0.42, + "learning_rate": 6.463480642473692e-08, + "logits/chosen": -1.841012716293335, + "logits/rejected": -1.9262406826019287, + "logps/chosen": -361.71417236328125, + "logps/rejected": -413.68170166015625, + "loss": 0.0506, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.653680443763733, + "rewards/margins": 3.225576877593994, + "rewards/rejected": -1.5718964338302612, + "step": 7274 + }, + { + "epoch": 0.42, + "learning_rate": 6.462579486113288e-08, + "logits/chosen": -2.05273699760437, + "logits/rejected": -2.0475966930389404, + "logps/chosen": -29.067346572875977, + "logps/rejected": -216.6446533203125, + "loss": 0.35, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1961805373430252, + "rewards/margins": 2.241001844406128, + "rewards/rejected": -2.044821262359619, + "step": 7275 + }, + { + "epoch": 0.42, + "learning_rate": 6.461678277794905e-08, + "logits/chosen": -1.943654179573059, + "logits/rejected": -1.9908596277236938, + "logps/chosen": -232.2325439453125, + "logps/rejected": -299.59930419921875, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.741955518722534, + "rewards/margins": 3.6533875465393066, + "rewards/rejected": -0.9114319086074829, + "step": 7276 + }, + { + "epoch": 0.42, + "learning_rate": 6.460777017550563e-08, + "logits/chosen": -1.960971713066101, + "logits/rejected": -1.9634881019592285, + "logps/chosen": -18.64740753173828, + "logps/rejected": -211.97412109375, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2646711468696594, + "rewards/margins": 1.8709180355072021, + "rewards/rejected": -1.6062469482421875, + "step": 7277 + }, + { + "epoch": 0.42, + "learning_rate": 6.459875705412273e-08, + "logits/chosen": -2.217146873474121, + "logits/rejected": -2.127943277359009, + "logps/chosen": -207.11465454101562, + "logps/rejected": -497.3942565917969, + "loss": 0.0799, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7264922857284546, + "rewards/margins": 4.280117988586426, + "rewards/rejected": -2.5536255836486816, + "step": 7278 + }, + { + "epoch": 0.42, + "learning_rate": 6.458974341412058e-08, + "logits/chosen": -1.8328791856765747, + "logits/rejected": -1.828133225440979, + "logps/chosen": -66.61924743652344, + "logps/rejected": -347.1524353027344, + "loss": 0.3915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2871261537075043, + "rewards/margins": 6.019187927246094, + "rewards/rejected": -6.306313991546631, + "step": 7279 + }, + { + "epoch": 0.42, + "learning_rate": 6.458072925581938e-08, + "logits/chosen": -1.6977829933166504, + "logits/rejected": -1.6616487503051758, + "logps/chosen": -206.64788818359375, + "logps/rejected": -484.548828125, + "loss": 0.1385, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8679932355880737, + "rewards/margins": 1.7002075910568237, + "rewards/rejected": 0.16778564453125, + "step": 7280 + }, + { + "epoch": 0.42, + "learning_rate": 6.457171457953935e-08, + "logits/chosen": -2.1300251483917236, + "logits/rejected": -2.134154796600342, + "logps/chosen": -11.058680534362793, + "logps/rejected": -152.35931396484375, + "loss": 0.6891, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6659154295921326, + "rewards/margins": 0.653187096118927, + "rewards/rejected": -1.3191025257110596, + "step": 7281 + }, + { + "epoch": 0.42, + "learning_rate": 6.456269938560075e-08, + "logits/chosen": -1.924645185470581, + "logits/rejected": -1.923911690711975, + "logps/chosen": -38.38941192626953, + "logps/rejected": -232.15696716308594, + "loss": 0.2812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3709060847759247, + "rewards/margins": 2.9212708473205566, + "rewards/rejected": -2.5503647327423096, + "step": 7282 + }, + { + "epoch": 0.42, + "learning_rate": 6.455368367432383e-08, + "logits/chosen": -1.6520270109176636, + "logits/rejected": -1.646178126335144, + "logps/chosen": -107.56230163574219, + "logps/rejected": -244.4886932373047, + "loss": 0.3969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1182113885879517, + "rewards/margins": 0.1457061767578125, + "rewards/rejected": 0.9725052118301392, + "step": 7283 + }, + { + "epoch": 0.42, + "learning_rate": 6.454466744602887e-08, + "logits/chosen": -2.0307586193084717, + "logits/rejected": -2.0265815258026123, + "logps/chosen": -75.13099670410156, + "logps/rejected": -173.18992614746094, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6397278308868408, + "rewards/margins": 0.6175049543380737, + "rewards/rejected": 1.022222876548767, + "step": 7284 + }, + { + "epoch": 0.42, + "learning_rate": 6.453565070103617e-08, + "logits/chosen": -1.922423243522644, + "logits/rejected": -1.9250117540359497, + "logps/chosen": -4.021641731262207, + "logps/rejected": -115.14149475097656, + "loss": 0.3891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07455692440271378, + "rewards/margins": 1.7776468992233276, + "rewards/rejected": -1.703089952468872, + "step": 7285 + }, + { + "epoch": 0.42, + "learning_rate": 6.452663343966608e-08, + "logits/chosen": -1.9251147508621216, + "logits/rejected": -1.9888733625411987, + "logps/chosen": -223.12603759765625, + "logps/rejected": -369.44927978515625, + "loss": 0.0636, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0632431507110596, + "rewards/margins": 2.526087999343872, + "rewards/rejected": -1.4628448486328125, + "step": 7286 + }, + { + "epoch": 0.42, + "learning_rate": 6.45176156622389e-08, + "logits/chosen": -1.9949545860290527, + "logits/rejected": -1.9982186555862427, + "logps/chosen": -8.881952285766602, + "logps/rejected": -232.56161499023438, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02311267890036106, + "rewards/margins": 4.900874614715576, + "rewards/rejected": -4.8777618408203125, + "step": 7287 + }, + { + "epoch": 0.42, + "learning_rate": 6.450859736907499e-08, + "logits/chosen": -2.038344144821167, + "logits/rejected": -2.0372512340545654, + "logps/chosen": -35.74553298950195, + "logps/rejected": -160.7222900390625, + "loss": 0.5376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17051926255226135, + "rewards/margins": 0.5476692318916321, + "rewards/rejected": -0.3771499693393707, + "step": 7288 + }, + { + "epoch": 0.42, + "learning_rate": 6.449957856049474e-08, + "logits/chosen": -1.9998152256011963, + "logits/rejected": -2.038625478744507, + "logps/chosen": -182.4570770263672, + "logps/rejected": -532.0648193359375, + "loss": 0.0453, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5258651971817017, + "rewards/margins": 3.621565341949463, + "rewards/rejected": -2.0957000255584717, + "step": 7289 + }, + { + "epoch": 0.42, + "learning_rate": 6.449055923681855e-08, + "logits/chosen": -1.9724693298339844, + "logits/rejected": -1.9730855226516724, + "logps/chosen": -36.967185974121094, + "logps/rejected": -199.39385986328125, + "loss": 0.4381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7268368005752563, + "rewards/margins": 0.32444268465042114, + "rewards/rejected": 0.4023941159248352, + "step": 7290 + }, + { + "epoch": 0.42, + "learning_rate": 6.448153939836678e-08, + "logits/chosen": -1.950722336769104, + "logits/rejected": -1.9312798976898193, + "logps/chosen": -152.90451049804688, + "logps/rejected": -308.4765930175781, + "loss": 0.2646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4156356751918793, + "rewards/margins": 1.5750534534454346, + "rewards/rejected": -1.159417748451233, + "step": 7291 + }, + { + "epoch": 0.42, + "learning_rate": 6.447251904545991e-08, + "logits/chosen": -2.003706455230713, + "logits/rejected": -2.0092697143554688, + "logps/chosen": -0.0007939980714581907, + "logps/rejected": -93.29893493652344, + "loss": 0.4659, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.754912045726087e-05, + "rewards/margins": 1.3060821294784546, + "rewards/rejected": -1.3060646057128906, + "step": 7292 + }, + { + "epoch": 0.42, + "learning_rate": 6.446349817841838e-08, + "logits/chosen": -1.8636735677719116, + "logits/rejected": -1.8617439270019531, + "logps/chosen": -3.885444164276123, + "logps/rejected": -196.06605529785156, + "loss": 0.3908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06505272537469864, + "rewards/margins": 2.8625078201293945, + "rewards/rejected": -2.927560567855835, + "step": 7293 + }, + { + "epoch": 0.42, + "learning_rate": 6.445447679756263e-08, + "logits/chosen": -1.979325771331787, + "logits/rejected": -1.9769035577774048, + "logps/chosen": -20.6978702545166, + "logps/rejected": -143.90737915039062, + "loss": 0.3558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23032799363136292, + "rewards/margins": 2.009244203567505, + "rewards/rejected": -1.7789162397384644, + "step": 7294 + }, + { + "epoch": 0.42, + "learning_rate": 6.444545490321316e-08, + "logits/chosen": -2.025728225708008, + "logits/rejected": -2.0321767330169678, + "logps/chosen": -26.427093505859375, + "logps/rejected": -181.6932373046875, + "loss": 0.3649, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13073311746120453, + "rewards/margins": 1.7624149322509766, + "rewards/rejected": -1.6316817998886108, + "step": 7295 + }, + { + "epoch": 0.42, + "learning_rate": 6.44364324956905e-08, + "logits/chosen": -1.8955233097076416, + "logits/rejected": -1.8981257677078247, + "logps/chosen": -13.958076477050781, + "logps/rejected": -190.6619873046875, + "loss": 0.3501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09275245666503906, + "rewards/margins": 2.830402135848999, + "rewards/rejected": -2.73764967918396, + "step": 7296 + }, + { + "epoch": 0.42, + "learning_rate": 6.44274095753151e-08, + "logits/chosen": -1.8818405866622925, + "logits/rejected": -1.8655996322631836, + "logps/chosen": -127.62391662597656, + "logps/rejected": -212.656982421875, + "loss": 0.2675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9977768063545227, + "rewards/margins": 0.7165100574493408, + "rewards/rejected": 0.2812667787075043, + "step": 7297 + }, + { + "epoch": 0.42, + "learning_rate": 6.441838614240757e-08, + "logits/chosen": -1.983152985572815, + "logits/rejected": -1.9745875597000122, + "logps/chosen": -92.2691650390625, + "logps/rejected": -208.88233947753906, + "loss": 0.3217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5395790338516235, + "rewards/margins": 1.3273322582244873, + "rewards/rejected": -0.7877532839775085, + "step": 7298 + }, + { + "epoch": 0.42, + "learning_rate": 6.440936219728843e-08, + "logits/chosen": -1.9591951370239258, + "logits/rejected": -1.9619215726852417, + "logps/chosen": -17.273099899291992, + "logps/rejected": -118.71095275878906, + "loss": 0.2221, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0137313604354858, + "rewards/margins": 1.7825348377227783, + "rewards/rejected": -0.7688034176826477, + "step": 7299 + }, + { + "epoch": 0.42, + "learning_rate": 6.440033774027826e-08, + "logits/chosen": -1.7308268547058105, + "logits/rejected": -1.6365618705749512, + "logps/chosen": -331.3236083984375, + "logps/rejected": -460.2708740234375, + "loss": 0.1024, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.967047095298767, + "rewards/margins": 2.162884473800659, + "rewards/rejected": -0.19583740830421448, + "step": 7300 + }, + { + "epoch": 0.42, + "learning_rate": 6.439131277169764e-08, + "logits/chosen": -1.8719881772994995, + "logits/rejected": -1.8449757099151611, + "logps/chosen": -177.02146911621094, + "logps/rejected": -334.7259521484375, + "loss": 0.0386, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4319260120391846, + "rewards/margins": 3.181779623031616, + "rewards/rejected": -0.7498535513877869, + "step": 7301 + }, + { + "epoch": 0.42, + "learning_rate": 6.438228729186721e-08, + "logits/chosen": -1.9294393062591553, + "logits/rejected": -1.9080756902694702, + "logps/chosen": -94.13212585449219, + "logps/rejected": -238.48426818847656, + "loss": 0.1542, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1218719482421875, + "rewards/margins": 2.068974256515503, + "rewards/rejected": -0.9471023678779602, + "step": 7302 + }, + { + "epoch": 0.42, + "learning_rate": 6.43732613011076e-08, + "logits/chosen": -1.9844293594360352, + "logits/rejected": -2.097395896911621, + "logps/chosen": -244.1141357421875, + "logps/rejected": -286.21527099609375, + "loss": 0.0687, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.506384253501892, + "rewards/margins": 2.7329742908477783, + "rewards/rejected": -1.2265900373458862, + "step": 7303 + }, + { + "epoch": 0.43, + "learning_rate": 6.43642347997394e-08, + "logits/chosen": -2.006687879562378, + "logits/rejected": -2.0113298892974854, + "logps/chosen": -224.05587768554688, + "logps/rejected": -269.4183044433594, + "loss": 0.3806, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2536667585372925, + "rewards/margins": 0.349934458732605, + "rewards/rejected": 0.9037322998046875, + "step": 7304 + }, + { + "epoch": 0.43, + "learning_rate": 6.435520778808335e-08, + "logits/chosen": -1.9413609504699707, + "logits/rejected": -1.9368724822998047, + "logps/chosen": -24.658552169799805, + "logps/rejected": -154.64413452148438, + "loss": 0.5969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7098806500434875, + "rewards/margins": 1.9180598258972168, + "rewards/rejected": -2.6279404163360596, + "step": 7305 + }, + { + "epoch": 0.43, + "learning_rate": 6.434618026646009e-08, + "logits/chosen": -2.0466835498809814, + "logits/rejected": -2.0326602458953857, + "logps/chosen": -71.1946029663086, + "logps/rejected": -305.77191162109375, + "loss": 0.1574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7826766967773438, + "rewards/margins": 4.31346321105957, + "rewards/rejected": -3.5307862758636475, + "step": 7306 + }, + { + "epoch": 0.43, + "learning_rate": 6.433715223519034e-08, + "logits/chosen": -2.0643932819366455, + "logits/rejected": -2.054678201675415, + "logps/chosen": -172.53152465820312, + "logps/rejected": -269.5896301269531, + "loss": 0.4306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30084991455078125, + "rewards/margins": 0.2918960452079773, + "rewards/rejected": 0.008953857235610485, + "step": 7307 + }, + { + "epoch": 0.43, + "learning_rate": 6.43281236945948e-08, + "logits/chosen": -1.7916580438613892, + "logits/rejected": -1.7802740335464478, + "logps/chosen": -155.14300537109375, + "logps/rejected": -581.8973388671875, + "loss": 0.1999, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5328445434570312, + "rewards/margins": 8.189069747924805, + "rewards/rejected": -7.656225681304932, + "step": 7308 + }, + { + "epoch": 0.43, + "learning_rate": 6.431909464499423e-08, + "logits/chosen": -1.9847644567489624, + "logits/rejected": -1.987558364868164, + "logps/chosen": -64.21574401855469, + "logps/rejected": -110.10549926757812, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7001709342002869, + "rewards/margins": 0.8770927786827087, + "rewards/rejected": -0.17692184448242188, + "step": 7309 + }, + { + "epoch": 0.43, + "learning_rate": 6.431006508670937e-08, + "logits/chosen": -1.951609492301941, + "logits/rejected": -1.955108404159546, + "logps/chosen": -6.352302551269531, + "logps/rejected": -97.16961669921875, + "loss": 0.4452, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11098051071166992, + "rewards/margins": 1.1684573888778687, + "rewards/rejected": -1.0574768781661987, + "step": 7310 + }, + { + "epoch": 0.43, + "learning_rate": 6.4301035020061e-08, + "logits/chosen": -1.7103275060653687, + "logits/rejected": -1.7082639932632446, + "logps/chosen": -0.20926521718502045, + "logps/rejected": -239.04678344726562, + "loss": 0.3516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018785955384373665, + "rewards/margins": 3.930152177810669, + "rewards/rejected": -3.9113662242889404, + "step": 7311 + }, + { + "epoch": 0.43, + "learning_rate": 6.42920044453699e-08, + "logits/chosen": -2.112414598464966, + "logits/rejected": -2.108780860900879, + "logps/chosen": -31.299488067626953, + "logps/rejected": -110.92607116699219, + "loss": 0.379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30176734924316406, + "rewards/margins": 1.266238808631897, + "rewards/rejected": -0.9644714593887329, + "step": 7312 + }, + { + "epoch": 0.43, + "learning_rate": 6.42829733629569e-08, + "logits/chosen": -1.8432221412658691, + "logits/rejected": -1.8440630435943604, + "logps/chosen": -54.06510925292969, + "logps/rejected": -180.3579559326172, + "loss": 0.5358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28000566363334656, + "rewards/margins": 1.2068275213241577, + "rewards/rejected": -1.4868332147598267, + "step": 7313 + }, + { + "epoch": 0.43, + "learning_rate": 6.427394177314284e-08, + "logits/chosen": -2.1940672397613525, + "logits/rejected": -2.1909937858581543, + "logps/chosen": -21.952978134155273, + "logps/rejected": -129.3678436279297, + "loss": 0.2552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42408791184425354, + "rewards/margins": 2.9095394611358643, + "rewards/rejected": -2.4854514598846436, + "step": 7314 + }, + { + "epoch": 0.43, + "learning_rate": 6.426490967624853e-08, + "logits/chosen": -2.0521061420440674, + "logits/rejected": -2.0481534004211426, + "logps/chosen": -20.39841079711914, + "logps/rejected": -157.4114990234375, + "loss": 0.2143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7398712038993835, + "rewards/margins": 2.529052734375, + "rewards/rejected": -1.7891815900802612, + "step": 7315 + }, + { + "epoch": 0.43, + "learning_rate": 6.425587707259485e-08, + "logits/chosen": -1.9791489839553833, + "logits/rejected": -1.978090763092041, + "logps/chosen": -5.269782543182373, + "logps/rejected": -120.71920776367188, + "loss": 0.4546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17807216942310333, + "rewards/margins": 1.0355268716812134, + "rewards/rejected": -0.8574547171592712, + "step": 7316 + }, + { + "epoch": 0.43, + "learning_rate": 6.424684396250268e-08, + "logits/chosen": -1.874464511871338, + "logits/rejected": -1.8473650217056274, + "logps/chosen": -256.11944580078125, + "logps/rejected": -498.00091552734375, + "loss": 0.1116, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.137521505355835, + "rewards/margins": 1.6111786365509033, + "rewards/rejected": 0.5263428092002869, + "step": 7317 + }, + { + "epoch": 0.43, + "learning_rate": 6.423781034629292e-08, + "logits/chosen": -2.070568799972534, + "logits/rejected": -2.062732219696045, + "logps/chosen": -1.3239946365356445, + "logps/rejected": -174.90916442871094, + "loss": 0.3835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026685213670134544, + "rewards/margins": 1.861807942390442, + "rewards/rejected": -1.8351227045059204, + "step": 7318 + }, + { + "epoch": 0.43, + "learning_rate": 6.422877622428651e-08, + "logits/chosen": -1.99769127368927, + "logits/rejected": -1.9991835355758667, + "logps/chosen": -154.81979370117188, + "logps/rejected": -310.0810852050781, + "loss": 0.1446, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2874176502227783, + "rewards/margins": 2.7190184593200684, + "rewards/rejected": -1.4316009283065796, + "step": 7319 + }, + { + "epoch": 0.43, + "learning_rate": 6.421974159680437e-08, + "logits/chosen": -2.0193593502044678, + "logits/rejected": -2.0116519927978516, + "logps/chosen": -91.55264282226562, + "logps/rejected": -237.08001708984375, + "loss": 0.2123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9634918570518494, + "rewards/margins": 2.41721510887146, + "rewards/rejected": -1.4537231922149658, + "step": 7320 + }, + { + "epoch": 0.43, + "learning_rate": 6.421070646416743e-08, + "logits/chosen": -1.9472020864486694, + "logits/rejected": -1.9403835535049438, + "logps/chosen": -196.41973876953125, + "logps/rejected": -469.14166259765625, + "loss": 0.0655, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4531738758087158, + "rewards/margins": 2.7568116188049316, + "rewards/rejected": -1.3036377429962158, + "step": 7321 + }, + { + "epoch": 0.43, + "learning_rate": 6.420167082669671e-08, + "logits/chosen": -2.185030221939087, + "logits/rejected": -2.1814615726470947, + "logps/chosen": -0.0014064725255593657, + "logps/rejected": -44.33702087402344, + "loss": 0.7682, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.837962766643614e-05, + "rewards/margins": -0.28105029463768005, + "rewards/rejected": 0.2809719145298004, + "step": 7322 + }, + { + "epoch": 0.43, + "learning_rate": 6.419263468471316e-08, + "logits/chosen": -1.767187237739563, + "logits/rejected": -1.7763333320617676, + "logps/chosen": -11.375041961669922, + "logps/rejected": -234.43035888671875, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27499905228614807, + "rewards/margins": 5.103804588317871, + "rewards/rejected": -4.828805446624756, + "step": 7323 + }, + { + "epoch": 0.43, + "learning_rate": 6.41835980385378e-08, + "logits/chosen": -2.0240862369537354, + "logits/rejected": -2.011619806289673, + "logps/chosen": -44.466346740722656, + "logps/rejected": -183.55776977539062, + "loss": 0.3367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1630886048078537, + "rewards/margins": 2.311997652053833, + "rewards/rejected": -2.148909091949463, + "step": 7324 + }, + { + "epoch": 0.43, + "learning_rate": 6.417456088849166e-08, + "logits/chosen": -1.996180772781372, + "logits/rejected": -1.9725664854049683, + "logps/chosen": -144.24620056152344, + "logps/rejected": -236.529296875, + "loss": 0.2722, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.33868408203125, + "rewards/margins": 0.5963607430458069, + "rewards/rejected": 0.7423233389854431, + "step": 7325 + }, + { + "epoch": 0.43, + "learning_rate": 6.416552323489579e-08, + "logits/chosen": -2.0802009105682373, + "logits/rejected": -2.076683759689331, + "logps/chosen": -0.00012528483057394624, + "logps/rejected": -193.9376678466797, + "loss": 0.4423, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7416535886004567e-07, + "rewards/margins": 1.7162829637527466, + "rewards/rejected": -1.7162827253341675, + "step": 7326 + }, + { + "epoch": 0.43, + "learning_rate": 6.415648507807123e-08, + "logits/chosen": -2.2273435592651367, + "logits/rejected": -2.2128777503967285, + "logps/chosen": -42.3718376159668, + "logps/rejected": -195.537841796875, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07944907993078232, + "rewards/margins": 3.1016829013824463, + "rewards/rejected": -3.022233724594116, + "step": 7327 + }, + { + "epoch": 0.43, + "learning_rate": 6.414744641833909e-08, + "logits/chosen": -1.9257460832595825, + "logits/rejected": -1.9041980504989624, + "logps/chosen": -169.37991333007812, + "logps/rejected": -331.0826416015625, + "loss": 0.1233, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8279160261154175, + "rewards/margins": 2.2718918323516846, + "rewards/rejected": -0.4439758360385895, + "step": 7328 + }, + { + "epoch": 0.43, + "learning_rate": 6.413840725602043e-08, + "logits/chosen": -1.856677532196045, + "logits/rejected": -1.7819628715515137, + "logps/chosen": -219.3409881591797, + "logps/rejected": -412.42608642578125, + "loss": 0.3157, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0744202136993408, + "rewards/margins": 0.7423462271690369, + "rewards/rejected": 0.33207398653030396, + "step": 7329 + }, + { + "epoch": 0.43, + "learning_rate": 6.41293675914364e-08, + "logits/chosen": -2.176811933517456, + "logits/rejected": -2.1504409313201904, + "logps/chosen": -38.65184020996094, + "logps/rejected": -385.2904052734375, + "loss": 0.2085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.490509033203125, + "rewards/margins": 5.011926174163818, + "rewards/rejected": -4.521417140960693, + "step": 7330 + }, + { + "epoch": 0.43, + "learning_rate": 6.412032742490811e-08, + "logits/chosen": -1.7788236141204834, + "logits/rejected": -1.8406537771224976, + "logps/chosen": -217.56881713867188, + "logps/rejected": -410.55755615234375, + "loss": 0.0745, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6767700910568237, + "rewards/margins": 2.2465882301330566, + "rewards/rejected": -0.5698181390762329, + "step": 7331 + }, + { + "epoch": 0.43, + "learning_rate": 6.411128675675673e-08, + "logits/chosen": -2.0652616024017334, + "logits/rejected": -2.0741922855377197, + "logps/chosen": -87.48515319824219, + "logps/rejected": -347.31036376953125, + "loss": 0.0972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4447174072265625, + "rewards/margins": 3.4068450927734375, + "rewards/rejected": -1.962127685546875, + "step": 7332 + }, + { + "epoch": 0.43, + "learning_rate": 6.410224558730341e-08, + "logits/chosen": -1.950938105583191, + "logits/rejected": -2.0001277923583984, + "logps/chosen": -279.2474670410156, + "logps/rejected": -424.8834228515625, + "loss": 0.0551, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5359954833984375, + "rewards/margins": 3.294869899749756, + "rewards/rejected": -1.758874535560608, + "step": 7333 + }, + { + "epoch": 0.43, + "learning_rate": 6.409320391686934e-08, + "logits/chosen": -1.86666738986969, + "logits/rejected": -1.8769904375076294, + "logps/chosen": -196.45986938476562, + "logps/rejected": -334.43817138671875, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1598984003067017, + "rewards/margins": 2.0551650524139404, + "rewards/rejected": -0.8952667117118835, + "step": 7334 + }, + { + "epoch": 0.43, + "learning_rate": 6.408416174577575e-08, + "logits/chosen": -1.7591477632522583, + "logits/rejected": -1.7648719549179077, + "logps/chosen": -6.501752853393555, + "logps/rejected": -288.3091735839844, + "loss": 0.3525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012512254528701305, + "rewards/margins": 3.4395768642425537, + "rewards/rejected": -3.4270646572113037, + "step": 7335 + }, + { + "epoch": 0.43, + "learning_rate": 6.407511907434383e-08, + "logits/chosen": -1.9874725341796875, + "logits/rejected": -1.9895594120025635, + "logps/chosen": -23.01422882080078, + "logps/rejected": -126.17337036132812, + "loss": 0.5989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10813522338867188, + "rewards/margins": 0.3698112666606903, + "rewards/rejected": -0.26167604327201843, + "step": 7336 + }, + { + "epoch": 0.43, + "learning_rate": 6.406607590289482e-08, + "logits/chosen": -2.0793826580047607, + "logits/rejected": -2.085879325866699, + "logps/chosen": -70.25939178466797, + "logps/rejected": -361.24798583984375, + "loss": 0.3333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.043471526354551315, + "rewards/margins": 3.7379631996154785, + "rewards/rejected": -3.6944916248321533, + "step": 7337 + }, + { + "epoch": 0.43, + "learning_rate": 6.405703223175001e-08, + "logits/chosen": -2.0188558101654053, + "logits/rejected": -2.0122480392456055, + "logps/chosen": -38.338199615478516, + "logps/rejected": -152.80050659179688, + "loss": 0.4009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08514022827148438, + "rewards/margins": 3.2625954151153564, + "rewards/rejected": -3.347735643386841, + "step": 7338 + }, + { + "epoch": 0.43, + "learning_rate": 6.404798806123064e-08, + "logits/chosen": -1.9163641929626465, + "logits/rejected": -1.9069616794586182, + "logps/chosen": -74.86540985107422, + "logps/rejected": -204.64199829101562, + "loss": 0.2439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9032783508300781, + "rewards/margins": 1.5881507396697998, + "rewards/rejected": -0.6848724484443665, + "step": 7339 + }, + { + "epoch": 0.43, + "learning_rate": 6.403894339165803e-08, + "logits/chosen": -2.1690306663513184, + "logits/rejected": -2.1679494380950928, + "logps/chosen": -0.022635336965322495, + "logps/rejected": -61.24065399169922, + "loss": 0.5331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004054782912135124, + "rewards/margins": 0.6800640225410461, + "rewards/rejected": -0.6804695129394531, + "step": 7340 + }, + { + "epoch": 0.43, + "learning_rate": 6.402989822335347e-08, + "logits/chosen": -1.935164451599121, + "logits/rejected": -1.8619433641433716, + "logps/chosen": -193.91848754882812, + "logps/rejected": -364.64813232421875, + "loss": 0.3687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.364785760641098, + "rewards/margins": 1.1727203130722046, + "rewards/rejected": -0.807934582233429, + "step": 7341 + }, + { + "epoch": 0.43, + "learning_rate": 6.40208525566383e-08, + "logits/chosen": -2.0354301929473877, + "logits/rejected": -2.034605026245117, + "logps/chosen": -2.149278402328491, + "logps/rejected": -94.12313079833984, + "loss": 0.4316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06303836405277252, + "rewards/margins": 1.6754359006881714, + "rewards/rejected": -1.7384742498397827, + "step": 7342 + }, + { + "epoch": 0.43, + "learning_rate": 6.401180639183387e-08, + "logits/chosen": -2.0749425888061523, + "logits/rejected": -2.080922842025757, + "logps/chosen": -121.16159057617188, + "logps/rejected": -414.54547119140625, + "loss": 0.0712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.486791968345642, + "rewards/margins": 2.498931884765625, + "rewards/rejected": -1.012139916419983, + "step": 7343 + }, + { + "epoch": 0.43, + "learning_rate": 6.400275972926152e-08, + "logits/chosen": -1.8612064123153687, + "logits/rejected": -1.84576416015625, + "logps/chosen": -67.16964721679688, + "logps/rejected": -291.7872314453125, + "loss": 0.3957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1827743500471115, + "rewards/margins": 4.453332424163818, + "rewards/rejected": -4.636106967926025, + "step": 7344 + }, + { + "epoch": 0.43, + "learning_rate": 6.399371256924268e-08, + "logits/chosen": -1.8581346273422241, + "logits/rejected": -1.8510662317276, + "logps/chosen": -167.9898681640625, + "logps/rejected": -415.59027099609375, + "loss": 0.1368, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8923416137695312, + "rewards/margins": 1.4383865594863892, + "rewards/rejected": 0.4539550840854645, + "step": 7345 + }, + { + "epoch": 0.43, + "learning_rate": 6.398466491209868e-08, + "logits/chosen": -1.9086344242095947, + "logits/rejected": -1.8130615949630737, + "logps/chosen": -349.01953125, + "logps/rejected": -601.255615234375, + "loss": 0.2578, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.652783155441284, + "rewards/margins": 0.4416625499725342, + "rewards/rejected": 2.21112060546875, + "step": 7346 + }, + { + "epoch": 0.43, + "learning_rate": 6.397561675815101e-08, + "logits/chosen": -2.0932531356811523, + "logits/rejected": -2.0929713249206543, + "logps/chosen": -17.228050231933594, + "logps/rejected": -137.2928009033203, + "loss": 0.6138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04243011400103569, + "rewards/margins": 0.1999053955078125, + "rewards/rejected": -0.1574752777814865, + "step": 7347 + }, + { + "epoch": 0.43, + "learning_rate": 6.396656810772106e-08, + "logits/chosen": -1.7662607431411743, + "logits/rejected": -1.7217174768447876, + "logps/chosen": -305.72467041015625, + "logps/rejected": -502.900390625, + "loss": 0.2305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.876794457435608, + "rewards/margins": 0.7488769292831421, + "rewards/rejected": 1.1279175281524658, + "step": 7348 + }, + { + "epoch": 0.43, + "learning_rate": 6.395751896113029e-08, + "logits/chosen": -2.0203680992126465, + "logits/rejected": -2.016810655593872, + "logps/chosen": -0.5261113047599792, + "logps/rejected": -146.15090942382812, + "loss": 0.3812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08900927752256393, + "rewards/margins": 2.1244752407073975, + "rewards/rejected": -2.035465955734253, + "step": 7349 + }, + { + "epoch": 0.43, + "learning_rate": 6.394846931870018e-08, + "logits/chosen": -1.69355046749115, + "logits/rejected": -1.7373628616333008, + "logps/chosen": -209.06922912597656, + "logps/rejected": -372.638671875, + "loss": 0.0648, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.953790307044983, + "rewards/margins": 2.5264861583709717, + "rewards/rejected": -0.5726959109306335, + "step": 7350 + }, + { + "epoch": 0.43, + "learning_rate": 6.39394191807522e-08, + "logits/chosen": -1.9001071453094482, + "logits/rejected": -1.778673768043518, + "logps/chosen": -221.14920043945312, + "logps/rejected": -689.621337890625, + "loss": 0.14, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3993713855743408, + "rewards/margins": 2.869720458984375, + "rewards/rejected": -1.4703491926193237, + "step": 7351 + }, + { + "epoch": 0.43, + "learning_rate": 6.393036854760786e-08, + "logits/chosen": -1.9456021785736084, + "logits/rejected": -1.9277912378311157, + "logps/chosen": -4.859490871429443, + "logps/rejected": -191.10972595214844, + "loss": 0.2355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6053797602653503, + "rewards/margins": 2.4667932987213135, + "rewards/rejected": -1.861413598060608, + "step": 7352 + }, + { + "epoch": 0.43, + "learning_rate": 6.392131741958871e-08, + "logits/chosen": -1.9066736698150635, + "logits/rejected": -1.9054081439971924, + "logps/chosen": -161.4396514892578, + "logps/rejected": -239.09707641601562, + "loss": 0.1174, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0766098499298096, + "rewards/margins": 1.8440918922424316, + "rewards/rejected": 0.2325180023908615, + "step": 7353 + }, + { + "epoch": 0.43, + "learning_rate": 6.391226579701625e-08, + "logits/chosen": -2.109955072402954, + "logits/rejected": -2.107316493988037, + "logps/chosen": -46.414085388183594, + "logps/rejected": -78.45484161376953, + "loss": 0.5465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26814958453178406, + "rewards/margins": 0.16494446992874146, + "rewards/rejected": 0.103205107152462, + "step": 7354 + }, + { + "epoch": 0.43, + "learning_rate": 6.390321368021205e-08, + "logits/chosen": -1.946894884109497, + "logits/rejected": -1.9296023845672607, + "logps/chosen": -20.818431854248047, + "logps/rejected": -240.58782958984375, + "loss": 0.3727, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5232295989990234, + "rewards/margins": 1.2997157573699951, + "rewards/rejected": -0.7764862179756165, + "step": 7355 + }, + { + "epoch": 0.43, + "learning_rate": 6.38941610694977e-08, + "logits/chosen": -2.0263350009918213, + "logits/rejected": -2.021620035171509, + "logps/chosen": -12.004918098449707, + "logps/rejected": -198.93521118164062, + "loss": 0.4854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10756092518568039, + "rewards/margins": 1.3545851707458496, + "rewards/rejected": -1.4621460437774658, + "step": 7356 + }, + { + "epoch": 0.43, + "learning_rate": 6.38851079651948e-08, + "logits/chosen": -1.9136768579483032, + "logits/rejected": -1.9198932647705078, + "logps/chosen": -14.383400917053223, + "logps/rejected": -230.65682983398438, + "loss": 0.3851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12510518729686737, + "rewards/margins": 4.66171407699585, + "rewards/rejected": -4.7868194580078125, + "step": 7357 + }, + { + "epoch": 0.43, + "learning_rate": 6.387605436762493e-08, + "logits/chosen": -1.9537553787231445, + "logits/rejected": -1.9474802017211914, + "logps/chosen": -245.760498046875, + "logps/rejected": -346.76318359375, + "loss": 0.2197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4181030988693237, + "rewards/margins": 1.0649445056915283, + "rewards/rejected": 0.353158563375473, + "step": 7358 + }, + { + "epoch": 0.43, + "learning_rate": 6.386700027710974e-08, + "logits/chosen": -1.8944439888000488, + "logits/rejected": -1.8912450075149536, + "logps/chosen": -1.2732906341552734, + "logps/rejected": -160.98391723632812, + "loss": 0.3666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03987879678606987, + "rewards/margins": 2.533046007156372, + "rewards/rejected": -2.4931671619415283, + "step": 7359 + }, + { + "epoch": 0.43, + "learning_rate": 6.385794569397087e-08, + "logits/chosen": -1.7367593050003052, + "logits/rejected": -1.735135793685913, + "logps/chosen": -0.468614786863327, + "logps/rejected": -37.09496307373047, + "loss": 0.7015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01054171659052372, + "rewards/margins": 0.011763963848352432, + "rewards/rejected": -0.022305680438876152, + "step": 7360 + }, + { + "epoch": 0.43, + "learning_rate": 6.384889061852998e-08, + "logits/chosen": -2.0700621604919434, + "logits/rejected": -2.056689977645874, + "logps/chosen": -41.490718841552734, + "logps/rejected": -265.4313049316406, + "loss": 0.223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.414960116147995, + "rewards/margins": 2.984964370727539, + "rewards/rejected": -2.5700042247772217, + "step": 7361 + }, + { + "epoch": 0.43, + "learning_rate": 6.383983505110876e-08, + "logits/chosen": -2.064293146133423, + "logits/rejected": -2.06325626373291, + "logps/chosen": -69.5674819946289, + "logps/rejected": -213.65451049804688, + "loss": 0.9265, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.11372604221105576, + "rewards/margins": -0.8874778747558594, + "rewards/rejected": 1.0012038946151733, + "step": 7362 + }, + { + "epoch": 0.43, + "learning_rate": 6.38307789920289e-08, + "logits/chosen": -1.9449421167373657, + "logits/rejected": -1.9401955604553223, + "logps/chosen": -17.421485900878906, + "logps/rejected": -267.1138916015625, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005478096194565296, + "rewards/margins": 6.417755126953125, + "rewards/rejected": -6.4232330322265625, + "step": 7363 + }, + { + "epoch": 0.43, + "learning_rate": 6.38217224416121e-08, + "logits/chosen": -1.8919702768325806, + "logits/rejected": -1.8929017782211304, + "logps/chosen": -0.13952304422855377, + "logps/rejected": -176.55772399902344, + "loss": 0.4197, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015808669850230217, + "rewards/margins": 1.676222801208496, + "rewards/rejected": -1.6604140996932983, + "step": 7364 + }, + { + "epoch": 0.43, + "learning_rate": 6.381266540018013e-08, + "logits/chosen": -2.053705930709839, + "logits/rejected": -2.008755922317505, + "logps/chosen": -217.73892211914062, + "logps/rejected": -547.369384765625, + "loss": 0.0531, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9115692377090454, + "rewards/margins": 2.716940402984619, + "rewards/rejected": -0.805371105670929, + "step": 7365 + }, + { + "epoch": 0.43, + "learning_rate": 6.380360786805471e-08, + "logits/chosen": -2.094926595687866, + "logits/rejected": -2.123300552368164, + "logps/chosen": -205.06396484375, + "logps/rejected": -253.5121307373047, + "loss": 0.1757, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6008713245391846, + "rewards/margins": 0.9610321521759033, + "rewards/rejected": 1.6398391723632812, + "step": 7366 + }, + { + "epoch": 0.43, + "learning_rate": 6.379454984555762e-08, + "logits/chosen": -2.084195375442505, + "logits/rejected": -2.0814545154571533, + "logps/chosen": -14.71539306640625, + "logps/rejected": -116.6214828491211, + "loss": 0.3078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20469284057617188, + "rewards/margins": 1.963557481765747, + "rewards/rejected": -1.7588646411895752, + "step": 7367 + }, + { + "epoch": 0.43, + "learning_rate": 6.378549133301064e-08, + "logits/chosen": -1.8355435132980347, + "logits/rejected": -1.8200985193252563, + "logps/chosen": -207.93202209472656, + "logps/rejected": -229.9638671875, + "loss": 0.3612, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7459579706192017, + "rewards/margins": 0.08788454532623291, + "rewards/rejected": 1.6580734252929688, + "step": 7368 + }, + { + "epoch": 0.43, + "learning_rate": 6.377643233073558e-08, + "logits/chosen": -1.9750890731811523, + "logits/rejected": -1.9665507078170776, + "logps/chosen": -41.84192657470703, + "logps/rejected": -154.857421875, + "loss": 0.538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19355927407741547, + "rewards/margins": 0.9260635375976562, + "rewards/rejected": -1.119622826576233, + "step": 7369 + }, + { + "epoch": 0.43, + "learning_rate": 6.376737283905424e-08, + "logits/chosen": -1.8131396770477295, + "logits/rejected": -1.8221555948257446, + "logps/chosen": -0.0006829837802797556, + "logps/rejected": -127.86322021484375, + "loss": 0.541, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.447140261414461e-05, + "rewards/margins": 0.6504729390144348, + "rewards/rejected": -0.650507390499115, + "step": 7370 + }, + { + "epoch": 0.43, + "learning_rate": 6.37583128582885e-08, + "logits/chosen": -1.8409579992294312, + "logits/rejected": -1.8268948793411255, + "logps/chosen": -192.61708068847656, + "logps/rejected": -405.989990234375, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27646636962890625, + "rewards/margins": 1.9819138050079346, + "rewards/rejected": -2.258380174636841, + "step": 7371 + }, + { + "epoch": 0.43, + "learning_rate": 6.374925238876018e-08, + "logits/chosen": -1.9182283878326416, + "logits/rejected": -1.919272541999817, + "logps/chosen": -146.79104614257812, + "logps/rejected": -381.1944580078125, + "loss": 0.1722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8589828610420227, + "rewards/margins": 2.9228317737579346, + "rewards/rejected": -2.0638489723205566, + "step": 7372 + }, + { + "epoch": 0.43, + "learning_rate": 6.374019143079114e-08, + "logits/chosen": -1.9476839303970337, + "logits/rejected": -1.9474906921386719, + "logps/chosen": -192.69715881347656, + "logps/rejected": -239.83822631835938, + "loss": 0.4724, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.9872726202011108, + "rewards/margins": -0.37024080753326416, + "rewards/rejected": 2.357513427734375, + "step": 7373 + }, + { + "epoch": 0.43, + "learning_rate": 6.373112998470331e-08, + "logits/chosen": -1.895755648612976, + "logits/rejected": -1.906294822692871, + "logps/chosen": -136.27593994140625, + "logps/rejected": -272.81622314453125, + "loss": 1.0527, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.8209381103515625, + "rewards/margins": -0.5619109869003296, + "rewards/rejected": -0.2590270936489105, + "step": 7374 + }, + { + "epoch": 0.43, + "learning_rate": 6.372206805081856e-08, + "logits/chosen": -2.0249831676483154, + "logits/rejected": -2.0077004432678223, + "logps/chosen": -57.927642822265625, + "logps/rejected": -257.86541748046875, + "loss": 0.1938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9890686273574829, + "rewards/margins": 2.4482240676879883, + "rewards/rejected": -1.4591553211212158, + "step": 7375 + }, + { + "epoch": 0.43, + "learning_rate": 6.371300562945885e-08, + "logits/chosen": -1.9605371952056885, + "logits/rejected": -1.9539484977722168, + "logps/chosen": -26.73092269897461, + "logps/rejected": -261.899658203125, + "loss": 0.1766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7897331118583679, + "rewards/margins": 2.974651336669922, + "rewards/rejected": -2.184918165206909, + "step": 7376 + }, + { + "epoch": 0.43, + "learning_rate": 6.370394272094608e-08, + "logits/chosen": -1.821009635925293, + "logits/rejected": -1.8242007493972778, + "logps/chosen": -3.3259144402109087e-05, + "logps/rejected": -109.96954345703125, + "loss": 0.437, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.006692163078696e-07, + "rewards/margins": 1.5295636653900146, + "rewards/rejected": -1.5295631885528564, + "step": 7377 + }, + { + "epoch": 0.43, + "learning_rate": 6.369487932560227e-08, + "logits/chosen": -2.1354944705963135, + "logits/rejected": -2.1390717029571533, + "logps/chosen": -0.00019918910402338952, + "logps/rejected": -162.32577514648438, + "loss": 0.3548, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.015878746737144e-06, + "rewards/margins": 3.2180862426757812, + "rewards/rejected": -3.2180893421173096, + "step": 7378 + }, + { + "epoch": 0.43, + "learning_rate": 6.368581544374932e-08, + "logits/chosen": -1.960347294807434, + "logits/rejected": -1.9698195457458496, + "logps/chosen": -6.318056583404541, + "logps/rejected": -202.50332641601562, + "loss": 0.308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2774995267391205, + "rewards/margins": 3.5709259510040283, + "rewards/rejected": -3.293426513671875, + "step": 7379 + }, + { + "epoch": 0.43, + "learning_rate": 6.367675107570928e-08, + "logits/chosen": -1.8835968971252441, + "logits/rejected": -1.8586063385009766, + "logps/chosen": -178.51174926757812, + "logps/rejected": -290.6041259765625, + "loss": 0.2006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4555572271347046, + "rewards/margins": 1.449304223060608, + "rewards/rejected": 0.006253052037209272, + "step": 7380 + }, + { + "epoch": 0.43, + "learning_rate": 6.366768622180414e-08, + "logits/chosen": -1.852076530456543, + "logits/rejected": -1.8494985103607178, + "logps/chosen": -4.875606100540608e-05, + "logps/rejected": -33.13590621948242, + "loss": 0.6898, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.337769101108279e-07, + "rewards/margins": -0.025599241256713867, + "rewards/rejected": 0.025598907843232155, + "step": 7381 + }, + { + "epoch": 0.43, + "learning_rate": 6.365862088235591e-08, + "logits/chosen": -1.9301670789718628, + "logits/rejected": -1.9184696674346924, + "logps/chosen": -159.3860626220703, + "logps/rejected": -329.6976318359375, + "loss": 0.1195, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7232345342636108, + "rewards/margins": 1.7567001581192017, + "rewards/rejected": -0.03346557542681694, + "step": 7382 + }, + { + "epoch": 0.43, + "learning_rate": 6.364955505768668e-08, + "logits/chosen": -2.0416600704193115, + "logits/rejected": -2.002009630203247, + "logps/chosen": -32.37015914916992, + "logps/rejected": -532.6160888671875, + "loss": 0.2551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3520832061767578, + "rewards/margins": 8.388924598693848, + "rewards/rejected": -8.03684139251709, + "step": 7383 + }, + { + "epoch": 0.43, + "learning_rate": 6.364048874811847e-08, + "logits/chosen": -1.8638904094696045, + "logits/rejected": -1.8602511882781982, + "logps/chosen": -27.600032806396484, + "logps/rejected": -212.58370971679688, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.346343994140625, + "rewards/margins": 2.0707168579101562, + "rewards/rejected": -1.7243728637695312, + "step": 7384 + }, + { + "epoch": 0.43, + "learning_rate": 6.36314219539734e-08, + "logits/chosen": -1.842455506324768, + "logits/rejected": -1.7494245767593384, + "logps/chosen": -190.656982421875, + "logps/rejected": -498.94708251953125, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4745117127895355, + "rewards/margins": 1.3832061290740967, + "rewards/rejected": -0.9086944460868835, + "step": 7385 + }, + { + "epoch": 0.43, + "learning_rate": 6.362235467557352e-08, + "logits/chosen": -1.8600636720657349, + "logits/rejected": -1.8498954772949219, + "logps/chosen": -7.5814127922058105, + "logps/rejected": -162.76846313476562, + "loss": 0.3812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28009095788002014, + "rewards/margins": 1.4351248741149902, + "rewards/rejected": -1.1550339460372925, + "step": 7386 + }, + { + "epoch": 0.43, + "learning_rate": 6.361328691324098e-08, + "logits/chosen": -1.9885351657867432, + "logits/rejected": -1.9736193418502808, + "logps/chosen": -220.348388671875, + "logps/rejected": -273.7505187988281, + "loss": 0.1994, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2604522705078125, + "rewards/margins": 0.9693480730056763, + "rewards/rejected": 1.2911041975021362, + "step": 7387 + }, + { + "epoch": 0.43, + "learning_rate": 6.360421866729789e-08, + "logits/chosen": -1.7190625667572021, + "logits/rejected": -1.7730929851531982, + "logps/chosen": -273.0171813964844, + "logps/rejected": -261.905029296875, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0113983154296875, + "rewards/margins": 2.373587131500244, + "rewards/rejected": 0.6378113031387329, + "step": 7388 + }, + { + "epoch": 0.43, + "learning_rate": 6.359514993806642e-08, + "logits/chosen": -1.9411263465881348, + "logits/rejected": -1.9419629573822021, + "logps/chosen": -10.625070571899414, + "logps/rejected": -50.70569610595703, + "loss": 0.394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05000047758221626, + "rewards/margins": 1.8392033576965332, + "rewards/rejected": -1.7892029285430908, + "step": 7389 + }, + { + "epoch": 0.43, + "learning_rate": 6.35860807258687e-08, + "logits/chosen": -1.8546934127807617, + "logits/rejected": -1.8458260297775269, + "logps/chosen": -41.5574836730957, + "logps/rejected": -221.90704345703125, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19879722595214844, + "rewards/margins": 3.3752193450927734, + "rewards/rejected": -3.176422119140625, + "step": 7390 + }, + { + "epoch": 0.43, + "learning_rate": 6.357701103102698e-08, + "logits/chosen": -1.949049711227417, + "logits/rejected": -1.9709137678146362, + "logps/chosen": -279.11981201171875, + "logps/rejected": -301.9969482421875, + "loss": 0.2234, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.602728247642517, + "rewards/margins": 0.9102111458778381, + "rewards/rejected": 0.692517101764679, + "step": 7391 + }, + { + "epoch": 0.43, + "learning_rate": 6.356794085386337e-08, + "logits/chosen": -2.072021007537842, + "logits/rejected": -2.0987699031829834, + "logps/chosen": -198.31912231445312, + "logps/rejected": -234.0277862548828, + "loss": 0.1248, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0446503162384033, + "rewards/margins": 1.7624664306640625, + "rewards/rejected": 0.28218385577201843, + "step": 7392 + }, + { + "epoch": 0.43, + "learning_rate": 6.355887019470016e-08, + "logits/chosen": -1.733852505683899, + "logits/rejected": -1.7839957475662231, + "logps/chosen": -217.67843627929688, + "logps/rejected": -312.03936767578125, + "loss": 0.1128, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.343029737472534, + "rewards/margins": 1.60931396484375, + "rewards/rejected": 0.733715832233429, + "step": 7393 + }, + { + "epoch": 0.43, + "learning_rate": 6.354979905385954e-08, + "logits/chosen": -1.981321930885315, + "logits/rejected": -1.9654651880264282, + "logps/chosen": -49.40537643432617, + "logps/rejected": -272.78424072265625, + "loss": 0.3976, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016071701422333717, + "rewards/margins": 2.358703374862671, + "rewards/rejected": -2.3426315784454346, + "step": 7394 + }, + { + "epoch": 0.43, + "learning_rate": 6.354072743166378e-08, + "logits/chosen": -1.823393702507019, + "logits/rejected": -1.7990559339523315, + "logps/chosen": -523.2116088867188, + "logps/rejected": -586.854736328125, + "loss": 0.4863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02595825307071209, + "rewards/margins": 0.12645873427391052, + "rewards/rejected": -0.15241698920726776, + "step": 7395 + }, + { + "epoch": 0.43, + "learning_rate": 6.353165532843517e-08, + "logits/chosen": -1.9053114652633667, + "logits/rejected": -1.9072291851043701, + "logps/chosen": -186.7903594970703, + "logps/rejected": -497.78826904296875, + "loss": 0.0648, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5762131214141846, + "rewards/margins": 3.633183479309082, + "rewards/rejected": -2.0569703578948975, + "step": 7396 + }, + { + "epoch": 0.43, + "learning_rate": 6.352258274449593e-08, + "logits/chosen": -2.101266860961914, + "logits/rejected": -2.099548816680908, + "logps/chosen": -24.372529983520508, + "logps/rejected": -153.71737670898438, + "loss": 0.4951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3561130464076996, + "rewards/margins": 1.8496395349502563, + "rewards/rejected": -2.2057526111602783, + "step": 7397 + }, + { + "epoch": 0.43, + "learning_rate": 6.351350968016844e-08, + "logits/chosen": -1.9524595737457275, + "logits/rejected": -1.9674421548843384, + "logps/chosen": -431.1314697265625, + "logps/rejected": -444.62432861328125, + "loss": 0.0505, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7965056896209717, + "rewards/margins": 2.5238189697265625, + "rewards/rejected": 0.27268677949905396, + "step": 7398 + }, + { + "epoch": 0.43, + "learning_rate": 6.350443613577498e-08, + "logits/chosen": -1.8596532344818115, + "logits/rejected": -1.8576723337173462, + "logps/chosen": -46.52970886230469, + "logps/rejected": -282.5625305175781, + "loss": 0.3491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05367622524499893, + "rewards/margins": 4.190410137176514, + "rewards/rejected": -4.1367340087890625, + "step": 7399 + }, + { + "epoch": 0.43, + "learning_rate": 6.349536211163787e-08, + "logits/chosen": -2.084751605987549, + "logits/rejected": -2.073298931121826, + "logps/chosen": -45.658119201660156, + "logps/rejected": -186.48748779296875, + "loss": 0.3177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41323205828666687, + "rewards/margins": 1.7018886804580688, + "rewards/rejected": -1.2886565923690796, + "step": 7400 + }, + { + "epoch": 0.43, + "learning_rate": 6.34862876080795e-08, + "logits/chosen": -1.9004430770874023, + "logits/rejected": -1.912976622581482, + "logps/chosen": -0.524773120880127, + "logps/rejected": -193.9145050048828, + "loss": 0.356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037491556257009506, + "rewards/margins": 3.1540305614471436, + "rewards/rejected": -3.1165390014648438, + "step": 7401 + }, + { + "epoch": 0.43, + "learning_rate": 6.347721262542223e-08, + "logits/chosen": -1.7919079065322876, + "logits/rejected": -1.7928134202957153, + "logps/chosen": -0.07000990211963654, + "logps/rejected": -133.86300659179688, + "loss": 0.4097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008949466282501817, + "rewards/margins": 1.968653917312622, + "rewards/rejected": -1.9677590131759644, + "step": 7402 + }, + { + "epoch": 0.43, + "learning_rate": 6.346813716398843e-08, + "logits/chosen": -1.9406973123550415, + "logits/rejected": -1.9293975830078125, + "logps/chosen": -152.97396850585938, + "logps/rejected": -259.57452392578125, + "loss": 0.1261, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7431458234786987, + "rewards/margins": 1.7256958484649658, + "rewards/rejected": 0.01744995079934597, + "step": 7403 + }, + { + "epoch": 0.43, + "learning_rate": 6.345906122410053e-08, + "logits/chosen": -1.9794127941131592, + "logits/rejected": -1.9934195280075073, + "logps/chosen": -267.259521484375, + "logps/rejected": -519.4534912109375, + "loss": 0.0343, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0279481410980225, + "rewards/margins": 3.3671205043792725, + "rewards/rejected": -0.33917236328125, + "step": 7404 + }, + { + "epoch": 0.43, + "learning_rate": 6.344998480608094e-08, + "logits/chosen": -2.115662097930908, + "logits/rejected": -2.102036476135254, + "logps/chosen": -81.17857360839844, + "logps/rejected": -185.5189208984375, + "loss": 0.2608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8680969476699829, + "rewards/margins": 1.5922913551330566, + "rewards/rejected": -0.724194347858429, + "step": 7405 + }, + { + "epoch": 0.43, + "learning_rate": 6.344090791025211e-08, + "logits/chosen": -2.170097827911377, + "logits/rejected": -2.1635026931762695, + "logps/chosen": -25.5382080078125, + "logps/rejected": -200.3634033203125, + "loss": 0.5275, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3849853575229645, + "rewards/margins": 2.1509153842926025, + "rewards/rejected": -2.535900831222534, + "step": 7406 + }, + { + "epoch": 0.43, + "learning_rate": 6.343183053693646e-08, + "logits/chosen": -1.9788388013839722, + "logits/rejected": -1.9848437309265137, + "logps/chosen": -135.48031616210938, + "logps/rejected": -200.74258422851562, + "loss": 0.4302, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1407486200332642, + "rewards/margins": 0.2086990475654602, + "rewards/rejected": 0.932049572467804, + "step": 7407 + }, + { + "epoch": 0.43, + "learning_rate": 6.34227526864565e-08, + "logits/chosen": -2.0654714107513428, + "logits/rejected": -2.0683774948120117, + "logps/chosen": -18.359455108642578, + "logps/rejected": -145.95089721679688, + "loss": 0.499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13736343383789062, + "rewards/margins": 1.2228355407714844, + "rewards/rejected": -1.360198974609375, + "step": 7408 + }, + { + "epoch": 0.43, + "learning_rate": 6.34136743591347e-08, + "logits/chosen": -1.8220969438552856, + "logits/rejected": -1.805819034576416, + "logps/chosen": -176.93634033203125, + "logps/rejected": -374.8875732421875, + "loss": 0.1614, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6138886213302612, + "rewards/margins": 1.4939453601837158, + "rewards/rejected": 0.11994323879480362, + "step": 7409 + }, + { + "epoch": 0.43, + "learning_rate": 6.340459555529359e-08, + "logits/chosen": -1.9753564596176147, + "logits/rejected": -1.966407299041748, + "logps/chosen": -14.211552619934082, + "logps/rejected": -105.13538360595703, + "loss": 0.4924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20131663978099823, + "rewards/margins": 1.568645715713501, + "rewards/rejected": -1.7699623107910156, + "step": 7410 + }, + { + "epoch": 0.43, + "learning_rate": 6.339551627525565e-08, + "logits/chosen": -2.0253078937530518, + "logits/rejected": -2.0235435962677, + "logps/chosen": -6.435210227966309, + "logps/rejected": -46.65592575073242, + "loss": 0.6557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015485763549804688, + "rewards/margins": 0.17031823098659515, + "rewards/rejected": -0.18580399453639984, + "step": 7411 + }, + { + "epoch": 0.43, + "learning_rate": 6.338643651934348e-08, + "logits/chosen": -2.0514655113220215, + "logits/rejected": -2.058500051498413, + "logps/chosen": -0.772620677947998, + "logps/rejected": -189.94100952148438, + "loss": 0.4146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008619392290711403, + "rewards/margins": 1.9167417287826538, + "rewards/rejected": -1.9081223011016846, + "step": 7412 + }, + { + "epoch": 0.43, + "learning_rate": 6.337735628787957e-08, + "logits/chosen": -1.9997859001159668, + "logits/rejected": -1.99708092212677, + "logps/chosen": -51.51192855834961, + "logps/rejected": -311.14056396484375, + "loss": 0.2164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7092728018760681, + "rewards/margins": 2.8643693923950195, + "rewards/rejected": -2.1550965309143066, + "step": 7413 + }, + { + "epoch": 0.43, + "learning_rate": 6.336827558118656e-08, + "logits/chosen": -2.0802459716796875, + "logits/rejected": -2.0750935077667236, + "logps/chosen": -61.09968566894531, + "logps/rejected": -221.97402954101562, + "loss": 0.2211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48381349444389343, + "rewards/margins": 2.4601516723632812, + "rewards/rejected": -1.976338267326355, + "step": 7414 + }, + { + "epoch": 0.43, + "learning_rate": 6.335919439958699e-08, + "logits/chosen": -1.9856375455856323, + "logits/rejected": -1.9511276483535767, + "logps/chosen": -94.14049530029297, + "logps/rejected": -352.0019226074219, + "loss": 0.1831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.885248601436615, + "rewards/margins": 1.4037361145019531, + "rewards/rejected": -0.5184875726699829, + "step": 7415 + }, + { + "epoch": 0.43, + "learning_rate": 6.335011274340349e-08, + "logits/chosen": -1.9247474670410156, + "logits/rejected": -1.919049620628357, + "logps/chosen": -155.989013671875, + "logps/rejected": -210.55203247070312, + "loss": 0.1397, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8166275024414062, + "rewards/margins": 1.872003197669983, + "rewards/rejected": -0.05537567287683487, + "step": 7416 + }, + { + "epoch": 0.43, + "learning_rate": 6.334103061295868e-08, + "logits/chosen": -1.8505628108978271, + "logits/rejected": -1.844535231590271, + "logps/chosen": -13.795856475830078, + "logps/rejected": -90.20726013183594, + "loss": 0.517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2589431703090668, + "rewards/margins": 1.4684860706329346, + "rewards/rejected": -1.7274292707443237, + "step": 7417 + }, + { + "epoch": 0.43, + "learning_rate": 6.333194800857521e-08, + "logits/chosen": -2.1022026538848877, + "logits/rejected": -2.1037487983703613, + "logps/chosen": -31.545866012573242, + "logps/rejected": -118.81674194335938, + "loss": 0.2097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.091636061668396, + "rewards/margins": 1.7866647243499756, + "rewards/rejected": -0.6950287222862244, + "step": 7418 + }, + { + "epoch": 0.43, + "learning_rate": 6.332286493057573e-08, + "logits/chosen": -1.9367198944091797, + "logits/rejected": -1.9126423597335815, + "logps/chosen": -226.78656005859375, + "logps/rejected": -380.5311279296875, + "loss": 0.1112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.58074951171875, + "rewards/margins": 2.6727631092071533, + "rewards/rejected": -1.0920135974884033, + "step": 7419 + }, + { + "epoch": 0.43, + "learning_rate": 6.331378137928292e-08, + "logits/chosen": -1.9472805261611938, + "logits/rejected": -1.91996169090271, + "logps/chosen": -24.234779357910156, + "logps/rejected": -302.3997497558594, + "loss": 0.2602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27924615144729614, + "rewards/margins": 2.6663219928741455, + "rewards/rejected": -2.387075901031494, + "step": 7420 + }, + { + "epoch": 0.43, + "learning_rate": 6.330469735501948e-08, + "logits/chosen": -2.0344650745391846, + "logits/rejected": -2.030174732208252, + "logps/chosen": -39.17219924926758, + "logps/rejected": -232.61090087890625, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2135089933872223, + "rewards/margins": 3.6568410396575928, + "rewards/rejected": -3.4433319568634033, + "step": 7421 + }, + { + "epoch": 0.43, + "learning_rate": 6.32956128581081e-08, + "logits/chosen": -1.794521450996399, + "logits/rejected": -1.791356086730957, + "logps/chosen": -14.630475997924805, + "logps/rejected": -152.28123474121094, + "loss": 0.5333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31233349442481995, + "rewards/margins": 0.3820478618144989, + "rewards/rejected": -0.06971435993909836, + "step": 7422 + }, + { + "epoch": 0.43, + "learning_rate": 6.328652788887153e-08, + "logits/chosen": -2.0930466651916504, + "logits/rejected": -2.0786402225494385, + "logps/chosen": -84.67768859863281, + "logps/rejected": -255.8372344970703, + "loss": 0.2463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8970733880996704, + "rewards/margins": 1.7584457397460938, + "rewards/rejected": -0.8613724112510681, + "step": 7423 + }, + { + "epoch": 0.43, + "learning_rate": 6.327744244763246e-08, + "logits/chosen": -1.8667689561843872, + "logits/rejected": -1.8693512678146362, + "logps/chosen": -2.5453264713287354, + "logps/rejected": -80.87649536132812, + "loss": 0.4494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015250325202941895, + "rewards/margins": 1.2772735357284546, + "rewards/rejected": -1.2620232105255127, + "step": 7424 + }, + { + "epoch": 0.43, + "learning_rate": 6.326835653471373e-08, + "logits/chosen": -1.9486942291259766, + "logits/rejected": -1.947195291519165, + "logps/chosen": -3.7755417823791504, + "logps/rejected": -161.06216430664062, + "loss": 0.406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012036943808197975, + "rewards/margins": 1.904334306716919, + "rewards/rejected": -1.892297387123108, + "step": 7425 + }, + { + "epoch": 0.43, + "learning_rate": 6.325927015043807e-08, + "logits/chosen": -2.053107500076294, + "logits/rejected": -2.0724849700927734, + "logps/chosen": -13.461808204650879, + "logps/rejected": -221.0526123046875, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017647553235292435, + "rewards/margins": 2.6601614952087402, + "rewards/rejected": -2.6425139904022217, + "step": 7426 + }, + { + "epoch": 0.43, + "learning_rate": 6.325018329512825e-08, + "logits/chosen": -1.9489820003509521, + "logits/rejected": -1.9479860067367554, + "logps/chosen": -9.004374504089355, + "logps/rejected": -98.83702087402344, + "loss": 0.5953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2198467254638672, + "rewards/margins": 0.09020805358886719, + "rewards/rejected": 0.129638671875, + "step": 7427 + }, + { + "epoch": 0.43, + "learning_rate": 6.324109596910711e-08, + "logits/chosen": -1.8982477188110352, + "logits/rejected": -1.8878421783447266, + "logps/chosen": -0.07034935057163239, + "logps/rejected": -237.07086181640625, + "loss": 0.3832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016125105321407318, + "rewards/margins": 2.627875328063965, + "rewards/rejected": -2.6294877529144287, + "step": 7428 + }, + { + "epoch": 0.43, + "learning_rate": 6.32320081726975e-08, + "logits/chosen": -1.9502896070480347, + "logits/rejected": -1.9319427013397217, + "logps/chosen": -293.98907470703125, + "logps/rejected": -406.830810546875, + "loss": 0.0448, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9488343000411987, + "rewards/margins": 3.0624146461486816, + "rewards/rejected": -1.113580346107483, + "step": 7429 + }, + { + "epoch": 0.43, + "learning_rate": 6.32229199062222e-08, + "logits/chosen": -1.9154341220855713, + "logits/rejected": -1.9224038124084473, + "logps/chosen": -201.49441528320312, + "logps/rejected": -661.0910034179688, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6741348505020142, + "rewards/margins": 6.873146057128906, + "rewards/rejected": -5.199011325836182, + "step": 7430 + }, + { + "epoch": 0.43, + "learning_rate": 6.321383117000413e-08, + "logits/chosen": -1.8127455711364746, + "logits/rejected": -1.8087114095687866, + "logps/chosen": -68.2831802368164, + "logps/rejected": -386.30938720703125, + "loss": 0.479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14709626138210297, + "rewards/margins": 0.27962493896484375, + "rewards/rejected": -0.13252869248390198, + "step": 7431 + }, + { + "epoch": 0.43, + "learning_rate": 6.320474196436612e-08, + "logits/chosen": -1.8141179084777832, + "logits/rejected": -1.8221373558044434, + "logps/chosen": -160.0513916015625, + "logps/rejected": -327.9178466796875, + "loss": 0.0838, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.98980712890625, + "rewards/margins": 2.111438035964966, + "rewards/rejected": -0.12163086235523224, + "step": 7432 + }, + { + "epoch": 0.43, + "learning_rate": 6.319565228963111e-08, + "logits/chosen": -2.012824535369873, + "logits/rejected": -2.007978677749634, + "logps/chosen": -0.00011396177433198318, + "logps/rejected": -154.7437286376953, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3364671051240293e-06, + "rewards/margins": 3.3280487060546875, + "rewards/rejected": -3.3280510902404785, + "step": 7433 + }, + { + "epoch": 0.43, + "learning_rate": 6.318656214612197e-08, + "logits/chosen": -1.9852354526519775, + "logits/rejected": -1.974074125289917, + "logps/chosen": -202.1415557861328, + "logps/rejected": -556.7593383789062, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4360549449920654, + "rewards/margins": 5.0404558181762695, + "rewards/rejected": -2.604400634765625, + "step": 7434 + }, + { + "epoch": 0.43, + "learning_rate": 6.317747153416165e-08, + "logits/chosen": -1.9806904792785645, + "logits/rejected": -1.963752269744873, + "logps/chosen": -220.63174438476562, + "logps/rejected": -362.39825439453125, + "loss": 0.0492, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7762008905410767, + "rewards/margins": 3.6634140014648438, + "rewards/rejected": -1.887213110923767, + "step": 7435 + }, + { + "epoch": 0.43, + "learning_rate": 6.316838045407306e-08, + "logits/chosen": -2.1213083267211914, + "logits/rejected": -2.120417594909668, + "logps/chosen": -1.84253990650177, + "logps/rejected": -184.0465087890625, + "loss": 0.44, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0911850780248642, + "rewards/margins": 1.6178160905838013, + "rewards/rejected": -1.7090011835098267, + "step": 7436 + }, + { + "epoch": 0.43, + "learning_rate": 6.315928890617918e-08, + "logits/chosen": -1.6791644096374512, + "logits/rejected": -1.6803749799728394, + "logps/chosen": -118.61250305175781, + "logps/rejected": -292.3224182128906, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8088485598564148, + "rewards/margins": 1.4444137811660767, + "rewards/rejected": -0.6355652213096619, + "step": 7437 + }, + { + "epoch": 0.43, + "learning_rate": 6.315019689080301e-08, + "logits/chosen": -1.942376971244812, + "logits/rejected": -1.9866899251937866, + "logps/chosen": -209.5574188232422, + "logps/rejected": -253.00205993652344, + "loss": 0.2549, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5052703619003296, + "rewards/margins": 0.9329696297645569, + "rewards/rejected": 0.5723007321357727, + "step": 7438 + }, + { + "epoch": 0.43, + "learning_rate": 6.314110440826751e-08, + "logits/chosen": -2.113492488861084, + "logits/rejected": -2.1100497245788574, + "logps/chosen": -9.089778900146484, + "logps/rejected": -139.72463989257812, + "loss": 0.5034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31643494963645935, + "rewards/margins": 0.4596203565597534, + "rewards/rejected": -0.14318542182445526, + "step": 7439 + }, + { + "epoch": 0.43, + "learning_rate": 6.31320114588957e-08, + "logits/chosen": -1.8604397773742676, + "logits/rejected": -1.815118670463562, + "logps/chosen": -223.26919555664062, + "logps/rejected": -726.8936767578125, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.030308485031128, + "rewards/margins": 6.61802864074707, + "rewards/rejected": -4.587719917297363, + "step": 7440 + }, + { + "epoch": 0.43, + "learning_rate": 6.312291804301063e-08, + "logits/chosen": -1.9169714450836182, + "logits/rejected": -1.9194104671478271, + "logps/chosen": -24.18627166748047, + "logps/rejected": -79.31083679199219, + "loss": 0.7372, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.21245498955249786, + "rewards/margins": -0.45866626501083374, + "rewards/rejected": 0.6711212396621704, + "step": 7441 + }, + { + "epoch": 0.43, + "learning_rate": 6.311382416093531e-08, + "logits/chosen": -2.025907278060913, + "logits/rejected": -2.060790538787842, + "logps/chosen": -251.99087524414062, + "logps/rejected": -398.3974609375, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8648895025253296, + "rewards/margins": 5.407635688781738, + "rewards/rejected": -3.542746067047119, + "step": 7442 + }, + { + "epoch": 0.43, + "learning_rate": 6.31047298129928e-08, + "logits/chosen": -1.8014895915985107, + "logits/rejected": -1.8062360286712646, + "logps/chosen": -88.43647003173828, + "logps/rejected": -204.3422088623047, + "loss": 0.3669, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1757004261016846, + "rewards/margins": 0.4259582757949829, + "rewards/rejected": 0.7497421503067017, + "step": 7443 + }, + { + "epoch": 0.43, + "learning_rate": 6.309563499950619e-08, + "logits/chosen": -2.096445322036743, + "logits/rejected": -2.08793044090271, + "logps/chosen": -50.018741607666016, + "logps/rejected": -314.5262145996094, + "loss": 0.2058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6018592715263367, + "rewards/margins": 4.275174617767334, + "rewards/rejected": -3.6733155250549316, + "step": 7444 + }, + { + "epoch": 0.43, + "learning_rate": 6.308653972079856e-08, + "logits/chosen": -1.8762603998184204, + "logits/rejected": -1.8573925495147705, + "logps/chosen": -69.79150390625, + "logps/rejected": -263.4662170410156, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15552674233913422, + "rewards/margins": 1.373945713043213, + "rewards/rejected": -1.2184189558029175, + "step": 7445 + }, + { + "epoch": 0.43, + "learning_rate": 6.307744397719303e-08, + "logits/chosen": -2.1459715366363525, + "logits/rejected": -2.1448004245758057, + "logps/chosen": -35.00850296020508, + "logps/rejected": -253.08908081054688, + "loss": 0.1338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.999237060546875, + "rewards/margins": 3.7625396251678467, + "rewards/rejected": -2.7633025646209717, + "step": 7446 + }, + { + "epoch": 0.43, + "learning_rate": 6.306834776901274e-08, + "logits/chosen": -1.8703442811965942, + "logits/rejected": -1.785954475402832, + "logps/chosen": -291.12274169921875, + "logps/rejected": -527.6015014648438, + "loss": 0.1905, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5746887922286987, + "rewards/margins": 1.537438988685608, + "rewards/rejected": 0.03724975511431694, + "step": 7447 + }, + { + "epoch": 0.43, + "learning_rate": 6.30592510965808e-08, + "logits/chosen": -2.052011013031006, + "logits/rejected": -2.0903773307800293, + "logps/chosen": -158.54698181152344, + "logps/rejected": -250.76028442382812, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.397064208984375, + "rewards/margins": 2.152792453765869, + "rewards/rejected": 0.24427185952663422, + "step": 7448 + }, + { + "epoch": 0.43, + "learning_rate": 6.305015396022036e-08, + "logits/chosen": -2.0404772758483887, + "logits/rejected": -2.029181957244873, + "logps/chosen": -70.59066772460938, + "logps/rejected": -283.5892333984375, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6164779663085938, + "rewards/margins": 1.8417739868164062, + "rewards/rejected": -1.2252960205078125, + "step": 7449 + }, + { + "epoch": 0.43, + "learning_rate": 6.304105636025465e-08, + "logits/chosen": -1.9990161657333374, + "logits/rejected": -1.9931997060775757, + "logps/chosen": -51.79030990600586, + "logps/rejected": -204.4863739013672, + "loss": 0.6896, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.276763916015625, + "rewards/margins": -0.30496519804000854, + "rewards/rejected": 0.5817291140556335, + "step": 7450 + }, + { + "epoch": 0.43, + "learning_rate": 6.30319582970068e-08, + "logits/chosen": -1.791223168373108, + "logits/rejected": -1.758357286453247, + "logps/chosen": -180.22418212890625, + "logps/rejected": -327.907958984375, + "loss": 0.1894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.649012804031372, + "rewards/margins": 1.48564612865448, + "rewards/rejected": 0.16336670517921448, + "step": 7451 + }, + { + "epoch": 0.43, + "learning_rate": 6.302285977080006e-08, + "logits/chosen": -1.9836876392364502, + "logits/rejected": -1.9855999946594238, + "logps/chosen": -39.525482177734375, + "logps/rejected": -178.8191680908203, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6207275390625, + "rewards/margins": 2.2319273948669434, + "rewards/rejected": -1.611199975013733, + "step": 7452 + }, + { + "epoch": 0.43, + "learning_rate": 6.301376078195763e-08, + "logits/chosen": -1.8747789859771729, + "logits/rejected": -1.8683043718338013, + "logps/chosen": -201.4919891357422, + "logps/rejected": -298.5863037109375, + "loss": 0.137, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5255142450332642, + "rewards/margins": 2.1373398303985596, + "rewards/rejected": -0.6118255853652954, + "step": 7453 + }, + { + "epoch": 0.43, + "learning_rate": 6.300466133080276e-08, + "logits/chosen": -2.071074962615967, + "logits/rejected": -2.0546624660491943, + "logps/chosen": -0.00017010708688758314, + "logps/rejected": -246.37318420410156, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.052883014082909e-06, + "rewards/margins": 6.8357462882995605, + "rewards/rejected": -6.835750102996826, + "step": 7454 + }, + { + "epoch": 0.43, + "learning_rate": 6.299556141765871e-08, + "logits/chosen": -2.0419182777404785, + "logits/rejected": -2.040196657180786, + "logps/chosen": -165.89315795898438, + "logps/rejected": -413.6662902832031, + "loss": 0.115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2061065435409546, + "rewards/margins": 3.9690065383911133, + "rewards/rejected": -2.762899875640869, + "step": 7455 + }, + { + "epoch": 0.43, + "learning_rate": 6.298646104284877e-08, + "logits/chosen": -1.824049949645996, + "logits/rejected": -1.8219380378723145, + "logps/chosen": -172.2335662841797, + "logps/rejected": -242.52256774902344, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.219874620437622, + "rewards/margins": 0.12975454330444336, + "rewards/rejected": 2.0901200771331787, + "step": 7456 + }, + { + "epoch": 0.43, + "learning_rate": 6.297736020669619e-08, + "logits/chosen": -1.7528290748596191, + "logits/rejected": -1.7247859239578247, + "logps/chosen": -201.12445068359375, + "logps/rejected": -261.97723388671875, + "loss": 0.2669, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7852309942245483, + "rewards/margins": 0.7146224975585938, + "rewards/rejected": 1.0706084966659546, + "step": 7457 + }, + { + "epoch": 0.43, + "learning_rate": 6.296825890952428e-08, + "logits/chosen": -1.8070600032806396, + "logits/rejected": -1.8058102130889893, + "logps/chosen": -21.54451560974121, + "logps/rejected": -103.39652252197266, + "loss": 0.584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13666458427906036, + "rewards/margins": 0.5825287103652954, + "rewards/rejected": -0.7191932797431946, + "step": 7458 + }, + { + "epoch": 0.43, + "learning_rate": 6.29591571516564e-08, + "logits/chosen": -2.026970624923706, + "logits/rejected": -2.01581072807312, + "logps/chosen": -224.55908203125, + "logps/rejected": -477.32470703125, + "loss": 0.1132, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1079742908477783, + "rewards/margins": 2.0916872024536133, + "rewards/rejected": -0.9837127923965454, + "step": 7459 + }, + { + "epoch": 0.43, + "learning_rate": 6.295005493341587e-08, + "logits/chosen": -1.9644145965576172, + "logits/rejected": -1.9496604204177856, + "logps/chosen": -49.381004333496094, + "logps/rejected": -197.38308715820312, + "loss": 0.2026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2105640172958374, + "rewards/margins": 1.591774344444275, + "rewards/rejected": -0.3812103271484375, + "step": 7460 + }, + { + "epoch": 0.43, + "learning_rate": 6.294095225512604e-08, + "logits/chosen": -2.0815911293029785, + "logits/rejected": -2.0857319831848145, + "logps/chosen": -0.1033371314406395, + "logps/rejected": -85.30300903320312, + "loss": 0.4742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018615076318383217, + "rewards/margins": 1.194223165512085, + "rewards/rejected": -1.1756080389022827, + "step": 7461 + }, + { + "epoch": 0.43, + "learning_rate": 6.293184911711028e-08, + "logits/chosen": -2.199739933013916, + "logits/rejected": -2.186838388442993, + "logps/chosen": -74.353271484375, + "logps/rejected": -279.8473815917969, + "loss": 0.6495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00812377966940403, + "rewards/margins": 0.8682189583778381, + "rewards/rejected": -0.860095202922821, + "step": 7462 + }, + { + "epoch": 0.43, + "learning_rate": 6.292274551969198e-08, + "logits/chosen": -2.1685733795166016, + "logits/rejected": -2.160412549972534, + "logps/chosen": -27.454557418823242, + "logps/rejected": -203.38296508789062, + "loss": 0.2461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44410935044288635, + "rewards/margins": 2.651247501373291, + "rewards/rejected": -2.2071380615234375, + "step": 7463 + }, + { + "epoch": 0.43, + "learning_rate": 6.291364146319455e-08, + "logits/chosen": -2.0989575386047363, + "logits/rejected": -2.101658821105957, + "logps/chosen": -22.55681610107422, + "logps/rejected": -159.05911254882812, + "loss": 0.2808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.362758070230484, + "rewards/margins": 1.9229615926742554, + "rewards/rejected": -1.5602035522460938, + "step": 7464 + }, + { + "epoch": 0.43, + "learning_rate": 6.290453694794142e-08, + "logits/chosen": -1.806963562965393, + "logits/rejected": -1.806714415550232, + "logps/chosen": -54.07240295410156, + "logps/rejected": -164.8483428955078, + "loss": 0.5053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35111960768699646, + "rewards/margins": 1.4565112590789795, + "rewards/rejected": -1.8076308965682983, + "step": 7465 + }, + { + "epoch": 0.43, + "learning_rate": 6.2895431974256e-08, + "logits/chosen": -2.038140296936035, + "logits/rejected": -2.03981614112854, + "logps/chosen": -0.00018381403060629964, + "logps/rejected": -298.36541748046875, + "loss": 0.348, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4210243029519916e-06, + "rewards/margins": 5.564949035644531, + "rewards/rejected": -5.564952373504639, + "step": 7466 + }, + { + "epoch": 0.43, + "learning_rate": 6.288632654246178e-08, + "logits/chosen": -2.16389536857605, + "logits/rejected": -2.155160665512085, + "logps/chosen": -5.03052506246604e-05, + "logps/rejected": -84.49678802490234, + "loss": 0.6589, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3113059083025291e-07, + "rewards/margins": 0.11194471269845963, + "rewards/rejected": -0.11194457858800888, + "step": 7467 + }, + { + "epoch": 0.43, + "learning_rate": 6.287722065288217e-08, + "logits/chosen": -2.0110175609588623, + "logits/rejected": -1.9889546632766724, + "logps/chosen": -84.66130828857422, + "logps/rejected": -266.29461669921875, + "loss": 0.4417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27255478501319885, + "rewards/margins": 1.7343515157699585, + "rewards/rejected": -2.006906270980835, + "step": 7468 + }, + { + "epoch": 0.43, + "learning_rate": 6.286811430584073e-08, + "logits/chosen": -2.1206469535827637, + "logits/rejected": -2.117979049682617, + "logps/chosen": -35.355648040771484, + "logps/rejected": -195.64398193359375, + "loss": 0.2745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18583030998706818, + "rewards/margins": 2.4193508625030518, + "rewards/rejected": -2.2335205078125, + "step": 7469 + }, + { + "epoch": 0.43, + "learning_rate": 6.285900750166091e-08, + "logits/chosen": -2.0284247398376465, + "logits/rejected": -2.0076565742492676, + "logps/chosen": -199.14791870117188, + "logps/rejected": -272.3802490234375, + "loss": 0.3185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6075454950332642, + "rewards/margins": 0.6721634268760681, + "rewards/rejected": -0.06461792439222336, + "step": 7470 + }, + { + "epoch": 0.43, + "learning_rate": 6.284990024066625e-08, + "logits/chosen": -2.1425564289093018, + "logits/rejected": -2.1344780921936035, + "logps/chosen": -4.681317329406738, + "logps/rejected": -163.55386352539062, + "loss": 0.3788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05306582525372505, + "rewards/margins": 2.6354336738586426, + "rewards/rejected": -2.6884994506835938, + "step": 7471 + }, + { + "epoch": 0.43, + "learning_rate": 6.284079252318026e-08, + "logits/chosen": -2.071882486343384, + "logits/rejected": -2.063335418701172, + "logps/chosen": -72.63861083984375, + "logps/rejected": -285.22564697265625, + "loss": 0.149, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2122443914413452, + "rewards/margins": 2.5162787437438965, + "rewards/rejected": -1.3040344715118408, + "step": 7472 + }, + { + "epoch": 0.43, + "learning_rate": 6.283168434952653e-08, + "logits/chosen": -1.9708563089370728, + "logits/rejected": -1.9629853963851929, + "logps/chosen": -0.04864836484193802, + "logps/rejected": -91.60659790039062, + "loss": 0.6304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003030814230442047, + "rewards/margins": 0.2678142786026001, + "rewards/rejected": -0.26478347182273865, + "step": 7473 + }, + { + "epoch": 0.43, + "learning_rate": 6.282257572002858e-08, + "logits/chosen": -1.875829815864563, + "logits/rejected": -1.8666527271270752, + "logps/chosen": -165.47628784179688, + "logps/rejected": -406.82049560546875, + "loss": 0.026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.098771810531616, + "rewards/margins": 5.6816086769104, + "rewards/rejected": -3.582836866378784, + "step": 7474 + }, + { + "epoch": 0.43, + "learning_rate": 6.281346663501004e-08, + "logits/chosen": -2.00692081451416, + "logits/rejected": -2.0045278072357178, + "logps/chosen": -0.4853341579437256, + "logps/rejected": -169.17181396484375, + "loss": 0.4654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06399722397327423, + "rewards/margins": 1.1199604272842407, + "rewards/rejected": -1.055963158607483, + "step": 7475 + }, + { + "epoch": 0.44, + "learning_rate": 6.280435709479448e-08, + "logits/chosen": -1.9175982475280762, + "logits/rejected": -1.9102627038955688, + "logps/chosen": -18.62569808959961, + "logps/rejected": -129.82957458496094, + "loss": 0.5888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27430248260498047, + "rewards/margins": 0.8036623001098633, + "rewards/rejected": -1.0779647827148438, + "step": 7476 + }, + { + "epoch": 0.44, + "learning_rate": 6.279524709970553e-08, + "logits/chosen": -1.857995867729187, + "logits/rejected": -1.8655173778533936, + "logps/chosen": -152.1923370361328, + "logps/rejected": -195.44586181640625, + "loss": 0.4355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8285858035087585, + "rewards/margins": 0.2283172607421875, + "rewards/rejected": 0.600268542766571, + "step": 7477 + }, + { + "epoch": 0.44, + "learning_rate": 6.278613665006679e-08, + "logits/chosen": -2.0092029571533203, + "logits/rejected": -2.0134167671203613, + "logps/chosen": -2.926011562347412, + "logps/rejected": -71.08111572265625, + "loss": 0.5531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0343838706612587, + "rewards/margins": 0.517999529838562, + "rewards/rejected": -0.5523834228515625, + "step": 7478 + }, + { + "epoch": 0.44, + "learning_rate": 6.277702574620196e-08, + "logits/chosen": -1.7840429544448853, + "logits/rejected": -1.822853684425354, + "logps/chosen": -214.0554962158203, + "logps/rejected": -287.871826171875, + "loss": 0.1234, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.303964376449585, + "rewards/margins": 1.5233826637268066, + "rewards/rejected": 0.7805816531181335, + "step": 7479 + }, + { + "epoch": 0.44, + "learning_rate": 6.276791438843466e-08, + "logits/chosen": -1.981704831123352, + "logits/rejected": -1.98471200466156, + "logps/chosen": -0.00014853056927677244, + "logps/rejected": -149.56671142578125, + "loss": 0.3656, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4744134861975908e-05, + "rewards/margins": 2.7564244270324707, + "rewards/rejected": -2.756399631500244, + "step": 7480 + }, + { + "epoch": 0.44, + "learning_rate": 6.27588025770886e-08, + "logits/chosen": -2.0647385120391846, + "logits/rejected": -2.053727865219116, + "logps/chosen": -15.860878944396973, + "logps/rejected": -81.6619644165039, + "loss": 0.4185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011304664425551891, + "rewards/margins": 1.9460493326187134, + "rewards/rejected": -1.957353949546814, + "step": 7481 + }, + { + "epoch": 0.44, + "learning_rate": 6.274969031248745e-08, + "logits/chosen": -1.7816232442855835, + "logits/rejected": -1.7814807891845703, + "logps/chosen": -53.94499588012695, + "logps/rejected": -177.31539916992188, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24378128349781036, + "rewards/margins": 1.6256996393203735, + "rewards/rejected": -1.3819183111190796, + "step": 7482 + }, + { + "epoch": 0.44, + "learning_rate": 6.274057759495494e-08, + "logits/chosen": -2.157541513442993, + "logits/rejected": -2.1538100242614746, + "logps/chosen": -76.95664978027344, + "logps/rejected": -214.31285095214844, + "loss": 0.0863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6395965814590454, + "rewards/margins": 3.2791473865509033, + "rewards/rejected": -1.639550805091858, + "step": 7483 + }, + { + "epoch": 0.44, + "learning_rate": 6.27314644248148e-08, + "logits/chosen": -1.8315534591674805, + "logits/rejected": -1.8360507488250732, + "logps/chosen": -65.94203186035156, + "logps/rejected": -174.74679565429688, + "loss": 0.3574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30696946382522583, + "rewards/margins": 1.4607048034667969, + "rewards/rejected": -1.1537353992462158, + "step": 7484 + }, + { + "epoch": 0.44, + "learning_rate": 6.272235080239076e-08, + "logits/chosen": -2.0699565410614014, + "logits/rejected": -2.071786642074585, + "logps/chosen": -190.07479858398438, + "logps/rejected": -296.5383605957031, + "loss": 0.3987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3659195005893707, + "rewards/margins": 0.3288742005825043, + "rewards/rejected": 0.03704528883099556, + "step": 7485 + }, + { + "epoch": 0.44, + "learning_rate": 6.271323672800659e-08, + "logits/chosen": -1.8928370475769043, + "logits/rejected": -1.9271373748779297, + "logps/chosen": -213.92974853515625, + "logps/rejected": -251.12286376953125, + "loss": 0.1045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6015655994415283, + "rewards/margins": 1.6034424304962158, + "rewards/rejected": 0.9981231689453125, + "step": 7486 + }, + { + "epoch": 0.44, + "learning_rate": 6.270412220198605e-08, + "logits/chosen": -1.9775015115737915, + "logits/rejected": -1.973831295967102, + "logps/chosen": -2.7313313484191895, + "logps/rejected": -32.762474060058594, + "loss": 0.5904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07756540924310684, + "rewards/margins": 0.2805466055870056, + "rewards/rejected": -0.20298118889331818, + "step": 7487 + }, + { + "epoch": 0.44, + "learning_rate": 6.269500722465297e-08, + "logits/chosen": -1.831048607826233, + "logits/rejected": -1.787489414215088, + "logps/chosen": -290.7628479003906, + "logps/rejected": -427.39788818359375, + "loss": 0.107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9515045881271362, + "rewards/margins": 2.252011299133301, + "rewards/rejected": -0.300506591796875, + "step": 7488 + }, + { + "epoch": 0.44, + "learning_rate": 6.268589179633113e-08, + "logits/chosen": -1.995361566543579, + "logits/rejected": -1.988595724105835, + "logps/chosen": -20.471385955810547, + "logps/rejected": -148.95704650878906, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2011260986328125, + "rewards/margins": 2.377127170562744, + "rewards/rejected": -2.1760010719299316, + "step": 7489 + }, + { + "epoch": 0.44, + "learning_rate": 6.267677591734435e-08, + "logits/chosen": -1.8646267652511597, + "logits/rejected": -1.8558073043823242, + "logps/chosen": -233.32070922851562, + "logps/rejected": -394.4898986816406, + "loss": 0.0758, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.085272192955017, + "rewards/margins": 3.3740601539611816, + "rewards/rejected": -2.288787841796875, + "step": 7490 + }, + { + "epoch": 0.44, + "learning_rate": 6.266765958801649e-08, + "logits/chosen": -1.8551726341247559, + "logits/rejected": -1.8297936916351318, + "logps/chosen": -179.77218627929688, + "logps/rejected": -311.5628967285156, + "loss": 0.3039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7940430045127869, + "rewards/margins": 1.1666626930236816, + "rewards/rejected": -0.37261962890625, + "step": 7491 + }, + { + "epoch": 0.44, + "learning_rate": 6.265854280867139e-08, + "logits/chosen": -1.8717846870422363, + "logits/rejected": -1.8486624956130981, + "logps/chosen": -279.12939453125, + "logps/rejected": -514.306640625, + "loss": 0.0961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9518219232559204, + "rewards/margins": 2.148977756500244, + "rewards/rejected": -0.19715575873851776, + "step": 7492 + }, + { + "epoch": 0.44, + "learning_rate": 6.264942557963293e-08, + "logits/chosen": -1.9470237493515015, + "logits/rejected": -1.9208611249923706, + "logps/chosen": -201.75860595703125, + "logps/rejected": -346.59796142578125, + "loss": 0.0718, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7326080799102783, + "rewards/margins": 2.4898529052734375, + "rewards/rejected": -0.757244884967804, + "step": 7493 + }, + { + "epoch": 0.44, + "learning_rate": 6.2640307901225e-08, + "logits/chosen": -1.9094477891921997, + "logits/rejected": -1.93087899684906, + "logps/chosen": -153.42166137695312, + "logps/rejected": -417.89398193359375, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.734887719154358, + "rewards/margins": 5.5541534423828125, + "rewards/rejected": -3.819265842437744, + "step": 7494 + }, + { + "epoch": 0.44, + "learning_rate": 6.26311897737715e-08, + "logits/chosen": -1.9127697944641113, + "logits/rejected": -1.926844835281372, + "logps/chosen": -140.67214965820312, + "logps/rejected": -302.3326721191406, + "loss": 0.1243, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6453369855880737, + "rewards/margins": 1.6755127906799316, + "rewards/rejected": -0.03017578087747097, + "step": 7495 + }, + { + "epoch": 0.44, + "learning_rate": 6.262207119759637e-08, + "logits/chosen": -1.8294706344604492, + "logits/rejected": -1.8415584564208984, + "logps/chosen": -114.23104858398438, + "logps/rejected": -319.6507568359375, + "loss": 0.2404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5053604245185852, + "rewards/margins": 3.2541823387145996, + "rewards/rejected": -2.748821973800659, + "step": 7496 + }, + { + "epoch": 0.44, + "learning_rate": 6.261295217302352e-08, + "logits/chosen": -1.9083527326583862, + "logits/rejected": -1.9158979654312134, + "logps/chosen": -90.66401672363281, + "logps/rejected": -145.54824829101562, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44512939453125, + "rewards/margins": 1.2107864618301392, + "rewards/rejected": -0.7656570672988892, + "step": 7497 + }, + { + "epoch": 0.44, + "learning_rate": 6.26038327003769e-08, + "logits/chosen": -2.042741060256958, + "logits/rejected": -2.0413312911987305, + "logps/chosen": -92.72282409667969, + "logps/rejected": -216.4626007080078, + "loss": 0.1341, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.207862138748169, + "rewards/margins": 2.614910125732422, + "rewards/rejected": -1.4070481061935425, + "step": 7498 + }, + { + "epoch": 0.44, + "learning_rate": 6.25947127799805e-08, + "logits/chosen": -1.9532470703125, + "logits/rejected": -1.9534118175506592, + "logps/chosen": -30.676143646240234, + "logps/rejected": -114.71015930175781, + "loss": 0.2841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5554096102714539, + "rewards/margins": 1.4040485620498657, + "rewards/rejected": -0.8486389517784119, + "step": 7499 + }, + { + "epoch": 0.44, + "learning_rate": 6.25855924121583e-08, + "logits/chosen": -1.7354177236557007, + "logits/rejected": -1.7273651361465454, + "logps/chosen": -157.19044494628906, + "logps/rejected": -247.73939514160156, + "loss": 0.1634, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8277695178985596, + "rewards/margins": 1.2500625848770142, + "rewards/rejected": 0.5777069330215454, + "step": 7500 + }, + { + "epoch": 0.44, + "learning_rate": 6.257647159723427e-08, + "logits/chosen": -1.9490162134170532, + "logits/rejected": -1.9252045154571533, + "logps/chosen": -280.5063781738281, + "logps/rejected": -402.1792907714844, + "loss": 0.1196, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.318817138671875, + "rewards/margins": 1.4257354736328125, + "rewards/rejected": 0.8930816650390625, + "step": 7501 + }, + { + "epoch": 0.44, + "learning_rate": 6.256735033553248e-08, + "logits/chosen": -1.8788220882415771, + "logits/rejected": -1.8730523586273193, + "logps/chosen": -134.68771362304688, + "logps/rejected": -649.9191284179688, + "loss": 0.0611, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7119110822677612, + "rewards/margins": 5.7102508544921875, + "rewards/rejected": -3.998339891433716, + "step": 7502 + }, + { + "epoch": 0.44, + "learning_rate": 6.255822862737691e-08, + "logits/chosen": -2.1721513271331787, + "logits/rejected": -2.166262149810791, + "logps/chosen": -62.06309127807617, + "logps/rejected": -227.49130249023438, + "loss": 0.4514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41592904925346375, + "rewards/margins": 0.6781604886054993, + "rewards/rejected": -0.2622314393520355, + "step": 7503 + }, + { + "epoch": 0.44, + "learning_rate": 6.254910647309166e-08, + "logits/chosen": -1.7627029418945312, + "logits/rejected": -1.7801191806793213, + "logps/chosen": -204.92172241210938, + "logps/rejected": -368.912109375, + "loss": 0.1487, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3208389282226562, + "rewards/margins": 1.6948349475860596, + "rewards/rejected": -0.37399598956108093, + "step": 7504 + }, + { + "epoch": 0.44, + "learning_rate": 6.253998387300075e-08, + "logits/chosen": -1.9425745010375977, + "logits/rejected": -1.9776192903518677, + "logps/chosen": -235.92654418945312, + "logps/rejected": -422.7524719238281, + "loss": 0.066, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2973785400390625, + "rewards/margins": 2.9017577171325684, + "rewards/rejected": -0.6043792963027954, + "step": 7505 + }, + { + "epoch": 0.44, + "learning_rate": 6.253086082742825e-08, + "logits/chosen": -1.7967380285263062, + "logits/rejected": -1.7866159677505493, + "logps/chosen": -309.2851867675781, + "logps/rejected": -480.6595764160156, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.682348608970642, + "rewards/margins": 3.2446868419647217, + "rewards/rejected": -1.5623382329940796, + "step": 7506 + }, + { + "epoch": 0.44, + "learning_rate": 6.25217373366983e-08, + "logits/chosen": -1.8757802248001099, + "logits/rejected": -1.8273124694824219, + "logps/chosen": -210.2019500732422, + "logps/rejected": -391.0892333984375, + "loss": 0.1628, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6364212036132812, + "rewards/margins": 1.2357864379882812, + "rewards/rejected": 0.400634765625, + "step": 7507 + }, + { + "epoch": 0.44, + "learning_rate": 6.251261340113499e-08, + "logits/chosen": -2.092197895050049, + "logits/rejected": -2.100886344909668, + "logps/chosen": -208.69383239746094, + "logps/rejected": -302.60992431640625, + "loss": 0.1154, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2205429077148438, + "rewards/margins": 2.0487351417541504, + "rewards/rejected": -0.8281921744346619, + "step": 7508 + }, + { + "epoch": 0.44, + "learning_rate": 6.250348902106245e-08, + "logits/chosen": -2.070657968521118, + "logits/rejected": -2.0707123279571533, + "logps/chosen": -58.558746337890625, + "logps/rejected": -141.36752319335938, + "loss": 0.3172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26514893770217896, + "rewards/margins": 2.4899849891662598, + "rewards/rejected": -2.2248361110687256, + "step": 7509 + }, + { + "epoch": 0.44, + "learning_rate": 6.249436419680481e-08, + "logits/chosen": -1.8986995220184326, + "logits/rejected": -1.9044996500015259, + "logps/chosen": -244.03286743164062, + "logps/rejected": -317.1850280761719, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.097747802734375, + "rewards/margins": 0.7250640392303467, + "rewards/rejected": 0.37268373370170593, + "step": 7510 + }, + { + "epoch": 0.44, + "learning_rate": 6.248523892868624e-08, + "logits/chosen": -1.869307518005371, + "logits/rejected": -1.8706494569778442, + "logps/chosen": -12.355188369750977, + "logps/rejected": -80.3678207397461, + "loss": 0.6458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03466367721557617, + "rewards/margins": 0.1071818396449089, + "rewards/rejected": -0.07251816242933273, + "step": 7511 + }, + { + "epoch": 0.44, + "learning_rate": 6.247611321703091e-08, + "logits/chosen": -2.015192747116089, + "logits/rejected": -2.0181591510772705, + "logps/chosen": -23.560184478759766, + "logps/rejected": -135.15780639648438, + "loss": 0.2288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5409852862358093, + "rewards/margins": 3.196434259414673, + "rewards/rejected": -2.6554489135742188, + "step": 7512 + }, + { + "epoch": 0.44, + "learning_rate": 6.246698706216303e-08, + "logits/chosen": -2.0763397216796875, + "logits/rejected": -2.0599985122680664, + "logps/chosen": -159.9000701904297, + "logps/rejected": -238.83840942382812, + "loss": 0.2026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2128632068634033, + "rewards/margins": 1.9534072875976562, + "rewards/rejected": -0.7405441403388977, + "step": 7513 + }, + { + "epoch": 0.44, + "learning_rate": 6.245786046440675e-08, + "logits/chosen": -2.0783400535583496, + "logits/rejected": -2.0562281608581543, + "logps/chosen": -231.29080200195312, + "logps/rejected": -419.4977111816406, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.799490451812744, + "rewards/margins": 2.80609130859375, + "rewards/rejected": 0.9933990836143494, + "step": 7514 + }, + { + "epoch": 0.44, + "learning_rate": 6.244873342408636e-08, + "logits/chosen": -1.931767225265503, + "logits/rejected": -1.9328885078430176, + "logps/chosen": -1.7965102195739746, + "logps/rejected": -115.99397277832031, + "loss": 0.4644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030336523428559303, + "rewards/margins": 0.9187871217727661, + "rewards/rejected": -0.8884506225585938, + "step": 7515 + }, + { + "epoch": 0.44, + "learning_rate": 6.243960594152604e-08, + "logits/chosen": -1.9106563329696655, + "logits/rejected": -1.8994145393371582, + "logps/chosen": -239.77406311035156, + "logps/rejected": -633.8731689453125, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3711776733398438, + "rewards/margins": 4.577104568481445, + "rewards/rejected": -2.2059266567230225, + "step": 7516 + }, + { + "epoch": 0.44, + "learning_rate": 6.24304780170501e-08, + "logits/chosen": -1.9655709266662598, + "logits/rejected": -1.9637705087661743, + "logps/chosen": -10.542797088623047, + "logps/rejected": -172.95187377929688, + "loss": 0.5429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2756354510784149, + "rewards/margins": 0.2423774003982544, + "rewards/rejected": 0.03325805813074112, + "step": 7517 + }, + { + "epoch": 0.44, + "learning_rate": 6.242134965098275e-08, + "logits/chosen": -1.9274303913116455, + "logits/rejected": -1.9783108234405518, + "logps/chosen": -290.982177734375, + "logps/rejected": -316.10791015625, + "loss": 0.1204, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8030518293380737, + "rewards/margins": 1.7475464344024658, + "rewards/rejected": 0.05550537258386612, + "step": 7518 + }, + { + "epoch": 0.44, + "learning_rate": 6.24122208436483e-08, + "logits/chosen": -1.8140202760696411, + "logits/rejected": -1.8191457986831665, + "logps/chosen": -0.0032475371845066547, + "logps/rejected": -310.2098083496094, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018659366469364613, + "rewards/margins": 5.5105695724487305, + "rewards/rejected": -5.510756015777588, + "step": 7519 + }, + { + "epoch": 0.44, + "learning_rate": 6.240309159537105e-08, + "logits/chosen": -1.886586308479309, + "logits/rejected": -1.8693618774414062, + "logps/chosen": -188.8006591796875, + "logps/rejected": -359.7914123535156, + "loss": 0.0918, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.878021240234375, + "rewards/margins": 2.010894775390625, + "rewards/rejected": -0.13287353515625, + "step": 7520 + }, + { + "epoch": 0.44, + "learning_rate": 6.239396190647534e-08, + "logits/chosen": -1.952404260635376, + "logits/rejected": -1.9484837055206299, + "logps/chosen": -0.5357551574707031, + "logps/rejected": -175.6036376953125, + "loss": 0.3466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003346043871715665, + "rewards/margins": 3.9064395427703857, + "rewards/rejected": -3.909785509109497, + "step": 7521 + }, + { + "epoch": 0.44, + "learning_rate": 6.238483177728546e-08, + "logits/chosen": -1.7577062845230103, + "logits/rejected": -1.785383701324463, + "logps/chosen": -226.13400268554688, + "logps/rejected": -223.16400146484375, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7924515008926392, + "rewards/margins": 1.1969269514083862, + "rewards/rejected": -0.4044754207134247, + "step": 7522 + }, + { + "epoch": 0.44, + "learning_rate": 6.237570120812578e-08, + "logits/chosen": -2.058751344680786, + "logits/rejected": -2.060551643371582, + "logps/chosen": -40.12532043457031, + "logps/rejected": -184.5786590576172, + "loss": 0.3238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.073430635035038, + "rewards/margins": 2.282724618911743, + "rewards/rejected": -2.209294080734253, + "step": 7523 + }, + { + "epoch": 0.44, + "learning_rate": 6.236657019932065e-08, + "logits/chosen": -1.9134596586227417, + "logits/rejected": -1.976747989654541, + "logps/chosen": -159.50131225585938, + "logps/rejected": -306.42724609375, + "loss": 0.1137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9787139892578125, + "rewards/margins": 1.9278656244277954, + "rewards/rejected": -0.9491516351699829, + "step": 7524 + }, + { + "epoch": 0.44, + "learning_rate": 6.235743875119446e-08, + "logits/chosen": -2.0275728702545166, + "logits/rejected": -2.007669448852539, + "logps/chosen": -70.15404510498047, + "logps/rejected": -454.83642578125, + "loss": 0.1778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6146408319473267, + "rewards/margins": 2.0314438343048096, + "rewards/rejected": -1.416803002357483, + "step": 7525 + }, + { + "epoch": 0.44, + "learning_rate": 6.234830686407159e-08, + "logits/chosen": -1.8811852931976318, + "logits/rejected": -1.8805094957351685, + "logps/chosen": -65.13496398925781, + "logps/rejected": -233.19288635253906, + "loss": 0.1548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8744072318077087, + "rewards/margins": 3.6263420581817627, + "rewards/rejected": -2.751934766769409, + "step": 7526 + }, + { + "epoch": 0.44, + "learning_rate": 6.233917453827648e-08, + "logits/chosen": -2.0518414974212646, + "logits/rejected": -2.0825119018554688, + "logps/chosen": -240.95530700683594, + "logps/rejected": -296.81524658203125, + "loss": 0.0606, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9259567260742188, + "rewards/margins": 2.732426404953003, + "rewards/rejected": -0.806469738483429, + "step": 7527 + }, + { + "epoch": 0.44, + "learning_rate": 6.233004177413353e-08, + "logits/chosen": -1.972608208656311, + "logits/rejected": -1.9350947141647339, + "logps/chosen": -213.71435546875, + "logps/rejected": -340.9499206542969, + "loss": 0.3081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6211410760879517, + "rewards/margins": 0.27523958683013916, + "rewards/rejected": 1.3459014892578125, + "step": 7528 + }, + { + "epoch": 0.44, + "learning_rate": 6.232090857196715e-08, + "logits/chosen": -1.9811346530914307, + "logits/rejected": -1.979590654373169, + "logps/chosen": -5.6625471115112305, + "logps/rejected": -35.40609359741211, + "loss": 0.7653, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.025092411786317825, + "rewards/margins": -0.3116414248943329, + "rewards/rejected": 0.28654900193214417, + "step": 7529 + }, + { + "epoch": 0.44, + "learning_rate": 6.231177493210187e-08, + "logits/chosen": -2.0798237323760986, + "logits/rejected": -2.075585126876831, + "logps/chosen": -77.61581420898438, + "logps/rejected": -274.7972106933594, + "loss": 0.4209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20533600449562073, + "rewards/margins": 1.9999269247055054, + "rewards/rejected": -2.2052628993988037, + "step": 7530 + }, + { + "epoch": 0.44, + "learning_rate": 6.23026408548621e-08, + "logits/chosen": -1.582050085067749, + "logits/rejected": -1.5412263870239258, + "logps/chosen": -140.5819091796875, + "logps/rejected": -361.51092529296875, + "loss": 0.0679, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4698212146759033, + "rewards/margins": 2.6129486560821533, + "rewards/rejected": -0.14312744140625, + "step": 7531 + }, + { + "epoch": 0.44, + "learning_rate": 6.229350634057236e-08, + "logits/chosen": -2.023512363433838, + "logits/rejected": -2.008117437362671, + "logps/chosen": -51.97472381591797, + "logps/rejected": -231.47705078125, + "loss": 0.1725, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.146074652671814, + "rewards/margins": 1.9506981372833252, + "rewards/rejected": -0.8046234250068665, + "step": 7532 + }, + { + "epoch": 0.44, + "learning_rate": 6.228437138955712e-08, + "logits/chosen": -1.8080116510391235, + "logits/rejected": -1.8065879344940186, + "logps/chosen": -43.971466064453125, + "logps/rejected": -218.79446411132812, + "loss": 0.2498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7228439450263977, + "rewards/margins": 1.6972031593322754, + "rewards/rejected": -0.9743591547012329, + "step": 7533 + }, + { + "epoch": 0.44, + "learning_rate": 6.227523600214094e-08, + "logits/chosen": -1.7613388299942017, + "logits/rejected": -1.7462705373764038, + "logps/chosen": -95.86161804199219, + "logps/rejected": -214.09722900390625, + "loss": 0.2397, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7470604181289673, + "rewards/margins": 0.9030967950820923, + "rewards/rejected": 0.843963623046875, + "step": 7534 + }, + { + "epoch": 0.44, + "learning_rate": 6.226610017864832e-08, + "logits/chosen": -1.8971434831619263, + "logits/rejected": -1.9106324911117554, + "logps/chosen": -22.811838150024414, + "logps/rejected": -125.71235656738281, + "loss": 0.4588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11597499996423721, + "rewards/margins": 1.0152807235717773, + "rewards/rejected": -0.8993057608604431, + "step": 7535 + }, + { + "epoch": 0.44, + "learning_rate": 6.225696391940382e-08, + "logits/chosen": -1.9975301027297974, + "logits/rejected": -2.008122444152832, + "logps/chosen": -7.313478469848633, + "logps/rejected": -213.07040405273438, + "loss": 0.3522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04887714609503746, + "rewards/margins": 5.733499526977539, + "rewards/rejected": -5.684622287750244, + "step": 7536 + }, + { + "epoch": 0.44, + "learning_rate": 6.224782722473201e-08, + "logits/chosen": -2.0062990188598633, + "logits/rejected": -1.9962719678878784, + "logps/chosen": -8.248610496520996, + "logps/rejected": -206.82421875, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.186625674366951, + "rewards/margins": 1.886213779449463, + "rewards/rejected": -1.6995880603790283, + "step": 7537 + }, + { + "epoch": 0.44, + "learning_rate": 6.223869009495748e-08, + "logits/chosen": -1.9851824045181274, + "logits/rejected": -1.9788215160369873, + "logps/chosen": -339.2156677246094, + "logps/rejected": -597.3348388671875, + "loss": 0.0674, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4365265369415283, + "rewards/margins": 2.192880392074585, + "rewards/rejected": 0.24364624917507172, + "step": 7538 + }, + { + "epoch": 0.44, + "learning_rate": 6.222955253040478e-08, + "logits/chosen": -1.8170793056488037, + "logits/rejected": -1.820121169090271, + "logps/chosen": -25.246463775634766, + "logps/rejected": -206.43727111816406, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3798172175884247, + "rewards/margins": 2.39422607421875, + "rewards/rejected": -2.014408826828003, + "step": 7539 + }, + { + "epoch": 0.44, + "learning_rate": 6.222041453139859e-08, + "logits/chosen": -1.9218928813934326, + "logits/rejected": -1.915048360824585, + "logps/chosen": -155.21697998046875, + "logps/rejected": -209.59820556640625, + "loss": 0.1036, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.692974805831909, + "rewards/margins": 1.75843346118927, + "rewards/rejected": 0.9345413446426392, + "step": 7540 + }, + { + "epoch": 0.44, + "learning_rate": 6.221127609826348e-08, + "logits/chosen": -2.12266206741333, + "logits/rejected": -2.1234679222106934, + "logps/chosen": -0.00019442429766058922, + "logps/rejected": -94.38320922851562, + "loss": 0.7109, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.02103443472879e-06, + "rewards/margins": -0.06991821527481079, + "rewards/rejected": 0.06991119682788849, + "step": 7541 + }, + { + "epoch": 0.44, + "learning_rate": 6.220213723132411e-08, + "logits/chosen": -2.146789312362671, + "logits/rejected": -2.1547203063964844, + "logps/chosen": -0.8302919268608093, + "logps/rejected": -195.95755004882812, + "loss": 0.4457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012774157337844372, + "rewards/margins": 1.4757177829742432, + "rewards/rejected": -1.4884918928146362, + "step": 7542 + }, + { + "epoch": 0.44, + "learning_rate": 6.219299793090514e-08, + "logits/chosen": -1.8117488622665405, + "logits/rejected": -1.8129138946533203, + "logps/chosen": -52.224853515625, + "logps/rejected": -358.53973388671875, + "loss": 0.3024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1811695098876953, + "rewards/margins": 2.7785022258758545, + "rewards/rejected": -2.597332715988159, + "step": 7543 + }, + { + "epoch": 0.44, + "learning_rate": 6.218385819733125e-08, + "logits/chosen": -1.9068468809127808, + "logits/rejected": -1.9106724262237549, + "logps/chosen": -301.45379638671875, + "logps/rejected": -270.63763427734375, + "loss": 0.2569, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4630982875823975, + "rewards/margins": 0.4554901123046875, + "rewards/rejected": 2.00760817527771, + "step": 7544 + }, + { + "epoch": 0.44, + "learning_rate": 6.217471803092711e-08, + "logits/chosen": -1.866930365562439, + "logits/rejected": -1.858939528465271, + "logps/chosen": -171.04122924804688, + "logps/rejected": -237.15414428710938, + "loss": 0.2025, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7901535034179688, + "rewards/margins": 1.1037094593048096, + "rewards/rejected": 0.686444103717804, + "step": 7545 + }, + { + "epoch": 0.44, + "learning_rate": 6.216557743201743e-08, + "logits/chosen": -2.086010456085205, + "logits/rejected": -2.0868189334869385, + "logps/chosen": -18.353214263916016, + "logps/rejected": -289.96514892578125, + "loss": 0.265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15597668290138245, + "rewards/margins": 4.161216735839844, + "rewards/rejected": -4.005239963531494, + "step": 7546 + }, + { + "epoch": 0.44, + "learning_rate": 6.215643640092695e-08, + "logits/chosen": -2.0075061321258545, + "logits/rejected": -2.0053086280822754, + "logps/chosen": -217.8159942626953, + "logps/rejected": -280.94915771484375, + "loss": 0.323, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7668594121932983, + "rewards/margins": 0.27241051197052, + "rewards/rejected": 1.4944489002227783, + "step": 7547 + }, + { + "epoch": 0.44, + "learning_rate": 6.214729493798037e-08, + "logits/chosen": -1.8234697580337524, + "logits/rejected": -1.7740854024887085, + "logps/chosen": -238.22332763671875, + "logps/rejected": -512.1324462890625, + "loss": 0.0468, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4729583263397217, + "rewards/margins": 3.0238373279571533, + "rewards/rejected": -0.5508789420127869, + "step": 7548 + }, + { + "epoch": 0.44, + "learning_rate": 6.213815304350246e-08, + "logits/chosen": -1.9199907779693604, + "logits/rejected": -1.9135164022445679, + "logps/chosen": -186.53488159179688, + "logps/rejected": -452.6345520019531, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.178305149078369, + "rewards/margins": 5.195709228515625, + "rewards/rejected": -3.017404317855835, + "step": 7549 + }, + { + "epoch": 0.44, + "learning_rate": 6.212901071781796e-08, + "logits/chosen": -2.0643815994262695, + "logits/rejected": -2.0548458099365234, + "logps/chosen": -135.4039306640625, + "logps/rejected": -313.63092041015625, + "loss": 0.1509, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0241730213165283, + "rewards/margins": 1.5664582252502441, + "rewards/rejected": 0.45771485567092896, + "step": 7550 + }, + { + "epoch": 0.44, + "learning_rate": 6.21198679612517e-08, + "logits/chosen": -2.0344443321228027, + "logits/rejected": -2.0258285999298096, + "logps/chosen": -3.145848512649536, + "logps/rejected": -221.68405151367188, + "loss": 0.412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08428948372602463, + "rewards/margins": 2.229379415512085, + "rewards/rejected": -2.313668966293335, + "step": 7551 + }, + { + "epoch": 0.44, + "learning_rate": 6.211072477412844e-08, + "logits/chosen": -1.977326512336731, + "logits/rejected": -1.958318829536438, + "logps/chosen": -219.57342529296875, + "logps/rejected": -352.8160400390625, + "loss": 0.0545, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.900762915611267, + "rewards/margins": 2.692535400390625, + "rewards/rejected": -0.7917724847793579, + "step": 7552 + }, + { + "epoch": 0.44, + "learning_rate": 6.210158115677299e-08, + "logits/chosen": -2.0020053386688232, + "logits/rejected": -2.0113015174865723, + "logps/chosen": -41.738975524902344, + "logps/rejected": -164.13214111328125, + "loss": 0.2255, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4115475416183472, + "rewards/margins": 1.12076997756958, + "rewards/rejected": 0.2907775938510895, + "step": 7553 + }, + { + "epoch": 0.44, + "learning_rate": 6.209243710951017e-08, + "logits/chosen": -1.9576336145401, + "logits/rejected": -1.8581739664077759, + "logps/chosen": -200.23696899414062, + "logps/rejected": -649.3128051757812, + "loss": 0.1388, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.08885657787323, + "rewards/margins": 3.5631608963012695, + "rewards/rejected": -2.47430419921875, + "step": 7554 + }, + { + "epoch": 0.44, + "learning_rate": 6.208329263266486e-08, + "logits/chosen": -1.9416489601135254, + "logits/rejected": -1.9321476221084595, + "logps/chosen": -34.217864990234375, + "logps/rejected": -223.44619750976562, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2127552032470703, + "rewards/margins": 1.8402622938156128, + "rewards/rejected": -1.6275070905685425, + "step": 7555 + }, + { + "epoch": 0.44, + "learning_rate": 6.207414772656185e-08, + "logits/chosen": -1.98174250125885, + "logits/rejected": -1.970619559288025, + "logps/chosen": -353.1495361328125, + "logps/rejected": -500.02691650390625, + "loss": 0.0381, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6672241687774658, + "rewards/margins": 4.268035888671875, + "rewards/rejected": -2.600811719894409, + "step": 7556 + }, + { + "epoch": 0.44, + "learning_rate": 6.206500239152609e-08, + "logits/chosen": -1.77351713180542, + "logits/rejected": -1.7710916996002197, + "logps/chosen": -24.277423858642578, + "logps/rejected": -89.96354675292969, + "loss": 0.7956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9805742502212524, + "rewards/margins": 0.9389852285385132, + "rewards/rejected": -1.9195594787597656, + "step": 7557 + }, + { + "epoch": 0.44, + "learning_rate": 6.20558566278824e-08, + "logits/chosen": -1.9364465475082397, + "logits/rejected": -1.9439566135406494, + "logps/chosen": -57.72903823852539, + "logps/rejected": -378.125244140625, + "loss": 0.13, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8999500274658203, + "rewards/margins": 2.672111988067627, + "rewards/rejected": -1.772161841392517, + "step": 7558 + }, + { + "epoch": 0.44, + "learning_rate": 6.204671043595574e-08, + "logits/chosen": -1.9524827003479004, + "logits/rejected": -1.917480230331421, + "logps/chosen": -223.05567932128906, + "logps/rejected": -427.9593505859375, + "loss": 0.7838, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.30510711669921875, + "rewards/margins": -0.7695999145507812, + "rewards/rejected": 1.07470703125, + "step": 7559 + }, + { + "epoch": 0.44, + "learning_rate": 6.203756381607097e-08, + "logits/chosen": -2.0294930934906006, + "logits/rejected": -2.011993169784546, + "logps/chosen": -60.06895065307617, + "logps/rejected": -318.5455017089844, + "loss": 0.2348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43870124220848083, + "rewards/margins": 5.2035112380981445, + "rewards/rejected": -4.764810085296631, + "step": 7560 + }, + { + "epoch": 0.44, + "learning_rate": 6.202841676855307e-08, + "logits/chosen": -1.8210958242416382, + "logits/rejected": -1.8228623867034912, + "logps/chosen": -212.46853637695312, + "logps/rejected": -342.82659912109375, + "loss": 0.2632, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7611236572265625, + "rewards/margins": 0.6619536876678467, + "rewards/rejected": 1.0991699695587158, + "step": 7561 + }, + { + "epoch": 0.44, + "learning_rate": 6.201926929372696e-08, + "logits/chosen": -1.927085041999817, + "logits/rejected": -1.9297758340835571, + "logps/chosen": -51.67702102661133, + "logps/rejected": -386.63653564453125, + "loss": 0.2522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4081276059150696, + "rewards/margins": 4.495887279510498, + "rewards/rejected": -4.087759494781494, + "step": 7562 + }, + { + "epoch": 0.44, + "learning_rate": 6.201012139191762e-08, + "logits/chosen": -1.942643642425537, + "logits/rejected": -1.9688804149627686, + "logps/chosen": -246.07656860351562, + "logps/rejected": -449.6012878417969, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.462872266769409, + "rewards/margins": 5.756735324859619, + "rewards/rejected": -3.29386305809021, + "step": 7563 + }, + { + "epoch": 0.44, + "learning_rate": 6.200097306345002e-08, + "logits/chosen": -1.9476823806762695, + "logits/rejected": -1.9629451036453247, + "logps/chosen": -327.2320556640625, + "logps/rejected": -447.68402099609375, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6563355922698975, + "rewards/margins": 5.524322509765625, + "rewards/rejected": -2.8679871559143066, + "step": 7564 + }, + { + "epoch": 0.44, + "learning_rate": 6.199182430864914e-08, + "logits/chosen": -1.7872064113616943, + "logits/rejected": -1.7481218576431274, + "logps/chosen": -177.9873504638672, + "logps/rejected": -256.287841796875, + "loss": 0.6076, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5801727175712585, + "rewards/margins": -0.20621037483215332, + "rewards/rejected": 0.7863830924034119, + "step": 7565 + }, + { + "epoch": 0.44, + "learning_rate": 6.198267512784001e-08, + "logits/chosen": -2.000877857208252, + "logits/rejected": -2.0302717685699463, + "logps/chosen": -173.4580841064453, + "logps/rejected": -282.8853759765625, + "loss": 0.1925, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2299057245254517, + "rewards/margins": 1.0374497175216675, + "rewards/rejected": 0.19245605170726776, + "step": 7566 + }, + { + "epoch": 0.44, + "learning_rate": 6.197352552134765e-08, + "logits/chosen": -1.9854521751403809, + "logits/rejected": -1.9594402313232422, + "logps/chosen": -153.28256225585938, + "logps/rejected": -413.09912109375, + "loss": 0.0558, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.807641625404358, + "rewards/margins": 3.374896287918091, + "rewards/rejected": -1.567254662513733, + "step": 7567 + }, + { + "epoch": 0.44, + "learning_rate": 6.196437548949709e-08, + "logits/chosen": -1.9014590978622437, + "logits/rejected": -1.884843111038208, + "logps/chosen": -98.35313415527344, + "logps/rejected": -248.66932678222656, + "loss": 0.2666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5214958190917969, + "rewards/margins": 1.495398759841919, + "rewards/rejected": -0.9739028811454773, + "step": 7568 + }, + { + "epoch": 0.44, + "learning_rate": 6.195522503261339e-08, + "logits/chosen": -2.0251708030700684, + "logits/rejected": -2.0183680057525635, + "logps/chosen": -0.06548038870096207, + "logps/rejected": -208.65542602539062, + "loss": 0.3637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00498300651088357, + "rewards/margins": 3.401779890060425, + "rewards/rejected": -3.4067628383636475, + "step": 7569 + }, + { + "epoch": 0.44, + "learning_rate": 6.194607415102161e-08, + "logits/chosen": -1.8221232891082764, + "logits/rejected": -1.7778186798095703, + "logps/chosen": -271.1016845703125, + "logps/rejected": -556.538818359375, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4207916259765625, + "rewards/margins": 0.0348968505859375, + "rewards/rejected": 2.385894775390625, + "step": 7570 + }, + { + "epoch": 0.44, + "learning_rate": 6.193692284504686e-08, + "logits/chosen": -1.774930477142334, + "logits/rejected": -1.7700170278549194, + "logps/chosen": -236.0306854248047, + "logps/rejected": -223.80654907226562, + "loss": 0.0962, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2973344326019287, + "rewards/margins": 2.247981309890747, + "rewards/rejected": 0.04935302957892418, + "step": 7571 + }, + { + "epoch": 0.44, + "learning_rate": 6.192777111501421e-08, + "logits/chosen": -1.8699078559875488, + "logits/rejected": -1.8825479745864868, + "logps/chosen": -192.66525268554688, + "logps/rejected": -387.3989562988281, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1283493041992188, + "rewards/margins": 4.5355119705200195, + "rewards/rejected": -2.4071624279022217, + "step": 7572 + }, + { + "epoch": 0.44, + "learning_rate": 6.191861896124879e-08, + "logits/chosen": -2.0766284465789795, + "logits/rejected": -2.0616798400878906, + "logps/chosen": -59.46038818359375, + "logps/rejected": -318.4836730957031, + "loss": 0.0954, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.254571557044983, + "rewards/margins": 3.540872097015381, + "rewards/rejected": -2.2863006591796875, + "step": 7573 + }, + { + "epoch": 0.44, + "learning_rate": 6.190946638407572e-08, + "logits/chosen": -1.885608434677124, + "logits/rejected": -1.8956763744354248, + "logps/chosen": -54.67210006713867, + "logps/rejected": -300.1341247558594, + "loss": 0.2531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7731181979179382, + "rewards/margins": 1.4125560522079468, + "rewards/rejected": -0.6394378542900085, + "step": 7574 + }, + { + "epoch": 0.44, + "learning_rate": 6.190031338382014e-08, + "logits/chosen": -1.6609572172164917, + "logits/rejected": -1.6660566329956055, + "logps/chosen": -41.601104736328125, + "logps/rejected": -175.6099853515625, + "loss": 0.2995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5040332674980164, + "rewards/margins": 2.780806541442871, + "rewards/rejected": -2.27677321434021, + "step": 7575 + }, + { + "epoch": 0.44, + "learning_rate": 6.189115996080724e-08, + "logits/chosen": -1.944191336631775, + "logits/rejected": -1.9375722408294678, + "logps/chosen": -0.009973968379199505, + "logps/rejected": -94.36032104492188, + "loss": 0.4381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007134905899874866, + "rewards/margins": 1.4392884969711304, + "rewards/rejected": -1.438575029373169, + "step": 7576 + }, + { + "epoch": 0.44, + "learning_rate": 6.188200611536215e-08, + "logits/chosen": -1.8776767253875732, + "logits/rejected": -1.8675400018692017, + "logps/chosen": -207.68917846679688, + "logps/rejected": -446.8577575683594, + "loss": 0.0931, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9264999628067017, + "rewards/margins": 2.084681749343872, + "rewards/rejected": -0.15818177163600922, + "step": 7577 + }, + { + "epoch": 0.44, + "learning_rate": 6.18728518478101e-08, + "logits/chosen": -1.6954002380371094, + "logits/rejected": -1.670078158378601, + "logps/chosen": -207.99591064453125, + "logps/rejected": -366.4988708496094, + "loss": 0.1582, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4605621099472046, + "rewards/margins": 1.7687225341796875, + "rewards/rejected": -0.3081603944301605, + "step": 7578 + }, + { + "epoch": 0.44, + "learning_rate": 6.186369715847625e-08, + "logits/chosen": -1.975297451019287, + "logits/rejected": -1.9750163555145264, + "logps/chosen": -20.29793930053711, + "logps/rejected": -201.21234130859375, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14744797348976135, + "rewards/margins": 2.1529030799865723, + "rewards/rejected": -2.0054550170898438, + "step": 7579 + }, + { + "epoch": 0.44, + "learning_rate": 6.185454204768586e-08, + "logits/chosen": -1.786101222038269, + "logits/rejected": -1.774423360824585, + "logps/chosen": -59.639434814453125, + "logps/rejected": -335.7975158691406, + "loss": 0.1654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6354660391807556, + "rewards/margins": 5.110823154449463, + "rewards/rejected": -4.4753570556640625, + "step": 7580 + }, + { + "epoch": 0.44, + "learning_rate": 6.184538651576415e-08, + "logits/chosen": -2.0732805728912354, + "logits/rejected": -2.072145462036133, + "logps/chosen": -26.248916625976562, + "logps/rejected": -139.22317504882812, + "loss": 0.5304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0282453540712595, + "rewards/margins": 0.6755750775337219, + "rewards/rejected": -0.6473297476768494, + "step": 7581 + }, + { + "epoch": 0.44, + "learning_rate": 6.183623056303638e-08, + "logits/chosen": -2.0228874683380127, + "logits/rejected": -2.0200698375701904, + "logps/chosen": -4.756420821649954e-05, + "logps/rejected": -174.08203125, + "loss": 0.327, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1682412832669797e-06, + "rewards/margins": 5.170548915863037, + "rewards/rejected": -5.1705498695373535, + "step": 7582 + }, + { + "epoch": 0.44, + "learning_rate": 6.182707418982779e-08, + "logits/chosen": -2.1911613941192627, + "logits/rejected": -2.17460298538208, + "logps/chosen": -34.27549743652344, + "logps/rejected": -287.267822265625, + "loss": 0.4114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37123337388038635, + "rewards/margins": 2.793415069580078, + "rewards/rejected": -3.1646485328674316, + "step": 7583 + }, + { + "epoch": 0.44, + "learning_rate": 6.181791739646368e-08, + "logits/chosen": -1.7250149250030518, + "logits/rejected": -1.7179032564163208, + "logps/chosen": -137.08047485351562, + "logps/rejected": -347.645263671875, + "loss": 0.0606, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3667312860488892, + "rewards/margins": 3.1514482498168945, + "rewards/rejected": -1.7847168445587158, + "step": 7584 + }, + { + "epoch": 0.44, + "learning_rate": 6.180876018326933e-08, + "logits/chosen": -1.9773048162460327, + "logits/rejected": -1.9616501331329346, + "logps/chosen": -34.011932373046875, + "logps/rejected": -199.21868896484375, + "loss": 0.3766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23134461045265198, + "rewards/margins": 1.4657471179962158, + "rewards/rejected": -1.2344025373458862, + "step": 7585 + }, + { + "epoch": 0.44, + "learning_rate": 6.179960255057006e-08, + "logits/chosen": -1.977699637413025, + "logits/rejected": -1.9707118272781372, + "logps/chosen": -39.12227249145508, + "logps/rejected": -210.85723876953125, + "loss": 0.1466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.886313259601593, + "rewards/margins": 3.3675618171691895, + "rewards/rejected": -2.481248617172241, + "step": 7586 + }, + { + "epoch": 0.44, + "learning_rate": 6.179044449869119e-08, + "logits/chosen": -2.167937755584717, + "logits/rejected": -2.120244026184082, + "logps/chosen": -171.44638061523438, + "logps/rejected": -366.4693603515625, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.087393283843994, + "rewards/margins": 0.40418708324432373, + "rewards/rejected": 1.6832062005996704, + "step": 7587 + }, + { + "epoch": 0.44, + "learning_rate": 6.178128602795805e-08, + "logits/chosen": -2.080772876739502, + "logits/rejected": -2.0787289142608643, + "logps/chosen": -41.40993881225586, + "logps/rejected": -193.7823486328125, + "loss": 0.3829, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26869621872901917, + "rewards/margins": 1.0992473363876343, + "rewards/rejected": -0.8305511474609375, + "step": 7588 + }, + { + "epoch": 0.44, + "learning_rate": 6.177212713869601e-08, + "logits/chosen": -2.0309112071990967, + "logits/rejected": -2.02069354057312, + "logps/chosen": -37.37028503417969, + "logps/rejected": -245.0460205078125, + "loss": 0.3703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002979278564453125, + "rewards/margins": 3.2387855052948, + "rewards/rejected": -3.241764783859253, + "step": 7589 + }, + { + "epoch": 0.44, + "learning_rate": 6.176296783123042e-08, + "logits/chosen": -1.9742943048477173, + "logits/rejected": -1.9635282754898071, + "logps/chosen": -46.26557540893555, + "logps/rejected": -194.78628540039062, + "loss": 0.5393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5097114443778992, + "rewards/margins": 1.667478084564209, + "rewards/rejected": -2.177189588546753, + "step": 7590 + }, + { + "epoch": 0.44, + "learning_rate": 6.175380810588668e-08, + "logits/chosen": -1.9366261959075928, + "logits/rejected": -1.9369611740112305, + "logps/chosen": -13.34223461151123, + "logps/rejected": -121.28173065185547, + "loss": 0.7509, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.10128288716077805, + "rewards/margins": -0.45015066862106323, + "rewards/rejected": 0.5514335632324219, + "step": 7591 + }, + { + "epoch": 0.44, + "learning_rate": 6.174464796299019e-08, + "logits/chosen": -1.9521464109420776, + "logits/rejected": -1.9835178852081299, + "logps/chosen": -215.09725952148438, + "logps/rejected": -482.3187255859375, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3646668195724487, + "rewards/margins": 3.697579860687256, + "rewards/rejected": -2.3329131603240967, + "step": 7592 + }, + { + "epoch": 0.44, + "learning_rate": 6.173548740286635e-08, + "logits/chosen": -1.9027128219604492, + "logits/rejected": -1.8871225118637085, + "logps/chosen": -196.18988037109375, + "logps/rejected": -281.6077575683594, + "loss": 0.2594, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4835221767425537, + "rewards/margins": 0.44778919219970703, + "rewards/rejected": 2.0357329845428467, + "step": 7593 + }, + { + "epoch": 0.44, + "learning_rate": 6.172632642584058e-08, + "logits/chosen": -1.9737045764923096, + "logits/rejected": -1.9737783670425415, + "logps/chosen": -166.53533935546875, + "logps/rejected": -229.90048217773438, + "loss": 0.2703, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4309204816818237, + "rewards/margins": 0.9222824573516846, + "rewards/rejected": 0.5086380243301392, + "step": 7594 + }, + { + "epoch": 0.44, + "learning_rate": 6.171716503223836e-08, + "logits/chosen": -1.997727870941162, + "logits/rejected": -1.9684922695159912, + "logps/chosen": -56.706912994384766, + "logps/rejected": -251.63836669921875, + "loss": 0.1563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6483379602432251, + "rewards/margins": 3.332310199737549, + "rewards/rejected": -2.683972120285034, + "step": 7595 + }, + { + "epoch": 0.44, + "learning_rate": 6.17080032223851e-08, + "logits/chosen": -1.9766863584518433, + "logits/rejected": -1.983633279800415, + "logps/chosen": -201.48150634765625, + "logps/rejected": -305.2508544921875, + "loss": 0.1469, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.207928419113159, + "rewards/margins": 1.3362243175506592, + "rewards/rejected": 0.8717041015625, + "step": 7596 + }, + { + "epoch": 0.44, + "learning_rate": 6.169884099660632e-08, + "logits/chosen": -1.9183472394943237, + "logits/rejected": -1.9423922300338745, + "logps/chosen": -233.50079345703125, + "logps/rejected": -531.1845703125, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5911377668380737, + "rewards/margins": 4.944909572601318, + "rewards/rejected": -3.353771924972534, + "step": 7597 + }, + { + "epoch": 0.44, + "learning_rate": 6.168967835522745e-08, + "logits/chosen": -2.0511245727539062, + "logits/rejected": -2.0495026111602783, + "logps/chosen": -88.80207824707031, + "logps/rejected": -299.88470458984375, + "loss": 0.1846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8018936514854431, + "rewards/margins": 4.469172477722168, + "rewards/rejected": -3.667279005050659, + "step": 7598 + }, + { + "epoch": 0.44, + "learning_rate": 6.168051529857405e-08, + "logits/chosen": -2.036492347717285, + "logits/rejected": -1.9759358167648315, + "logps/chosen": -175.3994140625, + "logps/rejected": -294.8890075683594, + "loss": 0.0703, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.109356641769409, + "rewards/margins": 2.8242766857147217, + "rewards/rejected": -0.7149200439453125, + "step": 7599 + }, + { + "epoch": 0.44, + "learning_rate": 6.167135182697158e-08, + "logits/chosen": -1.8876417875289917, + "logits/rejected": -1.8759750127792358, + "logps/chosen": -76.6668701171875, + "logps/rejected": -254.6312255859375, + "loss": 0.1088, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2785767316818237, + "rewards/margins": 3.4417452812194824, + "rewards/rejected": -2.163168430328369, + "step": 7600 + }, + { + "epoch": 0.44, + "learning_rate": 6.166218794074564e-08, + "logits/chosen": -1.9473451375961304, + "logits/rejected": -1.9166840314865112, + "logps/chosen": -115.7862319946289, + "logps/rejected": -277.7124328613281, + "loss": 0.2045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4338417053222656, + "rewards/margins": 4.223104476928711, + "rewards/rejected": -3.789262533187866, + "step": 7601 + }, + { + "epoch": 0.44, + "learning_rate": 6.165302364022172e-08, + "logits/chosen": -1.8367350101470947, + "logits/rejected": -1.8412292003631592, + "logps/chosen": -2.331287384033203, + "logps/rejected": -62.42823791503906, + "loss": 0.6512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021938873454928398, + "rewards/margins": 0.16281569004058838, + "rewards/rejected": -0.18475456535816193, + "step": 7602 + }, + { + "epoch": 0.44, + "learning_rate": 6.164385892572541e-08, + "logits/chosen": -2.0886826515197754, + "logits/rejected": -2.073823928833008, + "logps/chosen": -78.70887756347656, + "logps/rejected": -481.05084228515625, + "loss": 0.0792, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5872681140899658, + "rewards/margins": 5.999636650085449, + "rewards/rejected": -4.4123687744140625, + "step": 7603 + }, + { + "epoch": 0.44, + "learning_rate": 6.163469379758226e-08, + "logits/chosen": -1.9911084175109863, + "logits/rejected": -1.9924777746200562, + "logps/chosen": -112.85679626464844, + "logps/rejected": -191.87844848632812, + "loss": 0.8869, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.28185731172561646, + "rewards/margins": -0.7708496451377869, + "rewards/rejected": 1.0527069568634033, + "step": 7604 + }, + { + "epoch": 0.44, + "learning_rate": 6.162552825611789e-08, + "logits/chosen": -1.8781766891479492, + "logits/rejected": -1.8852338790893555, + "logps/chosen": -38.11132049560547, + "logps/rejected": -408.47662353515625, + "loss": 0.2187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6917125582695007, + "rewards/margins": 5.079975605010986, + "rewards/rejected": -4.38826322555542, + "step": 7605 + }, + { + "epoch": 0.44, + "learning_rate": 6.161636230165787e-08, + "logits/chosen": -1.9016622304916382, + "logits/rejected": -1.8527597188949585, + "logps/chosen": -205.76141357421875, + "logps/rejected": -392.2532958984375, + "loss": 0.0359, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.523555040359497, + "rewards/margins": 2.911360263824463, + "rewards/rejected": 0.612194836139679, + "step": 7606 + }, + { + "epoch": 0.44, + "learning_rate": 6.160719593452786e-08, + "logits/chosen": -1.97183358669281, + "logits/rejected": -1.9993761777877808, + "logps/chosen": -225.02529907226562, + "logps/rejected": -365.50909423828125, + "loss": 0.2234, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0940841436386108, + "rewards/margins": 1.1765273809432983, + "rewards/rejected": -0.0824432373046875, + "step": 7607 + }, + { + "epoch": 0.44, + "learning_rate": 6.159802915505346e-08, + "logits/chosen": -2.0334300994873047, + "logits/rejected": -1.9846299886703491, + "logps/chosen": -112.03811645507812, + "logps/rejected": -378.07574462890625, + "loss": 0.0925, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3544601202011108, + "rewards/margins": 2.5342636108398438, + "rewards/rejected": -1.179803490638733, + "step": 7608 + }, + { + "epoch": 0.44, + "learning_rate": 6.158886196356034e-08, + "logits/chosen": -2.0537450313568115, + "logits/rejected": -2.0524070262908936, + "logps/chosen": -16.262775421142578, + "logps/rejected": -229.3389892578125, + "loss": 0.3714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06783294677734375, + "rewards/margins": 3.2749695777893066, + "rewards/rejected": -3.3428025245666504, + "step": 7609 + }, + { + "epoch": 0.44, + "learning_rate": 6.157969436037415e-08, + "logits/chosen": -1.8102506399154663, + "logits/rejected": -1.8415981531143188, + "logps/chosen": -209.45611572265625, + "logps/rejected": -430.25927734375, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0249359607696533, + "rewards/margins": 2.177142381668091, + "rewards/rejected": -0.1522064208984375, + "step": 7610 + }, + { + "epoch": 0.44, + "learning_rate": 6.157052634582058e-08, + "logits/chosen": -1.9969476461410522, + "logits/rejected": -1.9940259456634521, + "logps/chosen": -5.7696845033206046e-05, + "logps/rejected": -139.17218017578125, + "loss": 0.4161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2159216566942632e-06, + "rewards/margins": 1.7597438097000122, + "rewards/rejected": -1.7597450017929077, + "step": 7611 + }, + { + "epoch": 0.44, + "learning_rate": 6.156135792022532e-08, + "logits/chosen": -1.9107130765914917, + "logits/rejected": -1.8764712810516357, + "logps/chosen": -106.83015441894531, + "logps/rejected": -304.38763427734375, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3724464476108551, + "rewards/margins": 3.935697317123413, + "rewards/rejected": -3.563250780105591, + "step": 7612 + }, + { + "epoch": 0.44, + "learning_rate": 6.155218908391406e-08, + "logits/chosen": -1.8143287897109985, + "logits/rejected": -1.8129209280014038, + "logps/chosen": -237.04052734375, + "logps/rejected": -329.9128723144531, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.316326856613159, + "rewards/margins": 3.7854583263397217, + "rewards/rejected": -1.4691314697265625, + "step": 7613 + }, + { + "epoch": 0.44, + "learning_rate": 6.154301983721255e-08, + "logits/chosen": -2.052042007446289, + "logits/rejected": -2.0463852882385254, + "logps/chosen": -125.48471069335938, + "logps/rejected": -319.04412841796875, + "loss": 0.1422, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3242263793945312, + "rewards/margins": 2.058638095855713, + "rewards/rejected": -0.7344116568565369, + "step": 7614 + }, + { + "epoch": 0.44, + "learning_rate": 6.15338501804465e-08, + "logits/chosen": -1.9053804874420166, + "logits/rejected": -1.901245355606079, + "logps/chosen": -177.34396362304688, + "logps/rejected": -294.557373046875, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8589950799942017, + "rewards/margins": 0.7116775512695312, + "rewards/rejected": 0.14731751382350922, + "step": 7615 + }, + { + "epoch": 0.44, + "learning_rate": 6.15246801139417e-08, + "logits/chosen": -2.104003667831421, + "logits/rejected": -2.0950634479522705, + "logps/chosen": -109.00901794433594, + "logps/rejected": -310.6768493652344, + "loss": 0.2582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16287003457546234, + "rewards/margins": 3.1423377990722656, + "rewards/rejected": -2.9794678688049316, + "step": 7616 + }, + { + "epoch": 0.44, + "learning_rate": 6.151550963802386e-08, + "logits/chosen": -2.0165319442749023, + "logits/rejected": -2.024726629257202, + "logps/chosen": -236.9166717529297, + "logps/rejected": -389.493408203125, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3305680751800537, + "rewards/margins": 3.555128574371338, + "rewards/rejected": -1.2245606184005737, + "step": 7617 + }, + { + "epoch": 0.44, + "learning_rate": 6.150633875301879e-08, + "logits/chosen": -1.8353426456451416, + "logits/rejected": -1.8035707473754883, + "logps/chosen": -297.8907775878906, + "logps/rejected": -407.9920349121094, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0849273204803467, + "rewards/margins": 0.36837756633758545, + "rewards/rejected": 1.7165497541427612, + "step": 7618 + }, + { + "epoch": 0.44, + "learning_rate": 6.14971674592523e-08, + "logits/chosen": -2.1046204566955566, + "logits/rejected": -2.0943262577056885, + "logps/chosen": -227.28651428222656, + "logps/rejected": -371.7528381347656, + "loss": 0.0686, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2048537731170654, + "rewards/margins": 3.127854824066162, + "rewards/rejected": -0.9230011105537415, + "step": 7619 + }, + { + "epoch": 0.44, + "learning_rate": 6.148799575705017e-08, + "logits/chosen": -2.0736703872680664, + "logits/rejected": -2.0605862140655518, + "logps/chosen": -72.93110656738281, + "logps/rejected": -162.70465087890625, + "loss": 0.5599, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2700538635253906, + "rewards/margins": 0.8505882024765015, + "rewards/rejected": -1.120642066001892, + "step": 7620 + }, + { + "epoch": 0.44, + "learning_rate": 6.147882364673823e-08, + "logits/chosen": -1.8813189268112183, + "logits/rejected": -1.8848919868469238, + "logps/chosen": -2.436816692352295, + "logps/rejected": -13.219228744506836, + "loss": 0.6849, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04910022020339966, + "rewards/margins": -0.01722804084420204, + "rewards/rejected": -0.03187217935919762, + "step": 7621 + }, + { + "epoch": 0.44, + "learning_rate": 6.146965112864235e-08, + "logits/chosen": -2.0470900535583496, + "logits/rejected": -2.0530364513397217, + "logps/chosen": -0.2152102291584015, + "logps/rejected": -45.46774673461914, + "loss": 0.6301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005226698704063892, + "rewards/margins": 0.10359593480825424, + "rewards/rejected": -0.10882263630628586, + "step": 7622 + }, + { + "epoch": 0.44, + "learning_rate": 6.146047820308833e-08, + "logits/chosen": -1.9685595035552979, + "logits/rejected": -1.962826132774353, + "logps/chosen": -189.84652709960938, + "logps/rejected": -344.1219482421875, + "loss": 0.2987, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0129059553146362, + "rewards/margins": 0.9878754615783691, + "rewards/rejected": 0.02503051795065403, + "step": 7623 + }, + { + "epoch": 0.44, + "learning_rate": 6.145130487040209e-08, + "logits/chosen": -2.0568060874938965, + "logits/rejected": -2.0527660846710205, + "logps/chosen": -50.02330017089844, + "logps/rejected": -243.702392578125, + "loss": 0.1935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6751427054405212, + "rewards/margins": 3.0062355995178223, + "rewards/rejected": -2.3310928344726562, + "step": 7624 + }, + { + "epoch": 0.44, + "learning_rate": 6.144213113090947e-08, + "logits/chosen": -1.8479030132293701, + "logits/rejected": -1.7271448373794556, + "logps/chosen": -241.43075561523438, + "logps/rejected": -529.2009887695312, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9614319801330566, + "rewards/margins": 3.6077942848205566, + "rewards/rejected": -0.6463623046875, + "step": 7625 + }, + { + "epoch": 0.44, + "learning_rate": 6.143295698493639e-08, + "logits/chosen": -1.8958121538162231, + "logits/rejected": -1.9079351425170898, + "logps/chosen": -192.8858642578125, + "logps/rejected": -308.29205322265625, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.382045030593872, + "rewards/margins": 2.4961533546447754, + "rewards/rejected": -1.1141083240509033, + "step": 7626 + }, + { + "epoch": 0.44, + "learning_rate": 6.142378243280874e-08, + "logits/chosen": -1.9362355470657349, + "logits/rejected": -1.934105634689331, + "logps/chosen": -23.667896270751953, + "logps/rejected": -170.01980590820312, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27408885955810547, + "rewards/margins": 1.6566511392593384, + "rewards/rejected": -1.382562279701233, + "step": 7627 + }, + { + "epoch": 0.44, + "learning_rate": 6.141460747485248e-08, + "logits/chosen": -1.9201467037200928, + "logits/rejected": -1.8034429550170898, + "logps/chosen": -277.51287841796875, + "logps/rejected": -935.6343994140625, + "loss": 0.0532, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.519018530845642, + "rewards/margins": 7.354559421539307, + "rewards/rejected": -5.835540771484375, + "step": 7628 + }, + { + "epoch": 0.44, + "learning_rate": 6.140543211139352e-08, + "logits/chosen": -2.041104316711426, + "logits/rejected": -2.0362613201141357, + "logps/chosen": -14.97281551361084, + "logps/rejected": -153.29408264160156, + "loss": 0.5191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018741894513368607, + "rewards/margins": 0.8043162822723389, + "rewards/rejected": -0.7855743765830994, + "step": 7629 + }, + { + "epoch": 0.44, + "learning_rate": 6.139625634275782e-08, + "logits/chosen": -1.7820219993591309, + "logits/rejected": -1.7789552211761475, + "logps/chosen": -11.654393196105957, + "logps/rejected": -181.56907653808594, + "loss": 0.305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21418876945972443, + "rewards/margins": 3.402529716491699, + "rewards/rejected": -3.188340902328491, + "step": 7630 + }, + { + "epoch": 0.44, + "learning_rate": 6.138708016927136e-08, + "logits/chosen": -1.8783425092697144, + "logits/rejected": -1.898123860359192, + "logps/chosen": -280.6187744140625, + "logps/rejected": -381.2490234375, + "loss": 0.0648, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.69830322265625, + "rewards/margins": 2.168750047683716, + "rewards/rejected": 0.529553234577179, + "step": 7631 + }, + { + "epoch": 0.44, + "learning_rate": 6.137790359126009e-08, + "logits/chosen": -1.9471218585968018, + "logits/rejected": -1.935549020767212, + "logps/chosen": -0.0065523614175617695, + "logps/rejected": -276.51873779296875, + "loss": 0.3476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005101685528643429, + "rewards/margins": 3.528611183166504, + "rewards/rejected": -3.5291213989257812, + "step": 7632 + }, + { + "epoch": 0.44, + "learning_rate": 6.136872660905005e-08, + "logits/chosen": -1.9531406164169312, + "logits/rejected": -1.9450840950012207, + "logps/chosen": -9.934598922729492, + "logps/rejected": -239.0584716796875, + "loss": 0.3528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01857147179543972, + "rewards/margins": 4.072569370269775, + "rewards/rejected": -4.053997993469238, + "step": 7633 + }, + { + "epoch": 0.44, + "learning_rate": 6.135954922296722e-08, + "logits/chosen": -1.8524285554885864, + "logits/rejected": -1.8510996103286743, + "logps/chosen": -45.41077423095703, + "logps/rejected": -244.87875366210938, + "loss": 0.2456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2923969328403473, + "rewards/margins": 2.118147611618042, + "rewards/rejected": -1.825750708580017, + "step": 7634 + }, + { + "epoch": 0.44, + "learning_rate": 6.135037143333763e-08, + "logits/chosen": -1.7337009906768799, + "logits/rejected": -1.6670328378677368, + "logps/chosen": -221.83163452148438, + "logps/rejected": -585.55419921875, + "loss": 0.0758, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8097671270370483, + "rewards/margins": 2.6143202781677246, + "rewards/rejected": -0.804553210735321, + "step": 7635 + }, + { + "epoch": 0.44, + "learning_rate": 6.134119324048734e-08, + "logits/chosen": -1.9856245517730713, + "logits/rejected": -2.0269858837127686, + "logps/chosen": -292.52886962890625, + "logps/rejected": -513.5458984375, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.999841332435608, + "rewards/margins": 4.752368450164795, + "rewards/rejected": -2.7525269985198975, + "step": 7636 + }, + { + "epoch": 0.44, + "learning_rate": 6.133201464474238e-08, + "logits/chosen": -1.728727102279663, + "logits/rejected": -1.7326815128326416, + "logps/chosen": -41.66785430908203, + "logps/rejected": -109.9828872680664, + "loss": 0.4537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4173656404018402, + "rewards/margins": 0.5546611547470093, + "rewards/rejected": -0.13729552924633026, + "step": 7637 + }, + { + "epoch": 0.44, + "learning_rate": 6.132283564642883e-08, + "logits/chosen": -1.640671730041504, + "logits/rejected": -1.6415661573410034, + "logps/chosen": -216.76553344726562, + "logps/rejected": -308.7115478515625, + "loss": 0.1105, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.36118483543396, + "rewards/margins": 1.8685517311096191, + "rewards/rejected": 0.49263307452201843, + "step": 7638 + }, + { + "epoch": 0.44, + "learning_rate": 6.131365624587277e-08, + "logits/chosen": -1.8235007524490356, + "logits/rejected": -1.8287901878356934, + "logps/chosen": -11.106958389282227, + "logps/rejected": -261.41900634765625, + "loss": 0.4168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14187327027320862, + "rewards/margins": 2.9254958629608154, + "rewards/rejected": -3.067369222640991, + "step": 7639 + }, + { + "epoch": 0.44, + "learning_rate": 6.13044764434003e-08, + "logits/chosen": -2.1567635536193848, + "logits/rejected": -2.155822992324829, + "logps/chosen": -2.1578900814056396, + "logps/rejected": -66.02074432373047, + "loss": 0.5195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0625544935464859, + "rewards/margins": 0.8943554759025574, + "rewards/rejected": -0.9569099545478821, + "step": 7640 + }, + { + "epoch": 0.44, + "learning_rate": 6.129529623933753e-08, + "logits/chosen": -1.9632043838500977, + "logits/rejected": -1.9950617551803589, + "logps/chosen": -171.44229125976562, + "logps/rejected": -293.0601806640625, + "loss": 0.2055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9693481922149658, + "rewards/margins": 0.8382294178009033, + "rewards/rejected": 1.1311187744140625, + "step": 7641 + }, + { + "epoch": 0.44, + "learning_rate": 6.128611563401058e-08, + "logits/chosen": -2.0045721530914307, + "logits/rejected": -2.005871534347534, + "logps/chosen": -4.943370819091797, + "logps/rejected": -51.18364715576172, + "loss": 0.6053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3527350127696991, + "rewards/margins": 0.9046612977981567, + "rewards/rejected": -1.2573963403701782, + "step": 7642 + }, + { + "epoch": 0.44, + "learning_rate": 6.127693462774562e-08, + "logits/chosen": -1.973303198814392, + "logits/rejected": -1.9681581258773804, + "logps/chosen": -21.806547164916992, + "logps/rejected": -146.17039489746094, + "loss": 0.2422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41628894209861755, + "rewards/margins": 2.8744401931762695, + "rewards/rejected": -2.458151340484619, + "step": 7643 + }, + { + "epoch": 0.44, + "learning_rate": 6.126775322086876e-08, + "logits/chosen": -2.128812074661255, + "logits/rejected": -2.0813093185424805, + "logps/chosen": -172.61270141601562, + "logps/rejected": -304.9355163574219, + "loss": 0.09, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1750290393829346, + "rewards/margins": 2.5323777198791504, + "rewards/rejected": -0.35734865069389343, + "step": 7644 + }, + { + "epoch": 0.44, + "learning_rate": 6.12585714137062e-08, + "logits/chosen": -2.0110294818878174, + "logits/rejected": -2.0133512020111084, + "logps/chosen": -89.30816650390625, + "logps/rejected": -236.7180938720703, + "loss": 0.2688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2743164002895355, + "rewards/margins": 4.159493923187256, + "rewards/rejected": -3.8851776123046875, + "step": 7645 + }, + { + "epoch": 0.44, + "learning_rate": 6.12493892065841e-08, + "logits/chosen": -1.8495166301727295, + "logits/rejected": -1.828744649887085, + "logps/chosen": -254.04055786132812, + "logps/rejected": -371.3623046875, + "loss": 0.2277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1782257556915283, + "rewards/margins": 1.3839203119277954, + "rewards/rejected": -0.20569458603858948, + "step": 7646 + }, + { + "epoch": 0.45, + "learning_rate": 6.124020659982868e-08, + "logits/chosen": -1.868434190750122, + "logits/rejected": -1.852925419807434, + "logps/chosen": -56.377838134765625, + "logps/rejected": -183.568115234375, + "loss": 0.2711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4791141450405121, + "rewards/margins": 2.9136784076690674, + "rewards/rejected": -2.4345643520355225, + "step": 7647 + }, + { + "epoch": 0.45, + "learning_rate": 6.123102359376614e-08, + "logits/chosen": -1.8797141313552856, + "logits/rejected": -1.8815144300460815, + "logps/chosen": -5.459451198577881, + "logps/rejected": -147.9071807861328, + "loss": 0.5365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12638822197914124, + "rewards/margins": 0.9265644550323486, + "rewards/rejected": -1.0529526472091675, + "step": 7648 + }, + { + "epoch": 0.45, + "learning_rate": 6.122184018872269e-08, + "logits/chosen": -2.0103719234466553, + "logits/rejected": -2.013239860534668, + "logps/chosen": -63.31970977783203, + "logps/rejected": -227.40447998046875, + "loss": 0.081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.403631567955017, + "rewards/margins": 3.071938991546631, + "rewards/rejected": -1.6683075428009033, + "step": 7649 + }, + { + "epoch": 0.45, + "learning_rate": 6.121265638502461e-08, + "logits/chosen": -1.9414726495742798, + "logits/rejected": -1.9416511058807373, + "logps/chosen": -2.2511839866638184, + "logps/rejected": -98.01494598388672, + "loss": 0.3742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22918234765529633, + "rewards/margins": 1.6799697875976562, + "rewards/rejected": -1.4507874250411987, + "step": 7650 + }, + { + "epoch": 0.45, + "learning_rate": 6.120347218299811e-08, + "logits/chosen": -1.7517255544662476, + "logits/rejected": -1.747029185295105, + "logps/chosen": -33.2645263671875, + "logps/rejected": -221.78952026367188, + "loss": 0.1116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2297229766845703, + "rewards/margins": 3.8274312019348145, + "rewards/rejected": -2.597708225250244, + "step": 7651 + }, + { + "epoch": 0.45, + "learning_rate": 6.119428758296947e-08, + "logits/chosen": -1.682973861694336, + "logits/rejected": -1.6422938108444214, + "logps/chosen": -213.09500122070312, + "logps/rejected": -331.6166687011719, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7944886684417725, + "rewards/margins": 2.6265718936920166, + "rewards/rejected": 0.16791687905788422, + "step": 7652 + }, + { + "epoch": 0.45, + "learning_rate": 6.118510258526499e-08, + "logits/chosen": -1.9610954523086548, + "logits/rejected": -1.9473384618759155, + "logps/chosen": -215.97637939453125, + "logps/rejected": -353.28924560546875, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8954452276229858, + "rewards/margins": 2.273960828781128, + "rewards/rejected": -0.3785156309604645, + "step": 7653 + }, + { + "epoch": 0.45, + "learning_rate": 6.117591719021096e-08, + "logits/chosen": -1.749433994293213, + "logits/rejected": -1.7419579029083252, + "logps/chosen": -150.55242919921875, + "logps/rejected": -428.23291015625, + "loss": 0.1926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.642596423625946, + "rewards/margins": 1.8432250022888184, + "rewards/rejected": -1.200628638267517, + "step": 7654 + }, + { + "epoch": 0.45, + "learning_rate": 6.116673139813366e-08, + "logits/chosen": -1.8326807022094727, + "logits/rejected": -1.848400592803955, + "logps/chosen": -227.90875244140625, + "logps/rejected": -451.7269287109375, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4023683071136475, + "rewards/margins": 3.5693359375, + "rewards/rejected": -1.166967749595642, + "step": 7655 + }, + { + "epoch": 0.45, + "learning_rate": 6.115754520935947e-08, + "logits/chosen": -1.9243738651275635, + "logits/rejected": -1.9207425117492676, + "logps/chosen": -184.99398803710938, + "logps/rejected": -346.0132141113281, + "loss": 0.2064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8879913091659546, + "rewards/margins": 0.7952971458435059, + "rewards/rejected": 1.0926941633224487, + "step": 7656 + }, + { + "epoch": 0.45, + "learning_rate": 6.114835862421468e-08, + "logits/chosen": -1.7810211181640625, + "logits/rejected": -1.7722381353378296, + "logps/chosen": -55.540245056152344, + "logps/rejected": -209.97962951660156, + "loss": 0.1778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.164955973625183, + "rewards/margins": 1.9803810119628906, + "rewards/rejected": -0.8154250979423523, + "step": 7657 + }, + { + "epoch": 0.45, + "learning_rate": 6.113917164302566e-08, + "logits/chosen": -1.9069939851760864, + "logits/rejected": -1.8945879936218262, + "logps/chosen": -58.48430252075195, + "logps/rejected": -165.6575927734375, + "loss": 0.2107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0847148895263672, + "rewards/margins": 2.0278499126434326, + "rewards/rejected": -0.9431350827217102, + "step": 7658 + }, + { + "epoch": 0.45, + "learning_rate": 6.112998426611877e-08, + "logits/chosen": -1.9079418182373047, + "logits/rejected": -1.9100788831710815, + "logps/chosen": -6.128891944885254, + "logps/rejected": -195.03689575195312, + "loss": 0.3719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027248144149780273, + "rewards/margins": 1.4111944437026978, + "rewards/rejected": -1.3839462995529175, + "step": 7659 + }, + { + "epoch": 0.45, + "learning_rate": 6.11207964938204e-08, + "logits/chosen": -2.0129499435424805, + "logits/rejected": -2.011784553527832, + "logps/chosen": -5.811822891235352, + "logps/rejected": -76.1420669555664, + "loss": 0.624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28085023164749146, + "rewards/margins": 0.012381762266159058, + "rewards/rejected": 0.2684684693813324, + "step": 7660 + }, + { + "epoch": 0.45, + "learning_rate": 6.111160832645693e-08, + "logits/chosen": -1.7673084735870361, + "logits/rejected": -1.7661865949630737, + "logps/chosen": -4.114749431610107, + "logps/rejected": -136.19070434570312, + "loss": 0.378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1897173374891281, + "rewards/margins": 1.6674273014068604, + "rewards/rejected": -1.4777100086212158, + "step": 7661 + }, + { + "epoch": 0.45, + "learning_rate": 6.11024197643548e-08, + "logits/chosen": -1.9644179344177246, + "logits/rejected": -1.9703483581542969, + "logps/chosen": -48.934017181396484, + "logps/rejected": -153.41348266601562, + "loss": 0.515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5175861716270447, + "rewards/margins": 0.26837658882141113, + "rewards/rejected": 0.24920959770679474, + "step": 7662 + }, + { + "epoch": 0.45, + "learning_rate": 6.10932308078404e-08, + "logits/chosen": -1.9458677768707275, + "logits/rejected": -1.9332467317581177, + "logps/chosen": -0.0007809749222360551, + "logps/rejected": -149.1219482421875, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0002517943794373423, + "rewards/margins": 2.8762779235839844, + "rewards/rejected": -2.876026153564453, + "step": 7663 + }, + { + "epoch": 0.45, + "learning_rate": 6.108404145724019e-08, + "logits/chosen": -1.767958641052246, + "logits/rejected": -1.7459251880645752, + "logps/chosen": -206.4429931640625, + "logps/rejected": -351.0595703125, + "loss": 0.1132, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0786621570587158, + "rewards/margins": 1.91416335105896, + "rewards/rejected": -0.8355011343955994, + "step": 7664 + }, + { + "epoch": 0.45, + "learning_rate": 6.107485171288061e-08, + "logits/chosen": -1.9118298292160034, + "logits/rejected": -1.8979095220565796, + "logps/chosen": -146.0703125, + "logps/rejected": -245.21505737304688, + "loss": 0.246, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.73895263671875, + "rewards/margins": 0.6709411144256592, + "rewards/rejected": 1.0680115222930908, + "step": 7665 + }, + { + "epoch": 0.45, + "learning_rate": 6.106566157508811e-08, + "logits/chosen": -2.103685140609741, + "logits/rejected": -2.0955495834350586, + "logps/chosen": -0.0002497209352441132, + "logps/rejected": -330.4719543457031, + "loss": 0.3642, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2991908079129644e-05, + "rewards/margins": 3.55900502204895, + "rewards/rejected": -3.5590178966522217, + "step": 7666 + }, + { + "epoch": 0.45, + "learning_rate": 6.105647104418918e-08, + "logits/chosen": -1.9416700601577759, + "logits/rejected": -1.928776741027832, + "logps/chosen": -237.83807373046875, + "logps/rejected": -396.2837829589844, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0856873989105225, + "rewards/margins": 2.8458101749420166, + "rewards/rejected": -0.7601227164268494, + "step": 7667 + }, + { + "epoch": 0.45, + "learning_rate": 6.104728012051034e-08, + "logits/chosen": -1.9554470777511597, + "logits/rejected": -1.9555448293685913, + "logps/chosen": -26.54663848876953, + "logps/rejected": -137.45700073242188, + "loss": 0.4828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07778777927160263, + "rewards/margins": 1.1751892566680908, + "rewards/rejected": -1.2529770135879517, + "step": 7668 + }, + { + "epoch": 0.45, + "learning_rate": 6.103808880437806e-08, + "logits/chosen": -1.7812936305999756, + "logits/rejected": -1.7868753671646118, + "logps/chosen": -30.24983024597168, + "logps/rejected": -180.8955078125, + "loss": 0.3834, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11246433109045029, + "rewards/margins": 2.182877779006958, + "rewards/rejected": -2.07041335105896, + "step": 7669 + }, + { + "epoch": 0.45, + "learning_rate": 6.102889709611884e-08, + "logits/chosen": -1.9729177951812744, + "logits/rejected": -1.9734336137771606, + "logps/chosen": -34.49861145019531, + "logps/rejected": -193.41281127929688, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22467003762722015, + "rewards/margins": 2.1302762031555176, + "rewards/rejected": -1.9056061506271362, + "step": 7670 + }, + { + "epoch": 0.45, + "learning_rate": 6.10197049960593e-08, + "logits/chosen": -1.878239393234253, + "logits/rejected": -1.8750190734863281, + "logps/chosen": -85.26414489746094, + "logps/rejected": -226.54014587402344, + "loss": 0.3505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9837669730186462, + "rewards/margins": 0.34608232975006104, + "rewards/rejected": 0.6376846432685852, + "step": 7671 + }, + { + "epoch": 0.45, + "learning_rate": 6.10105125045259e-08, + "logits/chosen": -2.008406400680542, + "logits/rejected": -2.0027945041656494, + "logps/chosen": -13.431571006774902, + "logps/rejected": -240.22560119628906, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007097912020981312, + "rewards/margins": 3.6491870880126953, + "rewards/rejected": -3.656285047531128, + "step": 7672 + }, + { + "epoch": 0.45, + "learning_rate": 6.100131962184526e-08, + "logits/chosen": -1.8257675170898438, + "logits/rejected": -1.8781182765960693, + "logps/chosen": -248.31228637695312, + "logps/rejected": -428.1085205078125, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1034424304962158, + "rewards/margins": 4.495175361633301, + "rewards/rejected": -3.391732931137085, + "step": 7673 + }, + { + "epoch": 0.45, + "learning_rate": 6.099212634834392e-08, + "logits/chosen": -1.8437000513076782, + "logits/rejected": -1.8233829736709595, + "logps/chosen": -2.8609943910851143e-05, + "logps/rejected": -161.29647827148438, + "loss": 0.3714, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.192329346366705e-08, + "rewards/margins": 2.61041259765625, + "rewards/rejected": -2.61041259765625, + "step": 7674 + }, + { + "epoch": 0.45, + "learning_rate": 6.098293268434849e-08, + "logits/chosen": -2.0038623809814453, + "logits/rejected": -1.9960212707519531, + "logps/chosen": -97.0123291015625, + "logps/rejected": -302.0015869140625, + "loss": 0.4127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06416473537683487, + "rewards/margins": 2.0285356044769287, + "rewards/rejected": -2.092700242996216, + "step": 7675 + }, + { + "epoch": 0.45, + "learning_rate": 6.097373863018555e-08, + "logits/chosen": -2.1142234802246094, + "logits/rejected": -2.1086065769195557, + "logps/chosen": -0.00015818017709534615, + "logps/rejected": -75.40484619140625, + "loss": 0.4267, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.914012712717522e-06, + "rewards/margins": 1.610707402229309, + "rewards/rejected": -1.6106995344161987, + "step": 7676 + }, + { + "epoch": 0.45, + "learning_rate": 6.096454418618176e-08, + "logits/chosen": -1.992950201034546, + "logits/rejected": -1.9802448749542236, + "logps/chosen": -0.0002481818664819002, + "logps/rejected": -121.48696899414062, + "loss": 0.5943, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1633902431640308e-05, + "rewards/margins": 0.44340041279792786, + "rewards/rejected": -0.44341203570365906, + "step": 7677 + }, + { + "epoch": 0.45, + "learning_rate": 6.095534935266372e-08, + "logits/chosen": -1.831247329711914, + "logits/rejected": -1.8224729299545288, + "logps/chosen": -170.4615478515625, + "logps/rejected": -297.20458984375, + "loss": 0.2607, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9352463483810425, + "rewards/margins": 0.7376450300216675, + "rewards/rejected": 1.197601318359375, + "step": 7678 + }, + { + "epoch": 0.45, + "learning_rate": 6.094615412995808e-08, + "logits/chosen": -2.106330633163452, + "logits/rejected": -2.103114128112793, + "logps/chosen": -176.41226196289062, + "logps/rejected": -297.71063232421875, + "loss": 0.0762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9939956665039062, + "rewards/margins": 2.203639268875122, + "rewards/rejected": -0.20964355766773224, + "step": 7679 + }, + { + "epoch": 0.45, + "learning_rate": 6.09369585183915e-08, + "logits/chosen": -2.1021344661712646, + "logits/rejected": -2.0915868282318115, + "logps/chosen": -34.04292297363281, + "logps/rejected": -195.03619384765625, + "loss": 0.4012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22700805962085724, + "rewards/margins": 1.0723479986190796, + "rewards/rejected": -0.8453399538993835, + "step": 7680 + }, + { + "epoch": 0.45, + "learning_rate": 6.092776251829066e-08, + "logits/chosen": -2.1053221225738525, + "logits/rejected": -2.098510503768921, + "logps/chosen": -56.014827728271484, + "logps/rejected": -214.88348388671875, + "loss": 0.1445, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2541645765304565, + "rewards/margins": 2.413483142852783, + "rewards/rejected": -1.1593185663223267, + "step": 7681 + }, + { + "epoch": 0.45, + "learning_rate": 6.091856612998223e-08, + "logits/chosen": -1.754111886024475, + "logits/rejected": -1.7485545873641968, + "logps/chosen": -149.3489532470703, + "logps/rejected": -201.49655151367188, + "loss": 0.0632, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.466160535812378, + "rewards/margins": 2.8468430042266846, + "rewards/rejected": -0.3806823790073395, + "step": 7682 + }, + { + "epoch": 0.45, + "learning_rate": 6.090936935379294e-08, + "logits/chosen": -1.910718560218811, + "logits/rejected": -2.0064337253570557, + "logps/chosen": -188.906005859375, + "logps/rejected": -262.9728698730469, + "loss": 0.0518, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3132386207580566, + "rewards/margins": 2.782916307449341, + "rewards/rejected": -0.46967774629592896, + "step": 7683 + }, + { + "epoch": 0.45, + "learning_rate": 6.090017219004947e-08, + "logits/chosen": -2.0799360275268555, + "logits/rejected": -2.0562233924865723, + "logps/chosen": -179.26080322265625, + "logps/rejected": -289.2111511230469, + "loss": 0.2742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49302980303764343, + "rewards/margins": 1.1249908208847046, + "rewards/rejected": -0.6319610476493835, + "step": 7684 + }, + { + "epoch": 0.45, + "learning_rate": 6.089097463907857e-08, + "logits/chosen": -1.9705058336257935, + "logits/rejected": -1.9884897470474243, + "logps/chosen": -253.86676025390625, + "logps/rejected": -255.43557739257812, + "loss": 0.0634, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5879180431365967, + "rewards/margins": 2.339529275894165, + "rewards/rejected": 0.24838867783546448, + "step": 7685 + }, + { + "epoch": 0.45, + "learning_rate": 6.088177670120697e-08, + "logits/chosen": -1.9329558610916138, + "logits/rejected": -1.9213321208953857, + "logps/chosen": -149.6409454345703, + "logps/rejected": -231.84947204589844, + "loss": 0.2271, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4504715204238892, + "rewards/margins": 0.8056259155273438, + "rewards/rejected": 0.6448456048965454, + "step": 7686 + }, + { + "epoch": 0.45, + "learning_rate": 6.087257837676147e-08, + "logits/chosen": -1.8901498317718506, + "logits/rejected": -1.8934499025344849, + "logps/chosen": -33.873435974121094, + "logps/rejected": -124.44827270507812, + "loss": 0.4908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4621311128139496, + "rewards/margins": 0.5175792574882507, + "rewards/rejected": -0.055448152124881744, + "step": 7687 + }, + { + "epoch": 0.45, + "learning_rate": 6.086337966606876e-08, + "logits/chosen": -1.982718586921692, + "logits/rejected": -1.96188223361969, + "logps/chosen": -17.087018966674805, + "logps/rejected": -285.8154296875, + "loss": 0.3813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030146408826112747, + "rewards/margins": 4.017576694488525, + "rewards/rejected": -4.047723293304443, + "step": 7688 + }, + { + "epoch": 0.45, + "learning_rate": 6.08541805694557e-08, + "logits/chosen": -2.1758246421813965, + "logits/rejected": -2.1507749557495117, + "logps/chosen": -114.9744873046875, + "logps/rejected": -212.92539978027344, + "loss": 0.6089, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8319076895713806, + "rewards/margins": -0.30392760038375854, + "rewards/rejected": 1.1358352899551392, + "step": 7689 + }, + { + "epoch": 0.45, + "learning_rate": 6.084498108724902e-08, + "logits/chosen": -1.9068257808685303, + "logits/rejected": -1.9025547504425049, + "logps/chosen": -0.9491561055183411, + "logps/rejected": -82.0637435913086, + "loss": 0.4965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18886180222034454, + "rewards/margins": 0.7150977849960327, + "rewards/rejected": -0.5262359976768494, + "step": 7690 + }, + { + "epoch": 0.45, + "learning_rate": 6.083578121977558e-08, + "logits/chosen": -1.992550253868103, + "logits/rejected": -1.9810681343078613, + "logps/chosen": -37.291725158691406, + "logps/rejected": -248.3649139404297, + "loss": 0.2721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27850762009620667, + "rewards/margins": 4.171862602233887, + "rewards/rejected": -3.893354892730713, + "step": 7691 + }, + { + "epoch": 0.45, + "learning_rate": 6.082658096736219e-08, + "logits/chosen": -2.0322322845458984, + "logits/rejected": -2.0413665771484375, + "logps/chosen": -151.9267578125, + "logps/rejected": -287.9963073730469, + "loss": 0.1361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4862213134765625, + "rewards/margins": 1.694280982017517, + "rewards/rejected": -0.20805969834327698, + "step": 7692 + }, + { + "epoch": 0.45, + "learning_rate": 6.081738033033567e-08, + "logits/chosen": -2.0508692264556885, + "logits/rejected": -2.089012384414673, + "logps/chosen": -183.94277954101562, + "logps/rejected": -342.36859130859375, + "loss": 0.2511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48849794268608093, + "rewards/margins": 1.0089935064315796, + "rewards/rejected": -0.520495593547821, + "step": 7693 + }, + { + "epoch": 0.45, + "learning_rate": 6.080817930902291e-08, + "logits/chosen": -2.067570924758911, + "logits/rejected": -2.0671496391296387, + "logps/chosen": -0.00018703298701439053, + "logps/rejected": -158.75564575195312, + "loss": 0.4417, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0610514209474786e-06, + "rewards/margins": 1.5616322755813599, + "rewards/rejected": -1.5616333484649658, + "step": 7694 + }, + { + "epoch": 0.45, + "learning_rate": 6.079897790375073e-08, + "logits/chosen": -1.7471935749053955, + "logits/rejected": -1.7286251783370972, + "logps/chosen": -294.25128173828125, + "logps/rejected": -390.8197021484375, + "loss": 0.3865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3319275379180908, + "rewards/margins": 0.00533139705657959, + "rewards/rejected": 1.3265961408615112, + "step": 7695 + }, + { + "epoch": 0.45, + "learning_rate": 6.078977611484606e-08, + "logits/chosen": -1.8625565767288208, + "logits/rejected": -1.8633586168289185, + "logps/chosen": -1.8745359182357788, + "logps/rejected": -66.40684509277344, + "loss": 0.6718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011026322841644287, + "rewards/margins": 0.20740769803524017, + "rewards/rejected": -0.1963813751935959, + "step": 7696 + }, + { + "epoch": 0.45, + "learning_rate": 6.078057394263574e-08, + "logits/chosen": -2.1231491565704346, + "logits/rejected": -2.1161906719207764, + "logps/chosen": -78.85230255126953, + "logps/rejected": -293.07794189453125, + "loss": 0.2476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3625297546386719, + "rewards/margins": 3.5373284816741943, + "rewards/rejected": -3.1747987270355225, + "step": 7697 + }, + { + "epoch": 0.45, + "learning_rate": 6.07713713874467e-08, + "logits/chosen": -1.7663160562515259, + "logits/rejected": -1.7589750289916992, + "logps/chosen": -32.889801025390625, + "logps/rejected": -197.60635375976562, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3455253541469574, + "rewards/margins": 2.065126895904541, + "rewards/rejected": -1.7196015119552612, + "step": 7698 + }, + { + "epoch": 0.45, + "learning_rate": 6.076216844960587e-08, + "logits/chosen": -1.8600646257400513, + "logits/rejected": -1.856002926826477, + "logps/chosen": -177.66348266601562, + "logps/rejected": -207.4241943359375, + "loss": 0.3919, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.175799608230591, + "rewards/margins": -0.09769582748413086, + "rewards/rejected": 2.2734954357147217, + "step": 7699 + }, + { + "epoch": 0.45, + "learning_rate": 6.075296512944017e-08, + "logits/chosen": -2.130932331085205, + "logits/rejected": -2.1125295162200928, + "logps/chosen": -10.204020500183105, + "logps/rejected": -440.8883361816406, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12980671226978302, + "rewards/margins": 8.494479179382324, + "rewards/rejected": -8.364672660827637, + "step": 7700 + }, + { + "epoch": 0.45, + "learning_rate": 6.074376142727656e-08, + "logits/chosen": -2.0182483196258545, + "logits/rejected": -1.9485034942626953, + "logps/chosen": -120.75621032714844, + "logps/rejected": -393.96246337890625, + "loss": 0.1907, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8545746207237244, + "rewards/margins": 1.2807374000549316, + "rewards/rejected": -0.4261627197265625, + "step": 7701 + }, + { + "epoch": 0.45, + "learning_rate": 6.073455734344196e-08, + "logits/chosen": -1.6019887924194336, + "logits/rejected": -1.6181973218917847, + "logps/chosen": -0.002179476898163557, + "logps/rejected": -141.42523193359375, + "loss": 0.4682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001729640644043684, + "rewards/margins": 1.1604105234146118, + "rewards/rejected": -1.16058349609375, + "step": 7702 + }, + { + "epoch": 0.45, + "learning_rate": 6.072535287826339e-08, + "logits/chosen": -1.8897813558578491, + "logits/rejected": -1.9034943580627441, + "logps/chosen": -151.32748413085938, + "logps/rejected": -480.6876220703125, + "loss": 0.1005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1258667707443237, + "rewards/margins": 3.071310520172119, + "rewards/rejected": -1.9454437494277954, + "step": 7703 + }, + { + "epoch": 0.45, + "learning_rate": 6.071614803206784e-08, + "logits/chosen": -1.8577697277069092, + "logits/rejected": -1.8609511852264404, + "logps/chosen": -6.358108997344971, + "logps/rejected": -56.79167556762695, + "loss": 0.6269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2652420401573181, + "rewards/margins": 0.4948310852050781, + "rewards/rejected": -0.7600731253623962, + "step": 7704 + }, + { + "epoch": 0.45, + "learning_rate": 6.070694280518227e-08, + "logits/chosen": -2.0326623916625977, + "logits/rejected": -2.0241966247558594, + "logps/chosen": -47.305755615234375, + "logps/rejected": -216.95864868164062, + "loss": 0.4132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5196739435195923, + "rewards/margins": 0.9523963928222656, + "rewards/rejected": -0.4327224791049957, + "step": 7705 + }, + { + "epoch": 0.45, + "learning_rate": 6.069773719793374e-08, + "logits/chosen": -1.8663814067840576, + "logits/rejected": -1.861354112625122, + "logps/chosen": -304.1094665527344, + "logps/rejected": -504.1671142578125, + "loss": 0.0454, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.921518087387085, + "rewards/margins": 2.6412689685821533, + "rewards/rejected": 0.2802490293979645, + "step": 7706 + }, + { + "epoch": 0.45, + "learning_rate": 6.068853121064923e-08, + "logits/chosen": -1.9804632663726807, + "logits/rejected": -1.9579601287841797, + "logps/chosen": -160.53802490234375, + "logps/rejected": -602.1934814453125, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.060467481613159, + "rewards/margins": 8.856585502624512, + "rewards/rejected": -6.796118259429932, + "step": 7707 + }, + { + "epoch": 0.45, + "learning_rate": 6.067932484365583e-08, + "logits/chosen": -1.5940860509872437, + "logits/rejected": -1.5334454774856567, + "logps/chosen": -278.3126220703125, + "logps/rejected": -397.73974609375, + "loss": 0.0802, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0250244140625, + "rewards/margins": 2.104879856109619, + "rewards/rejected": 0.9201446771621704, + "step": 7708 + }, + { + "epoch": 0.45, + "learning_rate": 6.067011809728056e-08, + "logits/chosen": -1.8812862634658813, + "logits/rejected": -1.8717708587646484, + "logps/chosen": -45.60598373413086, + "logps/rejected": -280.4031066894531, + "loss": 0.1756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4655494689941406, + "rewards/margins": 2.953707218170166, + "rewards/rejected": -2.4881577491760254, + "step": 7709 + }, + { + "epoch": 0.45, + "learning_rate": 6.066091097185053e-08, + "logits/chosen": -1.9967923164367676, + "logits/rejected": -1.9974312782287598, + "logps/chosen": -20.950672149658203, + "logps/rejected": -262.93182373046875, + "loss": 0.2318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5173134207725525, + "rewards/margins": 2.2183234691619873, + "rewards/rejected": -1.7010101079940796, + "step": 7710 + }, + { + "epoch": 0.45, + "learning_rate": 6.065170346769276e-08, + "logits/chosen": -1.914794683456421, + "logits/rejected": -1.9259936809539795, + "logps/chosen": -238.0047149658203, + "logps/rejected": -572.6505126953125, + "loss": 0.066, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.86723792552948, + "rewards/margins": 3.1041276454925537, + "rewards/rejected": -1.2368897199630737, + "step": 7711 + }, + { + "epoch": 0.45, + "learning_rate": 6.064249558513439e-08, + "logits/chosen": -2.055753231048584, + "logits/rejected": -2.063955783843994, + "logps/chosen": -149.23150634765625, + "logps/rejected": -239.50656127929688, + "loss": 0.1104, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4068740606307983, + "rewards/margins": 2.0810348987579346, + "rewards/rejected": -0.6741607785224915, + "step": 7712 + }, + { + "epoch": 0.45, + "learning_rate": 6.063328732450252e-08, + "logits/chosen": -1.8762060403823853, + "logits/rejected": -1.831207513809204, + "logps/chosen": -266.8045959472656, + "logps/rejected": -543.4619750976562, + "loss": 0.0472, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3961853981018066, + "rewards/margins": 3.6844606399536133, + "rewards/rejected": -1.288275122642517, + "step": 7713 + }, + { + "epoch": 0.45, + "learning_rate": 6.062407868612427e-08, + "logits/chosen": -1.9028067588806152, + "logits/rejected": -1.9527117013931274, + "logps/chosen": -193.04156494140625, + "logps/rejected": -490.60394287109375, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6022385358810425, + "rewards/margins": 5.8833699226379395, + "rewards/rejected": -4.281131267547607, + "step": 7714 + }, + { + "epoch": 0.45, + "learning_rate": 6.061486967032678e-08, + "logits/chosen": -2.098189115524292, + "logits/rejected": -2.100003719329834, + "logps/chosen": -25.938901901245117, + "logps/rejected": -105.86846160888672, + "loss": 0.6277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019294356927275658, + "rewards/margins": 0.009693527594208717, + "rewards/rejected": -0.028987884521484375, + "step": 7715 + }, + { + "epoch": 0.45, + "learning_rate": 6.06056602774372e-08, + "logits/chosen": -1.8811354637145996, + "logits/rejected": -1.8790717124938965, + "logps/chosen": -62.63423156738281, + "logps/rejected": -353.326904296875, + "loss": 0.0827, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3533165454864502, + "rewards/margins": 4.380843162536621, + "rewards/rejected": -3.02752685546875, + "step": 7716 + }, + { + "epoch": 0.45, + "learning_rate": 6.059645050778269e-08, + "logits/chosen": -1.9491705894470215, + "logits/rejected": -1.9201046228408813, + "logps/chosen": -163.0347900390625, + "logps/rejected": -205.1165771484375, + "loss": 0.4683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8400421142578125, + "rewards/margins": 0.23405152559280396, + "rewards/rejected": 0.6059905886650085, + "step": 7717 + }, + { + "epoch": 0.45, + "learning_rate": 6.058724036169042e-08, + "logits/chosen": -1.9376826286315918, + "logits/rejected": -1.9364049434661865, + "logps/chosen": -139.3108673095703, + "logps/rejected": -307.94219970703125, + "loss": 0.1791, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6598129272460938, + "rewards/margins": 1.2685348987579346, + "rewards/rejected": 0.39127808809280396, + "step": 7718 + }, + { + "epoch": 0.45, + "learning_rate": 6.057802983948759e-08, + "logits/chosen": -2.135356903076172, + "logits/rejected": -2.1322391033172607, + "logps/chosen": -62.77499771118164, + "logps/rejected": -268.024658203125, + "loss": 0.2102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6935421228408813, + "rewards/margins": 3.0857691764831543, + "rewards/rejected": -2.3922271728515625, + "step": 7719 + }, + { + "epoch": 0.45, + "learning_rate": 6.056881894150139e-08, + "logits/chosen": -1.932205080986023, + "logits/rejected": -1.9137502908706665, + "logps/chosen": -166.43536376953125, + "logps/rejected": -355.1670227050781, + "loss": 0.189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4912445545196533, + "rewards/margins": 1.1437561511993408, + "rewards/rejected": 0.3474884033203125, + "step": 7720 + }, + { + "epoch": 0.45, + "learning_rate": 6.055960766805904e-08, + "logits/chosen": -2.1583473682403564, + "logits/rejected": -2.1431214809417725, + "logps/chosen": -46.93585205078125, + "logps/rejected": -292.90087890625, + "loss": 0.2688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3845924437046051, + "rewards/margins": 2.9623818397521973, + "rewards/rejected": -2.577789306640625, + "step": 7721 + }, + { + "epoch": 0.45, + "learning_rate": 6.055039601948777e-08, + "logits/chosen": -1.7425017356872559, + "logits/rejected": -1.749972939491272, + "logps/chosen": -43.569149017333984, + "logps/rejected": -184.32968139648438, + "loss": 0.4738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18864670395851135, + "rewards/margins": 1.0575264692306519, + "rewards/rejected": -0.8688797354698181, + "step": 7722 + }, + { + "epoch": 0.45, + "learning_rate": 6.054118399611484e-08, + "logits/chosen": -1.8285518884658813, + "logits/rejected": -1.8215886354446411, + "logps/chosen": -190.88870239257812, + "logps/rejected": -542.917724609375, + "loss": 0.048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.291986107826233, + "rewards/margins": 3.1864748001098633, + "rewards/rejected": -1.8944885730743408, + "step": 7723 + }, + { + "epoch": 0.45, + "learning_rate": 6.053197159826749e-08, + "logits/chosen": -1.9148831367492676, + "logits/rejected": -1.9114103317260742, + "logps/chosen": -15.263090133666992, + "logps/rejected": -191.65396118164062, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16136112809181213, + "rewards/margins": 4.048360824584961, + "rewards/rejected": -3.8869996070861816, + "step": 7724 + }, + { + "epoch": 0.45, + "learning_rate": 6.052275882627298e-08, + "logits/chosen": -2.0894229412078857, + "logits/rejected": -2.090583562850952, + "logps/chosen": -7.973170757293701, + "logps/rejected": -131.56121826171875, + "loss": 0.6609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10928540676832199, + "rewards/margins": 0.04175610840320587, + "rewards/rejected": 0.06752929836511612, + "step": 7725 + }, + { + "epoch": 0.45, + "learning_rate": 6.051354568045861e-08, + "logits/chosen": -1.9369806051254272, + "logits/rejected": -1.933210849761963, + "logps/chosen": -20.09087562561035, + "logps/rejected": -198.90206909179688, + "loss": 0.4131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16862927377223969, + "rewards/margins": 1.538564920425415, + "rewards/rejected": -1.3699356317520142, + "step": 7726 + }, + { + "epoch": 0.45, + "learning_rate": 6.050433216115168e-08, + "logits/chosen": -1.850917100906372, + "logits/rejected": -1.8494365215301514, + "logps/chosen": -50.5561637878418, + "logps/rejected": -261.6095886230469, + "loss": 0.1502, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2033360004425049, + "rewards/margins": 3.1901659965515137, + "rewards/rejected": -1.9868301153182983, + "step": 7727 + }, + { + "epoch": 0.45, + "learning_rate": 6.049511826867948e-08, + "logits/chosen": -1.9879249334335327, + "logits/rejected": -1.9788318872451782, + "logps/chosen": -50.39855194091797, + "logps/rejected": -236.55078125, + "loss": 0.4764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07724342495203018, + "rewards/margins": 1.3936748504638672, + "rewards/rejected": -1.4709182977676392, + "step": 7728 + }, + { + "epoch": 0.45, + "learning_rate": 6.048590400336935e-08, + "logits/chosen": -1.8791989088058472, + "logits/rejected": -1.8809278011322021, + "logps/chosen": -71.30355834960938, + "logps/rejected": -163.36227416992188, + "loss": 0.8463, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1046310663223267, + "rewards/margins": 1.147598385810852, + "rewards/rejected": -2.2522294521331787, + "step": 7729 + }, + { + "epoch": 0.45, + "learning_rate": 6.04766893655486e-08, + "logits/chosen": -1.8405252695083618, + "logits/rejected": -1.8406864404678345, + "logps/chosen": -36.341102600097656, + "logps/rejected": -116.56376647949219, + "loss": 0.3881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10738182067871094, + "rewards/margins": 1.246554970741272, + "rewards/rejected": -1.139173150062561, + "step": 7730 + }, + { + "epoch": 0.45, + "learning_rate": 6.04674743555446e-08, + "logits/chosen": -1.9883923530578613, + "logits/rejected": -1.9541677236557007, + "logps/chosen": -163.27210998535156, + "logps/rejected": -274.19403076171875, + "loss": 0.2388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9423538446426392, + "rewards/margins": 1.401484727859497, + "rewards/rejected": -0.4591308534145355, + "step": 7731 + }, + { + "epoch": 0.45, + "learning_rate": 6.045825897368472e-08, + "logits/chosen": -1.9348417520523071, + "logits/rejected": -1.9167077541351318, + "logps/chosen": -170.83848571777344, + "logps/rejected": -323.71746826171875, + "loss": 0.2379, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1022567749023438, + "rewards/margins": 0.5926864147186279, + "rewards/rejected": 1.5095703601837158, + "step": 7732 + }, + { + "epoch": 0.45, + "learning_rate": 6.044904322029633e-08, + "logits/chosen": -1.923047423362732, + "logits/rejected": -1.9231373071670532, + "logps/chosen": -21.857402801513672, + "logps/rejected": -245.84518432617188, + "loss": 0.1547, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0140994787216187, + "rewards/margins": 4.155984878540039, + "rewards/rejected": -3.14188551902771, + "step": 7733 + }, + { + "epoch": 0.45, + "learning_rate": 6.043982709570681e-08, + "logits/chosen": -1.8511136770248413, + "logits/rejected": -1.904842495918274, + "logps/chosen": -263.9757080078125, + "logps/rejected": -442.25604248046875, + "loss": 0.0554, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1113159656524658, + "rewards/margins": 2.9569029808044434, + "rewards/rejected": -1.845587134361267, + "step": 7734 + }, + { + "epoch": 0.45, + "learning_rate": 6.043061060024355e-08, + "logits/chosen": -2.0855600833892822, + "logits/rejected": -2.082270622253418, + "logps/chosen": -17.890972137451172, + "logps/rejected": -100.43292236328125, + "loss": 0.6046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.137172132730484, + "rewards/margins": 0.23682042956352234, + "rewards/rejected": -0.09964828938245773, + "step": 7735 + }, + { + "epoch": 0.45, + "learning_rate": 6.0421393734234e-08, + "logits/chosen": -1.6687341928482056, + "logits/rejected": -1.61184561252594, + "logps/chosen": -191.12677001953125, + "logps/rejected": -282.100830078125, + "loss": 0.2398, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4610871076583862, + "rewards/margins": 1.1611511707305908, + "rewards/rejected": 0.299935907125473, + "step": 7736 + }, + { + "epoch": 0.45, + "learning_rate": 6.041217649800555e-08, + "logits/chosen": -2.076608657836914, + "logits/rejected": -2.0693588256835938, + "logps/chosen": -44.369598388671875, + "logps/rejected": -163.26068115234375, + "loss": 0.4485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5207130312919617, + "rewards/margins": 0.6090736389160156, + "rewards/rejected": -0.08836060017347336, + "step": 7737 + }, + { + "epoch": 0.45, + "learning_rate": 6.040295889188567e-08, + "logits/chosen": -2.0533175468444824, + "logits/rejected": -2.0419540405273438, + "logps/chosen": -143.3244171142578, + "logps/rejected": -277.3936462402344, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09806976467370987, + "rewards/margins": 2.5849382877349854, + "rewards/rejected": -2.6830079555511475, + "step": 7738 + }, + { + "epoch": 0.45, + "learning_rate": 6.039374091620179e-08, + "logits/chosen": -1.9207531213760376, + "logits/rejected": -1.922379493713379, + "logps/chosen": -212.0821533203125, + "logps/rejected": -313.3565673828125, + "loss": 0.0894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9514983892440796, + "rewards/margins": 2.2172882556915283, + "rewards/rejected": -0.26578980684280396, + "step": 7739 + }, + { + "epoch": 0.45, + "learning_rate": 6.038452257128142e-08, + "logits/chosen": -1.9000781774520874, + "logits/rejected": -1.8874263763427734, + "logps/chosen": -214.35073852539062, + "logps/rejected": -349.810546875, + "loss": 0.1041, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.686909556388855, + "rewards/margins": 2.608747959136963, + "rewards/rejected": -0.9218384027481079, + "step": 7740 + }, + { + "epoch": 0.45, + "learning_rate": 6.037530385745198e-08, + "logits/chosen": -2.0217292308807373, + "logits/rejected": -2.0185420513153076, + "logps/chosen": -2.233752965927124, + "logps/rejected": -122.85810852050781, + "loss": 0.3781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09329428523778915, + "rewards/margins": 2.171184539794922, + "rewards/rejected": -2.077890157699585, + "step": 7741 + }, + { + "epoch": 0.45, + "learning_rate": 6.036608477504101e-08, + "logits/chosen": -1.879814863204956, + "logits/rejected": -1.8738538026809692, + "logps/chosen": -228.5851593017578, + "logps/rejected": -312.91607666015625, + "loss": 0.0622, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3828797340393066, + "rewards/margins": 2.331814765930176, + "rewards/rejected": 0.05106506496667862, + "step": 7742 + }, + { + "epoch": 0.45, + "learning_rate": 6.0356865324376e-08, + "logits/chosen": -2.0136358737945557, + "logits/rejected": -2.0143845081329346, + "logps/chosen": -9.190678247250617e-05, + "logps/rejected": -202.11614990234375, + "loss": 0.3424, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.969422323280014e-06, + "rewards/margins": 3.614481210708618, + "rewards/rejected": -3.614485263824463, + "step": 7743 + }, + { + "epoch": 0.45, + "learning_rate": 6.034764550578447e-08, + "logits/chosen": -1.9835466146469116, + "logits/rejected": -1.9833462238311768, + "logps/chosen": -29.86600112915039, + "logps/rejected": -103.29595184326172, + "loss": 0.4675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5273982882499695, + "rewards/margins": 0.494240939617157, + "rewards/rejected": 0.0331573486328125, + "step": 7744 + }, + { + "epoch": 0.45, + "learning_rate": 6.033842531959395e-08, + "logits/chosen": -1.7026828527450562, + "logits/rejected": -1.6898137331008911, + "logps/chosen": -220.69515991210938, + "logps/rejected": -380.3446044921875, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3743927478790283, + "rewards/margins": 4.113009452819824, + "rewards/rejected": -1.738616943359375, + "step": 7745 + }, + { + "epoch": 0.45, + "learning_rate": 6.032920476613202e-08, + "logits/chosen": -2.079308032989502, + "logits/rejected": -2.0800282955169678, + "logps/chosen": -7.143954753875732, + "logps/rejected": -203.67124938964844, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02334432676434517, + "rewards/margins": 2.6873397827148438, + "rewards/rejected": -2.6639955043792725, + "step": 7746 + }, + { + "epoch": 0.45, + "learning_rate": 6.031998384572619e-08, + "logits/chosen": -1.9634850025177002, + "logits/rejected": -1.9279204607009888, + "logps/chosen": -282.60302734375, + "logps/rejected": -401.49542236328125, + "loss": 0.1514, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.45269775390625, + "rewards/margins": 1.2292052507400513, + "rewards/rejected": 1.2234925031661987, + "step": 7747 + }, + { + "epoch": 0.45, + "learning_rate": 6.031076255870406e-08, + "logits/chosen": -1.8423268795013428, + "logits/rejected": -1.8623414039611816, + "logps/chosen": -181.13235473632812, + "logps/rejected": -213.99537658691406, + "loss": 0.3418, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2007904052734375, + "rewards/margins": 0.5884917974472046, + "rewards/rejected": 0.6122986078262329, + "step": 7748 + }, + { + "epoch": 0.45, + "learning_rate": 6.03015409053932e-08, + "logits/chosen": -1.8830327987670898, + "logits/rejected": -1.8933298587799072, + "logps/chosen": -30.398456573486328, + "logps/rejected": -275.989013671875, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21180497109889984, + "rewards/margins": 2.987804412841797, + "rewards/rejected": -2.7759995460510254, + "step": 7749 + }, + { + "epoch": 0.45, + "learning_rate": 6.029231888612123e-08, + "logits/chosen": -1.9855260848999023, + "logits/rejected": -1.966115117073059, + "logps/chosen": -222.08712768554688, + "logps/rejected": -432.755859375, + "loss": 0.1027, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1474578380584717, + "rewards/margins": 2.0933258533477783, + "rewards/rejected": 0.05413208156824112, + "step": 7750 + }, + { + "epoch": 0.45, + "learning_rate": 6.028309650121573e-08, + "logits/chosen": -1.9892281293869019, + "logits/rejected": -1.9832324981689453, + "logps/chosen": -25.57135009765625, + "logps/rejected": -237.76116943359375, + "loss": 0.3527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013058090582489967, + "rewards/margins": 4.969048023223877, + "rewards/rejected": -4.9821062088012695, + "step": 7751 + }, + { + "epoch": 0.45, + "learning_rate": 6.027387375100435e-08, + "logits/chosen": -1.8409723043441772, + "logits/rejected": -1.8418853282928467, + "logps/chosen": -165.70045471191406, + "logps/rejected": -380.1480712890625, + "loss": 0.266, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9582901000976562, + "rewards/margins": 0.6750686168670654, + "rewards/rejected": 1.2832214832305908, + "step": 7752 + }, + { + "epoch": 0.45, + "learning_rate": 6.026465063581473e-08, + "logits/chosen": -1.8622426986694336, + "logits/rejected": -1.8307039737701416, + "logps/chosen": -205.71286010742188, + "logps/rejected": -382.0457763671875, + "loss": 0.2223, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.197468638420105, + "rewards/margins": 1.22789466381073, + "rewards/rejected": -0.030426025390625, + "step": 7753 + }, + { + "epoch": 0.45, + "learning_rate": 6.025542715597449e-08, + "logits/chosen": -2.086015462875366, + "logits/rejected": -2.05395770072937, + "logps/chosen": -174.5242919921875, + "logps/rejected": -310.92547607421875, + "loss": 0.1766, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7053924798965454, + "rewards/margins": 1.1891968250274658, + "rewards/rejected": 0.5161957144737244, + "step": 7754 + }, + { + "epoch": 0.45, + "learning_rate": 6.024620331181133e-08, + "logits/chosen": -1.9294447898864746, + "logits/rejected": -1.928584337234497, + "logps/chosen": -55.30550765991211, + "logps/rejected": -241.07290649414062, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12925072014331818, + "rewards/margins": 2.760183334350586, + "rewards/rejected": -2.630932569503784, + "step": 7755 + }, + { + "epoch": 0.45, + "learning_rate": 6.02369791036529e-08, + "logits/chosen": -1.9385212659835815, + "logits/rejected": -1.9371470212936401, + "logps/chosen": -59.7076416015625, + "logps/rejected": -112.24092864990234, + "loss": 1.3851, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.7229923009872437, + "rewards/margins": -0.6434855461120605, + "rewards/rejected": -1.079506754875183, + "step": 7756 + }, + { + "epoch": 0.45, + "learning_rate": 6.022775453182692e-08, + "logits/chosen": -1.7521915435791016, + "logits/rejected": -1.75027334690094, + "logps/chosen": -2.956692695617676, + "logps/rejected": -90.33301544189453, + "loss": 0.4057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10631251335144043, + "rewards/margins": 1.3207496404647827, + "rewards/rejected": -1.2144371271133423, + "step": 7757 + }, + { + "epoch": 0.45, + "learning_rate": 6.021852959666104e-08, + "logits/chosen": -1.82412588596344, + "logits/rejected": -1.8234422206878662, + "logps/chosen": -227.46775817871094, + "logps/rejected": -384.4945068359375, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1879289150238037, + "rewards/margins": 2.120008945465088, + "rewards/rejected": 1.0679199695587158, + "step": 7758 + }, + { + "epoch": 0.45, + "learning_rate": 6.020930429848301e-08, + "logits/chosen": -1.9153025150299072, + "logits/rejected": -1.8884146213531494, + "logps/chosen": -48.82429504394531, + "logps/rejected": -290.02105712890625, + "loss": 0.2256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40491676330566406, + "rewards/margins": 4.108051300048828, + "rewards/rejected": -3.703134298324585, + "step": 7759 + }, + { + "epoch": 0.45, + "learning_rate": 6.020007863762056e-08, + "logits/chosen": -1.8379404544830322, + "logits/rejected": -1.8395066261291504, + "logps/chosen": -89.70431518554688, + "logps/rejected": -429.7716064453125, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13793258368968964, + "rewards/margins": 7.6616387367248535, + "rewards/rejected": -7.523705959320068, + "step": 7760 + }, + { + "epoch": 0.45, + "learning_rate": 6.019085261440142e-08, + "logits/chosen": -2.098731756210327, + "logits/rejected": -2.098360061645508, + "logps/chosen": -5.962735176086426, + "logps/rejected": -175.14083862304688, + "loss": 0.4578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08234019577503204, + "rewards/margins": 1.5322502851486206, + "rewards/rejected": -1.6145905256271362, + "step": 7761 + }, + { + "epoch": 0.45, + "learning_rate": 6.018162622915335e-08, + "logits/chosen": -1.8051079511642456, + "logits/rejected": -1.7969313859939575, + "logps/chosen": -174.89454650878906, + "logps/rejected": -225.70391845703125, + "loss": 0.248, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.084437608718872, + "rewards/margins": 0.6755783557891846, + "rewards/rejected": 1.4088592529296875, + "step": 7762 + }, + { + "epoch": 0.45, + "learning_rate": 6.01723994822041e-08, + "logits/chosen": -1.8793437480926514, + "logits/rejected": -1.865983009338379, + "logps/chosen": -47.894630432128906, + "logps/rejected": -227.61080932617188, + "loss": 0.4299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17863693833351135, + "rewards/margins": 0.9737023115158081, + "rewards/rejected": -0.7950653433799744, + "step": 7763 + }, + { + "epoch": 0.45, + "learning_rate": 6.016317237388146e-08, + "logits/chosen": -1.9539543390274048, + "logits/rejected": -1.9576116800308228, + "logps/chosen": -63.143218994140625, + "logps/rejected": -198.33360290527344, + "loss": 0.4297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2095893919467926, + "rewards/margins": 2.118173837661743, + "rewards/rejected": -2.327763319015503, + "step": 7764 + }, + { + "epoch": 0.45, + "learning_rate": 6.015394490451324e-08, + "logits/chosen": -1.9384887218475342, + "logits/rejected": -1.9948469400405884, + "logps/chosen": -276.8533935546875, + "logps/rejected": -253.76736450195312, + "loss": 0.7482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8694214224815369, + "rewards/margins": 1.5386595726013184, + "rewards/rejected": -2.4080810546875, + "step": 7765 + }, + { + "epoch": 0.45, + "learning_rate": 6.014471707442722e-08, + "logits/chosen": -2.095571517944336, + "logits/rejected": -2.077230215072632, + "logps/chosen": -196.6582489013672, + "logps/rejected": -371.3756103515625, + "loss": 0.1182, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.190425157546997, + "rewards/margins": 1.520787000656128, + "rewards/rejected": 0.6696380972862244, + "step": 7766 + }, + { + "epoch": 0.45, + "learning_rate": 6.013548888395123e-08, + "logits/chosen": -2.136841297149658, + "logits/rejected": -2.117926836013794, + "logps/chosen": -89.86454772949219, + "logps/rejected": -145.4439697265625, + "loss": 0.2906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5907333493232727, + "rewards/margins": 1.4112625122070312, + "rewards/rejected": -0.8205291628837585, + "step": 7767 + }, + { + "epoch": 0.45, + "learning_rate": 6.01262603334131e-08, + "logits/chosen": -2.011981725692749, + "logits/rejected": -2.023540735244751, + "logps/chosen": -0.0005301611963659525, + "logps/rejected": -142.5364990234375, + "loss": 0.391, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.210719347000122e-08, + "rewards/margins": 2.250051259994507, + "rewards/rejected": -2.250051259994507, + "step": 7768 + }, + { + "epoch": 0.45, + "learning_rate": 6.011703142314067e-08, + "logits/chosen": -1.8716920614242554, + "logits/rejected": -1.8431549072265625, + "logps/chosen": -326.1168212890625, + "logps/rejected": -342.1978759765625, + "loss": 0.0516, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8895325660705566, + "rewards/margins": 2.8704376220703125, + "rewards/rejected": 0.01909484900534153, + "step": 7769 + }, + { + "epoch": 0.45, + "learning_rate": 6.01078021534618e-08, + "logits/chosen": -2.174189805984497, + "logits/rejected": -2.1619980335235596, + "logps/chosen": -2.9171464443206787, + "logps/rejected": -297.6927185058594, + "loss": 0.2655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27627599239349365, + "rewards/margins": 4.269259929656982, + "rewards/rejected": -3.9929840564727783, + "step": 7770 + }, + { + "epoch": 0.45, + "learning_rate": 6.009857252470434e-08, + "logits/chosen": -1.8812874555587769, + "logits/rejected": -1.8750003576278687, + "logps/chosen": -6.246659755706787, + "logps/rejected": -162.02001953125, + "loss": 0.4135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07119212299585342, + "rewards/margins": 1.9028053283691406, + "rewards/rejected": -1.8316131830215454, + "step": 7771 + }, + { + "epoch": 0.45, + "learning_rate": 6.00893425371962e-08, + "logits/chosen": -1.8665955066680908, + "logits/rejected": -1.8641616106033325, + "logps/chosen": -7.629754066467285, + "logps/rejected": -88.29426574707031, + "loss": 0.4079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0879269614815712, + "rewards/margins": 1.6625936031341553, + "rewards/rejected": -1.5746666193008423, + "step": 7772 + }, + { + "epoch": 0.45, + "learning_rate": 6.008011219126524e-08, + "logits/chosen": -2.0214807987213135, + "logits/rejected": -1.9945225715637207, + "logps/chosen": -139.5781707763672, + "logps/rejected": -256.4914855957031, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8346970081329346, + "rewards/margins": 0.5280106067657471, + "rewards/rejected": 1.3066864013671875, + "step": 7773 + }, + { + "epoch": 0.45, + "learning_rate": 6.007088148723941e-08, + "logits/chosen": -1.7329286336898804, + "logits/rejected": -1.7244783639907837, + "logps/chosen": -0.17354105412960052, + "logps/rejected": -113.44969177246094, + "loss": 0.5098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006248666439205408, + "rewards/margins": 0.7713392376899719, + "rewards/rejected": -0.777587890625, + "step": 7774 + }, + { + "epoch": 0.45, + "learning_rate": 6.00616504254466e-08, + "logits/chosen": -2.021308183670044, + "logits/rejected": -2.0457775592803955, + "logps/chosen": -124.56251525878906, + "logps/rejected": -393.426025390625, + "loss": 0.0545, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1749054193496704, + "rewards/margins": 4.965918064117432, + "rewards/rejected": -3.7910125255584717, + "step": 7775 + }, + { + "epoch": 0.45, + "learning_rate": 6.005241900621475e-08, + "logits/chosen": -1.894582986831665, + "logits/rejected": -1.89544677734375, + "logps/chosen": -18.526592254638672, + "logps/rejected": -276.62200927734375, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17225094139575958, + "rewards/margins": 4.3092122077941895, + "rewards/rejected": -4.136961460113525, + "step": 7776 + }, + { + "epoch": 0.45, + "learning_rate": 6.00431872298718e-08, + "logits/chosen": -1.6615254878997803, + "logits/rejected": -1.635571002960205, + "logps/chosen": -190.69949340820312, + "logps/rejected": -211.38230895996094, + "loss": 0.259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5454437732696533, + "rewards/margins": 0.9061142206192017, + "rewards/rejected": 0.6393295526504517, + "step": 7777 + }, + { + "epoch": 0.45, + "learning_rate": 6.003395509674572e-08, + "logits/chosen": -2.1652767658233643, + "logits/rejected": -2.1648693084716797, + "logps/chosen": -21.021793365478516, + "logps/rejected": -282.74530029296875, + "loss": 0.205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6488623023033142, + "rewards/margins": 5.770834922790527, + "rewards/rejected": -5.121972560882568, + "step": 7778 + }, + { + "epoch": 0.45, + "learning_rate": 6.002472260716447e-08, + "logits/chosen": -1.9392398595809937, + "logits/rejected": -1.8949967622756958, + "logps/chosen": -399.084716796875, + "logps/rejected": -808.674072265625, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7837036848068237, + "rewards/margins": 6.211102485656738, + "rewards/rejected": -4.427398681640625, + "step": 7779 + }, + { + "epoch": 0.45, + "learning_rate": 6.001548976145606e-08, + "logits/chosen": -1.7465914487838745, + "logits/rejected": -1.7483855485916138, + "logps/chosen": -11.329326629638672, + "logps/rejected": -59.10722351074219, + "loss": 0.6006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11559238284826279, + "rewards/margins": 0.27160969376564026, + "rewards/rejected": -0.15601730346679688, + "step": 7780 + }, + { + "epoch": 0.45, + "learning_rate": 6.000625655994842e-08, + "logits/chosen": -1.8150893449783325, + "logits/rejected": -1.8135261535644531, + "logps/chosen": -180.18539428710938, + "logps/rejected": -421.6032409667969, + "loss": 0.0713, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5557235479354858, + "rewards/margins": 2.841679334640503, + "rewards/rejected": -1.285955786705017, + "step": 7781 + }, + { + "epoch": 0.45, + "learning_rate": 5.999702300296964e-08, + "logits/chosen": -1.880188226699829, + "logits/rejected": -1.8626371622085571, + "logps/chosen": -167.19952392578125, + "logps/rejected": -271.4170837402344, + "loss": 0.1379, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4764724969863892, + "rewards/margins": 2.1270675659179688, + "rewards/rejected": -0.6505951285362244, + "step": 7782 + }, + { + "epoch": 0.45, + "learning_rate": 5.998778909084767e-08, + "logits/chosen": -1.8764735460281372, + "logits/rejected": -1.8645106554031372, + "logps/chosen": -39.81195831298828, + "logps/rejected": -257.7036437988281, + "loss": 0.3466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18626824021339417, + "rewards/margins": 1.9565792083740234, + "rewards/rejected": -1.7703109979629517, + "step": 7783 + }, + { + "epoch": 0.45, + "learning_rate": 5.997855482391058e-08, + "logits/chosen": -2.0661044120788574, + "logits/rejected": -2.0477325916290283, + "logps/chosen": -6.0185866355896, + "logps/rejected": -192.42742919921875, + "loss": 0.3635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1037578135728836, + "rewards/margins": 4.372589588165283, + "rewards/rejected": -4.47634744644165, + "step": 7784 + }, + { + "epoch": 0.45, + "learning_rate": 5.996932020248642e-08, + "logits/chosen": -1.9252538681030273, + "logits/rejected": -1.921376347541809, + "logps/chosen": -1.921002984046936, + "logps/rejected": -32.32192611694336, + "loss": 0.7014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12274330854415894, + "rewards/margins": 0.08643752336502075, + "rewards/rejected": -0.2091808319091797, + "step": 7785 + }, + { + "epoch": 0.45, + "learning_rate": 5.996008522690326e-08, + "logits/chosen": -1.8597944974899292, + "logits/rejected": -1.7954704761505127, + "logps/chosen": -141.057373046875, + "logps/rejected": -326.30279541015625, + "loss": 0.1054, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.784993052482605, + "rewards/margins": 1.789814829826355, + "rewards/rejected": -0.00482177734375, + "step": 7786 + }, + { + "epoch": 0.45, + "learning_rate": 5.995084989748913e-08, + "logits/chosen": -1.9157813787460327, + "logits/rejected": -1.9151700735092163, + "logps/chosen": -41.39251708984375, + "logps/rejected": -226.59994506835938, + "loss": 0.3501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5212478637695312, + "rewards/margins": 1.0398895740509033, + "rewards/rejected": -0.5186416506767273, + "step": 7787 + }, + { + "epoch": 0.45, + "learning_rate": 5.994161421457214e-08, + "logits/chosen": -2.0092549324035645, + "logits/rejected": -1.9832552671432495, + "logps/chosen": -242.65289306640625, + "logps/rejected": -475.374755859375, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.838409423828125, + "rewards/margins": 3.869915723800659, + "rewards/rejected": -2.031506299972534, + "step": 7788 + }, + { + "epoch": 0.45, + "learning_rate": 5.99323781784804e-08, + "logits/chosen": -1.8527610301971436, + "logits/rejected": -1.8184545040130615, + "logps/chosen": -151.13851928710938, + "logps/rejected": -226.87159729003906, + "loss": 0.5729, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6087097525596619, + "rewards/margins": -0.08002471923828125, + "rewards/rejected": 0.6887344717979431, + "step": 7789 + }, + { + "epoch": 0.45, + "learning_rate": 5.992314178954198e-08, + "logits/chosen": -1.7495651245117188, + "logits/rejected": -1.7062792778015137, + "logps/chosen": -204.79425048828125, + "logps/rejected": -261.02203369140625, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.861175537109375, + "rewards/margins": 1.3908295631408691, + "rewards/rejected": -0.5296539664268494, + "step": 7790 + }, + { + "epoch": 0.45, + "learning_rate": 5.991390504808503e-08, + "logits/chosen": -1.9284635782241821, + "logits/rejected": -1.9280116558074951, + "logps/chosen": -46.49535369873047, + "logps/rejected": -171.43002319335938, + "loss": 0.3065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4254188537597656, + "rewards/margins": 2.069242000579834, + "rewards/rejected": -1.643823266029358, + "step": 7791 + }, + { + "epoch": 0.45, + "learning_rate": 5.99046679544377e-08, + "logits/chosen": -1.8817427158355713, + "logits/rejected": -1.8954882621765137, + "logps/chosen": -2.604649066925049, + "logps/rejected": -66.62395477294922, + "loss": 0.5172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1345878690481186, + "rewards/margins": 0.6813762784004211, + "rewards/rejected": -0.5467883944511414, + "step": 7792 + }, + { + "epoch": 0.45, + "learning_rate": 5.989543050892809e-08, + "logits/chosen": -1.9938277006149292, + "logits/rejected": -1.9863959550857544, + "logps/chosen": -72.91798400878906, + "logps/rejected": -350.1885986328125, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03055572509765625, + "rewards/margins": 2.5725326538085938, + "rewards/rejected": -2.60308837890625, + "step": 7793 + }, + { + "epoch": 0.45, + "learning_rate": 5.988619271188439e-08, + "logits/chosen": -2.057701826095581, + "logits/rejected": -2.046891450881958, + "logps/chosen": -61.22383499145508, + "logps/rejected": -262.10040283203125, + "loss": 0.2622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2565010190010071, + "rewards/margins": 3.8733603954315186, + "rewards/rejected": -3.6168594360351562, + "step": 7794 + }, + { + "epoch": 0.45, + "learning_rate": 5.987695456363477e-08, + "logits/chosen": -1.9367483854293823, + "logits/rejected": -1.9312409162521362, + "logps/chosen": -0.7498984336853027, + "logps/rejected": -99.92047882080078, + "loss": 0.5483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008544296026229858, + "rewards/margins": 0.65730881690979, + "rewards/rejected": -0.6658531427383423, + "step": 7795 + }, + { + "epoch": 0.45, + "learning_rate": 5.98677160645074e-08, + "logits/chosen": -1.993644118309021, + "logits/rejected": -2.0369577407836914, + "logps/chosen": -272.1580810546875, + "logps/rejected": -389.8197021484375, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2921998500823975, + "rewards/margins": 3.290454149246216, + "rewards/rejected": 0.0017456054920330644, + "step": 7796 + }, + { + "epoch": 0.45, + "learning_rate": 5.985847721483051e-08, + "logits/chosen": -1.9491230249404907, + "logits/rejected": -1.9735239744186401, + "logps/chosen": -220.04254150390625, + "logps/rejected": -199.81053161621094, + "loss": 0.4487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5882629752159119, + "rewards/margins": 0.37165379524230957, + "rewards/rejected": 0.2166091948747635, + "step": 7797 + }, + { + "epoch": 0.45, + "learning_rate": 5.984923801493226e-08, + "logits/chosen": -1.817012071609497, + "logits/rejected": -1.8209439516067505, + "logps/chosen": -3.390810966491699, + "logps/rejected": -104.25609588623047, + "loss": 0.455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16317486763000488, + "rewards/margins": 0.9087780714035034, + "rewards/rejected": -0.7456032037734985, + "step": 7798 + }, + { + "epoch": 0.45, + "learning_rate": 5.983999846514092e-08, + "logits/chosen": -1.9341552257537842, + "logits/rejected": -1.9344128370285034, + "logps/chosen": -31.625247955322266, + "logps/rejected": -219.44378662109375, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35125237703323364, + "rewards/margins": 2.0998547077178955, + "rewards/rejected": -1.748602271080017, + "step": 7799 + }, + { + "epoch": 0.45, + "learning_rate": 5.98307585657847e-08, + "logits/chosen": -1.9385757446289062, + "logits/rejected": -1.9517451524734497, + "logps/chosen": -49.02806091308594, + "logps/rejected": -150.23910522460938, + "loss": 0.7452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7956100702285767, + "rewards/margins": 0.9964004755020142, + "rewards/rejected": -1.7920105457305908, + "step": 7800 + }, + { + "epoch": 0.45, + "learning_rate": 5.982151831719183e-08, + "logits/chosen": -2.106741428375244, + "logits/rejected": -2.103572368621826, + "logps/chosen": -18.521320343017578, + "logps/rejected": -255.09320068359375, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3324180543422699, + "rewards/margins": 2.368802547454834, + "rewards/rejected": -2.0363845825195312, + "step": 7801 + }, + { + "epoch": 0.45, + "learning_rate": 5.98122777196906e-08, + "logits/chosen": -1.9896066188812256, + "logits/rejected": -1.9748769998550415, + "logps/chosen": -82.63987731933594, + "logps/rejected": -342.42877197265625, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023812104016542435, + "rewards/margins": 3.8639352321624756, + "rewards/rejected": -3.887747287750244, + "step": 7802 + }, + { + "epoch": 0.45, + "learning_rate": 5.980303677360927e-08, + "logits/chosen": -1.8281526565551758, + "logits/rejected": -1.8620904684066772, + "logps/chosen": -122.02208709716797, + "logps/rejected": -350.38690185546875, + "loss": 0.0532, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8456764221191406, + "rewards/margins": 3.7455239295959473, + "rewards/rejected": -1.899847388267517, + "step": 7803 + }, + { + "epoch": 0.45, + "learning_rate": 5.97937954792761e-08, + "logits/chosen": -1.8737454414367676, + "logits/rejected": -1.8679133653640747, + "logps/chosen": -285.5457763671875, + "logps/rejected": -413.61822509765625, + "loss": 0.1263, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1431946754455566, + "rewards/margins": 1.617523193359375, + "rewards/rejected": 0.5256714224815369, + "step": 7804 + }, + { + "epoch": 0.45, + "learning_rate": 5.978455383701944e-08, + "logits/chosen": -1.8309365510940552, + "logits/rejected": -1.822519302368164, + "logps/chosen": -309.2184143066406, + "logps/rejected": -463.31622314453125, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7014068365097046, + "rewards/margins": 4.775790214538574, + "rewards/rejected": -3.074383497238159, + "step": 7805 + }, + { + "epoch": 0.45, + "learning_rate": 5.977531184716753e-08, + "logits/chosen": -2.085965871810913, + "logits/rejected": -2.087850332260132, + "logps/chosen": -54.24378204345703, + "logps/rejected": -217.97702026367188, + "loss": 0.2798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07468147575855255, + "rewards/margins": 3.9413747787475586, + "rewards/rejected": -3.8666932582855225, + "step": 7806 + }, + { + "epoch": 0.45, + "learning_rate": 5.976606951004877e-08, + "logits/chosen": -2.0192244052886963, + "logits/rejected": -2.016605854034424, + "logps/chosen": -31.905899047851562, + "logps/rejected": -294.0311279296875, + "loss": 0.2037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4656509459018707, + "rewards/margins": 3.066288948059082, + "rewards/rejected": -2.600637912750244, + "step": 7807 + }, + { + "epoch": 0.45, + "learning_rate": 5.975682682599142e-08, + "logits/chosen": -1.8183084726333618, + "logits/rejected": -1.8235408067703247, + "logps/chosen": -270.34478759765625, + "logps/rejected": -402.0726623535156, + "loss": 0.0543, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.937432885169983, + "rewards/margins": 2.547903537750244, + "rewards/rejected": -0.6104705929756165, + "step": 7808 + }, + { + "epoch": 0.45, + "learning_rate": 5.974758379532386e-08, + "logits/chosen": -1.9508250951766968, + "logits/rejected": -1.9507324695587158, + "logps/chosen": -133.36929321289062, + "logps/rejected": -192.18190002441406, + "loss": 0.553, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.55914306640625, + "rewards/margins": -0.1835128664970398, + "rewards/rejected": 0.7426559329032898, + "step": 7809 + }, + { + "epoch": 0.45, + "learning_rate": 5.973834041837445e-08, + "logits/chosen": -2.036560535430908, + "logits/rejected": -2.0390584468841553, + "logps/chosen": -17.361793518066406, + "logps/rejected": -186.38223266601562, + "loss": 0.3778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3048256039619446, + "rewards/margins": 1.2518622875213623, + "rewards/rejected": -0.9470367431640625, + "step": 7810 + }, + { + "epoch": 0.45, + "learning_rate": 5.972909669547156e-08, + "logits/chosen": -1.9085534811019897, + "logits/rejected": -1.9470444917678833, + "logps/chosen": -244.4656219482422, + "logps/rejected": -494.86761474609375, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8727524280548096, + "rewards/margins": 3.573786973953247, + "rewards/rejected": -1.7010345458984375, + "step": 7811 + }, + { + "epoch": 0.45, + "learning_rate": 5.971985262694355e-08, + "logits/chosen": -2.004551410675049, + "logits/rejected": -2.0025811195373535, + "logps/chosen": -25.097803115844727, + "logps/rejected": -208.03726196289062, + "loss": 0.2994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12569446861743927, + "rewards/margins": 2.4762561321258545, + "rewards/rejected": -2.3505616188049316, + "step": 7812 + }, + { + "epoch": 0.45, + "learning_rate": 5.971060821311884e-08, + "logits/chosen": -1.8986637592315674, + "logits/rejected": -1.907045602798462, + "logps/chosen": -5.122730731964111, + "logps/rejected": -196.20916748046875, + "loss": 0.3529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14165793359279633, + "rewards/margins": 2.2267072200775146, + "rewards/rejected": -2.0850493907928467, + "step": 7813 + }, + { + "epoch": 0.45, + "learning_rate": 5.970136345432582e-08, + "logits/chosen": -1.9753199815750122, + "logits/rejected": -1.961356282234192, + "logps/chosen": -202.82412719726562, + "logps/rejected": -257.5910339355469, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.566359043121338, + "rewards/margins": 0.01554727554321289, + "rewards/rejected": 2.550811767578125, + "step": 7814 + }, + { + "epoch": 0.45, + "learning_rate": 5.969211835089291e-08, + "logits/chosen": -1.9725093841552734, + "logits/rejected": -1.9842405319213867, + "logps/chosen": -43.68125915527344, + "logps/rejected": -227.12789916992188, + "loss": 0.4341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3628353178501129, + "rewards/margins": 0.8251827359199524, + "rewards/rejected": -0.4623474180698395, + "step": 7815 + }, + { + "epoch": 0.45, + "learning_rate": 5.968287290314857e-08, + "logits/chosen": -1.924298882484436, + "logits/rejected": -1.9456369876861572, + "logps/chosen": -182.0504608154297, + "logps/rejected": -240.74020385742188, + "loss": 0.1256, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.75731360912323, + "rewards/margins": 1.7428498268127441, + "rewards/rejected": 0.01446380652487278, + "step": 7816 + }, + { + "epoch": 0.45, + "learning_rate": 5.967362711142118e-08, + "logits/chosen": -2.1291491985321045, + "logits/rejected": -2.112422466278076, + "logps/chosen": -24.80280113220215, + "logps/rejected": -265.4330139160156, + "loss": 0.2571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47840481996536255, + "rewards/margins": 3.4286611080169678, + "rewards/rejected": -2.95025634765625, + "step": 7817 + }, + { + "epoch": 0.45, + "learning_rate": 5.966438097603928e-08, + "logits/chosen": -1.9028584957122803, + "logits/rejected": -1.9169069528579712, + "logps/chosen": -19.3076171875, + "logps/rejected": -243.98902893066406, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3601970672607422, + "rewards/margins": 5.165698051452637, + "rewards/rejected": -4.8055009841918945, + "step": 7818 + }, + { + "epoch": 0.46, + "learning_rate": 5.965513449733126e-08, + "logits/chosen": -1.642759084701538, + "logits/rejected": -1.6415038108825684, + "logps/chosen": -0.007608811371028423, + "logps/rejected": -88.59585571289062, + "loss": 0.3955, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0024866738822311163, + "rewards/margins": 1.9597376585006714, + "rewards/rejected": -1.957250952720642, + "step": 7819 + }, + { + "epoch": 0.46, + "learning_rate": 5.964588767562566e-08, + "logits/chosen": -1.9744317531585693, + "logits/rejected": -1.9661318063735962, + "logps/chosen": -182.22096252441406, + "logps/rejected": -257.7814025878906, + "loss": 0.1309, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3248519897460938, + "rewards/margins": 1.6777725219726562, + "rewards/rejected": 0.6470794677734375, + "step": 7820 + }, + { + "epoch": 0.46, + "learning_rate": 5.963664051125094e-08, + "logits/chosen": -2.0454261302948, + "logits/rejected": -1.9964967966079712, + "logps/chosen": -315.74822998046875, + "logps/rejected": -571.9882202148438, + "loss": 0.1316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7534546256065369, + "rewards/margins": 4.712677001953125, + "rewards/rejected": -3.9592225551605225, + "step": 7821 + }, + { + "epoch": 0.46, + "learning_rate": 5.962739300453561e-08, + "logits/chosen": -2.1612753868103027, + "logits/rejected": -2.1499288082122803, + "logps/chosen": -12.989105224609375, + "logps/rejected": -83.0424575805664, + "loss": 0.5448, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03423652797937393, + "rewards/margins": 0.5698909759521484, + "rewards/rejected": -0.6041275262832642, + "step": 7822 + }, + { + "epoch": 0.46, + "learning_rate": 5.961814515580817e-08, + "logits/chosen": -1.9797693490982056, + "logits/rejected": -1.9714103937149048, + "logps/chosen": -6.690465450286865, + "logps/rejected": -170.08592224121094, + "loss": 0.6402, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.15895819664001465, + "rewards/margins": -0.0023684054613113403, + "rewards/rejected": 0.161326602101326, + "step": 7823 + }, + { + "epoch": 0.46, + "learning_rate": 5.960889696539718e-08, + "logits/chosen": -1.9449275732040405, + "logits/rejected": -1.9482955932617188, + "logps/chosen": -52.4321403503418, + "logps/rejected": -249.90760803222656, + "loss": 0.3044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25560417771339417, + "rewards/margins": 2.3615782260894775, + "rewards/rejected": -2.105973958969116, + "step": 7824 + }, + { + "epoch": 0.46, + "learning_rate": 5.959964843363117e-08, + "logits/chosen": -2.0607399940490723, + "logits/rejected": -2.051565408706665, + "logps/chosen": -280.5540771484375, + "logps/rejected": -489.7201843261719, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6692473888397217, + "rewards/margins": 5.021566867828369, + "rewards/rejected": -2.3523194789886475, + "step": 7825 + }, + { + "epoch": 0.46, + "learning_rate": 5.959039956083869e-08, + "logits/chosen": -2.011536121368408, + "logits/rejected": -1.9882270097732544, + "logps/chosen": -229.59754943847656, + "logps/rejected": -528.3428344726562, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.384085178375244, + "rewards/margins": 5.143582344055176, + "rewards/rejected": -2.7594971656799316, + "step": 7826 + }, + { + "epoch": 0.46, + "learning_rate": 5.958115034734831e-08, + "logits/chosen": -1.9246010780334473, + "logits/rejected": -1.9611488580703735, + "logps/chosen": -259.6846618652344, + "logps/rejected": -473.9208679199219, + "loss": 0.0392, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9306609630584717, + "rewards/margins": 3.0367431640625, + "rewards/rejected": -0.10608215630054474, + "step": 7827 + }, + { + "epoch": 0.46, + "learning_rate": 5.9571900793488594e-08, + "logits/chosen": -1.923687219619751, + "logits/rejected": -1.916165828704834, + "logps/chosen": -64.1180648803711, + "logps/rejected": -284.45819091796875, + "loss": 0.1589, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.202797770500183, + "rewards/margins": 1.907814860343933, + "rewards/rejected": -0.70501708984375, + "step": 7828 + }, + { + "epoch": 0.46, + "learning_rate": 5.956265089958814e-08, + "logits/chosen": -2.045427083969116, + "logits/rejected": -2.0379059314727783, + "logps/chosen": -23.25574493408203, + "logps/rejected": -202.46202087402344, + "loss": 0.5692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1267332136631012, + "rewards/margins": 0.2908170819282532, + "rewards/rejected": -0.16408386826515198, + "step": 7829 + }, + { + "epoch": 0.46, + "learning_rate": 5.9553400665975564e-08, + "logits/chosen": -2.0266523361206055, + "logits/rejected": -2.0220563411712646, + "logps/chosen": -0.7709522247314453, + "logps/rejected": -90.15351867675781, + "loss": 0.4592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06438275426626205, + "rewards/margins": 1.1925101280212402, + "rewards/rejected": -1.1281273365020752, + "step": 7830 + }, + { + "epoch": 0.46, + "learning_rate": 5.9544150092979464e-08, + "logits/chosen": -2.115753173828125, + "logits/rejected": -2.1027274131774902, + "logps/chosen": -0.0009925422491505742, + "logps/rejected": -183.0631103515625, + "loss": 0.3768, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.378289981512353e-05, + "rewards/margins": 2.7526140213012695, + "rewards/rejected": -2.7526779174804688, + "step": 7831 + }, + { + "epoch": 0.46, + "learning_rate": 5.9534899180928455e-08, + "logits/chosen": -1.8987669944763184, + "logits/rejected": -1.905312180519104, + "logps/chosen": -195.9890899658203, + "logps/rejected": -209.57276916503906, + "loss": 0.2355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2105331420898438, + "rewards/margins": 0.8992782831192017, + "rewards/rejected": 0.3112548887729645, + "step": 7832 + }, + { + "epoch": 0.46, + "learning_rate": 5.952564793015119e-08, + "logits/chosen": -1.8878533840179443, + "logits/rejected": -1.8850172758102417, + "logps/chosen": -47.93695831298828, + "logps/rejected": -109.82897186279297, + "loss": 0.401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8173195123672485, + "rewards/margins": 0.3925277888774872, + "rewards/rejected": 0.42479172348976135, + "step": 7833 + }, + { + "epoch": 0.46, + "learning_rate": 5.9516396340976325e-08, + "logits/chosen": -1.9507160186767578, + "logits/rejected": -1.8437879085540771, + "logps/chosen": -252.20265197753906, + "logps/rejected": -449.18017578125, + "loss": 0.1467, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6696456670761108, + "rewards/margins": 1.9488967657089233, + "rewards/rejected": -0.2792510986328125, + "step": 7834 + }, + { + "epoch": 0.46, + "learning_rate": 5.95071444137325e-08, + "logits/chosen": -1.9167760610580444, + "logits/rejected": -1.922922968864441, + "logps/chosen": -47.82526779174805, + "logps/rejected": -227.88238525390625, + "loss": 0.196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9429680109024048, + "rewards/margins": 2.066725969314575, + "rewards/rejected": -1.1237579584121704, + "step": 7835 + }, + { + "epoch": 0.46, + "learning_rate": 5.9497892148748395e-08, + "logits/chosen": -2.023648500442505, + "logits/rejected": -2.019087791442871, + "logps/chosen": -5.7338398619322106e-05, + "logps/rejected": -80.29771423339844, + "loss": 0.7413, + "rewards/accuracies": 0.0, + "rewards/chosen": 4.172110550371144e-07, + "rewards/margins": -0.18845249712467194, + "rewards/rejected": 0.18845291435718536, + "step": 7836 + }, + { + "epoch": 0.46, + "learning_rate": 5.9488639546352727e-08, + "logits/chosen": -1.9964059591293335, + "logits/rejected": -2.005094051361084, + "logps/chosen": -223.50445556640625, + "logps/rejected": -400.36932373046875, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.812448263168335, + "rewards/margins": 4.746945381164551, + "rewards/rejected": -1.9344971179962158, + "step": 7837 + }, + { + "epoch": 0.46, + "learning_rate": 5.947938660687415e-08, + "logits/chosen": -1.9963265657424927, + "logits/rejected": -2.0033986568450928, + "logps/chosen": -201.39666748046875, + "logps/rejected": -280.8648681640625, + "loss": 0.0992, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1364288330078125, + "rewards/margins": 1.6819915771484375, + "rewards/rejected": 0.454437255859375, + "step": 7838 + }, + { + "epoch": 0.46, + "learning_rate": 5.9470133330641404e-08, + "logits/chosen": -2.0451231002807617, + "logits/rejected": -2.050396680831909, + "logps/chosen": -37.85907745361328, + "logps/rejected": -167.84063720703125, + "loss": 0.4494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31971970200538635, + "rewards/margins": 2.131699323654175, + "rewards/rejected": -2.4514191150665283, + "step": 7839 + }, + { + "epoch": 0.46, + "learning_rate": 5.946087971798318e-08, + "logits/chosen": -1.9896997213363647, + "logits/rejected": -1.9885931015014648, + "logps/chosen": -52.63768005371094, + "logps/rejected": -110.56159210205078, + "loss": 0.3821, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44697457551956177, + "rewards/margins": 0.898098349571228, + "rewards/rejected": -0.45112380385398865, + "step": 7840 + }, + { + "epoch": 0.46, + "learning_rate": 5.9451625769228244e-08, + "logits/chosen": -1.8819512128829956, + "logits/rejected": -1.8561313152313232, + "logps/chosen": -197.87513732910156, + "logps/rejected": -363.2740783691406, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3760970830917358, + "rewards/margins": 0.33404994010925293, + "rewards/rejected": 1.042047142982483, + "step": 7841 + }, + { + "epoch": 0.46, + "learning_rate": 5.944237148470532e-08, + "logits/chosen": -2.1037113666534424, + "logits/rejected": -2.0298142433166504, + "logps/chosen": -295.8075866699219, + "logps/rejected": -633.4072265625, + "loss": 0.1402, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7329314947128296, + "rewards/margins": 1.6909271478652954, + "rewards/rejected": 0.04200439527630806, + "step": 7842 + }, + { + "epoch": 0.46, + "learning_rate": 5.943311686474319e-08, + "logits/chosen": -1.8459692001342773, + "logits/rejected": -1.8627971410751343, + "logps/chosen": -186.18423461914062, + "logps/rejected": -274.8087158203125, + "loss": 0.1162, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6026413440704346, + "rewards/margins": 1.616337537765503, + "rewards/rejected": 0.9863037467002869, + "step": 7843 + }, + { + "epoch": 0.46, + "learning_rate": 5.942386190967058e-08, + "logits/chosen": -1.9279078245162964, + "logits/rejected": -1.9920052289962769, + "logps/chosen": -328.9852294921875, + "logps/rejected": -460.6626281738281, + "loss": 0.0762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5595825910568237, + "rewards/margins": 2.13358473777771, + "rewards/rejected": -0.5740020871162415, + "step": 7844 + }, + { + "epoch": 0.46, + "learning_rate": 5.941460661981632e-08, + "logits/chosen": -2.0245089530944824, + "logits/rejected": -2.030961751937866, + "logps/chosen": -0.00039285660022869706, + "logps/rejected": -112.95692443847656, + "loss": 0.4561, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4408906028838828e-05, + "rewards/margins": 1.3001426458358765, + "rewards/rejected": -1.3001670837402344, + "step": 7845 + }, + { + "epoch": 0.46, + "learning_rate": 5.940535099550916e-08, + "logits/chosen": -1.8238435983657837, + "logits/rejected": -1.8255434036254883, + "logps/chosen": -0.8143119812011719, + "logps/rejected": -56.50703811645508, + "loss": 0.529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035259928554296494, + "rewards/margins": 0.673005223274231, + "rewards/rejected": -0.6377453207969666, + "step": 7846 + }, + { + "epoch": 0.46, + "learning_rate": 5.939609503707795e-08, + "logits/chosen": -2.2843191623687744, + "logits/rejected": -2.2657570838928223, + "logps/chosen": -0.25428926944732666, + "logps/rejected": -122.55876922607422, + "loss": 0.5125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017662405967712402, + "rewards/margins": 0.8469715118408203, + "rewards/rejected": -0.8293091058731079, + "step": 7847 + }, + { + "epoch": 0.46, + "learning_rate": 5.938683874485146e-08, + "logits/chosen": -1.9698612689971924, + "logits/rejected": -1.9681670665740967, + "logps/chosen": -25.299419403076172, + "logps/rejected": -180.94314575195312, + "loss": 0.3346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23914757370948792, + "rewards/margins": 2.0731563568115234, + "rewards/rejected": -1.834008812904358, + "step": 7848 + }, + { + "epoch": 0.46, + "learning_rate": 5.9377582119158567e-08, + "logits/chosen": -2.0242362022399902, + "logits/rejected": -2.023198127746582, + "logps/chosen": -103.57106018066406, + "logps/rejected": -316.47747802734375, + "loss": 0.2219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8665710687637329, + "rewards/margins": 1.358306884765625, + "rewards/rejected": -0.4917358458042145, + "step": 7849 + }, + { + "epoch": 0.46, + "learning_rate": 5.936832516032807e-08, + "logits/chosen": -1.9765722751617432, + "logits/rejected": -1.9477901458740234, + "logps/chosen": -211.04908752441406, + "logps/rejected": -407.2089538574219, + "loss": 0.086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4362090826034546, + "rewards/margins": 4.280932426452637, + "rewards/rejected": -2.8447234630584717, + "step": 7850 + }, + { + "epoch": 0.46, + "learning_rate": 5.935906786868885e-08, + "logits/chosen": -1.9306985139846802, + "logits/rejected": -1.9174847602844238, + "logps/chosen": -64.28271484375, + "logps/rejected": -327.57293701171875, + "loss": 0.2511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5734550356864929, + "rewards/margins": 4.943767547607422, + "rewards/rejected": -4.370312690734863, + "step": 7851 + }, + { + "epoch": 0.46, + "learning_rate": 5.934981024456974e-08, + "logits/chosen": -2.085400104522705, + "logits/rejected": -2.0732226371765137, + "logps/chosen": -15.269189834594727, + "logps/rejected": -200.84732055664062, + "loss": 0.264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.372213751077652, + "rewards/margins": 2.408770799636841, + "rewards/rejected": -2.0365569591522217, + "step": 7852 + }, + { + "epoch": 0.46, + "learning_rate": 5.934055228829966e-08, + "logits/chosen": -2.0524444580078125, + "logits/rejected": -2.039904832839966, + "logps/chosen": -78.52693176269531, + "logps/rejected": -277.3936767578125, + "loss": 0.3401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25690993666648865, + "rewards/margins": 1.3408424854278564, + "rewards/rejected": -1.0839325189590454, + "step": 7853 + }, + { + "epoch": 0.46, + "learning_rate": 5.9331294000207455e-08, + "logits/chosen": -1.7560354471206665, + "logits/rejected": -1.7514790296554565, + "logps/chosen": -22.8624210357666, + "logps/rejected": -216.21234130859375, + "loss": 0.2023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1443380117416382, + "rewards/margins": 1.8126776218414307, + "rewards/rejected": -0.6683395504951477, + "step": 7854 + }, + { + "epoch": 0.46, + "learning_rate": 5.932203538062204e-08, + "logits/chosen": -1.9787708520889282, + "logits/rejected": -2.0296614170074463, + "logps/chosen": -300.83770751953125, + "logps/rejected": -352.3092041015625, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8161133527755737, + "rewards/margins": 3.3629212379455566, + "rewards/rejected": -1.546807885169983, + "step": 7855 + }, + { + "epoch": 0.46, + "learning_rate": 5.9312776429872335e-08, + "logits/chosen": -1.8735191822052002, + "logits/rejected": -1.8148350715637207, + "logps/chosen": -368.4295654296875, + "logps/rejected": -578.06494140625, + "loss": 0.0609, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5317444801330566, + "rewards/margins": 2.1014466285705566, + "rewards/rejected": 1.4302978515625, + "step": 7856 + }, + { + "epoch": 0.46, + "learning_rate": 5.930351714828726e-08, + "logits/chosen": -2.002042055130005, + "logits/rejected": -1.997822880744934, + "logps/chosen": -49.409019470214844, + "logps/rejected": -335.9877624511719, + "loss": 0.2494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4367092251777649, + "rewards/margins": 4.808004856109619, + "rewards/rejected": -4.37129545211792, + "step": 7857 + }, + { + "epoch": 0.46, + "learning_rate": 5.9294257536195745e-08, + "logits/chosen": -2.0177736282348633, + "logits/rejected": -2.016038656234741, + "logps/chosen": -1.6512064933776855, + "logps/rejected": -43.99385452270508, + "loss": 0.6838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07030274718999863, + "rewards/margins": 0.15512946248054504, + "rewards/rejected": -0.22543220221996307, + "step": 7858 + }, + { + "epoch": 0.46, + "learning_rate": 5.9284997593926744e-08, + "logits/chosen": -1.9944549798965454, + "logits/rejected": -1.980913758277893, + "logps/chosen": -21.22952651977539, + "logps/rejected": -212.87025451660156, + "loss": 0.2581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4330497682094574, + "rewards/margins": 2.9793617725372314, + "rewards/rejected": -2.546312093734741, + "step": 7859 + }, + { + "epoch": 0.46, + "learning_rate": 5.927573732180922e-08, + "logits/chosen": -1.760711431503296, + "logits/rejected": -1.7554675340652466, + "logps/chosen": -0.057620301842689514, + "logps/rejected": -47.57947540283203, + "loss": 0.4412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006666012341156602, + "rewards/margins": 1.5407793521881104, + "rewards/rejected": -1.5401127338409424, + "step": 7860 + }, + { + "epoch": 0.46, + "learning_rate": 5.926647672017211e-08, + "logits/chosen": -1.9060308933258057, + "logits/rejected": -1.899903655052185, + "logps/chosen": -0.0027403878048062325, + "logps/rejected": -298.0823974609375, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002001691173063591, + "rewards/margins": 5.926131725311279, + "rewards/rejected": -5.926331996917725, + "step": 7861 + }, + { + "epoch": 0.46, + "learning_rate": 5.9257215789344436e-08, + "logits/chosen": -1.963241457939148, + "logits/rejected": -1.9632844924926758, + "logps/chosen": -26.92449188232422, + "logps/rejected": -117.86027526855469, + "loss": 0.4392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26859208941459656, + "rewards/margins": 1.0236091613769531, + "rewards/rejected": -0.755017101764679, + "step": 7862 + }, + { + "epoch": 0.46, + "learning_rate": 5.924795452965516e-08, + "logits/chosen": -1.7290129661560059, + "logits/rejected": -1.7202050685882568, + "logps/chosen": -255.1656036376953, + "logps/rejected": -305.96435546875, + "loss": 0.1708, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7877731323242188, + "rewards/margins": 1.530705213546753, + "rewards/rejected": 0.25706788897514343, + "step": 7863 + }, + { + "epoch": 0.46, + "learning_rate": 5.923869294143332e-08, + "logits/chosen": -1.9594581127166748, + "logits/rejected": -1.941094160079956, + "logps/chosen": -66.98269653320312, + "logps/rejected": -241.3668670654297, + "loss": 0.6334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46080857515335083, + "rewards/margins": 0.8396537899971008, + "rewards/rejected": -1.3004623651504517, + "step": 7864 + }, + { + "epoch": 0.46, + "learning_rate": 5.922943102500789e-08, + "logits/chosen": -1.6940767765045166, + "logits/rejected": -1.7111517190933228, + "logps/chosen": -0.00011503359564812854, + "logps/rejected": -92.72745513916016, + "loss": 0.6089, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.145687147958597e-07, + "rewards/margins": 0.3022756576538086, + "rewards/rejected": -0.3022758662700653, + "step": 7865 + }, + { + "epoch": 0.46, + "learning_rate": 5.922016878070795e-08, + "logits/chosen": -2.039733409881592, + "logits/rejected": -2.043639898300171, + "logps/chosen": -0.00014101881242822856, + "logps/rejected": -130.68588256835938, + "loss": 0.409, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4493001799564809e-05, + "rewards/margins": 2.011693000793457, + "rewards/rejected": -2.011678457260132, + "step": 7866 + }, + { + "epoch": 0.46, + "learning_rate": 5.92109062088625e-08, + "logits/chosen": -1.7955704927444458, + "logits/rejected": -1.793333649635315, + "logps/chosen": -9.664886474609375, + "logps/rejected": -42.415992736816406, + "loss": 0.6114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06833944469690323, + "rewards/margins": 0.26744911074638367, + "rewards/rejected": -0.19910965859889984, + "step": 7867 + }, + { + "epoch": 0.46, + "learning_rate": 5.920164330980062e-08, + "logits/chosen": -2.058002471923828, + "logits/rejected": -2.051978588104248, + "logps/chosen": -5.411177635192871, + "logps/rejected": -200.056396484375, + "loss": 0.4243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18053412437438965, + "rewards/margins": 3.3532228469848633, + "rewards/rejected": -3.533756971359253, + "step": 7868 + }, + { + "epoch": 0.46, + "learning_rate": 5.9192380083851345e-08, + "logits/chosen": -2.0585665702819824, + "logits/rejected": -2.0580315589904785, + "logps/chosen": -0.048632584512233734, + "logps/rejected": -161.09814453125, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003756476566195488, + "rewards/margins": 3.494252920150757, + "rewards/rejected": -3.498009443283081, + "step": 7869 + }, + { + "epoch": 0.46, + "learning_rate": 5.918311653134378e-08, + "logits/chosen": -1.9565907716751099, + "logits/rejected": -1.9520704746246338, + "logps/chosen": -0.0008648543152958155, + "logps/rejected": -157.0396728515625, + "loss": 0.3691, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6264204279868864e-05, + "rewards/margins": 2.7309343814849854, + "rewards/rejected": -2.730908155441284, + "step": 7870 + }, + { + "epoch": 0.46, + "learning_rate": 5.917385265260698e-08, + "logits/chosen": -1.8238697052001953, + "logits/rejected": -1.809739112854004, + "logps/chosen": -0.041754938662052155, + "logps/rejected": -117.28380584716797, + "loss": 0.4315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0038571550976485014, + "rewards/margins": 1.5611966848373413, + "rewards/rejected": -1.5573395490646362, + "step": 7871 + }, + { + "epoch": 0.46, + "learning_rate": 5.916458844797008e-08, + "logits/chosen": -1.899045467376709, + "logits/rejected": -1.9071033000946045, + "logps/chosen": -155.9352264404297, + "logps/rejected": -206.7083282470703, + "loss": 0.1891, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.48711097240448, + "rewards/margins": 1.076196312904358, + "rewards/rejected": 0.4109146296977997, + "step": 7872 + }, + { + "epoch": 0.46, + "learning_rate": 5.915532391776215e-08, + "logits/chosen": -2.0323941707611084, + "logits/rejected": -2.0381603240966797, + "logps/chosen": -18.25001335144043, + "logps/rejected": -146.573486328125, + "loss": 0.4662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1149498000741005, + "rewards/margins": 1.0273689031600952, + "rewards/rejected": -0.9124191403388977, + "step": 7873 + }, + { + "epoch": 0.46, + "learning_rate": 5.914605906231234e-08, + "logits/chosen": -1.892967700958252, + "logits/rejected": -1.8949726819992065, + "logps/chosen": -110.08880615234375, + "logps/rejected": -191.0206298828125, + "loss": 0.2083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.096899390220642, + "rewards/margins": 1.5854156017303467, + "rewards/rejected": -0.488516241312027, + "step": 7874 + }, + { + "epoch": 0.46, + "learning_rate": 5.913679388194976e-08, + "logits/chosen": -1.8775911331176758, + "logits/rejected": -1.8321505784988403, + "logps/chosen": -269.0464782714844, + "logps/rejected": -425.46826171875, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.40130615234375, + "rewards/margins": 3.195941209793091, + "rewards/rejected": -0.794634997844696, + "step": 7875 + }, + { + "epoch": 0.46, + "learning_rate": 5.912752837700358e-08, + "logits/chosen": -1.9818103313446045, + "logits/rejected": -1.9797520637512207, + "logps/chosen": -9.013278007507324, + "logps/rejected": -143.54119873046875, + "loss": 0.402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18680143356323242, + "rewards/margins": 1.3072634935379028, + "rewards/rejected": -1.1204620599746704, + "step": 7876 + }, + { + "epoch": 0.46, + "learning_rate": 5.911826254780295e-08, + "logits/chosen": -1.8757529258728027, + "logits/rejected": -1.8490962982177734, + "logps/chosen": -178.23165893554688, + "logps/rejected": -341.39093017578125, + "loss": 0.2026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.289642333984375, + "rewards/margins": 1.1496460437774658, + "rewards/rejected": 0.13999633491039276, + "step": 7877 + }, + { + "epoch": 0.46, + "learning_rate": 5.910899639467702e-08, + "logits/chosen": -2.0765602588653564, + "logits/rejected": -2.0651087760925293, + "logps/chosen": -48.4985466003418, + "logps/rejected": -286.901611328125, + "loss": 0.2031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5046612024307251, + "rewards/margins": 4.038129806518555, + "rewards/rejected": -3.533468723297119, + "step": 7878 + }, + { + "epoch": 0.46, + "learning_rate": 5.9099729917955e-08, + "logits/chosen": -1.9166253805160522, + "logits/rejected": -1.9184213876724243, + "logps/chosen": -10.166348457336426, + "logps/rejected": -163.0614013671875, + "loss": 0.3025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3087390959262848, + "rewards/margins": 2.503754138946533, + "rewards/rejected": -2.1950149536132812, + "step": 7879 + }, + { + "epoch": 0.46, + "learning_rate": 5.909046311796604e-08, + "logits/chosen": -2.1288373470306396, + "logits/rejected": -2.128528356552124, + "logps/chosen": -0.0001797614968381822, + "logps/rejected": -40.64216232299805, + "loss": 0.5298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7041660385075375e-06, + "rewards/margins": 0.7189699411392212, + "rewards/rejected": -0.7189682126045227, + "step": 7880 + }, + { + "epoch": 0.46, + "learning_rate": 5.908119599503938e-08, + "logits/chosen": -1.8686474561691284, + "logits/rejected": -1.8734086751937866, + "logps/chosen": -8.354601860046387, + "logps/rejected": -184.1160888671875, + "loss": 0.3625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40106335282325745, + "rewards/margins": 1.1501966714859009, + "rewards/rejected": -0.749133288860321, + "step": 7881 + }, + { + "epoch": 0.46, + "learning_rate": 5.9071928549504205e-08, + "logits/chosen": -2.173988103866577, + "logits/rejected": -2.172050952911377, + "logps/chosen": -34.989830017089844, + "logps/rejected": -259.35968017578125, + "loss": 0.3541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23027725517749786, + "rewards/margins": 1.8232094049453735, + "rewards/rejected": -1.592932105064392, + "step": 7882 + }, + { + "epoch": 0.46, + "learning_rate": 5.9062660781689764e-08, + "logits/chosen": -1.864335536956787, + "logits/rejected": -1.832985281944275, + "logps/chosen": -177.08599853515625, + "logps/rejected": -224.34239196777344, + "loss": 0.0818, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.022757053375244, + "rewards/margins": 3.26519775390625, + "rewards/rejected": -1.2424408197402954, + "step": 7883 + }, + { + "epoch": 0.46, + "learning_rate": 5.905339269192527e-08, + "logits/chosen": -1.8917862176895142, + "logits/rejected": -1.9509011507034302, + "logps/chosen": -411.82781982421875, + "logps/rejected": -427.33233642578125, + "loss": 0.0769, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0213043689727783, + "rewards/margins": 2.084817409515381, + "rewards/rejected": 0.9364868402481079, + "step": 7884 + }, + { + "epoch": 0.46, + "learning_rate": 5.904412428053999e-08, + "logits/chosen": -2.0427188873291016, + "logits/rejected": -2.0369863510131836, + "logps/chosen": -144.09036254882812, + "logps/rejected": -379.11090087890625, + "loss": 0.2845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.495126336812973, + "rewards/margins": 1.3606903553009033, + "rewards/rejected": -0.8655639886856079, + "step": 7885 + }, + { + "epoch": 0.46, + "learning_rate": 5.903485554786316e-08, + "logits/chosen": -1.855883240699768, + "logits/rejected": -1.8510205745697021, + "logps/chosen": -12.375334739685059, + "logps/rejected": -133.22848510742188, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22503434121608734, + "rewards/margins": 2.2844398021698, + "rewards/rejected": -2.059405565261841, + "step": 7886 + }, + { + "epoch": 0.46, + "learning_rate": 5.9025586494224086e-08, + "logits/chosen": -2.0114293098449707, + "logits/rejected": -2.010859489440918, + "logps/chosen": -12.84519100189209, + "logps/rejected": -105.95443725585938, + "loss": 0.5405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06168670579791069, + "rewards/margins": 0.5953254699707031, + "rewards/rejected": -0.5336387753486633, + "step": 7887 + }, + { + "epoch": 0.46, + "learning_rate": 5.901631711995203e-08, + "logits/chosen": -1.8506548404693604, + "logits/rejected": -1.8517651557922363, + "logps/chosen": -397.43603515625, + "logps/rejected": -406.2803955078125, + "loss": 0.1203, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3323943614959717, + "rewards/margins": 1.8151580095291138, + "rewards/rejected": 0.5172363519668579, + "step": 7888 + }, + { + "epoch": 0.46, + "learning_rate": 5.900704742537628e-08, + "logits/chosen": -2.0642263889312744, + "logits/rejected": -2.048907518386841, + "logps/chosen": -24.119831085205078, + "logps/rejected": -215.06427001953125, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021419525146484375, + "rewards/margins": 4.09335994720459, + "rewards/rejected": -4.095501899719238, + "step": 7889 + }, + { + "epoch": 0.46, + "learning_rate": 5.899777741082614e-08, + "logits/chosen": -2.0297021865844727, + "logits/rejected": -2.0222413539886475, + "logps/chosen": -198.87615966796875, + "logps/rejected": -424.79803466796875, + "loss": 0.0436, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8962067365646362, + "rewards/margins": 5.501748561859131, + "rewards/rejected": -3.605541944503784, + "step": 7890 + }, + { + "epoch": 0.46, + "learning_rate": 5.8988507076630956e-08, + "logits/chosen": -2.029202938079834, + "logits/rejected": -2.027125358581543, + "logps/chosen": -51.570404052734375, + "logps/rejected": -221.1195831298828, + "loss": 0.5297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06140899658203125, + "rewards/margins": 0.7768005728721619, + "rewards/rejected": -0.7153915762901306, + "step": 7891 + }, + { + "epoch": 0.46, + "learning_rate": 5.897923642312001e-08, + "logits/chosen": -1.96670663356781, + "logits/rejected": -1.9607793092727661, + "logps/chosen": -0.04863143712282181, + "logps/rejected": -55.80518341064453, + "loss": 0.4634, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.722046125811175e-07, + "rewards/margins": 1.294706106185913, + "rewards/rejected": -1.2947067022323608, + "step": 7892 + }, + { + "epoch": 0.46, + "learning_rate": 5.896996545062268e-08, + "logits/chosen": -1.6876020431518555, + "logits/rejected": -1.686814308166504, + "logps/chosen": -8.904818969313055e-05, + "logps/rejected": -70.98615264892578, + "loss": 0.7026, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960246198810637e-07, + "rewards/margins": -0.06790220737457275, + "rewards/rejected": 0.067901611328125, + "step": 7893 + }, + { + "epoch": 0.46, + "learning_rate": 5.8960694159468295e-08, + "logits/chosen": -2.041398286819458, + "logits/rejected": -2.0386033058166504, + "logps/chosen": -45.573543548583984, + "logps/rejected": -226.9842987060547, + "loss": 0.2652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8062359094619751, + "rewards/margins": 1.2394039630889893, + "rewards/rejected": -0.4331680238246918, + "step": 7894 + }, + { + "epoch": 0.46, + "learning_rate": 5.895142254998622e-08, + "logits/chosen": -2.0339908599853516, + "logits/rejected": -2.026538372039795, + "logps/chosen": -9.715293708723038e-05, + "logps/rejected": -289.26239013671875, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.007001391277299e-07, + "rewards/margins": 5.895752429962158, + "rewards/rejected": -5.895751953125, + "step": 7895 + }, + { + "epoch": 0.46, + "learning_rate": 5.894215062250583e-08, + "logits/chosen": -1.9702496528625488, + "logits/rejected": -1.9330132007598877, + "logps/chosen": -271.0128173828125, + "logps/rejected": -344.9449768066406, + "loss": 0.0585, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.14615797996521, + "rewards/margins": 2.247830390930176, + "rewards/rejected": 0.898327648639679, + "step": 7896 + }, + { + "epoch": 0.46, + "learning_rate": 5.893287837735651e-08, + "logits/chosen": -1.8647301197052002, + "logits/rejected": -1.835157036781311, + "logps/chosen": -170.1525115966797, + "logps/rejected": -322.6069641113281, + "loss": 0.2208, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.142146348953247, + "rewards/margins": 1.4255173206329346, + "rewards/rejected": -0.2833709716796875, + "step": 7897 + }, + { + "epoch": 0.46, + "learning_rate": 5.892360581486766e-08, + "logits/chosen": -1.8226431608200073, + "logits/rejected": -1.8051940202713013, + "logps/chosen": -293.70819091796875, + "logps/rejected": -489.2939453125, + "loss": 0.0773, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.353271484375, + "rewards/margins": 2.22735595703125, + "rewards/rejected": 0.12591552734375, + "step": 7898 + }, + { + "epoch": 0.46, + "learning_rate": 5.891433293536867e-08, + "logits/chosen": -1.7916101217269897, + "logits/rejected": -1.7800959348678589, + "logps/chosen": -194.3048095703125, + "logps/rejected": -324.0251770019531, + "loss": 0.2114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2699768543243408, + "rewards/margins": 1.0125336647033691, + "rewards/rejected": 0.25744324922561646, + "step": 7899 + }, + { + "epoch": 0.46, + "learning_rate": 5.8905059739188976e-08, + "logits/chosen": -1.8719640970230103, + "logits/rejected": -1.8710483312606812, + "logps/chosen": -2.04614520072937, + "logps/rejected": -49.74404525756836, + "loss": 0.5332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0070223333314061165, + "rewards/margins": 0.45206284523010254, + "rewards/rejected": -0.4450405240058899, + "step": 7900 + }, + { + "epoch": 0.46, + "learning_rate": 5.8895786226657994e-08, + "logits/chosen": -1.9915833473205566, + "logits/rejected": -1.9726530313491821, + "logps/chosen": -61.89996337890625, + "logps/rejected": -288.25048828125, + "loss": 0.1568, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.298614501953125, + "rewards/margins": 2.239938259124756, + "rewards/rejected": -0.9413238763809204, + "step": 7901 + }, + { + "epoch": 0.46, + "learning_rate": 5.888651239810518e-08, + "logits/chosen": -1.7622753381729126, + "logits/rejected": -1.7441935539245605, + "logps/chosen": -207.1022491455078, + "logps/rejected": -312.5193176269531, + "loss": 0.1083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5906966924667358, + "rewards/margins": 1.6997696161270142, + "rewards/rejected": -0.10907287895679474, + "step": 7902 + }, + { + "epoch": 0.46, + "learning_rate": 5.887723825385997e-08, + "logits/chosen": -1.9399586915969849, + "logits/rejected": -1.944055438041687, + "logps/chosen": -195.24945068359375, + "logps/rejected": -371.5995178222656, + "loss": 0.0726, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.082594394683838, + "rewards/margins": 2.370924472808838, + "rewards/rejected": -0.288330078125, + "step": 7903 + }, + { + "epoch": 0.46, + "learning_rate": 5.886796379425185e-08, + "logits/chosen": -1.744516134262085, + "logits/rejected": -1.7366461753845215, + "logps/chosen": -163.64227294921875, + "logps/rejected": -183.9333953857422, + "loss": 0.3898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.815966784954071, + "rewards/margins": 0.5233535766601562, + "rewards/rejected": 0.2926132380962372, + "step": 7904 + }, + { + "epoch": 0.46, + "learning_rate": 5.885868901961025e-08, + "logits/chosen": -1.8789234161376953, + "logits/rejected": -1.867132544517517, + "logps/chosen": -257.4750671386719, + "logps/rejected": -442.6202697753906, + "loss": 0.0621, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8284790515899658, + "rewards/margins": 2.5612518787384033, + "rewards/rejected": -0.7327728271484375, + "step": 7905 + }, + { + "epoch": 0.46, + "learning_rate": 5.88494139302647e-08, + "logits/chosen": -2.0953071117401123, + "logits/rejected": -2.09037446975708, + "logps/chosen": -0.00012182809587102383, + "logps/rejected": -417.212646484375, + "loss": 0.3311, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.697911092283903e-06, + "rewards/margins": 8.631546974182129, + "rewards/rejected": -8.631552696228027, + "step": 7906 + }, + { + "epoch": 0.46, + "learning_rate": 5.884013852654468e-08, + "logits/chosen": -2.1017496585845947, + "logits/rejected": -2.1027090549468994, + "logps/chosen": -13.907617568969727, + "logps/rejected": -23.351043701171875, + "loss": 0.6886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01093139685690403, + "rewards/margins": 0.011389732360839844, + "rewards/rejected": -0.00045833588228560984, + "step": 7907 + }, + { + "epoch": 0.46, + "learning_rate": 5.8830862808779703e-08, + "logits/chosen": -2.029468297958374, + "logits/rejected": -2.025301933288574, + "logps/chosen": -97.27980041503906, + "logps/rejected": -158.62733459472656, + "loss": 0.184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9550697207450867, + "rewards/margins": 2.2507407665252686, + "rewards/rejected": -1.2956711053848267, + "step": 7908 + }, + { + "epoch": 0.46, + "learning_rate": 5.882158677729927e-08, + "logits/chosen": -1.7697069644927979, + "logits/rejected": -1.7739031314849854, + "logps/chosen": -171.33468627929688, + "logps/rejected": -343.8936462402344, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.056173801422119, + "rewards/margins": 2.573050022125244, + "rewards/rejected": -0.516876220703125, + "step": 7909 + }, + { + "epoch": 0.46, + "learning_rate": 5.881231043243294e-08, + "logits/chosen": -1.9687334299087524, + "logits/rejected": -1.9702390432357788, + "logps/chosen": -85.9913330078125, + "logps/rejected": -232.98394775390625, + "loss": 0.2577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8014152646064758, + "rewards/margins": 1.8854668140411377, + "rewards/rejected": -1.084051489830017, + "step": 7910 + }, + { + "epoch": 0.46, + "learning_rate": 5.8803033774510226e-08, + "logits/chosen": -1.9423480033874512, + "logits/rejected": -1.9807933568954468, + "logps/chosen": -197.037353515625, + "logps/rejected": -357.20574951171875, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6571197509765625, + "rewards/margins": 3.051107883453369, + "rewards/rejected": -1.393988013267517, + "step": 7911 + }, + { + "epoch": 0.46, + "learning_rate": 5.879375680386071e-08, + "logits/chosen": -2.0748651027679443, + "logits/rejected": -2.0691351890563965, + "logps/chosen": -45.808563232421875, + "logps/rejected": -121.60169982910156, + "loss": 0.5716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5259525179862976, + "rewards/margins": 1.1265842914581299, + "rewards/rejected": -1.6525367498397827, + "step": 7912 + }, + { + "epoch": 0.46, + "learning_rate": 5.8784479520813913e-08, + "logits/chosen": -2.0632617473602295, + "logits/rejected": -2.0568103790283203, + "logps/chosen": -0.0013925725361332297, + "logps/rejected": -82.26203918457031, + "loss": 0.7221, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0005095465457998216, + "rewards/margins": -0.14454355835914612, + "rewards/rejected": 0.14505310356616974, + "step": 7913 + }, + { + "epoch": 0.46, + "learning_rate": 5.877520192569945e-08, + "logits/chosen": -1.982017159461975, + "logits/rejected": -1.9666613340377808, + "logps/chosen": -30.533592224121094, + "logps/rejected": -137.97991943359375, + "loss": 0.2214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7072151303291321, + "rewards/margins": 2.1709001064300537, + "rewards/rejected": -1.4636849164962769, + "step": 7914 + }, + { + "epoch": 0.46, + "learning_rate": 5.876592401884687e-08, + "logits/chosen": -1.9585752487182617, + "logits/rejected": -1.9554340839385986, + "logps/chosen": -0.10221397876739502, + "logps/rejected": -84.9344253540039, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04804506525397301, + "rewards/margins": 3.0091631412506104, + "rewards/rejected": -2.96111798286438, + "step": 7915 + }, + { + "epoch": 0.46, + "learning_rate": 5.8756645800585814e-08, + "logits/chosen": -1.7333357334136963, + "logits/rejected": -1.7387726306915283, + "logps/chosen": -240.82684326171875, + "logps/rejected": -356.6923828125, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.918975830078125, + "rewards/margins": 2.6178619861602783, + "rewards/rejected": -0.6988860964775085, + "step": 7916 + }, + { + "epoch": 0.46, + "learning_rate": 5.874736727124585e-08, + "logits/chosen": -2.0708861351013184, + "logits/rejected": -2.0600922107696533, + "logps/chosen": -126.64530181884766, + "logps/rejected": -384.9414978027344, + "loss": 0.44, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5185905694961548, + "rewards/margins": 6.2363200187683105, + "rewards/rejected": -6.754910469055176, + "step": 7917 + }, + { + "epoch": 0.46, + "learning_rate": 5.8738088431156606e-08, + "logits/chosen": -1.9422215223312378, + "logits/rejected": -1.9370074272155762, + "logps/chosen": -191.8933563232422, + "logps/rejected": -212.77537536621094, + "loss": 0.2315, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.123378038406372, + "rewards/margins": 0.9008820056915283, + "rewards/rejected": 1.2224960327148438, + "step": 7918 + }, + { + "epoch": 0.46, + "learning_rate": 5.872880928064774e-08, + "logits/chosen": -1.7684307098388672, + "logits/rejected": -1.817144751548767, + "logps/chosen": -246.97935485839844, + "logps/rejected": -232.1345977783203, + "loss": 0.1174, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2687820196151733, + "rewards/margins": 1.9118072986602783, + "rewards/rejected": -0.6430252194404602, + "step": 7919 + }, + { + "epoch": 0.46, + "learning_rate": 5.8719529820048864e-08, + "logits/chosen": -2.12491512298584, + "logits/rejected": -2.114656448364258, + "logps/chosen": -39.1540641784668, + "logps/rejected": -156.5757293701172, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26075172424316406, + "rewards/margins": 2.482701539993286, + "rewards/rejected": -2.221949815750122, + "step": 7920 + }, + { + "epoch": 0.46, + "learning_rate": 5.8710250049689634e-08, + "logits/chosen": -1.99482262134552, + "logits/rejected": -1.992240071296692, + "logps/chosen": -0.03823690488934517, + "logps/rejected": -85.96659088134766, + "loss": 0.4966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044538677320815623, + "rewards/margins": 0.9048753976821899, + "rewards/rejected": -0.9053207635879517, + "step": 7921 + }, + { + "epoch": 0.46, + "learning_rate": 5.870096996989972e-08, + "logits/chosen": -2.1422300338745117, + "logits/rejected": -2.1188833713531494, + "logps/chosen": -222.276611328125, + "logps/rejected": -383.79815673828125, + "loss": 0.0971, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8176711797714233, + "rewards/margins": 2.029780626296997, + "rewards/rejected": -0.21210937201976776, + "step": 7922 + }, + { + "epoch": 0.46, + "learning_rate": 5.869168958100879e-08, + "logits/chosen": -1.972962498664856, + "logits/rejected": -1.9849859476089478, + "logps/chosen": -54.73662567138672, + "logps/rejected": -196.22402954101562, + "loss": 0.429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1707908660173416, + "rewards/margins": 1.7913886308670044, + "rewards/rejected": -1.9621795415878296, + "step": 7923 + }, + { + "epoch": 0.46, + "learning_rate": 5.868240888334653e-08, + "logits/chosen": -1.989336371421814, + "logits/rejected": -1.9845000505447388, + "logps/chosen": -60.534332275390625, + "logps/rejected": -266.8983459472656, + "loss": 0.2423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44203874468803406, + "rewards/margins": 1.7143563032150269, + "rewards/rejected": -1.2723175287246704, + "step": 7924 + }, + { + "epoch": 0.46, + "learning_rate": 5.867312787724263e-08, + "logits/chosen": -2.0428357124328613, + "logits/rejected": -2.0431230068206787, + "logps/chosen": -3.322350263595581, + "logps/rejected": -124.18870544433594, + "loss": 0.6191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07697403430938721, + "rewards/margins": 0.23136933147907257, + "rewards/rejected": -0.15439529716968536, + "step": 7925 + }, + { + "epoch": 0.46, + "learning_rate": 5.866384656302679e-08, + "logits/chosen": -2.0133492946624756, + "logits/rejected": -1.9784578084945679, + "logps/chosen": -225.18661499023438, + "logps/rejected": -537.9088134765625, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7285401821136475, + "rewards/margins": 3.3643555641174316, + "rewards/rejected": -0.635815441608429, + "step": 7926 + }, + { + "epoch": 0.46, + "learning_rate": 5.865456494102876e-08, + "logits/chosen": -1.9535493850708008, + "logits/rejected": -1.9526914358139038, + "logps/chosen": -0.0019760483410209417, + "logps/rejected": -252.3672637939453, + "loss": 0.3545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011221231397939846, + "rewards/margins": 5.457708358764648, + "rewards/rejected": -5.457820415496826, + "step": 7927 + }, + { + "epoch": 0.46, + "learning_rate": 5.864528301157825e-08, + "logits/chosen": -1.939418077468872, + "logits/rejected": -1.8815752267837524, + "logps/chosen": -163.93948364257812, + "logps/rejected": -404.77911376953125, + "loss": 0.1271, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.65704345703125, + "rewards/margins": 2.1718201637268066, + "rewards/rejected": -0.5147766470909119, + "step": 7928 + }, + { + "epoch": 0.46, + "learning_rate": 5.8636000775005e-08, + "logits/chosen": -1.9887738227844238, + "logits/rejected": -1.9845666885375977, + "logps/chosen": -6.453357696533203, + "logps/rejected": -124.0130615234375, + "loss": 0.3395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2765377163887024, + "rewards/margins": 1.496307134628296, + "rewards/rejected": -1.2197693586349487, + "step": 7929 + }, + { + "epoch": 0.46, + "learning_rate": 5.862671823163875e-08, + "logits/chosen": -1.9526221752166748, + "logits/rejected": -1.953069806098938, + "logps/chosen": -18.742300033569336, + "logps/rejected": -142.65118408203125, + "loss": 0.2748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44729405641555786, + "rewards/margins": 2.872544288635254, + "rewards/rejected": -2.425250291824341, + "step": 7930 + }, + { + "epoch": 0.46, + "learning_rate": 5.861743538180928e-08, + "logits/chosen": -1.9196068048477173, + "logits/rejected": -1.9083722829818726, + "logps/chosen": -209.69256591796875, + "logps/rejected": -356.23193359375, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2811310291290283, + "rewards/margins": 3.170736789703369, + "rewards/rejected": -0.889605700969696, + "step": 7931 + }, + { + "epoch": 0.46, + "learning_rate": 5.860815222584634e-08, + "logits/chosen": -1.936156153678894, + "logits/rejected": -1.946698784828186, + "logps/chosen": -45.52788162231445, + "logps/rejected": -182.44796752929688, + "loss": 0.4411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2941959500312805, + "rewards/margins": 0.637381374835968, + "rewards/rejected": -0.3431854248046875, + "step": 7932 + }, + { + "epoch": 0.46, + "learning_rate": 5.859886876407975e-08, + "logits/chosen": -2.006039619445801, + "logits/rejected": -2.004642963409424, + "logps/chosen": -11.992753982543945, + "logps/rejected": -81.07970428466797, + "loss": 0.6008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048403263092041016, + "rewards/margins": 0.2362038642168045, + "rewards/rejected": -0.1878006011247635, + "step": 7933 + }, + { + "epoch": 0.46, + "learning_rate": 5.858958499683926e-08, + "logits/chosen": -1.8149367570877075, + "logits/rejected": -1.8209527730941772, + "logps/chosen": -0.00020491064060479403, + "logps/rejected": -91.46229553222656, + "loss": 0.6029, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.899379124864936e-05, + "rewards/margins": 0.40078091621398926, + "rewards/rejected": -0.4007019102573395, + "step": 7934 + }, + { + "epoch": 0.46, + "learning_rate": 5.858030092445469e-08, + "logits/chosen": -1.863896369934082, + "logits/rejected": -1.8793270587921143, + "logps/chosen": -179.14817810058594, + "logps/rejected": -388.9820251464844, + "loss": 0.078, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4244095087051392, + "rewards/margins": 3.2594194412231445, + "rewards/rejected": -1.8350098133087158, + "step": 7935 + }, + { + "epoch": 0.46, + "learning_rate": 5.857101654725587e-08, + "logits/chosen": -1.8814020156860352, + "logits/rejected": -1.8698468208312988, + "logps/chosen": -166.6768341064453, + "logps/rejected": -340.33355712890625, + "loss": 0.2333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5875839591026306, + "rewards/margins": 1.5543015003204346, + "rewards/rejected": -0.966717541217804, + "step": 7936 + }, + { + "epoch": 0.46, + "learning_rate": 5.856173186557263e-08, + "logits/chosen": -1.9276851415634155, + "logits/rejected": -1.9268819093704224, + "logps/chosen": -175.39630126953125, + "logps/rejected": -316.8525390625, + "loss": 0.1544, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.301312208175659, + "rewards/margins": 1.1534209251403809, + "rewards/rejected": 1.1478912830352783, + "step": 7937 + }, + { + "epoch": 0.46, + "learning_rate": 5.8552446879734775e-08, + "logits/chosen": -1.8692201375961304, + "logits/rejected": -1.8705710172653198, + "logps/chosen": -19.822965621948242, + "logps/rejected": -90.86204528808594, + "loss": 0.4588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13437919318675995, + "rewards/margins": 1.0250240564346313, + "rewards/rejected": -0.8906448483467102, + "step": 7938 + }, + { + "epoch": 0.46, + "learning_rate": 5.854316159007217e-08, + "logits/chosen": -1.956592321395874, + "logits/rejected": -1.9509872198104858, + "logps/chosen": -5.245161082712002e-05, + "logps/rejected": -239.13436889648438, + "loss": 0.3448, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.0530105138714134e-07, + "rewards/margins": 7.423636436462402, + "rewards/rejected": -7.423635959625244, + "step": 7939 + }, + { + "epoch": 0.46, + "learning_rate": 5.853387599691468e-08, + "logits/chosen": -1.875564694404602, + "logits/rejected": -1.8787422180175781, + "logps/chosen": -273.37896728515625, + "logps/rejected": -471.6165466308594, + "loss": 0.0342, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8007324934005737, + "rewards/margins": 4.1622467041015625, + "rewards/rejected": -2.3615143299102783, + "step": 7940 + }, + { + "epoch": 0.46, + "learning_rate": 5.8524590100592175e-08, + "logits/chosen": -2.075615882873535, + "logits/rejected": -2.052760601043701, + "logps/chosen": -94.74333953857422, + "logps/rejected": -254.4313201904297, + "loss": 0.3696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3492134213447571, + "rewards/margins": 1.2945854663848877, + "rewards/rejected": -0.9453720450401306, + "step": 7941 + }, + { + "epoch": 0.46, + "learning_rate": 5.8515303901434544e-08, + "logits/chosen": -2.0023765563964844, + "logits/rejected": -1.9931002855300903, + "logps/chosen": -21.367746353149414, + "logps/rejected": -127.09276580810547, + "loss": 0.3962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.150349423289299, + "rewards/margins": 1.4541022777557373, + "rewards/rejected": -1.3037528991699219, + "step": 7942 + }, + { + "epoch": 0.46, + "learning_rate": 5.850601739977165e-08, + "logits/chosen": -2.066052198410034, + "logits/rejected": -2.0775070190429688, + "logps/chosen": -236.9243621826172, + "logps/rejected": -302.51788330078125, + "loss": 0.221, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2546889781951904, + "rewards/margins": 0.7421157360076904, + "rewards/rejected": 1.5125732421875, + "step": 7943 + }, + { + "epoch": 0.46, + "learning_rate": 5.8496730595933416e-08, + "logits/chosen": -2.075864791870117, + "logits/rejected": -2.0650248527526855, + "logps/chosen": -80.03533172607422, + "logps/rejected": -268.142578125, + "loss": 0.4269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019264984875917435, + "rewards/margins": 1.769178032875061, + "rewards/rejected": -1.7884429693222046, + "step": 7944 + }, + { + "epoch": 0.46, + "learning_rate": 5.8487443490249744e-08, + "logits/chosen": -1.9196394681930542, + "logits/rejected": -1.9116123914718628, + "logps/chosen": -198.99696350097656, + "logps/rejected": -412.1963806152344, + "loss": 0.0478, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4160339832305908, + "rewards/margins": 3.213308811187744, + "rewards/rejected": -1.7972748279571533, + "step": 7945 + }, + { + "epoch": 0.46, + "learning_rate": 5.8478156083050554e-08, + "logits/chosen": -1.8066645860671997, + "logits/rejected": -1.8121830224990845, + "logps/chosen": -82.50840759277344, + "logps/rejected": -237.69720458984375, + "loss": 0.4127, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08091812580823898, + "rewards/margins": 1.9287910461425781, + "rewards/rejected": -1.8478729724884033, + "step": 7946 + }, + { + "epoch": 0.46, + "learning_rate": 5.8468868374665804e-08, + "logits/chosen": -1.8300511837005615, + "logits/rejected": -1.8311351537704468, + "logps/chosen": -249.76589965820312, + "logps/rejected": -430.9342041015625, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6267242431640625, + "rewards/margins": 4.585339546203613, + "rewards/rejected": -1.9586151838302612, + "step": 7947 + }, + { + "epoch": 0.46, + "learning_rate": 5.845958036542542e-08, + "logits/chosen": -1.753394365310669, + "logits/rejected": -1.7491354942321777, + "logps/chosen": -1.495847225189209, + "logps/rejected": -100.04869079589844, + "loss": 0.6093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032635449897497892, + "rewards/margins": 0.32309603691101074, + "rewards/rejected": -0.3263595700263977, + "step": 7948 + }, + { + "epoch": 0.46, + "learning_rate": 5.845029205565935e-08, + "logits/chosen": -1.901026964187622, + "logits/rejected": -1.8689557313919067, + "logps/chosen": -157.75433349609375, + "logps/rejected": -369.5766906738281, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9379669427871704, + "rewards/margins": 3.4768309593200684, + "rewards/rejected": -1.5388641357421875, + "step": 7949 + }, + { + "epoch": 0.46, + "learning_rate": 5.844100344569759e-08, + "logits/chosen": -1.931535005569458, + "logits/rejected": -1.9281152486801147, + "logps/chosen": -24.30002784729004, + "logps/rejected": -207.08245849609375, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5630252957344055, + "rewards/margins": 1.5980045795440674, + "rewards/rejected": -1.034979224205017, + "step": 7950 + }, + { + "epoch": 0.46, + "learning_rate": 5.843171453587009e-08, + "logits/chosen": -1.8494535684585571, + "logits/rejected": -1.921783447265625, + "logps/chosen": -223.06619262695312, + "logps/rejected": -341.2218933105469, + "loss": 0.1828, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1182129383087158, + "rewards/margins": 1.4369721412658691, + "rewards/rejected": -0.31875917315483093, + "step": 7951 + }, + { + "epoch": 0.46, + "learning_rate": 5.8422425326506854e-08, + "logits/chosen": -1.8974993228912354, + "logits/rejected": -1.914139747619629, + "logps/chosen": -212.1179962158203, + "logps/rejected": -356.34759521484375, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.810864269733429, + "rewards/margins": 0.6778228878974915, + "rewards/rejected": 0.1330413818359375, + "step": 7952 + }, + { + "epoch": 0.46, + "learning_rate": 5.841313581793785e-08, + "logits/chosen": -2.0924136638641357, + "logits/rejected": -2.091280698776245, + "logps/chosen": -18.847412109375, + "logps/rejected": -174.98583984375, + "loss": 0.711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5264133810997009, + "rewards/margins": 0.39848899841308594, + "rewards/rejected": -0.9249023795127869, + "step": 7953 + }, + { + "epoch": 0.46, + "learning_rate": 5.840384601049313e-08, + "logits/chosen": -2.0553596019744873, + "logits/rejected": -2.0625500679016113, + "logps/chosen": -0.00542968837544322, + "logps/rejected": -97.37850189208984, + "loss": 0.5293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0035506263375282288, + "rewards/margins": 0.7071189284324646, + "rewards/rejected": -0.7035682797431946, + "step": 7954 + }, + { + "epoch": 0.46, + "learning_rate": 5.8394555904502685e-08, + "logits/chosen": -2.0550880432128906, + "logits/rejected": -2.0567245483398438, + "logps/chosen": -54.978248596191406, + "logps/rejected": -230.5595245361328, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09986648708581924, + "rewards/margins": 2.9422645568847656, + "rewards/rejected": -2.842398166656494, + "step": 7955 + }, + { + "epoch": 0.46, + "learning_rate": 5.8385265500296554e-08, + "logits/chosen": -1.7495145797729492, + "logits/rejected": -1.812212347984314, + "logps/chosen": -216.27984619140625, + "logps/rejected": -353.2841796875, + "loss": 0.0865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8884400129318237, + "rewards/margins": 2.112478733062744, + "rewards/rejected": -0.22403870522975922, + "step": 7956 + }, + { + "epoch": 0.46, + "learning_rate": 5.837597479820478e-08, + "logits/chosen": -2.182678699493408, + "logits/rejected": -2.1698615550994873, + "logps/chosen": -91.6162338256836, + "logps/rejected": -223.53358459472656, + "loss": 0.24, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5883651971817017, + "rewards/margins": 1.854882836341858, + "rewards/rejected": -1.2665176391601562, + "step": 7957 + }, + { + "epoch": 0.46, + "learning_rate": 5.8366683798557395e-08, + "logits/chosen": -1.9391978979110718, + "logits/rejected": -1.9412007331848145, + "logps/chosen": -1.3336305618286133, + "logps/rejected": -186.82366943359375, + "loss": 0.3791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08380582183599472, + "rewards/margins": 2.0557754039764404, + "rewards/rejected": -1.9719696044921875, + "step": 7958 + }, + { + "epoch": 0.46, + "learning_rate": 5.8357392501684476e-08, + "logits/chosen": -1.9997421503067017, + "logits/rejected": -1.9930920600891113, + "logps/chosen": -115.18171691894531, + "logps/rejected": -278.1832275390625, + "loss": 0.2136, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4010253846645355, + "rewards/margins": 2.4072844982147217, + "rewards/rejected": -2.0062592029571533, + "step": 7959 + }, + { + "epoch": 0.46, + "learning_rate": 5.834810090791611e-08, + "logits/chosen": -1.998539686203003, + "logits/rejected": -1.893009901046753, + "logps/chosen": -236.67242431640625, + "logps/rejected": -694.363037109375, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3226394653320312, + "rewards/margins": 4.983980178833008, + "rewards/rejected": -3.6613404750823975, + "step": 7960 + }, + { + "epoch": 0.46, + "learning_rate": 5.833880901758236e-08, + "logits/chosen": -1.8380191326141357, + "logits/rejected": -1.823447346687317, + "logps/chosen": -257.2543029785156, + "logps/rejected": -285.704833984375, + "loss": 0.1405, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.423004150390625, + "rewards/margins": 1.4393310546875, + "rewards/rejected": 0.983673095703125, + "step": 7961 + }, + { + "epoch": 0.46, + "learning_rate": 5.8329516831013316e-08, + "logits/chosen": -1.914344072341919, + "logits/rejected": -1.9147499799728394, + "logps/chosen": -101.91203308105469, + "logps/rejected": -250.4027862548828, + "loss": 0.2657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5181075930595398, + "rewards/margins": 3.275412082672119, + "rewards/rejected": -2.7573044300079346, + "step": 7962 + }, + { + "epoch": 0.46, + "learning_rate": 5.83202243485391e-08, + "logits/chosen": -1.8699862957000732, + "logits/rejected": -1.9288920164108276, + "logps/chosen": -247.0913848876953, + "logps/rejected": -404.1687316894531, + "loss": 0.0826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1900008916854858, + "rewards/margins": 3.860081672668457, + "rewards/rejected": -2.6700806617736816, + "step": 7963 + }, + { + "epoch": 0.46, + "learning_rate": 5.8310931570489805e-08, + "logits/chosen": -1.9396326541900635, + "logits/rejected": -1.9150111675262451, + "logps/chosen": -194.48348999023438, + "logps/rejected": -320.727294921875, + "loss": 0.2349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0397690534591675, + "rewards/margins": 1.7198350429534912, + "rewards/rejected": -0.680065929889679, + "step": 7964 + }, + { + "epoch": 0.46, + "learning_rate": 5.830163849719558e-08, + "logits/chosen": -1.9031611680984497, + "logits/rejected": -1.8865009546279907, + "logps/chosen": -159.71815490722656, + "logps/rejected": -251.2822265625, + "loss": 0.2841, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2316147089004517, + "rewards/margins": 0.5279647707939148, + "rewards/rejected": 0.7036499381065369, + "step": 7965 + }, + { + "epoch": 0.46, + "learning_rate": 5.829234512898653e-08, + "logits/chosen": -2.1105334758758545, + "logits/rejected": -2.082456111907959, + "logps/chosen": -100.41380310058594, + "logps/rejected": -418.35577392578125, + "loss": 0.3163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16721343994140625, + "rewards/margins": 7.081251621246338, + "rewards/rejected": -7.248465061187744, + "step": 7966 + }, + { + "epoch": 0.46, + "learning_rate": 5.828305146619283e-08, + "logits/chosen": -1.9233366250991821, + "logits/rejected": -1.9296393394470215, + "logps/chosen": -0.001987461932003498, + "logps/rejected": -40.87276840209961, + "loss": 0.581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00026936756330542266, + "rewards/margins": 0.4871002733707428, + "rewards/rejected": -0.4868309199810028, + "step": 7967 + }, + { + "epoch": 0.46, + "learning_rate": 5.827375750914463e-08, + "logits/chosen": -2.000138282775879, + "logits/rejected": -1.98220694065094, + "logps/chosen": -4.8075480461120605, + "logps/rejected": -193.87477111816406, + "loss": 0.4035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05797071382403374, + "rewards/margins": 2.1585118770599365, + "rewards/rejected": -2.216482639312744, + "step": 7968 + }, + { + "epoch": 0.46, + "learning_rate": 5.826446325817209e-08, + "logits/chosen": -2.022804021835327, + "logits/rejected": -2.0232059955596924, + "logps/chosen": -21.014446258544922, + "logps/rejected": -206.93443298339844, + "loss": 0.208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4969806671142578, + "rewards/margins": 3.3503055572509766, + "rewards/rejected": -2.8533248901367188, + "step": 7969 + }, + { + "epoch": 0.46, + "learning_rate": 5.825516871360539e-08, + "logits/chosen": -1.969172716140747, + "logits/rejected": -1.961856484413147, + "logps/chosen": -129.85910034179688, + "logps/rejected": -252.87979125976562, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3877930641174316, + "rewards/margins": 0.2748596668243408, + "rewards/rejected": 2.112933397293091, + "step": 7970 + }, + { + "epoch": 0.46, + "learning_rate": 5.824587387577472e-08, + "logits/chosen": -1.7721340656280518, + "logits/rejected": -1.7733259201049805, + "logps/chosen": -28.013912200927734, + "logps/rejected": -189.872802734375, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2075706571340561, + "rewards/margins": 1.519544243812561, + "rewards/rejected": -1.3119735717773438, + "step": 7971 + }, + { + "epoch": 0.46, + "learning_rate": 5.823657874501027e-08, + "logits/chosen": -2.0366644859313965, + "logits/rejected": -2.0302317142486572, + "logps/chosen": -190.369140625, + "logps/rejected": -288.2331237792969, + "loss": 0.1523, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6272248029708862, + "rewards/margins": 1.4213868379592896, + "rewards/rejected": 0.20583800971508026, + "step": 7972 + }, + { + "epoch": 0.46, + "learning_rate": 5.822728332164225e-08, + "logits/chosen": -1.9003512859344482, + "logits/rejected": -1.9082950353622437, + "logps/chosen": -60.416038513183594, + "logps/rejected": -112.12216186523438, + "loss": 0.3749, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5973579287528992, + "rewards/margins": 1.0893882513046265, + "rewards/rejected": -0.4920303523540497, + "step": 7973 + }, + { + "epoch": 0.46, + "learning_rate": 5.8217987606000886e-08, + "logits/chosen": -2.0323941707611084, + "logits/rejected": -2.031794309616089, + "logps/chosen": -0.000329947448335588, + "logps/rejected": -138.57911682128906, + "loss": 0.393, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.761724524840247e-05, + "rewards/margins": 2.3635642528533936, + "rewards/rejected": -2.363581895828247, + "step": 7974 + }, + { + "epoch": 0.46, + "learning_rate": 5.82086915984164e-08, + "logits/chosen": -2.0874388217926025, + "logits/rejected": -2.0782763957977295, + "logps/chosen": -39.415191650390625, + "logps/rejected": -340.829345703125, + "loss": 0.1769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6058597564697266, + "rewards/margins": 2.444446086883545, + "rewards/rejected": -1.838586449623108, + "step": 7975 + }, + { + "epoch": 0.46, + "learning_rate": 5.8199395299219035e-08, + "logits/chosen": -1.979978322982788, + "logits/rejected": -1.9616883993148804, + "logps/chosen": -229.6041259765625, + "logps/rejected": -352.0669860839844, + "loss": 0.3008, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8913848400115967, + "rewards/margins": 0.24498271942138672, + "rewards/rejected": 2.64640212059021, + "step": 7976 + }, + { + "epoch": 0.46, + "learning_rate": 5.8190098708739054e-08, + "logits/chosen": -1.826866865158081, + "logits/rejected": -1.8900455236434937, + "logps/chosen": -244.81381225585938, + "logps/rejected": -395.97369384765625, + "loss": 0.0456, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7578400373458862, + "rewards/margins": 3.3418610095977783, + "rewards/rejected": -1.584020972251892, + "step": 7977 + }, + { + "epoch": 0.46, + "learning_rate": 5.818080182730669e-08, + "logits/chosen": -1.8376445770263672, + "logits/rejected": -1.6473662853240967, + "logps/chosen": -285.8363342285156, + "logps/rejected": -734.385498046875, + "loss": 0.0791, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7127898931503296, + "rewards/margins": 3.273263454437256, + "rewards/rejected": -1.5604736804962158, + "step": 7978 + }, + { + "epoch": 0.46, + "learning_rate": 5.817150465525222e-08, + "logits/chosen": -1.8768693208694458, + "logits/rejected": -1.876521348953247, + "logps/chosen": -15.074830055236816, + "logps/rejected": -104.89725494384766, + "loss": 0.6007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1037323996424675, + "rewards/margins": 0.23006603121757507, + "rewards/rejected": -0.12633362412452698, + "step": 7979 + }, + { + "epoch": 0.46, + "learning_rate": 5.816220719290597e-08, + "logits/chosen": -1.8739657402038574, + "logits/rejected": -1.8993029594421387, + "logps/chosen": -293.2130432128906, + "logps/rejected": -284.75469970703125, + "loss": 0.1897, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3365936279296875, + "rewards/margins": 1.5028350353240967, + "rewards/rejected": -0.16624145209789276, + "step": 7980 + }, + { + "epoch": 0.46, + "learning_rate": 5.815290944059816e-08, + "logits/chosen": -1.893107533454895, + "logits/rejected": -1.8965067863464355, + "logps/chosen": -238.50962829589844, + "logps/rejected": -471.5471496582031, + "loss": 0.062, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.007890462875366, + "rewards/margins": 2.6774613857269287, + "rewards/rejected": -0.6695709228515625, + "step": 7981 + }, + { + "epoch": 0.46, + "learning_rate": 5.814361139865914e-08, + "logits/chosen": -2.0849578380584717, + "logits/rejected": -2.080789804458618, + "logps/chosen": -46.88684844970703, + "logps/rejected": -117.59275817871094, + "loss": 0.5571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06318702548742294, + "rewards/margins": 0.6348698139190674, + "rewards/rejected": -0.6980568170547485, + "step": 7982 + }, + { + "epoch": 0.46, + "learning_rate": 5.8134313067419204e-08, + "logits/chosen": -2.025043487548828, + "logits/rejected": -1.9997873306274414, + "logps/chosen": -206.80679321289062, + "logps/rejected": -419.23681640625, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.922445774078369, + "rewards/margins": 4.046048164367676, + "rewards/rejected": -1.123602271080017, + "step": 7983 + }, + { + "epoch": 0.46, + "learning_rate": 5.81250144472087e-08, + "logits/chosen": -1.9119901657104492, + "logits/rejected": -1.9084357023239136, + "logps/chosen": -67.1167984008789, + "logps/rejected": -213.70675659179688, + "loss": 0.4804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05383148416876793, + "rewards/margins": 1.4581161737442017, + "rewards/rejected": -1.5119476318359375, + "step": 7984 + }, + { + "epoch": 0.46, + "learning_rate": 5.811571553835791e-08, + "logits/chosen": -1.7837885618209839, + "logits/rejected": -1.773215651512146, + "logps/chosen": -0.3167991638183594, + "logps/rejected": -118.0500259399414, + "loss": 0.3695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19271795451641083, + "rewards/margins": 1.9123903512954712, + "rewards/rejected": -1.719672441482544, + "step": 7985 + }, + { + "epoch": 0.46, + "learning_rate": 5.810641634119722e-08, + "logits/chosen": -1.9625790119171143, + "logits/rejected": -1.9499378204345703, + "logps/chosen": -128.7467041015625, + "logps/rejected": -257.2872009277344, + "loss": 0.0946, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2591370344161987, + "rewards/margins": 2.548121690750122, + "rewards/rejected": -1.2889846563339233, + "step": 7986 + }, + { + "epoch": 0.46, + "learning_rate": 5.809711685605695e-08, + "logits/chosen": -1.8399213552474976, + "logits/rejected": -1.837896466255188, + "logps/chosen": -7.8928422927856445, + "logps/rejected": -110.43309020996094, + "loss": 0.3744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2546748220920563, + "rewards/margins": 0.9545985460281372, + "rewards/rejected": -0.6999236941337585, + "step": 7987 + }, + { + "epoch": 0.46, + "learning_rate": 5.808781708326751e-08, + "logits/chosen": -1.861147165298462, + "logits/rejected": -1.8689724206924438, + "logps/chosen": -155.28001403808594, + "logps/rejected": -197.53182983398438, + "loss": 0.1626, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5855728387832642, + "rewards/margins": 1.5638153553009033, + "rewards/rejected": 0.02175750769674778, + "step": 7988 + }, + { + "epoch": 0.46, + "learning_rate": 5.8078517023159226e-08, + "logits/chosen": -1.838379979133606, + "logits/rejected": -1.8379799127578735, + "logps/chosen": -148.64663696289062, + "logps/rejected": -182.19989013671875, + "loss": 0.2611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.971203625202179, + "rewards/margins": 1.4248428344726562, + "rewards/rejected": -0.4536392390727997, + "step": 7989 + }, + { + "epoch": 0.46, + "learning_rate": 5.8069216676062515e-08, + "logits/chosen": -1.934285283088684, + "logits/rejected": -1.936322569847107, + "logps/chosen": -0.0007088923011906445, + "logps/rejected": -283.0461120605469, + "loss": 0.3529, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.889847953862045e-05, + "rewards/margins": 6.312653541564941, + "rewards/rejected": -6.3126726150512695, + "step": 7990 + }, + { + "epoch": 0.47, + "learning_rate": 5.8059916042307745e-08, + "logits/chosen": -2.073072910308838, + "logits/rejected": -2.0539743900299072, + "logps/chosen": -134.87818908691406, + "logps/rejected": -493.017578125, + "loss": 0.4739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7873382568359375, + "rewards/margins": 10.555584907531738, + "rewards/rejected": -11.342923164367676, + "step": 7991 + }, + { + "epoch": 0.47, + "learning_rate": 5.8050615122225347e-08, + "logits/chosen": -1.9972190856933594, + "logits/rejected": -2.0452301502227783, + "logps/chosen": -221.99180603027344, + "logps/rejected": -437.97039794921875, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9352279901504517, + "rewards/margins": 6.342625617980957, + "rewards/rejected": -4.407397747039795, + "step": 7992 + }, + { + "epoch": 0.47, + "learning_rate": 5.8041313916145695e-08, + "logits/chosen": -1.9829473495483398, + "logits/rejected": -1.9707412719726562, + "logps/chosen": -201.13441467285156, + "logps/rejected": -324.74395751953125, + "loss": 0.2752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8105239868164062, + "rewards/margins": 0.44705355167388916, + "rewards/rejected": 1.363470435142517, + "step": 7993 + }, + { + "epoch": 0.47, + "learning_rate": 5.803201242439926e-08, + "logits/chosen": -2.20320725440979, + "logits/rejected": -2.1916019916534424, + "logps/chosen": -17.95687484741211, + "logps/rejected": -243.9716796875, + "loss": 0.3985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015398216433823109, + "rewards/margins": 2.5349555015563965, + "rewards/rejected": -2.519557237625122, + "step": 7994 + }, + { + "epoch": 0.47, + "learning_rate": 5.8022710647316445e-08, + "logits/chosen": -2.0756986141204834, + "logits/rejected": -2.080693483352661, + "logps/chosen": -99.43533325195312, + "logps/rejected": -191.30789184570312, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3967987298965454, + "rewards/margins": 0.760974109172821, + "rewards/rejected": 0.6358246207237244, + "step": 7995 + }, + { + "epoch": 0.47, + "learning_rate": 5.801340858522772e-08, + "logits/chosen": -1.9434988498687744, + "logits/rejected": -1.9587420225143433, + "logps/chosen": -250.87171936035156, + "logps/rejected": -294.63134765625, + "loss": 0.083, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5775375366210938, + "rewards/margins": 2.0263962745666504, + "rewards/rejected": 0.5511413812637329, + "step": 7996 + }, + { + "epoch": 0.47, + "learning_rate": 5.80041062384635e-08, + "logits/chosen": -1.7845345735549927, + "logits/rejected": -1.71715247631073, + "logps/chosen": -249.29281616210938, + "logps/rejected": -375.1753845214844, + "loss": 0.4151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.485556036233902, + "rewards/margins": 0.9024200439453125, + "rewards/rejected": -0.4168640077114105, + "step": 7997 + }, + { + "epoch": 0.47, + "learning_rate": 5.799480360735429e-08, + "logits/chosen": -1.9863508939743042, + "logits/rejected": -2.0420100688934326, + "logps/chosen": -185.3488006591797, + "logps/rejected": -182.3185577392578, + "loss": 0.1075, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6649338006973267, + "rewards/margins": 1.8375214338302612, + "rewards/rejected": -0.172587588429451, + "step": 7998 + }, + { + "epoch": 0.47, + "learning_rate": 5.7985500692230536e-08, + "logits/chosen": -1.9548766613006592, + "logits/rejected": -1.9536757469177246, + "logps/chosen": -162.86395263671875, + "logps/rejected": -337.5365905761719, + "loss": 0.0357, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6394577026367188, + "rewards/margins": 3.2029953002929688, + "rewards/rejected": -0.56353759765625, + "step": 7999 + }, + { + "epoch": 0.47, + "learning_rate": 5.7976197493422754e-08, + "logits/chosen": -1.9092445373535156, + "logits/rejected": -1.9056124687194824, + "logps/chosen": -14.381611824035645, + "logps/rejected": -123.64772033691406, + "loss": 0.4497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02694540098309517, + "rewards/margins": 1.260830044746399, + "rewards/rejected": -1.2877753973007202, + "step": 8000 + }, + { + "epoch": 0.47, + "learning_rate": 5.7966894011261414e-08, + "logits/chosen": -1.8685826063156128, + "logits/rejected": -1.868067741394043, + "logps/chosen": -19.868539810180664, + "logps/rejected": -98.33518981933594, + "loss": 0.6039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03751182556152344, + "rewards/margins": 0.23190346360206604, + "rewards/rejected": -0.2694152891635895, + "step": 8001 + }, + { + "epoch": 0.47, + "learning_rate": 5.795759024607702e-08, + "logits/chosen": -1.8613120317459106, + "logits/rejected": -1.8655587434768677, + "logps/chosen": -218.97698974609375, + "logps/rejected": -286.576416015625, + "loss": 0.154, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1539276838302612, + "rewards/margins": 1.2753692865371704, + "rewards/rejected": -0.12144165486097336, + "step": 8002 + }, + { + "epoch": 0.47, + "learning_rate": 5.79482861982001e-08, + "logits/chosen": -2.0674118995666504, + "logits/rejected": -2.110926628112793, + "logps/chosen": -163.26315307617188, + "logps/rejected": -393.5341796875, + "loss": 0.0856, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3465241193771362, + "rewards/margins": 2.6459412574768066, + "rewards/rejected": -1.2994171380996704, + "step": 8003 + }, + { + "epoch": 0.47, + "learning_rate": 5.7938981867961177e-08, + "logits/chosen": -1.9186387062072754, + "logits/rejected": -1.8487218618392944, + "logps/chosen": -304.11932373046875, + "logps/rejected": -434.1368408203125, + "loss": 0.1116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.943603515625, + "rewards/margins": 1.9589049816131592, + "rewards/rejected": -0.015301513485610485, + "step": 8004 + }, + { + "epoch": 0.47, + "learning_rate": 5.792967725569079e-08, + "logits/chosen": -1.6538081169128418, + "logits/rejected": -1.6554771661758423, + "logps/chosen": -83.59172058105469, + "logps/rejected": -214.06031799316406, + "loss": 0.2615, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6519005298614502, + "rewards/margins": 0.8133781552314758, + "rewards/rejected": 0.8385223746299744, + "step": 8005 + }, + { + "epoch": 0.47, + "learning_rate": 5.792037236171947e-08, + "logits/chosen": -2.2293338775634766, + "logits/rejected": -2.22223162651062, + "logps/chosen": -0.0023524421267211437, + "logps/rejected": -388.579345703125, + "loss": 0.3519, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.388111796695739e-05, + "rewards/margins": 5.989044666290283, + "rewards/rejected": -5.989108562469482, + "step": 8006 + }, + { + "epoch": 0.47, + "learning_rate": 5.791106718637778e-08, + "logits/chosen": -1.9192566871643066, + "logits/rejected": -1.914392352104187, + "logps/chosen": -9.958454132080078, + "logps/rejected": -158.3826446533203, + "loss": 0.3855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07797183841466904, + "rewards/margins": 1.748024582862854, + "rewards/rejected": -1.8259963989257812, + "step": 8007 + }, + { + "epoch": 0.47, + "learning_rate": 5.790176172999629e-08, + "logits/chosen": -2.0783979892730713, + "logits/rejected": -2.079314708709717, + "logps/chosen": -13.419194221496582, + "logps/rejected": -108.02540588378906, + "loss": 0.7042, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3180808126926422, + "rewards/margins": -0.2724083960056305, + "rewards/rejected": 0.5904892086982727, + "step": 8008 + }, + { + "epoch": 0.47, + "learning_rate": 5.7892455992905576e-08, + "logits/chosen": -1.9225202798843384, + "logits/rejected": -1.9383758306503296, + "logps/chosen": -169.59552001953125, + "logps/rejected": -417.2626647949219, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3550598621368408, + "rewards/margins": 3.7064239978790283, + "rewards/rejected": -2.3513641357421875, + "step": 8009 + }, + { + "epoch": 0.47, + "learning_rate": 5.788314997543621e-08, + "logits/chosen": -1.9257478713989258, + "logits/rejected": -1.9315786361694336, + "logps/chosen": -252.09597778320312, + "logps/rejected": -336.0632629394531, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3613861799240112, + "rewards/margins": 2.44512939453125, + "rewards/rejected": -1.0837433338165283, + "step": 8010 + }, + { + "epoch": 0.47, + "learning_rate": 5.787384367791881e-08, + "logits/chosen": -1.9155585765838623, + "logits/rejected": -1.9163600206375122, + "logps/chosen": -0.0005041610565967858, + "logps/rejected": -151.56272888183594, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.313822005817201e-05, + "rewards/margins": 2.8237338066101074, + "rewards/rejected": -2.8237106800079346, + "step": 8011 + }, + { + "epoch": 0.47, + "learning_rate": 5.786453710068395e-08, + "logits/chosen": -2.1530942916870117, + "logits/rejected": -2.143253803253174, + "logps/chosen": -7.784155604895204e-05, + "logps/rejected": -143.44056701660156, + "loss": 0.4446, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.00676833325997e-07, + "rewards/margins": 1.5292760133743286, + "rewards/rejected": -1.5292755365371704, + "step": 8012 + }, + { + "epoch": 0.47, + "learning_rate": 5.785523024406228e-08, + "logits/chosen": -2.0425631999969482, + "logits/rejected": -2.0350253582000732, + "logps/chosen": -22.750818252563477, + "logps/rejected": -140.43678283691406, + "loss": 0.4105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16877594590187073, + "rewards/margins": 1.4547119140625, + "rewards/rejected": -1.2859359979629517, + "step": 8013 + }, + { + "epoch": 0.47, + "learning_rate": 5.7845923108384396e-08, + "logits/chosen": -1.8482270240783691, + "logits/rejected": -1.8233227729797363, + "logps/chosen": -223.33572387695312, + "logps/rejected": -449.858642578125, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7402405738830566, + "rewards/margins": 3.856802463531494, + "rewards/rejected": -1.1165618896484375, + "step": 8014 + }, + { + "epoch": 0.47, + "learning_rate": 5.783661569398095e-08, + "logits/chosen": -2.0218443870544434, + "logits/rejected": -2.0231032371520996, + "logps/chosen": -50.37294006347656, + "logps/rejected": -240.52688598632812, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3849845826625824, + "rewards/margins": 1.7603508234024048, + "rewards/rejected": -1.3753662109375, + "step": 8015 + }, + { + "epoch": 0.47, + "learning_rate": 5.7827308001182575e-08, + "logits/chosen": -1.9032598733901978, + "logits/rejected": -1.8947875499725342, + "logps/chosen": -159.232421875, + "logps/rejected": -404.04339599609375, + "loss": 0.0418, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9432464838027954, + "rewards/margins": 5.790505886077881, + "rewards/rejected": -3.847259521484375, + "step": 8016 + }, + { + "epoch": 0.47, + "learning_rate": 5.7818000030319943e-08, + "logits/chosen": -1.9128390550613403, + "logits/rejected": -1.8952672481536865, + "logps/chosen": -245.42608642578125, + "logps/rejected": -343.6802978515625, + "loss": 0.2236, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3970764875411987, + "rewards/margins": 0.8151856064796448, + "rewards/rejected": 0.581890881061554, + "step": 8017 + }, + { + "epoch": 0.47, + "learning_rate": 5.780869178172369e-08, + "logits/chosen": -1.9788438081741333, + "logits/rejected": -1.975117802619934, + "logps/chosen": -21.138988494873047, + "logps/rejected": -313.03192138671875, + "loss": 0.261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20778141915798187, + "rewards/margins": 7.365450382232666, + "rewards/rejected": -7.1576690673828125, + "step": 8018 + }, + { + "epoch": 0.47, + "learning_rate": 5.7799383255724534e-08, + "logits/chosen": -2.0122334957122803, + "logits/rejected": -2.015939474105835, + "logps/chosen": -20.03934669494629, + "logps/rejected": -156.69476318359375, + "loss": 0.4094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16102848947048187, + "rewards/margins": 1.4375101327896118, + "rewards/rejected": -1.2764816284179688, + "step": 8019 + }, + { + "epoch": 0.47, + "learning_rate": 5.779007445265312e-08, + "logits/chosen": -1.9041000604629517, + "logits/rejected": -1.8706663846969604, + "logps/chosen": -208.2003936767578, + "logps/rejected": -356.02496337890625, + "loss": 0.147, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4976364374160767, + "rewards/margins": 1.3437789678573608, + "rewards/rejected": 0.15385742485523224, + "step": 8020 + }, + { + "epoch": 0.47, + "learning_rate": 5.778076537284015e-08, + "logits/chosen": -1.7359408140182495, + "logits/rejected": -1.6858806610107422, + "logps/chosen": -224.5406494140625, + "logps/rejected": -500.36572265625, + "loss": 0.2421, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3586333990097046, + "rewards/margins": 0.9286925792694092, + "rewards/rejected": 0.429940789937973, + "step": 8021 + }, + { + "epoch": 0.47, + "learning_rate": 5.777145601661634e-08, + "logits/chosen": -1.9734677076339722, + "logits/rejected": -1.9430346488952637, + "logps/chosen": -224.05111694335938, + "logps/rejected": -413.0974426269531, + "loss": 0.0791, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2123138904571533, + "rewards/margins": 3.764556884765625, + "rewards/rejected": -2.5522429943084717, + "step": 8022 + }, + { + "epoch": 0.47, + "learning_rate": 5.776214638431239e-08, + "logits/chosen": -1.9117069244384766, + "logits/rejected": -1.9042197465896606, + "logps/chosen": -282.49957275390625, + "logps/rejected": -392.414794921875, + "loss": 0.1338, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9814026355743408, + "rewards/margins": 1.3740601539611816, + "rewards/rejected": 0.607342541217804, + "step": 8023 + }, + { + "epoch": 0.47, + "learning_rate": 5.7752836476259046e-08, + "logits/chosen": -2.0608208179473877, + "logits/rejected": -2.0594470500946045, + "logps/chosen": -0.4723092317581177, + "logps/rejected": -181.70465087890625, + "loss": 0.3938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.044000692665576935, + "rewards/margins": 2.104675769805908, + "rewards/rejected": -2.0606751441955566, + "step": 8024 + }, + { + "epoch": 0.47, + "learning_rate": 5.7743526292787015e-08, + "logits/chosen": -1.7883046865463257, + "logits/rejected": -1.7857129573822021, + "logps/chosen": -11.422954559326172, + "logps/rejected": -120.08350372314453, + "loss": 0.386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11412363499403, + "rewards/margins": 1.8173744678497314, + "rewards/rejected": -1.7032508850097656, + "step": 8025 + }, + { + "epoch": 0.47, + "learning_rate": 5.773421583422707e-08, + "logits/chosen": -1.9079923629760742, + "logits/rejected": -1.912217617034912, + "logps/chosen": -42.77545166015625, + "logps/rejected": -247.44139099121094, + "loss": 0.2996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37192612886428833, + "rewards/margins": 3.2747809886932373, + "rewards/rejected": -2.9028549194335938, + "step": 8026 + }, + { + "epoch": 0.47, + "learning_rate": 5.7724905100909915e-08, + "logits/chosen": -1.8631483316421509, + "logits/rejected": -1.8605384826660156, + "logps/chosen": -0.00013076912728138268, + "logps/rejected": -154.25625610351562, + "loss": 0.3716, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.24790219363058e-05, + "rewards/margins": 2.9301555156707764, + "rewards/rejected": -2.930133104324341, + "step": 8027 + }, + { + "epoch": 0.47, + "learning_rate": 5.771559409316637e-08, + "logits/chosen": -1.8492614030838013, + "logits/rejected": -1.8219430446624756, + "logps/chosen": -250.34002685546875, + "logps/rejected": -430.4366455078125, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5771758556365967, + "rewards/margins": 2.1686432361602783, + "rewards/rejected": 0.4085327088832855, + "step": 8028 + }, + { + "epoch": 0.47, + "learning_rate": 5.7706282811327165e-08, + "logits/chosen": -1.7926697731018066, + "logits/rejected": -1.7975867986679077, + "logps/chosen": -145.621337890625, + "logps/rejected": -232.8323516845703, + "loss": 0.3963, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3667694330215454, + "rewards/margins": -0.01530611515045166, + "rewards/rejected": 1.382075548171997, + "step": 8029 + }, + { + "epoch": 0.47, + "learning_rate": 5.769697125572313e-08, + "logits/chosen": -2.0331690311431885, + "logits/rejected": -2.0312461853027344, + "logps/chosen": -0.0002291024720761925, + "logps/rejected": -71.72183227539062, + "loss": 0.5275, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.30805708776461e-06, + "rewards/margins": 0.8300607204437256, + "rewards/rejected": -0.8300514221191406, + "step": 8030 + }, + { + "epoch": 0.47, + "learning_rate": 5.7687659426685e-08, + "logits/chosen": -1.938138723373413, + "logits/rejected": -1.938755750656128, + "logps/chosen": -3.058152914047241, + "logps/rejected": -188.0547332763672, + "loss": 0.3709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1052260622382164, + "rewards/margins": 2.534820556640625, + "rewards/rejected": -2.4295945167541504, + "step": 8031 + }, + { + "epoch": 0.47, + "learning_rate": 5.767834732454361e-08, + "logits/chosen": -1.9434175491333008, + "logits/rejected": -2.012075662612915, + "logps/chosen": -313.18048095703125, + "logps/rejected": -406.72259521484375, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.576892137527466, + "rewards/margins": 4.521881103515625, + "rewards/rejected": -1.9449890851974487, + "step": 8032 + }, + { + "epoch": 0.47, + "learning_rate": 5.7669034949629756e-08, + "logits/chosen": -1.8802762031555176, + "logits/rejected": -1.8875632286071777, + "logps/chosen": -211.3370361328125, + "logps/rejected": -289.20635986328125, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.990373373031616, + "rewards/margins": 3.4312639236450195, + "rewards/rejected": -0.44089052081108093, + "step": 8033 + }, + { + "epoch": 0.47, + "learning_rate": 5.765972230227428e-08, + "logits/chosen": -1.9029202461242676, + "logits/rejected": -1.8736008405685425, + "logps/chosen": -140.5893096923828, + "logps/rejected": -226.39109802246094, + "loss": 0.2428, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1449158191680908, + "rewards/margins": 1.3735779523849487, + "rewards/rejected": -0.22866211831569672, + "step": 8034 + }, + { + "epoch": 0.47, + "learning_rate": 5.765040938280798e-08, + "logits/chosen": -1.867240309715271, + "logits/rejected": -1.8813481330871582, + "logps/chosen": -271.4953918457031, + "logps/rejected": -526.7099609375, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.117095947265625, + "rewards/margins": 9.124120712280273, + "rewards/rejected": -6.007025241851807, + "step": 8035 + }, + { + "epoch": 0.47, + "learning_rate": 5.7641096191561746e-08, + "logits/chosen": -1.943111538887024, + "logits/rejected": -1.9370516538619995, + "logps/chosen": -195.65304565429688, + "logps/rejected": -276.906494140625, + "loss": 0.0561, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.067462205886841, + "rewards/margins": 2.6483154296875, + "rewards/rejected": 0.41914674639701843, + "step": 8036 + }, + { + "epoch": 0.47, + "learning_rate": 5.763178272886637e-08, + "logits/chosen": -1.9420863389968872, + "logits/rejected": -1.9534804821014404, + "logps/chosen": -1.8954240658786148e-05, + "logps/rejected": -173.97247314453125, + "loss": 0.3843, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.417450428372831e-07, + "rewards/margins": 1.995719313621521, + "rewards/rejected": -1.9957183599472046, + "step": 8037 + }, + { + "epoch": 0.47, + "learning_rate": 5.7622468995052764e-08, + "logits/chosen": -2.00070858001709, + "logits/rejected": -1.992483139038086, + "logps/chosen": -143.59317016601562, + "logps/rejected": -286.5591735839844, + "loss": 0.0908, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5448745489120483, + "rewards/margins": 4.344850063323975, + "rewards/rejected": -2.799975633621216, + "step": 8038 + }, + { + "epoch": 0.47, + "learning_rate": 5.761315499045176e-08, + "logits/chosen": -2.0150623321533203, + "logits/rejected": -2.007925271987915, + "logps/chosen": -23.595455169677734, + "logps/rejected": -231.033935546875, + "loss": 0.2401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6004205942153931, + "rewards/margins": 2.6926846504211426, + "rewards/rejected": -2.09226393699646, + "step": 8039 + }, + { + "epoch": 0.47, + "learning_rate": 5.760384071539426e-08, + "logits/chosen": -1.6082346439361572, + "logits/rejected": -1.6102973222732544, + "logps/chosen": -16.20672035217285, + "logps/rejected": -43.32836151123047, + "loss": 0.6608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11422424763441086, + "rewards/margins": 0.2880447208881378, + "rewards/rejected": -0.4022689759731293, + "step": 8040 + }, + { + "epoch": 0.47, + "learning_rate": 5.7594526170211133e-08, + "logits/chosen": -1.8415336608886719, + "logits/rejected": -1.8879226446151733, + "logps/chosen": -226.28424072265625, + "logps/rejected": -424.3479309082031, + "loss": 0.1385, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.87548828125, + "rewards/margins": 1.3727385997772217, + "rewards/rejected": 0.5027496218681335, + "step": 8041 + }, + { + "epoch": 0.47, + "learning_rate": 5.758521135523329e-08, + "logits/chosen": -1.799651861190796, + "logits/rejected": -1.7023069858551025, + "logps/chosen": -270.72296142578125, + "logps/rejected": -830.8081665039062, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1279296875, + "rewards/margins": 9.954681396484375, + "rewards/rejected": -7.826751708984375, + "step": 8042 + }, + { + "epoch": 0.47, + "learning_rate": 5.757589627079165e-08, + "logits/chosen": -2.06805682182312, + "logits/rejected": -2.064671277999878, + "logps/chosen": -44.492942810058594, + "logps/rejected": -318.9521789550781, + "loss": 0.146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0671157836914062, + "rewards/margins": 2.6055893898010254, + "rewards/rejected": -1.5384734869003296, + "step": 8043 + }, + { + "epoch": 0.47, + "learning_rate": 5.756658091721709e-08, + "logits/chosen": -1.9680191278457642, + "logits/rejected": -1.9968626499176025, + "logps/chosen": -227.05702209472656, + "logps/rejected": -324.7548828125, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.229144334793091, + "rewards/margins": 4.717633247375488, + "rewards/rejected": -2.4884889125823975, + "step": 8044 + }, + { + "epoch": 0.47, + "learning_rate": 5.7557265294840594e-08, + "logits/chosen": -2.0054502487182617, + "logits/rejected": -2.0122008323669434, + "logps/chosen": -104.87326049804688, + "logps/rejected": -366.587158203125, + "loss": 0.0696, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5367523431777954, + "rewards/margins": 3.9589357376098633, + "rewards/rejected": -2.4221832752227783, + "step": 8045 + }, + { + "epoch": 0.47, + "learning_rate": 5.7547949403993046e-08, + "logits/chosen": -1.8861045837402344, + "logits/rejected": -1.9028199911117554, + "logps/chosen": -165.64010620117188, + "logps/rejected": -272.0850830078125, + "loss": 0.4301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9811294674873352, + "rewards/margins": 0.3212173581123352, + "rewards/rejected": 0.659912109375, + "step": 8046 + }, + { + "epoch": 0.47, + "learning_rate": 5.753863324500543e-08, + "logits/chosen": -1.8232342004776, + "logits/rejected": -1.8234879970550537, + "logps/chosen": -60.03920364379883, + "logps/rejected": -102.07887268066406, + "loss": 0.3363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5778324007987976, + "rewards/margins": 1.2006686925888062, + "rewards/rejected": -0.6228362917900085, + "step": 8047 + }, + { + "epoch": 0.47, + "learning_rate": 5.752931681820865e-08, + "logits/chosen": -1.9241710901260376, + "logits/rejected": -1.9255218505859375, + "logps/chosen": -7.307767391204834, + "logps/rejected": -30.41944122314453, + "loss": 0.4714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5031668543815613, + "rewards/margins": 0.512144148349762, + "rewards/rejected": -0.008977318182587624, + "step": 8048 + }, + { + "epoch": 0.47, + "learning_rate": 5.752000012393374e-08, + "logits/chosen": -2.097860336303711, + "logits/rejected": -2.09985613822937, + "logps/chosen": -224.14463806152344, + "logps/rejected": -331.8409423828125, + "loss": 0.068, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8079391717910767, + "rewards/margins": 2.7519028186798096, + "rewards/rejected": -0.9439636468887329, + "step": 8049 + }, + { + "epoch": 0.47, + "learning_rate": 5.751068316251164e-08, + "logits/chosen": -1.9697425365447998, + "logits/rejected": -2.0091423988342285, + "logps/chosen": -280.5562744140625, + "logps/rejected": -444.4775390625, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5989716053009033, + "rewards/margins": 7.662442207336426, + "rewards/rejected": -5.063470363616943, + "step": 8050 + }, + { + "epoch": 0.47, + "learning_rate": 5.7501365934273324e-08, + "logits/chosen": -1.889256238937378, + "logits/rejected": -1.8841124773025513, + "logps/chosen": -163.1133270263672, + "logps/rejected": -234.16285705566406, + "loss": 0.2214, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1074326038360596, + "rewards/margins": 0.7178223133087158, + "rewards/rejected": 1.3896102905273438, + "step": 8051 + }, + { + "epoch": 0.47, + "learning_rate": 5.74920484395498e-08, + "logits/chosen": -2.0636627674102783, + "logits/rejected": -2.065633535385132, + "logps/chosen": -17.287670135498047, + "logps/rejected": -116.80010986328125, + "loss": 0.2126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9122602343559265, + "rewards/margins": 2.2647032737731934, + "rewards/rejected": -1.352442979812622, + "step": 8052 + }, + { + "epoch": 0.47, + "learning_rate": 5.7482730678672085e-08, + "logits/chosen": -1.7445056438446045, + "logits/rejected": -1.7357110977172852, + "logps/chosen": -51.36716079711914, + "logps/rejected": -234.8842010498047, + "loss": 0.2441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8292434811592102, + "rewards/margins": 1.6513047218322754, + "rewards/rejected": -0.8220611810684204, + "step": 8053 + }, + { + "epoch": 0.47, + "learning_rate": 5.747341265197115e-08, + "logits/chosen": -2.0652575492858887, + "logits/rejected": -2.054025888442993, + "logps/chosen": -89.98281860351562, + "logps/rejected": -410.34100341796875, + "loss": 0.1356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.984387218952179, + "rewards/margins": 3.9348907470703125, + "rewards/rejected": -2.9505035877227783, + "step": 8054 + }, + { + "epoch": 0.47, + "learning_rate": 5.746409435977806e-08, + "logits/chosen": -1.8052740097045898, + "logits/rejected": -1.8298406600952148, + "logps/chosen": -217.1756591796875, + "logps/rejected": -318.3694763183594, + "loss": 0.1489, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1288528442382812, + "rewards/margins": 1.485661268234253, + "rewards/rejected": -0.35680848360061646, + "step": 8055 + }, + { + "epoch": 0.47, + "learning_rate": 5.7454775802423817e-08, + "logits/chosen": -1.9813101291656494, + "logits/rejected": -1.988000512123108, + "logps/chosen": -43.80488586425781, + "logps/rejected": -227.35142517089844, + "loss": 0.1108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5869648456573486, + "rewards/margins": 2.4526174068450928, + "rewards/rejected": -0.8656525015830994, + "step": 8056 + }, + { + "epoch": 0.47, + "learning_rate": 5.744545698023948e-08, + "logits/chosen": -1.94772469997406, + "logits/rejected": -1.9419208765029907, + "logps/chosen": -172.71676635742188, + "logps/rejected": -313.408447265625, + "loss": 0.127, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.267828345298767, + "rewards/margins": 2.867025852203369, + "rewards/rejected": -1.5991973876953125, + "step": 8057 + }, + { + "epoch": 0.47, + "learning_rate": 5.743613789355608e-08, + "logits/chosen": -2.0191075801849365, + "logits/rejected": -2.0089492797851562, + "logps/chosen": -6.559053897857666, + "logps/rejected": -170.16168212890625, + "loss": 0.2686, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4329654276371002, + "rewards/margins": 3.055229663848877, + "rewards/rejected": -2.6222641468048096, + "step": 8058 + }, + { + "epoch": 0.47, + "learning_rate": 5.74268185427047e-08, + "logits/chosen": -1.9038618803024292, + "logits/rejected": -1.8843873739242554, + "logps/chosen": -33.86820983886719, + "logps/rejected": -234.9402618408203, + "loss": 0.4658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35544511675834656, + "rewards/margins": 2.148073673248291, + "rewards/rejected": -2.50351881980896, + "step": 8059 + }, + { + "epoch": 0.47, + "learning_rate": 5.741749892801638e-08, + "logits/chosen": -1.842549443244934, + "logits/rejected": -1.8353506326675415, + "logps/chosen": -14.630273818969727, + "logps/rejected": -241.82623291015625, + "loss": 0.2222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6503240466117859, + "rewards/margins": 2.6469366550445557, + "rewards/rejected": -1.996612548828125, + "step": 8060 + }, + { + "epoch": 0.47, + "learning_rate": 5.740817904982223e-08, + "logits/chosen": -1.9305551052093506, + "logits/rejected": -1.9549990892410278, + "logps/chosen": -243.64393615722656, + "logps/rejected": -366.2283935546875, + "loss": 0.5759, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1017913818359375, + "rewards/margins": 0.6275910139083862, + "rewards/rejected": -1.7293823957443237, + "step": 8061 + }, + { + "epoch": 0.47, + "learning_rate": 5.7398858908453316e-08, + "logits/chosen": -2.0608394145965576, + "logits/rejected": -2.0622847080230713, + "logps/chosen": -25.78195571899414, + "logps/rejected": -211.9319610595703, + "loss": 0.3321, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03198413923382759, + "rewards/margins": 5.490017414093018, + "rewards/rejected": -5.52200174331665, + "step": 8062 + }, + { + "epoch": 0.47, + "learning_rate": 5.738953850424074e-08, + "logits/chosen": -2.1170735359191895, + "logits/rejected": -2.1135406494140625, + "logps/chosen": -58.41886901855469, + "logps/rejected": -145.70114135742188, + "loss": 0.1095, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6604797840118408, + "rewards/margins": 2.776576280593872, + "rewards/rejected": -1.1160964965820312, + "step": 8063 + }, + { + "epoch": 0.47, + "learning_rate": 5.738021783751562e-08, + "logits/chosen": -1.9245245456695557, + "logits/rejected": -1.9183660745620728, + "logps/chosen": -36.58386993408203, + "logps/rejected": -141.449951171875, + "loss": 0.3643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33777695894241333, + "rewards/margins": 1.4325218200683594, + "rewards/rejected": -1.0947449207305908, + "step": 8064 + }, + { + "epoch": 0.47, + "learning_rate": 5.737089690860906e-08, + "logits/chosen": -1.8154170513153076, + "logits/rejected": -1.7792489528656006, + "logps/chosen": -194.2508544921875, + "logps/rejected": -383.05157470703125, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.180258274078369, + "rewards/margins": 4.380093574523926, + "rewards/rejected": -2.1998353004455566, + "step": 8065 + }, + { + "epoch": 0.47, + "learning_rate": 5.7361575717852185e-08, + "logits/chosen": -1.9689196348190308, + "logits/rejected": -1.9400148391723633, + "logps/chosen": -167.84414672851562, + "logps/rejected": -338.35400390625, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6371307373046875, + "rewards/margins": 3.712207078933716, + "rewards/rejected": -0.07507629692554474, + "step": 8066 + }, + { + "epoch": 0.47, + "learning_rate": 5.735225426557613e-08, + "logits/chosen": -1.8238455057144165, + "logits/rejected": -1.802460789680481, + "logps/chosen": -297.8788757324219, + "logps/rejected": -480.13311767578125, + "loss": 0.142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7868744134902954, + "rewards/margins": 2.124673366546631, + "rewards/rejected": -1.337799072265625, + "step": 8067 + }, + { + "epoch": 0.47, + "learning_rate": 5.734293255211202e-08, + "logits/chosen": -1.8475629091262817, + "logits/rejected": -1.8313692808151245, + "logps/chosen": -84.04347229003906, + "logps/rejected": -270.824462890625, + "loss": 0.1587, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0453780889511108, + "rewards/margins": 2.173802137374878, + "rewards/rejected": -1.128424048423767, + "step": 8068 + }, + { + "epoch": 0.47, + "learning_rate": 5.733361057779105e-08, + "logits/chosen": -1.8924369812011719, + "logits/rejected": -1.8961663246154785, + "logps/chosen": -10.750102043151855, + "logps/rejected": -210.284912109375, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12499971687793732, + "rewards/margins": 4.746151447296143, + "rewards/rejected": -4.621151924133301, + "step": 8069 + }, + { + "epoch": 0.47, + "learning_rate": 5.732428834294436e-08, + "logits/chosen": -2.117331027984619, + "logits/rejected": -2.1174798011779785, + "logps/chosen": -11.62166690826416, + "logps/rejected": -117.11224365234375, + "loss": 0.4028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019266797229647636, + "rewards/margins": 1.9512889385223389, + "rewards/rejected": -1.9320220947265625, + "step": 8070 + }, + { + "epoch": 0.47, + "learning_rate": 5.731496584790312e-08, + "logits/chosen": -1.9290621280670166, + "logits/rejected": -1.9305578470230103, + "logps/chosen": -204.92369079589844, + "logps/rejected": -361.4330139160156, + "loss": 0.0587, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2813888788223267, + "rewards/margins": 3.963292121887207, + "rewards/rejected": -2.681903123855591, + "step": 8071 + }, + { + "epoch": 0.47, + "learning_rate": 5.730564309299851e-08, + "logits/chosen": -1.8214530944824219, + "logits/rejected": -1.7940911054611206, + "logps/chosen": -285.1458740234375, + "logps/rejected": -506.5355529785156, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.802539110183716, + "rewards/margins": 3.387725830078125, + "rewards/rejected": -0.585186779499054, + "step": 8072 + }, + { + "epoch": 0.47, + "learning_rate": 5.729632007856171e-08, + "logits/chosen": -2.1179912090301514, + "logits/rejected": -2.1216793060302734, + "logps/chosen": -2.347459554672241, + "logps/rejected": -141.78382873535156, + "loss": 0.5379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0245638620108366, + "rewards/margins": 0.5490099787712097, + "rewards/rejected": -0.5244461297988892, + "step": 8073 + }, + { + "epoch": 0.47, + "learning_rate": 5.728699680492395e-08, + "logits/chosen": -2.1435723304748535, + "logits/rejected": -2.1494805812835693, + "logps/chosen": -0.0004725820617750287, + "logps/rejected": -158.97921752929688, + "loss": 0.3696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0003122217894997448, + "rewards/margins": 3.003814220428467, + "rewards/rejected": -3.0035018920898438, + "step": 8074 + }, + { + "epoch": 0.47, + "learning_rate": 5.727767327241641e-08, + "logits/chosen": -1.8766801357269287, + "logits/rejected": -1.8811628818511963, + "logps/chosen": -8.892830848693848, + "logps/rejected": -244.64732360839844, + "loss": 0.3696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0842968001961708, + "rewards/margins": 2.2365200519561768, + "rewards/rejected": -2.1522233486175537, + "step": 8075 + }, + { + "epoch": 0.47, + "learning_rate": 5.726834948137033e-08, + "logits/chosen": -1.9481146335601807, + "logits/rejected": -1.943440318107605, + "logps/chosen": -35.602867126464844, + "logps/rejected": -188.9174041748047, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6347118616104126, + "rewards/margins": 1.0864925384521484, + "rewards/rejected": -0.4517807066440582, + "step": 8076 + }, + { + "epoch": 0.47, + "learning_rate": 5.725902543211689e-08, + "logits/chosen": -1.7190395593643188, + "logits/rejected": -1.7062550783157349, + "logps/chosen": -274.0311279296875, + "logps/rejected": -350.5213623046875, + "loss": 0.165, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.998388648033142, + "rewards/margins": 1.070928931236267, + "rewards/rejected": 0.927459716796875, + "step": 8077 + }, + { + "epoch": 0.47, + "learning_rate": 5.7249701124987395e-08, + "logits/chosen": -1.9992461204528809, + "logits/rejected": -2.0026462078094482, + "logps/chosen": -0.009839536622166634, + "logps/rejected": -251.15576171875, + "loss": 0.3534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003139448817819357, + "rewards/margins": 3.9319140911102295, + "rewards/rejected": -3.9322280883789062, + "step": 8078 + }, + { + "epoch": 0.47, + "learning_rate": 5.724037656031303e-08, + "logits/chosen": -2.047454595565796, + "logits/rejected": -2.0437488555908203, + "logps/chosen": -67.28466033935547, + "logps/rejected": -117.73168182373047, + "loss": 0.473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4574294984340668, + "rewards/margins": 0.4239349365234375, + "rewards/rejected": 0.03349456936120987, + "step": 8079 + }, + { + "epoch": 0.47, + "learning_rate": 5.7231051738425084e-08, + "logits/chosen": -1.93157160282135, + "logits/rejected": -1.9203413724899292, + "logps/chosen": -60.14754867553711, + "logps/rejected": -211.42605590820312, + "loss": 0.3991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08006401360034943, + "rewards/margins": 1.6153271198272705, + "rewards/rejected": -1.5352630615234375, + "step": 8080 + }, + { + "epoch": 0.47, + "learning_rate": 5.72217266596548e-08, + "logits/chosen": -1.758920669555664, + "logits/rejected": -1.7209744453430176, + "logps/chosen": -225.8370361328125, + "logps/rejected": -621.175537109375, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8887542486190796, + "rewards/margins": 3.762551784515381, + "rewards/rejected": -1.8737976551055908, + "step": 8081 + }, + { + "epoch": 0.47, + "learning_rate": 5.721240132433346e-08, + "logits/chosen": -2.1460258960723877, + "logits/rejected": -2.12355375289917, + "logps/chosen": -3.3020573027897626e-05, + "logps/rejected": -159.39532470703125, + "loss": 0.4285, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.145629025562812e-07, + "rewards/margins": 1.5641309022903442, + "rewards/rejected": -1.5641311407089233, + "step": 8082 + }, + { + "epoch": 0.47, + "learning_rate": 5.720307573279236e-08, + "logits/chosen": -1.8668947219848633, + "logits/rejected": -1.8775919675827026, + "logps/chosen": -154.77267456054688, + "logps/rejected": -228.679931640625, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8739410638809204, + "rewards/margins": 0.6513214111328125, + "rewards/rejected": 0.22261963784694672, + "step": 8083 + }, + { + "epoch": 0.47, + "learning_rate": 5.719374988536276e-08, + "logits/chosen": -2.0318660736083984, + "logits/rejected": -2.0344932079315186, + "logps/chosen": -53.527015686035156, + "logps/rejected": -137.1421661376953, + "loss": 0.3651, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.089276909828186, + "rewards/margins": 0.3832206726074219, + "rewards/rejected": 0.7060562372207642, + "step": 8084 + }, + { + "epoch": 0.47, + "learning_rate": 5.718442378237599e-08, + "logits/chosen": -2.0556533336639404, + "logits/rejected": -2.047886610031128, + "logps/chosen": -26.095521926879883, + "logps/rejected": -317.5614929199219, + "loss": 0.8853, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.014395833015442, + "rewards/margins": 0.38693785667419434, + "rewards/rejected": -1.4013336896896362, + "step": 8085 + }, + { + "epoch": 0.47, + "learning_rate": 5.717509742416333e-08, + "logits/chosen": -1.7856019735336304, + "logits/rejected": -1.765208125114441, + "logps/chosen": -306.9794006347656, + "logps/rejected": -596.2656860351562, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2059783935546875, + "rewards/margins": 5.750124931335449, + "rewards/rejected": -3.544146776199341, + "step": 8086 + }, + { + "epoch": 0.47, + "learning_rate": 5.716577081105613e-08, + "logits/chosen": -1.8321547508239746, + "logits/rejected": -1.8222616910934448, + "logps/chosen": -59.47900390625, + "logps/rejected": -223.24562072753906, + "loss": 0.3534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12236938625574112, + "rewards/margins": 1.967036485671997, + "rewards/rejected": -1.8446670770645142, + "step": 8087 + }, + { + "epoch": 0.47, + "learning_rate": 5.715644394338567e-08, + "logits/chosen": -2.0299007892608643, + "logits/rejected": -2.016796588897705, + "logps/chosen": -67.90363311767578, + "logps/rejected": -216.17613220214844, + "loss": 0.4841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3533432185649872, + "rewards/margins": 0.4233642816543579, + "rewards/rejected": -0.07002105563879013, + "step": 8088 + }, + { + "epoch": 0.47, + "learning_rate": 5.714711682148334e-08, + "logits/chosen": -1.9686427116394043, + "logits/rejected": -1.9582607746124268, + "logps/chosen": -43.3944091796875, + "logps/rejected": -234.1077117919922, + "loss": 0.2582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4630092680454254, + "rewards/margins": 2.4350109100341797, + "rewards/rejected": -1.9720016717910767, + "step": 8089 + }, + { + "epoch": 0.47, + "learning_rate": 5.713778944568045e-08, + "logits/chosen": -1.862473726272583, + "logits/rejected": -1.8624756336212158, + "logps/chosen": -165.56451416015625, + "logps/rejected": -206.6431121826172, + "loss": 0.3981, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5126450061798096, + "rewards/margins": -0.10668635368347168, + "rewards/rejected": 1.6193313598632812, + "step": 8090 + }, + { + "epoch": 0.47, + "learning_rate": 5.7128461816308374e-08, + "logits/chosen": -2.0166232585906982, + "logits/rejected": -2.008227586746216, + "logps/chosen": -43.9357795715332, + "logps/rejected": -216.54339599609375, + "loss": 0.3081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40955850481987, + "rewards/margins": 1.5228139162063599, + "rewards/rejected": -1.1132553815841675, + "step": 8091 + }, + { + "epoch": 0.47, + "learning_rate": 5.711913393369846e-08, + "logits/chosen": -1.9399845600128174, + "logits/rejected": -1.9453226327896118, + "logps/chosen": -0.0018195885932072997, + "logps/rejected": -94.65658569335938, + "loss": 0.4511, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.352357842028141e-05, + "rewards/margins": 1.421703577041626, + "rewards/rejected": -1.4217270612716675, + "step": 8092 + }, + { + "epoch": 0.47, + "learning_rate": 5.710980579818209e-08, + "logits/chosen": -1.9986457824707031, + "logits/rejected": -2.0013256072998047, + "logps/chosen": -0.0412357933819294, + "logps/rejected": -173.7747802734375, + "loss": 0.4991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006521476898342371, + "rewards/margins": 1.0138258934020996, + "rewards/rejected": -1.0073044300079346, + "step": 8093 + }, + { + "epoch": 0.47, + "learning_rate": 5.710047741009063e-08, + "logits/chosen": -2.0721561908721924, + "logits/rejected": -2.0717103481292725, + "logps/chosen": -6.0025858879089355, + "logps/rejected": -62.698760986328125, + "loss": 0.4647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11233282089233398, + "rewards/margins": 1.0852347612380981, + "rewards/rejected": -0.9729019403457642, + "step": 8094 + }, + { + "epoch": 0.47, + "learning_rate": 5.709114876975549e-08, + "logits/chosen": -2.125089406967163, + "logits/rejected": -2.1059529781341553, + "logps/chosen": -67.30815124511719, + "logps/rejected": -181.59027099609375, + "loss": 0.2595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9175872802734375, + "rewards/margins": 1.2262649536132812, + "rewards/rejected": -0.30867767333984375, + "step": 8095 + }, + { + "epoch": 0.47, + "learning_rate": 5.708181987750804e-08, + "logits/chosen": -1.9533532857894897, + "logits/rejected": -1.9386104345321655, + "logps/chosen": -49.11442565917969, + "logps/rejected": -297.01177978515625, + "loss": 0.1476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8506038784980774, + "rewards/margins": 3.2248775959014893, + "rewards/rejected": -2.3742737770080566, + "step": 8096 + }, + { + "epoch": 0.47, + "learning_rate": 5.7072490733679736e-08, + "logits/chosen": -1.8039922714233398, + "logits/rejected": -1.76024329662323, + "logps/chosen": -205.54098510742188, + "logps/rejected": -495.2648620605469, + "loss": 0.0426, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7392090559005737, + "rewards/margins": 4.868356227874756, + "rewards/rejected": -3.1291472911834717, + "step": 8097 + }, + { + "epoch": 0.47, + "learning_rate": 5.706316133860194e-08, + "logits/chosen": -2.02705717086792, + "logits/rejected": -2.0230917930603027, + "logps/chosen": -33.013519287109375, + "logps/rejected": -224.37664794921875, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8175453543663025, + "rewards/margins": 0.9227028489112854, + "rewards/rejected": -0.10515747219324112, + "step": 8098 + }, + { + "epoch": 0.47, + "learning_rate": 5.705383169260613e-08, + "logits/chosen": -2.0558595657348633, + "logits/rejected": -2.0502049922943115, + "logps/chosen": -2.293107748031616, + "logps/rejected": -176.91778564453125, + "loss": 0.3645, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1020890474319458, + "rewards/margins": 2.239511489868164, + "rewards/rejected": -2.1374223232269287, + "step": 8099 + }, + { + "epoch": 0.47, + "learning_rate": 5.704450179602369e-08, + "logits/chosen": -2.001817226409912, + "logits/rejected": -2.0032620429992676, + "logps/chosen": -54.48381042480469, + "logps/rejected": -307.05963134765625, + "loss": 0.4744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10256347805261612, + "rewards/margins": 1.6035277843475342, + "rewards/rejected": -1.706091284751892, + "step": 8100 + }, + { + "epoch": 0.47, + "learning_rate": 5.70351716491861e-08, + "logits/chosen": -2.0378386974334717, + "logits/rejected": -2.0783698558807373, + "logps/chosen": -170.0286407470703, + "logps/rejected": -402.55694580078125, + "loss": 0.1113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.985913097858429, + "rewards/margins": 2.400115966796875, + "rewards/rejected": -1.4142029285430908, + "step": 8101 + }, + { + "epoch": 0.47, + "learning_rate": 5.70258412524248e-08, + "logits/chosen": -2.12178373336792, + "logits/rejected": -2.1081066131591797, + "logps/chosen": -141.67294311523438, + "logps/rejected": -446.0306396484375, + "loss": 0.0909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0220718383789062, + "rewards/margins": 3.491450548171997, + "rewards/rejected": -2.469378709793091, + "step": 8102 + }, + { + "epoch": 0.47, + "learning_rate": 5.701651060607125e-08, + "logits/chosen": -1.9999853372573853, + "logits/rejected": -2.008216142654419, + "logps/chosen": -276.4356384277344, + "logps/rejected": -340.31134033203125, + "loss": 0.0932, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9323761463165283, + "rewards/margins": 1.8031524419784546, + "rewards/rejected": 1.1292237043380737, + "step": 8103 + }, + { + "epoch": 0.47, + "learning_rate": 5.700717971045692e-08, + "logits/chosen": -1.8412200212478638, + "logits/rejected": -1.8406912088394165, + "logps/chosen": -234.50833129882812, + "logps/rejected": -338.82275390625, + "loss": 0.423, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.866003394126892, + "rewards/margins": -0.2128509283065796, + "rewards/rejected": 2.0788543224334717, + "step": 8104 + }, + { + "epoch": 0.47, + "learning_rate": 5.6997848565913277e-08, + "logits/chosen": -2.037348985671997, + "logits/rejected": -1.9804474115371704, + "logps/chosen": -309.0262145996094, + "logps/rejected": -421.7279052734375, + "loss": 0.1685, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3446563482284546, + "rewards/margins": 1.7711883783340454, + "rewards/rejected": -0.42653200030326843, + "step": 8105 + }, + { + "epoch": 0.47, + "learning_rate": 5.698851717277184e-08, + "logits/chosen": -1.7170425653457642, + "logits/rejected": -1.7206025123596191, + "logps/chosen": -49.82244873046875, + "logps/rejected": -206.3761444091797, + "loss": 0.5996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06514473259449005, + "rewards/margins": 0.12181740254163742, + "rewards/rejected": -0.05667266994714737, + "step": 8106 + }, + { + "epoch": 0.47, + "learning_rate": 5.6979185531364096e-08, + "logits/chosen": -1.9591560363769531, + "logits/rejected": -1.957134485244751, + "logps/chosen": -1.9632655382156372, + "logps/rejected": -140.32992553710938, + "loss": 0.4461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0659397765994072, + "rewards/margins": 1.1519582271575928, + "rewards/rejected": -1.086018443107605, + "step": 8107 + }, + { + "epoch": 0.47, + "learning_rate": 5.696985364202153e-08, + "logits/chosen": -1.8521780967712402, + "logits/rejected": -1.8485462665557861, + "logps/chosen": -10.70136833190918, + "logps/rejected": -131.597412109375, + "loss": 0.5053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05217733606696129, + "rewards/margins": 0.7777197957038879, + "rewards/rejected": -0.7255424857139587, + "step": 8108 + }, + { + "epoch": 0.47, + "learning_rate": 5.6960521505075654e-08, + "logits/chosen": -2.126453161239624, + "logits/rejected": -2.1257071495056152, + "logps/chosen": -10.765959739685059, + "logps/rejected": -126.6678466796875, + "loss": 0.4475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35200950503349304, + "rewards/margins": 0.7610106468200684, + "rewards/rejected": -0.4090011715888977, + "step": 8109 + }, + { + "epoch": 0.47, + "learning_rate": 5.695118912085803e-08, + "logits/chosen": -1.8721522092819214, + "logits/rejected": -1.8530112504959106, + "logps/chosen": -0.0016234496142715216, + "logps/rejected": -266.83209228515625, + "loss": 0.355, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5228719348669983e-05, + "rewards/margins": 3.236645460128784, + "rewards/rejected": -3.2366607189178467, + "step": 8110 + }, + { + "epoch": 0.47, + "learning_rate": 5.6941856489700156e-08, + "logits/chosen": -1.9763249158859253, + "logits/rejected": -2.0148019790649414, + "logps/chosen": -160.09222412109375, + "logps/rejected": -216.91783142089844, + "loss": 0.0789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8371429443359375, + "rewards/margins": 2.167083740234375, + "rewards/rejected": -0.3299407958984375, + "step": 8111 + }, + { + "epoch": 0.47, + "learning_rate": 5.6932523611933594e-08, + "logits/chosen": -1.8636561632156372, + "logits/rejected": -1.866874098777771, + "logps/chosen": -40.85138702392578, + "logps/rejected": -196.3829803466797, + "loss": 0.2591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7815048098564148, + "rewards/margins": 1.6466659307479858, + "rewards/rejected": -0.865161120891571, + "step": 8112 + }, + { + "epoch": 0.47, + "learning_rate": 5.692319048788987e-08, + "logits/chosen": -2.0825717449188232, + "logits/rejected": -2.0724422931671143, + "logps/chosen": -0.01962374895811081, + "logps/rejected": -159.20999145507812, + "loss": 0.3783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001344565418548882, + "rewards/margins": 3.244354009628296, + "rewards/rejected": -3.2456986904144287, + "step": 8113 + }, + { + "epoch": 0.47, + "learning_rate": 5.6913857117900574e-08, + "logits/chosen": -1.9959908723831177, + "logits/rejected": -1.9951250553131104, + "logps/chosen": -2.1337037086486816, + "logps/rejected": -160.83648681640625, + "loss": 0.4016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13011465966701508, + "rewards/margins": 3.115771770477295, + "rewards/rejected": -3.2458863258361816, + "step": 8114 + }, + { + "epoch": 0.47, + "learning_rate": 5.6904523502297244e-08, + "logits/chosen": -2.017697334289551, + "logits/rejected": -2.0171656608581543, + "logps/chosen": -72.36550903320312, + "logps/rejected": -198.74334716796875, + "loss": 0.2319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8207268118858337, + "rewards/margins": 1.7481743097305298, + "rewards/rejected": -0.927447497844696, + "step": 8115 + }, + { + "epoch": 0.47, + "learning_rate": 5.689518964141147e-08, + "logits/chosen": -1.7548452615737915, + "logits/rejected": -1.7514469623565674, + "logps/chosen": -22.130006790161133, + "logps/rejected": -198.6378631591797, + "loss": 0.383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2969655990600586, + "rewards/margins": 1.4783544540405273, + "rewards/rejected": -1.1813888549804688, + "step": 8116 + }, + { + "epoch": 0.47, + "learning_rate": 5.688585553557481e-08, + "logits/chosen": -1.8326256275177002, + "logits/rejected": -1.8313652276992798, + "logps/chosen": -1.8298139572143555, + "logps/rejected": -111.41600036621094, + "loss": 0.4229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1277451068162918, + "rewards/margins": 1.9038703441619873, + "rewards/rejected": -2.0316154956817627, + "step": 8117 + }, + { + "epoch": 0.47, + "learning_rate": 5.6876521185118895e-08, + "logits/chosen": -2.0139148235321045, + "logits/rejected": -2.0101237297058105, + "logps/chosen": -14.5878324508667, + "logps/rejected": -134.88937377929688, + "loss": 0.3623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10531578212976456, + "rewards/margins": 2.4467830657958984, + "rewards/rejected": -2.3414673805236816, + "step": 8118 + }, + { + "epoch": 0.47, + "learning_rate": 5.6867186590375316e-08, + "logits/chosen": -2.1328015327453613, + "logits/rejected": -2.1194045543670654, + "logps/chosen": -15.107305526733398, + "logps/rejected": -184.00967407226562, + "loss": 0.455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3294094204902649, + "rewards/margins": 0.8117977380752563, + "rewards/rejected": -0.48238831758499146, + "step": 8119 + }, + { + "epoch": 0.47, + "learning_rate": 5.685785175167568e-08, + "logits/chosen": -1.941284418106079, + "logits/rejected": -1.933587908744812, + "logps/chosen": -233.8685302734375, + "logps/rejected": -300.05767822265625, + "loss": 0.2606, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.093924045562744, + "rewards/margins": 0.4180114269256592, + "rewards/rejected": 2.675912618637085, + "step": 8120 + }, + { + "epoch": 0.47, + "learning_rate": 5.6848516669351587e-08, + "logits/chosen": -1.8938524723052979, + "logits/rejected": -1.9085302352905273, + "logps/chosen": -187.5022430419922, + "logps/rejected": -302.2343444824219, + "loss": 0.0967, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7949464321136475, + "rewards/margins": 1.70475172996521, + "rewards/rejected": 1.0901947021484375, + "step": 8121 + }, + { + "epoch": 0.47, + "learning_rate": 5.68391813437347e-08, + "logits/chosen": -1.8935089111328125, + "logits/rejected": -1.8716514110565186, + "logps/chosen": -353.567626953125, + "logps/rejected": -614.777099609375, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.96514892578125, + "rewards/margins": 5.436438083648682, + "rewards/rejected": -3.4712891578674316, + "step": 8122 + }, + { + "epoch": 0.47, + "learning_rate": 5.682984577515662e-08, + "logits/chosen": -1.8854881525039673, + "logits/rejected": -1.9140775203704834, + "logps/chosen": -169.5442657470703, + "logps/rejected": -449.0658874511719, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9180755615234375, + "rewards/margins": 3.525103807449341, + "rewards/rejected": -1.6070282459259033, + "step": 8123 + }, + { + "epoch": 0.47, + "learning_rate": 5.682050996394901e-08, + "logits/chosen": -2.041537284851074, + "logits/rejected": -1.9973242282867432, + "logps/chosen": -142.76812744140625, + "logps/rejected": -386.42852783203125, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.81622314453125, + "rewards/margins": 3.949139356613159, + "rewards/rejected": -2.132916212081909, + "step": 8124 + }, + { + "epoch": 0.47, + "learning_rate": 5.6811173910443545e-08, + "logits/chosen": -1.7762669324874878, + "logits/rejected": -1.7580064535140991, + "logps/chosen": -183.723876953125, + "logps/rejected": -374.6632080078125, + "loss": 0.1085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6403321027755737, + "rewards/margins": 2.0197083950042725, + "rewards/rejected": -0.37937623262405396, + "step": 8125 + }, + { + "epoch": 0.47, + "learning_rate": 5.6801837614971846e-08, + "logits/chosen": -1.9422807693481445, + "logits/rejected": -1.9346046447753906, + "logps/chosen": -4.533819198608398, + "logps/rejected": -306.2740478515625, + "loss": 0.3057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15241074562072754, + "rewards/margins": 6.315256118774414, + "rewards/rejected": -6.162845134735107, + "step": 8126 + }, + { + "epoch": 0.47, + "learning_rate": 5.6792501077865615e-08, + "logits/chosen": -2.0980641841888428, + "logits/rejected": -2.0946176052093506, + "logps/chosen": -0.4459909200668335, + "logps/rejected": -178.59066772460938, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08222109079360962, + "rewards/margins": 3.7659521102905273, + "rewards/rejected": -3.6837310791015625, + "step": 8127 + }, + { + "epoch": 0.47, + "learning_rate": 5.6783164299456507e-08, + "logits/chosen": -1.9553781747817993, + "logits/rejected": -1.9574679136276245, + "logps/chosen": -14.634733200073242, + "logps/rejected": -87.38702392578125, + "loss": 0.4354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0703771635890007, + "rewards/margins": 1.8415499925613403, + "rewards/rejected": -1.9119271039962769, + "step": 8128 + }, + { + "epoch": 0.47, + "learning_rate": 5.677382728007624e-08, + "logits/chosen": -1.9839811325073242, + "logits/rejected": -1.967938780784607, + "logps/chosen": -81.96199035644531, + "logps/rejected": -367.6546936035156, + "loss": 0.1565, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1390037536621094, + "rewards/margins": 3.881554365158081, + "rewards/rejected": -2.7425506114959717, + "step": 8129 + }, + { + "epoch": 0.47, + "learning_rate": 5.676449002005648e-08, + "logits/chosen": -1.86734139919281, + "logits/rejected": -1.8342359066009521, + "logps/chosen": -179.79586791992188, + "logps/rejected": -344.0157470703125, + "loss": 0.141, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.017681837081909, + "rewards/margins": 1.5694854259490967, + "rewards/rejected": 0.4481964111328125, + "step": 8130 + }, + { + "epoch": 0.47, + "learning_rate": 5.675515251972896e-08, + "logits/chosen": -2.0138916969299316, + "logits/rejected": -2.0119104385375977, + "logps/chosen": -0.2522609233856201, + "logps/rejected": -240.26751708984375, + "loss": 0.3444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03239358589053154, + "rewards/margins": 4.3928656578063965, + "rewards/rejected": -4.360472202301025, + "step": 8131 + }, + { + "epoch": 0.47, + "learning_rate": 5.674581477942535e-08, + "logits/chosen": -1.9484671354293823, + "logits/rejected": -1.9479973316192627, + "logps/chosen": -25.326133728027344, + "logps/rejected": -155.04714965820312, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5445749163627625, + "rewards/margins": 0.6390665173530579, + "rewards/rejected": -0.09449157863855362, + "step": 8132 + }, + { + "epoch": 0.47, + "learning_rate": 5.673647679947744e-08, + "logits/chosen": -1.9781662225723267, + "logits/rejected": -1.9676434993743896, + "logps/chosen": -38.06614303588867, + "logps/rejected": -296.0841979980469, + "loss": 0.2014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.573276937007904, + "rewards/margins": 4.6907148361206055, + "rewards/rejected": -4.117437839508057, + "step": 8133 + }, + { + "epoch": 0.47, + "learning_rate": 5.6727138580216894e-08, + "logits/chosen": -2.1253833770751953, + "logits/rejected": -2.126178026199341, + "logps/chosen": -0.9780402183532715, + "logps/rejected": -123.0303726196289, + "loss": 0.4779, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013511097989976406, + "rewards/margins": 1.2053163051605225, + "rewards/rejected": -1.1918052434921265, + "step": 8134 + }, + { + "epoch": 0.47, + "learning_rate": 5.671780012197549e-08, + "logits/chosen": -2.043832540512085, + "logits/rejected": -1.9420791864395142, + "logps/chosen": -230.70545959472656, + "logps/rejected": -480.17071533203125, + "loss": 0.0827, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.482351779937744, + "rewards/margins": 2.2061402797698975, + "rewards/rejected": 0.27621155977249146, + "step": 8135 + }, + { + "epoch": 0.47, + "learning_rate": 5.6708461425084966e-08, + "logits/chosen": -1.9210814237594604, + "logits/rejected": -1.9226843118667603, + "logps/chosen": -30.750267028808594, + "logps/rejected": -131.6759033203125, + "loss": 0.2296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1036227941513062, + "rewards/margins": 1.8816478252410889, + "rewards/rejected": -0.7780250906944275, + "step": 8136 + }, + { + "epoch": 0.47, + "learning_rate": 5.669912248987708e-08, + "logits/chosen": -2.0416228771209717, + "logits/rejected": -2.0169694423675537, + "logps/chosen": -126.87525939941406, + "logps/rejected": -174.28927612304688, + "loss": 0.3059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7043289542198181, + "rewards/margins": 0.6066864132881165, + "rewards/rejected": 0.09764251857995987, + "step": 8137 + }, + { + "epoch": 0.47, + "learning_rate": 5.668978331668359e-08, + "logits/chosen": -1.6409411430358887, + "logits/rejected": -1.6269981861114502, + "logps/chosen": -196.81680297851562, + "logps/rejected": -355.0860595703125, + "loss": 0.3355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1867080926895142, + "rewards/margins": 0.7265335321426392, + "rewards/rejected": 0.460174560546875, + "step": 8138 + }, + { + "epoch": 0.47, + "learning_rate": 5.6680443905836285e-08, + "logits/chosen": -1.8287084102630615, + "logits/rejected": -1.8417766094207764, + "logps/chosen": -254.45372009277344, + "logps/rejected": -362.5737609863281, + "loss": 0.3809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5431411862373352, + "rewards/margins": 0.46611785888671875, + "rewards/rejected": 0.07702331990003586, + "step": 8139 + }, + { + "epoch": 0.47, + "learning_rate": 5.667110425766691e-08, + "logits/chosen": -1.8426637649536133, + "logits/rejected": -1.8442881107330322, + "logps/chosen": -92.39291381835938, + "logps/rejected": -304.91363525390625, + "loss": 0.1445, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0041207075119019, + "rewards/margins": 3.489753246307373, + "rewards/rejected": -2.4856324195861816, + "step": 8140 + }, + { + "epoch": 0.47, + "learning_rate": 5.66617643725073e-08, + "logits/chosen": -1.7936067581176758, + "logits/rejected": -1.785325050354004, + "logps/chosen": -11.138757705688477, + "logps/rejected": -273.68914794921875, + "loss": 0.2814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25189515948295593, + "rewards/margins": 2.9061784744262695, + "rewards/rejected": -2.654283285140991, + "step": 8141 + }, + { + "epoch": 0.47, + "learning_rate": 5.6652424250689234e-08, + "logits/chosen": -1.9173604249954224, + "logits/rejected": -1.9127007722854614, + "logps/chosen": -211.8370361328125, + "logps/rejected": -331.12066650390625, + "loss": 0.2418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.894512951374054, + "rewards/margins": 1.8563232421875, + "rewards/rejected": -0.961810290813446, + "step": 8142 + }, + { + "epoch": 0.47, + "learning_rate": 5.664308389254451e-08, + "logits/chosen": -1.8142188787460327, + "logits/rejected": -1.8396570682525635, + "logps/chosen": -210.9879150390625, + "logps/rejected": -328.18280029296875, + "loss": 0.1871, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4131561517715454, + "rewards/margins": 1.1874481439590454, + "rewards/rejected": 0.2257080078125, + "step": 8143 + }, + { + "epoch": 0.47, + "learning_rate": 5.663374329840496e-08, + "logits/chosen": -1.842125654220581, + "logits/rejected": -1.879165530204773, + "logps/chosen": -262.25701904296875, + "logps/rejected": -491.0292053222656, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2439606189727783, + "rewards/margins": 5.197772026062012, + "rewards/rejected": -2.9538116455078125, + "step": 8144 + }, + { + "epoch": 0.47, + "learning_rate": 5.6624402468602383e-08, + "logits/chosen": -2.111250877380371, + "logits/rejected": -2.108417272567749, + "logps/chosen": -0.005138065200299025, + "logps/rejected": -213.9510955810547, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003651495208032429, + "rewards/margins": 6.620991230010986, + "rewards/rejected": -6.62135648727417, + "step": 8145 + }, + { + "epoch": 0.47, + "learning_rate": 5.661506140346863e-08, + "logits/chosen": -1.8105603456497192, + "logits/rejected": -1.736786127090454, + "logps/chosen": -230.07659912109375, + "logps/rejected": -438.62457275390625, + "loss": 0.0938, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.103621006011963, + "rewards/margins": 2.4482407569885254, + "rewards/rejected": -0.3446197509765625, + "step": 8146 + }, + { + "epoch": 0.47, + "learning_rate": 5.660572010333554e-08, + "logits/chosen": -2.0827109813690186, + "logits/rejected": -2.0768260955810547, + "logps/chosen": -0.35024237632751465, + "logps/rejected": -63.18890380859375, + "loss": 0.4715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015915602445602417, + "rewards/margins": 1.2039390802383423, + "rewards/rejected": -1.219854712486267, + "step": 8147 + }, + { + "epoch": 0.47, + "learning_rate": 5.6596378568534974e-08, + "logits/chosen": -1.871258020401001, + "logits/rejected": -1.8524757623672485, + "logps/chosen": -216.62066650390625, + "logps/rejected": -263.11383056640625, + "loss": 0.3896, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.0837433338165283, + "rewards/margins": -0.0781707763671875, + "rewards/rejected": 2.161914110183716, + "step": 8148 + }, + { + "epoch": 0.47, + "learning_rate": 5.658703679939876e-08, + "logits/chosen": -2.09515380859375, + "logits/rejected": -2.0863001346588135, + "logps/chosen": -6.186049461364746, + "logps/rejected": -88.27849578857422, + "loss": 0.4826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18033090233802795, + "rewards/margins": 1.5275022983551025, + "rewards/rejected": -1.707833170890808, + "step": 8149 + }, + { + "epoch": 0.47, + "learning_rate": 5.657769479625878e-08, + "logits/chosen": -2.1122958660125732, + "logits/rejected": -2.102477788925171, + "logps/chosen": -23.628925323486328, + "logps/rejected": -187.52044677734375, + "loss": 0.1579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8809284567832947, + "rewards/margins": 3.049593210220337, + "rewards/rejected": -2.1686646938323975, + "step": 8150 + }, + { + "epoch": 0.47, + "learning_rate": 5.6568352559446905e-08, + "logits/chosen": -1.9676827192306519, + "logits/rejected": -1.9644056558609009, + "logps/chosen": -281.2109069824219, + "logps/rejected": -375.31982421875, + "loss": 0.2166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9168609976768494, + "rewards/margins": 1.6159820556640625, + "rewards/rejected": -0.6991211175918579, + "step": 8151 + }, + { + "epoch": 0.47, + "learning_rate": 5.655901008929502e-08, + "logits/chosen": -1.9937717914581299, + "logits/rejected": -1.9865083694458008, + "logps/chosen": -137.3936004638672, + "logps/rejected": -322.9766845703125, + "loss": 0.1441, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.648402452468872, + "rewards/margins": 1.369276523590088, + "rewards/rejected": 0.27912598848342896, + "step": 8152 + }, + { + "epoch": 0.47, + "learning_rate": 5.6549667386135014e-08, + "logits/chosen": -1.931349754333496, + "logits/rejected": -1.928365707397461, + "logps/chosen": -15.459070205688477, + "logps/rejected": -123.66619873046875, + "loss": 0.4224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05663185194134712, + "rewards/margins": 1.675967812538147, + "rewards/rejected": -1.6193360090255737, + "step": 8153 + }, + { + "epoch": 0.47, + "learning_rate": 5.6540324450298795e-08, + "logits/chosen": -2.1042089462280273, + "logits/rejected": -2.0967371463775635, + "logps/chosen": -0.0017222573515027761, + "logps/rejected": -136.40914916992188, + "loss": 0.3625, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.150355380261317e-05, + "rewards/margins": 3.3908071517944336, + "rewards/rejected": -3.3908486366271973, + "step": 8154 + }, + { + "epoch": 0.47, + "learning_rate": 5.653098128211824e-08, + "logits/chosen": -2.083153247833252, + "logits/rejected": -2.132431745529175, + "logps/chosen": -133.36849975585938, + "logps/rejected": -463.4991455078125, + "loss": 0.0343, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3546310663223267, + "rewards/margins": 6.742753505706787, + "rewards/rejected": -5.38812255859375, + "step": 8155 + }, + { + "epoch": 0.47, + "learning_rate": 5.65216378819253e-08, + "logits/chosen": -2.2195305824279785, + "logits/rejected": -2.215197801589966, + "logps/chosen": -0.005668820347636938, + "logps/rejected": -104.86396789550781, + "loss": 0.423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001656092790653929, + "rewards/margins": 1.6459877490997314, + "rewards/rejected": -1.6461533308029175, + "step": 8156 + }, + { + "epoch": 0.47, + "learning_rate": 5.651229425005187e-08, + "logits/chosen": -1.8482270240783691, + "logits/rejected": -1.8033279180526733, + "logps/chosen": -130.40151977539062, + "logps/rejected": -455.1654052734375, + "loss": 0.166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6157440543174744, + "rewards/margins": 1.6654725074768066, + "rewards/rejected": -1.0497283935546875, + "step": 8157 + }, + { + "epoch": 0.47, + "learning_rate": 5.6502950386829905e-08, + "logits/chosen": -1.774129867553711, + "logits/rejected": -1.7739781141281128, + "logps/chosen": -175.2965545654297, + "logps/rejected": -201.96287536621094, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5037765502929688, + "rewards/margins": 0.610089123249054, + "rewards/rejected": 0.8936874270439148, + "step": 8158 + }, + { + "epoch": 0.47, + "learning_rate": 5.649360629259132e-08, + "logits/chosen": -1.9803358316421509, + "logits/rejected": -1.978597640991211, + "logps/chosen": -23.902996063232422, + "logps/rejected": -132.48756408691406, + "loss": 0.4142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3950527310371399, + "rewards/margins": 0.9648265838623047, + "rewards/rejected": -0.5697738528251648, + "step": 8159 + }, + { + "epoch": 0.47, + "learning_rate": 5.648426196766809e-08, + "logits/chosen": -2.0497560501098633, + "logits/rejected": -2.043159008026123, + "logps/chosen": -0.2021206170320511, + "logps/rejected": -132.29751586914062, + "loss": 0.5223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01681123860180378, + "rewards/margins": 0.86207515001297, + "rewards/rejected": -0.8788864016532898, + "step": 8160 + }, + { + "epoch": 0.47, + "learning_rate": 5.647491741239214e-08, + "logits/chosen": -1.8077869415283203, + "logits/rejected": -1.7931956052780151, + "logps/chosen": -100.0357666015625, + "logps/rejected": -285.9317626953125, + "loss": 0.2803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06707916408777237, + "rewards/margins": 2.897718667984009, + "rewards/rejected": -2.830639600753784, + "step": 8161 + }, + { + "epoch": 0.47, + "learning_rate": 5.646557262709547e-08, + "logits/chosen": -1.989367127418518, + "logits/rejected": -1.9814780950546265, + "logps/chosen": -16.141090393066406, + "logps/rejected": -176.41787719726562, + "loss": 0.4949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19360600411891937, + "rewards/margins": 1.7281774282455444, + "rewards/rejected": -1.921783447265625, + "step": 8162 + }, + { + "epoch": 0.48, + "learning_rate": 5.6456227612110016e-08, + "logits/chosen": -1.8771235942840576, + "logits/rejected": -1.9097049236297607, + "logps/chosen": -323.3865051269531, + "logps/rejected": -456.4268798828125, + "loss": 0.1075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6519073843955994, + "rewards/margins": 4.482275485992432, + "rewards/rejected": -3.8303680419921875, + "step": 8163 + }, + { + "epoch": 0.48, + "learning_rate": 5.644688236776778e-08, + "logits/chosen": -1.954463243484497, + "logits/rejected": -1.9531371593475342, + "logps/chosen": -0.258764386177063, + "logps/rejected": -152.82130432128906, + "loss": 0.4986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04128975793719292, + "rewards/margins": 0.9907495975494385, + "rewards/rejected": -0.9494598507881165, + "step": 8164 + }, + { + "epoch": 0.48, + "learning_rate": 5.643753689440076e-08, + "logits/chosen": -2.031130075454712, + "logits/rejected": -2.0277647972106934, + "logps/chosen": -12.397930145263672, + "logps/rejected": -144.71084594726562, + "loss": 0.5302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0017543792491778731, + "rewards/margins": 1.1550136804580688, + "rewards/rejected": -1.15325927734375, + "step": 8165 + }, + { + "epoch": 0.48, + "learning_rate": 5.6428191192340925e-08, + "logits/chosen": -2.074679136276245, + "logits/rejected": -2.0695114135742188, + "logps/chosen": -46.10426712036133, + "logps/rejected": -140.1510467529297, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1621685028076172, + "rewards/margins": 1.7934528589248657, + "rewards/rejected": -1.6312843561172485, + "step": 8166 + }, + { + "epoch": 0.48, + "learning_rate": 5.641884526192031e-08, + "logits/chosen": -1.9815187454223633, + "logits/rejected": -1.9772597551345825, + "logps/chosen": -77.59385681152344, + "logps/rejected": -227.8383026123047, + "loss": 0.1924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0931075811386108, + "rewards/margins": 2.0456161499023438, + "rewards/rejected": -0.9525085687637329, + "step": 8167 + }, + { + "epoch": 0.48, + "learning_rate": 5.6409499103470894e-08, + "logits/chosen": -1.838147759437561, + "logits/rejected": -1.8586359024047852, + "logps/chosen": -267.02587890625, + "logps/rejected": -476.3319396972656, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8599518537521362, + "rewards/margins": 2.878527879714966, + "rewards/rejected": -1.0185760259628296, + "step": 8168 + }, + { + "epoch": 0.48, + "learning_rate": 5.640015271732473e-08, + "logits/chosen": -2.080461025238037, + "logits/rejected": -2.073192834854126, + "logps/chosen": -26.236602783203125, + "logps/rejected": -70.88267517089844, + "loss": 0.5513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3806625306606293, + "rewards/margins": 0.28187254071235657, + "rewards/rejected": 0.09878998249769211, + "step": 8169 + }, + { + "epoch": 0.48, + "learning_rate": 5.6390806103813823e-08, + "logits/chosen": -1.8398070335388184, + "logits/rejected": -1.8435988426208496, + "logps/chosen": -4.91420316696167, + "logps/rejected": -114.72816467285156, + "loss": 0.6559, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.042731620371341705, + "rewards/margins": -0.015325020998716354, + "rewards/rejected": 0.05805664137005806, + "step": 8170 + }, + { + "epoch": 0.48, + "learning_rate": 5.638145926327024e-08, + "logits/chosen": -2.021226644515991, + "logits/rejected": -2.0234413146972656, + "logps/chosen": -0.00016247498570010066, + "logps/rejected": -214.54266357421875, + "loss": 0.3604, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.919127604807727e-05, + "rewards/margins": 3.4007785320281982, + "rewards/rejected": -3.4007294178009033, + "step": 8171 + }, + { + "epoch": 0.48, + "learning_rate": 5.6372112196026e-08, + "logits/chosen": -1.9319820404052734, + "logits/rejected": -1.9178454875946045, + "logps/chosen": -0.0017949659377336502, + "logps/rejected": -113.73939514160156, + "loss": 0.5149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016488203254994005, + "rewards/margins": 0.8431395888328552, + "rewards/rejected": -0.843304455280304, + "step": 8172 + }, + { + "epoch": 0.48, + "learning_rate": 5.636276490241316e-08, + "logits/chosen": -2.0453691482543945, + "logits/rejected": -2.061000108718872, + "logps/chosen": -207.8919219970703, + "logps/rejected": -281.1893005371094, + "loss": 0.2574, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.397297739982605, + "rewards/margins": 0.826942503452301, + "rewards/rejected": 0.570355236530304, + "step": 8173 + }, + { + "epoch": 0.48, + "learning_rate": 5.63534173827638e-08, + "logits/chosen": -1.9130789041519165, + "logits/rejected": -1.9103007316589355, + "logps/chosen": -193.31515502929688, + "logps/rejected": -341.625244140625, + "loss": 0.0525, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9149169921875, + "rewards/margins": 3.101226806640625, + "rewards/rejected": -1.186309814453125, + "step": 8174 + }, + { + "epoch": 0.48, + "learning_rate": 5.634406963740997e-08, + "logits/chosen": -2.009665012359619, + "logits/rejected": -2.0145373344421387, + "logps/chosen": -276.1158752441406, + "logps/rejected": -477.0835266113281, + "loss": 0.0771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.579870581626892, + "rewards/margins": 2.5701842308044434, + "rewards/rejected": -0.990313708782196, + "step": 8175 + }, + { + "epoch": 0.48, + "learning_rate": 5.633472166668376e-08, + "logits/chosen": -1.8180034160614014, + "logits/rejected": -1.80201256275177, + "logps/chosen": -175.94985961914062, + "logps/rejected": -324.9280090332031, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5980453491210938, + "rewards/margins": 0.5689681768417358, + "rewards/rejected": 1.029077172279358, + "step": 8176 + }, + { + "epoch": 0.48, + "learning_rate": 5.632537347091725e-08, + "logits/chosen": -1.975818395614624, + "logits/rejected": -1.96614408493042, + "logps/chosen": -34.932987213134766, + "logps/rejected": -268.5752868652344, + "loss": 0.2372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4354972839355469, + "rewards/margins": 4.460509777069092, + "rewards/rejected": -4.025012493133545, + "step": 8177 + }, + { + "epoch": 0.48, + "learning_rate": 5.631602505044252e-08, + "logits/chosen": -1.7723935842514038, + "logits/rejected": -1.7692523002624512, + "logps/chosen": -16.953611373901367, + "logps/rejected": -97.05137634277344, + "loss": 0.5971, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011813354678452015, + "rewards/margins": 0.20908203721046448, + "rewards/rejected": -0.22089539468288422, + "step": 8178 + }, + { + "epoch": 0.48, + "learning_rate": 5.63066764055917e-08, + "logits/chosen": -1.8621968030929565, + "logits/rejected": -1.8609057664871216, + "logps/chosen": -4.887167453765869, + "logps/rejected": -84.22750091552734, + "loss": 0.545, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2522536814212799, + "rewards/margins": 0.395103394985199, + "rewards/rejected": -0.14284972846508026, + "step": 8179 + }, + { + "epoch": 0.48, + "learning_rate": 5.629732753669687e-08, + "logits/chosen": -1.7553867101669312, + "logits/rejected": -1.7580310106277466, + "logps/chosen": -156.84629821777344, + "logps/rejected": -437.07574462890625, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8449844121932983, + "rewards/margins": 3.653062343597412, + "rewards/rejected": -1.8080780506134033, + "step": 8180 + }, + { + "epoch": 0.48, + "learning_rate": 5.6287978444090176e-08, + "logits/chosen": -1.8094823360443115, + "logits/rejected": -1.8116109371185303, + "logps/chosen": -25.510452270507812, + "logps/rejected": -292.601806640625, + "loss": 0.2798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2881559431552887, + "rewards/margins": 7.938387870788574, + "rewards/rejected": -7.650231838226318, + "step": 8181 + }, + { + "epoch": 0.48, + "learning_rate": 5.627862912810373e-08, + "logits/chosen": -1.8736581802368164, + "logits/rejected": -1.8705008029937744, + "logps/chosen": -27.20819091796875, + "logps/rejected": -125.32975769042969, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5795974731445312, + "rewards/margins": 1.1335723400115967, + "rewards/rejected": -0.5539749264717102, + "step": 8182 + }, + { + "epoch": 0.48, + "learning_rate": 5.6269279589069664e-08, + "logits/chosen": -1.9014784097671509, + "logits/rejected": -1.9022494554519653, + "logps/chosen": -280.1107482910156, + "logps/rejected": -330.119140625, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9596832990646362, + "rewards/margins": 2.505636692047119, + "rewards/rejected": -0.5459533929824829, + "step": 8183 + }, + { + "epoch": 0.48, + "learning_rate": 5.6259929827320104e-08, + "logits/chosen": -1.8047505617141724, + "logits/rejected": -1.8083065748214722, + "logps/chosen": -318.16595458984375, + "logps/rejected": -479.2978515625, + "loss": 0.0619, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4948456287384033, + "rewards/margins": 2.4668030738830566, + "rewards/rejected": -0.9719573855400085, + "step": 8184 + }, + { + "epoch": 0.48, + "learning_rate": 5.625057984318722e-08, + "logits/chosen": -2.155208110809326, + "logits/rejected": -2.1526715755462646, + "logps/chosen": -58.29492950439453, + "logps/rejected": -283.6778564453125, + "loss": 0.2346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43344956636428833, + "rewards/margins": 2.723900556564331, + "rewards/rejected": -2.2904510498046875, + "step": 8185 + }, + { + "epoch": 0.48, + "learning_rate": 5.6241229637003184e-08, + "logits/chosen": -2.014169692993164, + "logits/rejected": -2.0095040798187256, + "logps/chosen": -27.172462463378906, + "logps/rejected": -223.20802307128906, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28104934096336365, + "rewards/margins": 3.259380340576172, + "rewards/rejected": -2.9783310890197754, + "step": 8186 + }, + { + "epoch": 0.48, + "learning_rate": 5.623187920910013e-08, + "logits/chosen": -2.14982533454895, + "logits/rejected": -2.1378743648529053, + "logps/chosen": -67.00086212158203, + "logps/rejected": -237.79147338867188, + "loss": 0.1857, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5193268060684204, + "rewards/margins": 1.2325057983398438, + "rewards/rejected": 0.2868209779262543, + "step": 8187 + }, + { + "epoch": 0.48, + "learning_rate": 5.6222528559810244e-08, + "logits/chosen": -2.074596643447876, + "logits/rejected": -2.060163736343384, + "logps/chosen": -49.29100036621094, + "logps/rejected": -236.021728515625, + "loss": 0.2622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39826318621635437, + "rewards/margins": 4.349543571472168, + "rewards/rejected": -3.951280355453491, + "step": 8188 + }, + { + "epoch": 0.48, + "learning_rate": 5.621317768946571e-08, + "logits/chosen": -1.8649358749389648, + "logits/rejected": -1.8540921211242676, + "logps/chosen": -52.97102355957031, + "logps/rejected": -286.7795104980469, + "loss": 0.1732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9685222506523132, + "rewards/margins": 3.041145086288452, + "rewards/rejected": -2.072622776031494, + "step": 8189 + }, + { + "epoch": 0.48, + "learning_rate": 5.6203826598398697e-08, + "logits/chosen": -2.020012140274048, + "logits/rejected": -1.9907294511795044, + "logps/chosen": -98.5671157836914, + "logps/rejected": -443.87060546875, + "loss": 0.1843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4649543762207031, + "rewards/margins": 6.983112335205078, + "rewards/rejected": -6.518157958984375, + "step": 8190 + }, + { + "epoch": 0.48, + "learning_rate": 5.619447528694142e-08, + "logits/chosen": -1.9430252313613892, + "logits/rejected": -1.9391754865646362, + "logps/chosen": -14.125086784362793, + "logps/rejected": -321.1939697265625, + "loss": 0.3132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08750257641077042, + "rewards/margins": 5.733504772186279, + "rewards/rejected": -5.646002292633057, + "step": 8191 + }, + { + "epoch": 0.48, + "learning_rate": 5.61851237554261e-08, + "logits/chosen": -1.8800462484359741, + "logits/rejected": -1.877160668373108, + "logps/chosen": -68.66942596435547, + "logps/rejected": -217.07583618164062, + "loss": 0.4309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5147720575332642, + "rewards/margins": 3.93989896774292, + "rewards/rejected": -4.4546709060668945, + "step": 8192 + }, + { + "epoch": 0.48, + "learning_rate": 5.617577200418492e-08, + "logits/chosen": -1.951130986213684, + "logits/rejected": -1.9552216529846191, + "logps/chosen": -0.03487636148929596, + "logps/rejected": -225.2854461669922, + "loss": 0.4045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009080268791876733, + "rewards/margins": 1.6999404430389404, + "rewards/rejected": -1.7008484601974487, + "step": 8193 + }, + { + "epoch": 0.48, + "learning_rate": 5.616642003355011e-08, + "logits/chosen": -1.8728777170181274, + "logits/rejected": -1.8566879034042358, + "logps/chosen": -319.48193359375, + "logps/rejected": -450.819580078125, + "loss": 0.0524, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2072571516036987, + "rewards/margins": 3.452279567718506, + "rewards/rejected": -2.2450225353240967, + "step": 8194 + }, + { + "epoch": 0.48, + "learning_rate": 5.615706784385389e-08, + "logits/chosen": -1.8332974910736084, + "logits/rejected": -1.8407907485961914, + "logps/chosen": -170.45431518554688, + "logps/rejected": -307.12603759765625, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.653691053390503, + "rewards/margins": 2.5343124866485596, + "rewards/rejected": 0.11937866359949112, + "step": 8195 + }, + { + "epoch": 0.48, + "learning_rate": 5.614771543542851e-08, + "logits/chosen": -1.9885035753250122, + "logits/rejected": -2.0379583835601807, + "logps/chosen": -202.4932861328125, + "logps/rejected": -275.9554443359375, + "loss": 0.2513, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0710830688476562, + "rewards/margins": 0.5257126092910767, + "rewards/rejected": 1.5453704595565796, + "step": 8196 + }, + { + "epoch": 0.48, + "learning_rate": 5.61383628086062e-08, + "logits/chosen": -1.8642245531082153, + "logits/rejected": -1.9080662727355957, + "logps/chosen": -196.27276611328125, + "logps/rejected": -380.63507080078125, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.903006076812744, + "rewards/margins": 4.231912612915039, + "rewards/rejected": -1.3289062976837158, + "step": 8197 + }, + { + "epoch": 0.48, + "learning_rate": 5.612900996371923e-08, + "logits/chosen": -2.00642728805542, + "logits/rejected": -1.9965624809265137, + "logps/chosen": -76.80793762207031, + "logps/rejected": -338.7934265136719, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4263428449630737, + "rewards/margins": 4.7834930419921875, + "rewards/rejected": -3.3571503162384033, + "step": 8198 + }, + { + "epoch": 0.48, + "learning_rate": 5.611965690109983e-08, + "logits/chosen": -2.017927408218384, + "logits/rejected": -2.0169625282287598, + "logps/chosen": -0.002897393424063921, + "logps/rejected": -237.10963439941406, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00014513493806589395, + "rewards/margins": 6.106285095214844, + "rewards/rejected": -6.10614013671875, + "step": 8199 + }, + { + "epoch": 0.48, + "learning_rate": 5.61103036210803e-08, + "logits/chosen": -1.98835289478302, + "logits/rejected": -1.9910030364990234, + "logps/chosen": -15.988800048828125, + "logps/rejected": -131.838623046875, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13434390723705292, + "rewards/margins": 2.326857089996338, + "rewards/rejected": -2.1925132274627686, + "step": 8200 + }, + { + "epoch": 0.48, + "learning_rate": 5.610095012399288e-08, + "logits/chosen": -2.0398802757263184, + "logits/rejected": -2.0268077850341797, + "logps/chosen": -15.326593399047852, + "logps/rejected": -267.5107421875, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21864643692970276, + "rewards/margins": 5.275753974914551, + "rewards/rejected": -5.057107448577881, + "step": 8201 + }, + { + "epoch": 0.48, + "learning_rate": 5.609159641016988e-08, + "logits/chosen": -1.983951210975647, + "logits/rejected": -1.962299108505249, + "logps/chosen": -64.43988800048828, + "logps/rejected": -269.85528564453125, + "loss": 0.155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7542320489883423, + "rewards/margins": 4.298917293548584, + "rewards/rejected": -3.5446853637695312, + "step": 8202 + }, + { + "epoch": 0.48, + "learning_rate": 5.608224247994356e-08, + "logits/chosen": -2.070286512374878, + "logits/rejected": -2.0358641147613525, + "logps/chosen": -149.93643188476562, + "logps/rejected": -356.437255859375, + "loss": 0.5662, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.23177491128444672, + "rewards/margins": -0.4485992193222046, + "rewards/rejected": 0.6803741455078125, + "step": 8203 + }, + { + "epoch": 0.48, + "learning_rate": 5.6072888333646264e-08, + "logits/chosen": -2.008897066116333, + "logits/rejected": -1.9226771593093872, + "logps/chosen": -277.3551940917969, + "logps/rejected": -455.02862548828125, + "loss": 0.2412, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3472412824630737, + "rewards/margins": 1.1301636695861816, + "rewards/rejected": 0.21707764267921448, + "step": 8204 + }, + { + "epoch": 0.48, + "learning_rate": 5.6063533971610244e-08, + "logits/chosen": -1.9291054010391235, + "logits/rejected": -1.9215960502624512, + "logps/chosen": -29.50526237487793, + "logps/rejected": -260.66705322265625, + "loss": 0.2124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47730502486228943, + "rewards/margins": 3.047435998916626, + "rewards/rejected": -2.5701310634613037, + "step": 8205 + }, + { + "epoch": 0.48, + "learning_rate": 5.605417939416784e-08, + "logits/chosen": -1.7188509702682495, + "logits/rejected": -1.730011224746704, + "logps/chosen": -162.14083862304688, + "logps/rejected": -436.4801940917969, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3262817859649658, + "rewards/margins": 2.470700263977051, + "rewards/rejected": -1.1444183588027954, + "step": 8206 + }, + { + "epoch": 0.48, + "learning_rate": 5.604482460165139e-08, + "logits/chosen": -1.8832122087478638, + "logits/rejected": -1.8714194297790527, + "logps/chosen": -10.700286865234375, + "logps/rejected": -274.84393310546875, + "loss": 0.2544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1423112004995346, + "rewards/margins": 6.234078884124756, + "rewards/rejected": -6.09176778793335, + "step": 8207 + }, + { + "epoch": 0.48, + "learning_rate": 5.603546959439318e-08, + "logits/chosen": -1.9904303550720215, + "logits/rejected": -1.9903359413146973, + "logps/chosen": -35.861331939697266, + "logps/rejected": -109.32179260253906, + "loss": 0.3985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3135314881801605, + "rewards/margins": 0.9691680669784546, + "rewards/rejected": -0.6556366086006165, + "step": 8208 + }, + { + "epoch": 0.48, + "learning_rate": 5.6026114372725584e-08, + "logits/chosen": -2.0400569438934326, + "logits/rejected": -2.046621322631836, + "logps/chosen": -0.6971763968467712, + "logps/rejected": -283.5414123535156, + "loss": 0.3177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03434806689620018, + "rewards/margins": 2.8609020709991455, + "rewards/rejected": -2.895250082015991, + "step": 8209 + }, + { + "epoch": 0.48, + "learning_rate": 5.601675893698091e-08, + "logits/chosen": -1.986020565032959, + "logits/rejected": -1.9673922061920166, + "logps/chosen": -48.74211502075195, + "logps/rejected": -258.3289489746094, + "loss": 0.128, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0707584619522095, + "rewards/margins": 3.7477102279663086, + "rewards/rejected": -2.6769516468048096, + "step": 8210 + }, + { + "epoch": 0.48, + "learning_rate": 5.600740328749154e-08, + "logits/chosen": -1.990045189857483, + "logits/rejected": -1.9908902645111084, + "logps/chosen": -240.8175811767578, + "logps/rejected": -389.0544738769531, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0111496448516846, + "rewards/margins": 3.1559343338012695, + "rewards/rejected": -1.1447845697402954, + "step": 8211 + }, + { + "epoch": 0.48, + "learning_rate": 5.599804742458981e-08, + "logits/chosen": -1.8909509181976318, + "logits/rejected": -1.8791345357894897, + "logps/chosen": -62.62487030029297, + "logps/rejected": -164.2223663330078, + "loss": 0.2044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.224982500076294, + "rewards/margins": 1.243660807609558, + "rewards/rejected": -0.01867828331887722, + "step": 8212 + }, + { + "epoch": 0.48, + "learning_rate": 5.5988691348608106e-08, + "logits/chosen": -1.9631446599960327, + "logits/rejected": -1.9600938558578491, + "logps/chosen": -94.77510833740234, + "logps/rejected": -369.8742980957031, + "loss": 0.2468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5663002133369446, + "rewards/margins": 3.189167022705078, + "rewards/rejected": -2.6228668689727783, + "step": 8213 + }, + { + "epoch": 0.48, + "learning_rate": 5.597933505987878e-08, + "logits/chosen": -1.7467530965805054, + "logits/rejected": -1.7445555925369263, + "logps/chosen": -0.27502912282943726, + "logps/rejected": -145.99795532226562, + "loss": 0.3805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01176440715789795, + "rewards/margins": 3.002437114715576, + "rewards/rejected": -3.0142014026641846, + "step": 8214 + }, + { + "epoch": 0.48, + "learning_rate": 5.596997855873424e-08, + "logits/chosen": -1.914951205253601, + "logits/rejected": -1.887674331665039, + "logps/chosen": -237.873779296875, + "logps/rejected": -333.8048095703125, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.368243455886841, + "rewards/margins": 1.8364441394805908, + "rewards/rejected": 0.53179931640625, + "step": 8215 + }, + { + "epoch": 0.48, + "learning_rate": 5.596062184550685e-08, + "logits/chosen": -1.8862292766571045, + "logits/rejected": -1.895854115486145, + "logps/chosen": -294.4017639160156, + "logps/rejected": -351.45904541015625, + "loss": 0.1107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.764520287513733, + "rewards/margins": 1.807196021080017, + "rewards/rejected": -0.04267578199505806, + "step": 8216 + }, + { + "epoch": 0.48, + "learning_rate": 5.595126492052903e-08, + "logits/chosen": -1.9721169471740723, + "logits/rejected": -1.968711495399475, + "logps/chosen": -13.143280029296875, + "logps/rejected": -107.05331420898438, + "loss": 0.5321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16179589927196503, + "rewards/margins": 0.5772911906242371, + "rewards/rejected": -0.4154953062534332, + "step": 8217 + }, + { + "epoch": 0.48, + "learning_rate": 5.5941907784133147e-08, + "logits/chosen": -1.9762758016586304, + "logits/rejected": -2.013428211212158, + "logps/chosen": -230.64991760253906, + "logps/rejected": -329.4117736816406, + "loss": 0.0468, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.445631504058838, + "rewards/margins": 2.5322771072387695, + "rewards/rejected": -0.08664550632238388, + "step": 8218 + }, + { + "epoch": 0.48, + "learning_rate": 5.593255043665166e-08, + "logits/chosen": -2.072782516479492, + "logits/rejected": -2.078383445739746, + "logps/chosen": -30.455211639404297, + "logps/rejected": -195.59075927734375, + "loss": 0.4626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06592941284179688, + "rewards/margins": 1.115088701248169, + "rewards/rejected": -1.049159288406372, + "step": 8219 + }, + { + "epoch": 0.48, + "learning_rate": 5.592319287841693e-08, + "logits/chosen": -1.9223947525024414, + "logits/rejected": -1.8841019868850708, + "logps/chosen": -207.870849609375, + "logps/rejected": -424.2266845703125, + "loss": 0.0849, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7514801025390625, + "rewards/margins": 2.1464691162109375, + "rewards/rejected": -0.394989013671875, + "step": 8220 + }, + { + "epoch": 0.48, + "learning_rate": 5.5913835109761445e-08, + "logits/chosen": -1.9354050159454346, + "logits/rejected": -1.940940022468567, + "logps/chosen": -209.7328338623047, + "logps/rejected": -292.32183837890625, + "loss": 0.0778, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.116764783859253, + "rewards/margins": 2.2097275257110596, + "rewards/rejected": -0.09296264499425888, + "step": 8221 + }, + { + "epoch": 0.48, + "learning_rate": 5.590447713101759e-08, + "logits/chosen": -2.072202682495117, + "logits/rejected": -2.0711286067962646, + "logps/chosen": -24.137950897216797, + "logps/rejected": -152.07302856445312, + "loss": 0.4737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3523612916469574, + "rewards/margins": 0.7322395443916321, + "rewards/rejected": -0.3798782527446747, + "step": 8222 + }, + { + "epoch": 0.48, + "learning_rate": 5.5895118942517836e-08, + "logits/chosen": -1.911329984664917, + "logits/rejected": -1.9073916673660278, + "logps/chosen": -16.68982696533203, + "logps/rejected": -117.42552185058594, + "loss": 0.5613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5130864977836609, + "rewards/margins": 1.717862844467163, + "rewards/rejected": -2.2309494018554688, + "step": 8223 + }, + { + "epoch": 0.48, + "learning_rate": 5.588576054459462e-08, + "logits/chosen": -2.0244827270507812, + "logits/rejected": -2.0229201316833496, + "logps/chosen": -0.00019060268823523074, + "logps/rejected": -175.28286743164062, + "loss": 0.3696, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.469866780709708e-06, + "rewards/margins": 2.936440944671631, + "rewards/rejected": -2.936436414718628, + "step": 8224 + }, + { + "epoch": 0.48, + "learning_rate": 5.5876401937580394e-08, + "logits/chosen": -1.8907097578048706, + "logits/rejected": -1.8902308940887451, + "logps/chosen": -0.000473202409921214, + "logps/rejected": -107.5042495727539, + "loss": 0.4196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3121075426170137e-05, + "rewards/margins": 1.9234602451324463, + "rewards/rejected": -1.9234733581542969, + "step": 8225 + }, + { + "epoch": 0.48, + "learning_rate": 5.5867043121807636e-08, + "logits/chosen": -1.9044960737228394, + "logits/rejected": -1.916918158531189, + "logps/chosen": -0.06966964900493622, + "logps/rejected": -264.429443359375, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038748644292354584, + "rewards/margins": 5.96550989151001, + "rewards/rejected": -5.969384670257568, + "step": 8226 + }, + { + "epoch": 0.48, + "learning_rate": 5.585768409760879e-08, + "logits/chosen": -1.8659225702285767, + "logits/rejected": -1.8706374168395996, + "logps/chosen": -20.12398910522461, + "logps/rejected": -119.18569946289062, + "loss": 0.5181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.214070126414299, + "rewards/margins": 0.37121811509132385, + "rewards/rejected": -0.15714798867702484, + "step": 8227 + }, + { + "epoch": 0.48, + "learning_rate": 5.584832486531637e-08, + "logits/chosen": -2.115058422088623, + "logits/rejected": -2.104865789413452, + "logps/chosen": -51.0395393371582, + "logps/rejected": -263.65826416015625, + "loss": 0.4092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11489524692296982, + "rewards/margins": 2.1426877975463867, + "rewards/rejected": -2.027792453765869, + "step": 8228 + }, + { + "epoch": 0.48, + "learning_rate": 5.583896542526283e-08, + "logits/chosen": -1.9441463947296143, + "logits/rejected": -1.9608155488967896, + "logps/chosen": -291.7231140136719, + "logps/rejected": -528.2779541015625, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.664816379547119, + "rewards/margins": 4.918509006500244, + "rewards/rejected": -2.253692626953125, + "step": 8229 + }, + { + "epoch": 0.48, + "learning_rate": 5.5829605777780686e-08, + "logits/chosen": -1.8374234437942505, + "logits/rejected": -1.8360434770584106, + "logps/chosen": -0.8280571699142456, + "logps/rejected": -21.309772491455078, + "loss": 0.7135, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04676901549100876, + "rewards/margins": -0.0009323768317699432, + "rewards/rejected": -0.045836638659238815, + "step": 8230 + }, + { + "epoch": 0.48, + "learning_rate": 5.5820245923202426e-08, + "logits/chosen": -1.9386652708053589, + "logits/rejected": -1.9186331033706665, + "logps/chosen": -237.63616943359375, + "logps/rejected": -370.4505310058594, + "loss": 0.0576, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4805543422698975, + "rewards/margins": 2.4900879859924316, + "rewards/rejected": -0.009533691219985485, + "step": 8231 + }, + { + "epoch": 0.48, + "learning_rate": 5.5810885861860546e-08, + "logits/chosen": -1.8068510293960571, + "logits/rejected": -1.8072706460952759, + "logps/chosen": -65.69700622558594, + "logps/rejected": -251.9476776123047, + "loss": 0.2754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3618637025356293, + "rewards/margins": 2.563647508621216, + "rewards/rejected": -2.2017838954925537, + "step": 8232 + }, + { + "epoch": 0.48, + "learning_rate": 5.58015255940876e-08, + "logits/chosen": -1.8465369939804077, + "logits/rejected": -1.8535912036895752, + "logps/chosen": -182.45291137695312, + "logps/rejected": -270.93878173828125, + "loss": 0.5041, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2289704084396362, + "rewards/margins": -0.1587066650390625, + "rewards/rejected": 1.3876770734786987, + "step": 8233 + }, + { + "epoch": 0.48, + "learning_rate": 5.5792165120216075e-08, + "logits/chosen": -2.135627269744873, + "logits/rejected": -2.1214020252227783, + "logps/chosen": -7.175539016723633, + "logps/rejected": -113.63422393798828, + "loss": 0.5041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03016657941043377, + "rewards/margins": 0.9172347784042358, + "rewards/rejected": -0.8870682120323181, + "step": 8234 + }, + { + "epoch": 0.48, + "learning_rate": 5.578280444057851e-08, + "logits/chosen": -1.8519233465194702, + "logits/rejected": -1.9066576957702637, + "logps/chosen": -312.1915283203125, + "logps/rejected": -397.6637268066406, + "loss": 0.1099, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5415191650390625, + "rewards/margins": 1.9957153797149658, + "rewards/rejected": -0.45419618487358093, + "step": 8235 + }, + { + "epoch": 0.48, + "learning_rate": 5.577344355550745e-08, + "logits/chosen": -1.8752782344818115, + "logits/rejected": -1.8636994361877441, + "logps/chosen": -245.5742645263672, + "logps/rejected": -332.488525390625, + "loss": 0.2153, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.896998643875122, + "rewards/margins": 0.7751724720001221, + "rewards/rejected": 1.121826171875, + "step": 8236 + }, + { + "epoch": 0.48, + "learning_rate": 5.5764082465335425e-08, + "logits/chosen": -2.045048475265503, + "logits/rejected": -2.0353617668151855, + "logps/chosen": -0.00013851786206942052, + "logps/rejected": -219.58908081054688, + "loss": 0.3443, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.602050805464387e-07, + "rewards/margins": 5.141592979431152, + "rewards/rejected": -5.141592502593994, + "step": 8237 + }, + { + "epoch": 0.48, + "learning_rate": 5.5754721170395016e-08, + "logits/chosen": -1.6333459615707397, + "logits/rejected": -1.6053483486175537, + "logps/chosen": -157.71316528320312, + "logps/rejected": -318.10980224609375, + "loss": 0.2639, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5302765369415283, + "rewards/margins": 0.8718536496162415, + "rewards/rejected": 0.6584228873252869, + "step": 8238 + }, + { + "epoch": 0.48, + "learning_rate": 5.5745359671018746e-08, + "logits/chosen": -1.9409093856811523, + "logits/rejected": -1.9418222904205322, + "logps/chosen": -58.56566619873047, + "logps/rejected": -230.65744018554688, + "loss": 0.1599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0379890203475952, + "rewards/margins": 2.9104042053222656, + "rewards/rejected": -1.8724151849746704, + "step": 8239 + }, + { + "epoch": 0.48, + "learning_rate": 5.5735997967539206e-08, + "logits/chosen": -1.9663382768630981, + "logits/rejected": -1.9645863771438599, + "logps/chosen": -0.05154800042510033, + "logps/rejected": -277.1854248046875, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015007853507995605, + "rewards/margins": 5.787209510803223, + "rewards/rejected": -5.788710117340088, + "step": 8240 + }, + { + "epoch": 0.48, + "learning_rate": 5.572663606028894e-08, + "logits/chosen": -1.9187138080596924, + "logits/rejected": -1.922583818435669, + "logps/chosen": -209.5425567626953, + "logps/rejected": -302.0706787109375, + "loss": 0.1311, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.351484775543213, + "rewards/margins": 1.3995132446289062, + "rewards/rejected": 0.9519714713096619, + "step": 8241 + }, + { + "epoch": 0.48, + "learning_rate": 5.5717273949600577e-08, + "logits/chosen": -1.9966719150543213, + "logits/rejected": -1.9092812538146973, + "logps/chosen": -205.71646118164062, + "logps/rejected": -599.8572998046875, + "loss": 0.0581, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.653782606124878, + "rewards/margins": 2.313310146331787, + "rewards/rejected": 0.34047242999076843, + "step": 8242 + }, + { + "epoch": 0.48, + "learning_rate": 5.570791163580666e-08, + "logits/chosen": -1.7513439655303955, + "logits/rejected": -1.7452030181884766, + "logps/chosen": -288.9793701171875, + "logps/rejected": -447.54669189453125, + "loss": 0.216, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.564416527748108, + "rewards/margins": 0.7715057730674744, + "rewards/rejected": 0.7929107546806335, + "step": 8243 + }, + { + "epoch": 0.48, + "learning_rate": 5.569854911923981e-08, + "logits/chosen": -2.0199999809265137, + "logits/rejected": -2.052799701690674, + "logps/chosen": -130.491943359375, + "logps/rejected": -297.6139831542969, + "loss": 0.4784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9863266348838806, + "rewards/margins": 0.06846165657043457, + "rewards/rejected": 0.917864978313446, + "step": 8244 + }, + { + "epoch": 0.48, + "learning_rate": 5.568918640023261e-08, + "logits/chosen": -1.8490080833435059, + "logits/rejected": -1.845853328704834, + "logps/chosen": -14.539948463439941, + "logps/rejected": -130.42633056640625, + "loss": 0.5379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3678254187107086, + "rewards/margins": 0.21836252510547638, + "rewards/rejected": 0.14946289360523224, + "step": 8245 + }, + { + "epoch": 0.48, + "learning_rate": 5.5679823479117694e-08, + "logits/chosen": -2.044116973876953, + "logits/rejected": -2.037224531173706, + "logps/chosen": -79.66168212890625, + "logps/rejected": -356.48016357421875, + "loss": 0.511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21331405639648438, + "rewards/margins": 0.8492835760116577, + "rewards/rejected": -1.062597632408142, + "step": 8246 + }, + { + "epoch": 0.48, + "learning_rate": 5.567046035622764e-08, + "logits/chosen": -1.9227635860443115, + "logits/rejected": -1.9186196327209473, + "logps/chosen": -25.92617416381836, + "logps/rejected": -172.7509307861328, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4221786558628082, + "rewards/margins": 2.698727607727051, + "rewards/rejected": -2.2765488624572754, + "step": 8247 + }, + { + "epoch": 0.48, + "learning_rate": 5.5661097031895124e-08, + "logits/chosen": -1.9250164031982422, + "logits/rejected": -1.905086874961853, + "logps/chosen": -172.77096557617188, + "logps/rejected": -377.2077941894531, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3154237270355225, + "rewards/margins": 3.5053133964538574, + "rewards/rejected": -1.1898895502090454, + "step": 8248 + }, + { + "epoch": 0.48, + "learning_rate": 5.565173350645274e-08, + "logits/chosen": -2.01597261428833, + "logits/rejected": -2.015673875808716, + "logps/chosen": -159.36746215820312, + "logps/rejected": -202.52186584472656, + "loss": 0.3324, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4591903686523438, + "rewards/margins": 0.1902313232421875, + "rewards/rejected": 2.2689590454101562, + "step": 8249 + }, + { + "epoch": 0.48, + "learning_rate": 5.5642369780233135e-08, + "logits/chosen": -1.8013018369674683, + "logits/rejected": -1.782188892364502, + "logps/chosen": -204.18453979492188, + "logps/rejected": -219.1287841796875, + "loss": 0.3847, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.593891978263855, + "rewards/margins": 0.17571568489074707, + "rewards/rejected": 1.418176293373108, + "step": 8250 + }, + { + "epoch": 0.48, + "learning_rate": 5.563300585356895e-08, + "logits/chosen": -1.9299067258834839, + "logits/rejected": -1.9320322275161743, + "logps/chosen": -7.736525003565475e-05, + "logps/rejected": -287.7010498046875, + "loss": 0.3397, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8145662983879447e-07, + "rewards/margins": 5.442426681518555, + "rewards/rejected": -5.442427158355713, + "step": 8251 + }, + { + "epoch": 0.48, + "learning_rate": 5.562364172679286e-08, + "logits/chosen": -2.1409730911254883, + "logits/rejected": -2.117323160171509, + "logps/chosen": -137.8953399658203, + "logps/rejected": -243.3312225341797, + "loss": 0.3776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10602875053882599, + "rewards/margins": 0.9967026114463806, + "rewards/rejected": -1.1027313470840454, + "step": 8252 + }, + { + "epoch": 0.48, + "learning_rate": 5.5614277400237484e-08, + "logits/chosen": -1.9023516178131104, + "logits/rejected": -1.9093878269195557, + "logps/chosen": -21.43859100341797, + "logps/rejected": -122.25920867919922, + "loss": 0.5844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0388551726937294, + "rewards/margins": 0.4229150712490082, + "rewards/rejected": -0.3840599060058594, + "step": 8253 + }, + { + "epoch": 0.48, + "learning_rate": 5.560491287423552e-08, + "logits/chosen": -1.981242060661316, + "logits/rejected": -1.9746160507202148, + "logps/chosen": -13.972761154174805, + "logps/rejected": -174.384521484375, + "loss": 0.4539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19077721238136292, + "rewards/margins": 0.7662487030029297, + "rewards/rejected": -0.5754715204238892, + "step": 8254 + }, + { + "epoch": 0.48, + "learning_rate": 5.559554814911965e-08, + "logits/chosen": -1.8984262943267822, + "logits/rejected": -1.8982449769973755, + "logps/chosen": -19.11767578125, + "logps/rejected": -234.35133361816406, + "loss": 0.2474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5295839309692383, + "rewards/margins": 3.785557746887207, + "rewards/rejected": -3.2559738159179688, + "step": 8255 + }, + { + "epoch": 0.48, + "learning_rate": 5.558618322522253e-08, + "logits/chosen": -1.9465140104293823, + "logits/rejected": -1.9479714632034302, + "logps/chosen": -0.1431087851524353, + "logps/rejected": -159.45425415039062, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006350114941596985, + "rewards/margins": 3.970242977142334, + "rewards/rejected": -3.976593017578125, + "step": 8256 + }, + { + "epoch": 0.48, + "learning_rate": 5.5576818102876853e-08, + "logits/chosen": -1.835985541343689, + "logits/rejected": -1.8426133394241333, + "logps/chosen": -182.6376953125, + "logps/rejected": -208.52838134765625, + "loss": 0.0472, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.191366672515869, + "rewards/margins": 2.643441915512085, + "rewards/rejected": 0.547924816608429, + "step": 8257 + }, + { + "epoch": 0.48, + "learning_rate": 5.5567452782415314e-08, + "logits/chosen": -2.0160372257232666, + "logits/rejected": -2.0109329223632812, + "logps/chosen": -47.5001106262207, + "logps/rejected": -239.51222229003906, + "loss": 0.2578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3748413026332855, + "rewards/margins": 4.49429178237915, + "rewards/rejected": -4.119450569152832, + "step": 8258 + }, + { + "epoch": 0.48, + "learning_rate": 5.555808726417064e-08, + "logits/chosen": -1.9458497762680054, + "logits/rejected": -2.0051159858703613, + "logps/chosen": -133.52313232421875, + "logps/rejected": -216.14637756347656, + "loss": 0.2797, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.016815185546875, + "rewards/margins": 0.4159637689590454, + "rewards/rejected": 1.6008514165878296, + "step": 8259 + }, + { + "epoch": 0.48, + "learning_rate": 5.5548721548475496e-08, + "logits/chosen": -1.9683072566986084, + "logits/rejected": -2.0200679302215576, + "logps/chosen": -376.6524353027344, + "logps/rejected": -421.3004455566406, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.583731174468994, + "rewards/margins": 4.266672134399414, + "rewards/rejected": -1.6829407215118408, + "step": 8260 + }, + { + "epoch": 0.48, + "learning_rate": 5.5539355635662634e-08, + "logits/chosen": -2.0193116664886475, + "logits/rejected": -2.0111074447631836, + "logps/chosen": -91.60440063476562, + "logps/rejected": -224.7669219970703, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3925079107284546, + "rewards/margins": 3.1585373878479004, + "rewards/rejected": -1.7660293579101562, + "step": 8261 + }, + { + "epoch": 0.48, + "learning_rate": 5.5529989526064755e-08, + "logits/chosen": -1.6602240800857544, + "logits/rejected": -1.6583991050720215, + "logps/chosen": -39.9886360168457, + "logps/rejected": -139.95663452148438, + "loss": 0.4107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4897274076938629, + "rewards/margins": 1.1583797931671143, + "rewards/rejected": -0.668652355670929, + "step": 8262 + }, + { + "epoch": 0.48, + "learning_rate": 5.5520623220014605e-08, + "logits/chosen": -1.9470272064208984, + "logits/rejected": -1.9276657104492188, + "logps/chosen": -172.47021484375, + "logps/rejected": -415.4215087890625, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7519943714141846, + "rewards/margins": 6.704680442810059, + "rewards/rejected": -4.952685832977295, + "step": 8263 + }, + { + "epoch": 0.48, + "learning_rate": 5.55112567178449e-08, + "logits/chosen": -1.9230347871780396, + "logits/rejected": -1.907688021659851, + "logps/chosen": -29.62009620666504, + "logps/rejected": -325.9377136230469, + "loss": 0.2152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3417719006538391, + "rewards/margins": 4.739184379577637, + "rewards/rejected": -4.397412300109863, + "step": 8264 + }, + { + "epoch": 0.48, + "learning_rate": 5.550189001988842e-08, + "logits/chosen": -1.943487286567688, + "logits/rejected": -1.9347347021102905, + "logps/chosen": -27.020587921142578, + "logps/rejected": -166.577392578125, + "loss": 0.68, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9747028350830078, + "rewards/margins": 1.7518200874328613, + "rewards/rejected": -2.726522922515869, + "step": 8265 + }, + { + "epoch": 0.48, + "learning_rate": 5.549252312647788e-08, + "logits/chosen": -2.080305337905884, + "logits/rejected": -2.0743231773376465, + "logps/chosen": -110.11306762695312, + "logps/rejected": -306.48583984375, + "loss": 0.1363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8630607724189758, + "rewards/margins": 4.2693352699279785, + "rewards/rejected": -3.4062745571136475, + "step": 8266 + }, + { + "epoch": 0.48, + "learning_rate": 5.548315603794604e-08, + "logits/chosen": -2.0047926902770996, + "logits/rejected": -1.99850594997406, + "logps/chosen": -4.518169403076172, + "logps/rejected": -102.31251525878906, + "loss": 0.6092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035300444811582565, + "rewards/margins": 0.37235337495803833, + "rewards/rejected": -0.40765380859375, + "step": 8267 + }, + { + "epoch": 0.48, + "learning_rate": 5.5473788754625705e-08, + "logits/chosen": -1.9093434810638428, + "logits/rejected": -1.9042925834655762, + "logps/chosen": -25.485952377319336, + "logps/rejected": -202.55776977539062, + "loss": 0.2594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4056144654750824, + "rewards/margins": 3.528684139251709, + "rewards/rejected": -3.1230697631835938, + "step": 8268 + }, + { + "epoch": 0.48, + "learning_rate": 5.5464421276849593e-08, + "logits/chosen": -2.0358004570007324, + "logits/rejected": -2.0306637287139893, + "logps/chosen": -1.8867053985595703, + "logps/rejected": -114.50733184814453, + "loss": 0.4381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03863641247153282, + "rewards/margins": 1.649112343788147, + "rewards/rejected": -1.6877487897872925, + "step": 8269 + }, + { + "epoch": 0.48, + "learning_rate": 5.545505360495053e-08, + "logits/chosen": -1.7927395105361938, + "logits/rejected": -1.7930351495742798, + "logps/chosen": -9.071666863746941e-05, + "logps/rejected": -29.416399002075195, + "loss": 0.631, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.430472252650361e-06, + "rewards/margins": 0.2643565237522125, + "rewards/rejected": -0.26435795426368713, + "step": 8270 + }, + { + "epoch": 0.48, + "learning_rate": 5.544568573926126e-08, + "logits/chosen": -1.9466954469680786, + "logits/rejected": -1.9783095121383667, + "logps/chosen": -185.25698852539062, + "logps/rejected": -380.6564025878906, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.28849196434021, + "rewards/margins": 5.590069770812988, + "rewards/rejected": -3.3015778064727783, + "step": 8271 + }, + { + "epoch": 0.48, + "learning_rate": 5.5436317680114596e-08, + "logits/chosen": -2.0196456909179688, + "logits/rejected": -2.03706431388855, + "logps/chosen": -234.76751708984375, + "logps/rejected": -397.62579345703125, + "loss": 0.1067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.807934582233429, + "rewards/margins": 2.3703980445861816, + "rewards/rejected": -1.562463402748108, + "step": 8272 + }, + { + "epoch": 0.48, + "learning_rate": 5.542694942784334e-08, + "logits/chosen": -2.015896797180176, + "logits/rejected": -2.0089240074157715, + "logps/chosen": -4.7444787924177945e-05, + "logps/rejected": -272.5291442871094, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.960027849027938e-08, + "rewards/margins": 4.126516819000244, + "rewards/rejected": -4.126516819000244, + "step": 8273 + }, + { + "epoch": 0.48, + "learning_rate": 5.541758098278031e-08, + "logits/chosen": -2.0299384593963623, + "logits/rejected": -2.0290706157684326, + "logps/chosen": -56.627464294433594, + "logps/rejected": -207.19778442382812, + "loss": 0.1014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3739006519317627, + "rewards/margins": 4.680703163146973, + "rewards/rejected": -3.30680251121521, + "step": 8274 + }, + { + "epoch": 0.48, + "learning_rate": 5.540821234525829e-08, + "logits/chosen": -1.9536890983581543, + "logits/rejected": -1.9563791751861572, + "logps/chosen": -13.279378890991211, + "logps/rejected": -116.27009582519531, + "loss": 0.486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21683979034423828, + "rewards/margins": 0.6452562808990479, + "rewards/rejected": -0.4284164607524872, + "step": 8275 + }, + { + "epoch": 0.48, + "learning_rate": 5.5398843515610116e-08, + "logits/chosen": -1.7187572717666626, + "logits/rejected": -1.8611990213394165, + "logps/chosen": -269.6260070800781, + "logps/rejected": -428.42352294921875, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8132447004318237, + "rewards/margins": 4.19392728805542, + "rewards/rejected": -2.3806824684143066, + "step": 8276 + }, + { + "epoch": 0.48, + "learning_rate": 5.5389474494168596e-08, + "logits/chosen": -1.932120680809021, + "logits/rejected": -1.9281091690063477, + "logps/chosen": -18.401784896850586, + "logps/rejected": -264.1820068359375, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5194503664970398, + "rewards/margins": 2.362867832183838, + "rewards/rejected": -1.8434174060821533, + "step": 8277 + }, + { + "epoch": 0.48, + "learning_rate": 5.5380105281266595e-08, + "logits/chosen": -2.051704168319702, + "logits/rejected": -2.0402941703796387, + "logps/chosen": -68.8077392578125, + "logps/rejected": -346.78076171875, + "loss": 0.2505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40081787109375, + "rewards/margins": 6.524853706359863, + "rewards/rejected": -6.124035835266113, + "step": 8278 + }, + { + "epoch": 0.48, + "learning_rate": 5.537073587723694e-08, + "logits/chosen": -2.045395851135254, + "logits/rejected": -2.0652120113372803, + "logps/chosen": -277.1669921875, + "logps/rejected": -421.69744873046875, + "loss": 0.0582, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.447247266769409, + "rewards/margins": 2.314929246902466, + "rewards/rejected": 0.13231812417507172, + "step": 8279 + }, + { + "epoch": 0.48, + "learning_rate": 5.5361366282412484e-08, + "logits/chosen": -2.0324532985687256, + "logits/rejected": -2.040313720703125, + "logps/chosen": -0.04496299475431442, + "logps/rejected": -213.59002685546875, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004382573999464512, + "rewards/margins": 4.739269733428955, + "rewards/rejected": -4.74365234375, + "step": 8280 + }, + { + "epoch": 0.48, + "learning_rate": 5.535199649712604e-08, + "logits/chosen": -1.9243426322937012, + "logits/rejected": -1.9077600240707397, + "logps/chosen": -170.73666381835938, + "logps/rejected": -344.81915283203125, + "loss": 0.0477, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.467431664466858, + "rewards/margins": 4.441516399383545, + "rewards/rejected": -2.9740846157073975, + "step": 8281 + }, + { + "epoch": 0.48, + "learning_rate": 5.5342626521710536e-08, + "logits/chosen": -1.8616986274719238, + "logits/rejected": -1.8465900421142578, + "logps/chosen": -56.52082061767578, + "logps/rejected": -262.390869140625, + "loss": 0.268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9390358328819275, + "rewards/margins": 1.3887062072753906, + "rewards/rejected": -0.4496704041957855, + "step": 8282 + }, + { + "epoch": 0.48, + "learning_rate": 5.533325635649878e-08, + "logits/chosen": -1.9351387023925781, + "logits/rejected": -1.9266691207885742, + "logps/chosen": -115.21754455566406, + "logps/rejected": -262.53875732421875, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049346923828125, + "rewards/margins": 3.284924268722534, + "rewards/rejected": -3.235577344894409, + "step": 8283 + }, + { + "epoch": 0.48, + "learning_rate": 5.5323886001823685e-08, + "logits/chosen": -2.034754753112793, + "logits/rejected": -2.0339744091033936, + "logps/chosen": -71.35216522216797, + "logps/rejected": -220.62530517578125, + "loss": 0.5861, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.47123414278030396, + "rewards/margins": -0.10561978816986084, + "rewards/rejected": 0.5768539309501648, + "step": 8284 + }, + { + "epoch": 0.48, + "learning_rate": 5.531451545801811e-08, + "logits/chosen": -2.059250593185425, + "logits/rejected": -2.0532026290893555, + "logps/chosen": -1.9000576734542847, + "logps/rejected": -74.21368408203125, + "loss": 0.3915, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07453157007694244, + "rewards/margins": 1.9336254596710205, + "rewards/rejected": -1.8590939044952393, + "step": 8285 + }, + { + "epoch": 0.48, + "learning_rate": 5.5305144725414955e-08, + "logits/chosen": -1.989157795906067, + "logits/rejected": -1.994676947593689, + "logps/chosen": -144.14605712890625, + "logps/rejected": -289.58062744140625, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.025686740875244, + "rewards/margins": 3.330310344696045, + "rewards/rejected": -1.3046234846115112, + "step": 8286 + }, + { + "epoch": 0.48, + "learning_rate": 5.5295773804347104e-08, + "logits/chosen": -1.960204839706421, + "logits/rejected": -1.9572840929031372, + "logps/chosen": -29.350915908813477, + "logps/rejected": -226.8008270263672, + "loss": 0.4022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17909221351146698, + "rewards/margins": 1.6753771305084229, + "rewards/rejected": -1.8544692993164062, + "step": 8287 + }, + { + "epoch": 0.48, + "learning_rate": 5.528640269514745e-08, + "logits/chosen": -2.041036605834961, + "logits/rejected": -2.025191307067871, + "logps/chosen": -32.36485290527344, + "logps/rejected": -127.2468490600586, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3507217466831207, + "rewards/margins": 1.659101128578186, + "rewards/rejected": -1.3083794116973877, + "step": 8288 + }, + { + "epoch": 0.48, + "learning_rate": 5.5277031398148935e-08, + "logits/chosen": -1.9086275100708008, + "logits/rejected": -1.8997013568878174, + "logps/chosen": -188.41970825195312, + "logps/rejected": -403.71942138671875, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.416546583175659, + "rewards/margins": 3.131732225418091, + "rewards/rejected": -0.7151855826377869, + "step": 8289 + }, + { + "epoch": 0.48, + "learning_rate": 5.526765991368444e-08, + "logits/chosen": -1.9029706716537476, + "logits/rejected": -1.925001621246338, + "logps/chosen": -181.38584899902344, + "logps/rejected": -257.4226379394531, + "loss": 0.0862, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7528793811798096, + "rewards/margins": 1.914686679840088, + "rewards/rejected": 0.8381927609443665, + "step": 8290 + }, + { + "epoch": 0.48, + "learning_rate": 5.5258288242086903e-08, + "logits/chosen": -1.9913830757141113, + "logits/rejected": -1.9751825332641602, + "logps/chosen": -57.521461486816406, + "logps/rejected": -307.3161315917969, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01749420166015625, + "rewards/margins": 4.481083869934082, + "rewards/rejected": -4.463589668273926, + "step": 8291 + }, + { + "epoch": 0.48, + "learning_rate": 5.524891638368925e-08, + "logits/chosen": -1.8597005605697632, + "logits/rejected": -1.859466791152954, + "logps/chosen": -2.89585542678833, + "logps/rejected": -71.38578033447266, + "loss": 0.6017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1799726039171219, + "rewards/margins": 0.3000478148460388, + "rewards/rejected": -0.12007522583007812, + "step": 8292 + }, + { + "epoch": 0.48, + "learning_rate": 5.523954433882439e-08, + "logits/chosen": -1.7104774713516235, + "logits/rejected": -1.708559513092041, + "logps/chosen": -99.68142700195312, + "logps/rejected": -228.67726135253906, + "loss": 0.3946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4027114808559418, + "rewards/margins": 1.0532500743865967, + "rewards/rejected": -0.6505386233329773, + "step": 8293 + }, + { + "epoch": 0.48, + "learning_rate": 5.523017210782531e-08, + "logits/chosen": -1.696192979812622, + "logits/rejected": -1.6917725801467896, + "logps/chosen": -24.948394775390625, + "logps/rejected": -106.38929748535156, + "loss": 0.6909, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7967666983604431, + "rewards/margins": 1.2585539817810059, + "rewards/rejected": -2.0553207397460938, + "step": 8294 + }, + { + "epoch": 0.48, + "learning_rate": 5.522079969102493e-08, + "logits/chosen": -1.630976676940918, + "logits/rejected": -1.5969246625900269, + "logps/chosen": -141.50369262695312, + "logps/rejected": -301.6258544921875, + "loss": 0.1762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7794143557548523, + "rewards/margins": 2.817073106765747, + "rewards/rejected": -2.03765869140625, + "step": 8295 + }, + { + "epoch": 0.48, + "learning_rate": 5.52114270887562e-08, + "logits/chosen": -1.9015170335769653, + "logits/rejected": -1.899685263633728, + "logps/chosen": -212.27963256835938, + "logps/rejected": -260.8084716796875, + "loss": 0.4218, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.327780246734619, + "rewards/margins": -0.21167302131652832, + "rewards/rejected": 2.5394532680511475, + "step": 8296 + }, + { + "epoch": 0.48, + "learning_rate": 5.52020543013521e-08, + "logits/chosen": -1.9827394485473633, + "logits/rejected": -1.965378761291504, + "logps/chosen": -9.841500282287598, + "logps/rejected": -263.3096618652344, + "loss": 0.198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6164280772209167, + "rewards/margins": 4.32826566696167, + "rewards/rejected": -3.7118377685546875, + "step": 8297 + }, + { + "epoch": 0.48, + "learning_rate": 5.519268132914557e-08, + "logits/chosen": -2.0234429836273193, + "logits/rejected": -2.017336130142212, + "logps/chosen": -23.13409423828125, + "logps/rejected": -44.82317352294922, + "loss": 0.6411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18042679131031036, + "rewards/margins": 0.004489138722419739, + "rewards/rejected": 0.17593765258789062, + "step": 8298 + }, + { + "epoch": 0.48, + "learning_rate": 5.518330817246961e-08, + "logits/chosen": -1.9840320348739624, + "logits/rejected": -1.9643956422805786, + "logps/chosen": -12.587409019470215, + "logps/rejected": -324.85467529296875, + "loss": 0.2252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34652701020240784, + "rewards/margins": 4.902594089508057, + "rewards/rejected": -4.556066989898682, + "step": 8299 + }, + { + "epoch": 0.48, + "learning_rate": 5.517393483165719e-08, + "logits/chosen": -1.8279740810394287, + "logits/rejected": -1.830559253692627, + "logps/chosen": -5.1208882331848145, + "logps/rejected": -464.14312744140625, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12587347626686096, + "rewards/margins": 12.783353805541992, + "rewards/rejected": -12.657480239868164, + "step": 8300 + }, + { + "epoch": 0.48, + "learning_rate": 5.516456130704129e-08, + "logits/chosen": -2.1820290088653564, + "logits/rejected": -2.169922351837158, + "logps/chosen": -50.27437973022461, + "logps/rejected": -277.90594482421875, + "loss": 0.3133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47985345125198364, + "rewards/margins": 1.6384289264678955, + "rewards/rejected": -1.158575415611267, + "step": 8301 + }, + { + "epoch": 0.48, + "learning_rate": 5.5155187598954914e-08, + "logits/chosen": -2.130066394805908, + "logits/rejected": -2.130622625350952, + "logps/chosen": -0.05869917571544647, + "logps/rejected": -224.60736083984375, + "loss": 0.3847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023218251299113035, + "rewards/margins": 2.1557228565216064, + "rewards/rejected": -2.1580445766448975, + "step": 8302 + }, + { + "epoch": 0.48, + "learning_rate": 5.514581370773106e-08, + "logits/chosen": -2.1566128730773926, + "logits/rejected": -2.1495847702026367, + "logps/chosen": -4.904085159301758, + "logps/rejected": -249.349609375, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04651837423443794, + "rewards/margins": 4.600528240203857, + "rewards/rejected": -4.554009914398193, + "step": 8303 + }, + { + "epoch": 0.48, + "learning_rate": 5.5136439633702726e-08, + "logits/chosen": -1.8605940341949463, + "logits/rejected": -1.8654617071151733, + "logps/chosen": -16.485658645629883, + "logps/rejected": -95.8928451538086, + "loss": 0.4511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10210762172937393, + "rewards/margins": 1.1916134357452393, + "rewards/rejected": -1.0895057916641235, + "step": 8304 + }, + { + "epoch": 0.48, + "learning_rate": 5.512706537720293e-08, + "logits/chosen": -2.016774892807007, + "logits/rejected": -2.018805980682373, + "logps/chosen": -249.80502319335938, + "logps/rejected": -538.2088623046875, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.389883518218994, + "rewards/margins": 3.953488349914551, + "rewards/rejected": -1.563604712486267, + "step": 8305 + }, + { + "epoch": 0.48, + "learning_rate": 5.5117690938564695e-08, + "logits/chosen": -2.1003942489624023, + "logits/rejected": -2.093515634536743, + "logps/chosen": -181.89065551757812, + "logps/rejected": -325.2336120605469, + "loss": 0.2372, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.621374487876892, + "rewards/margins": 0.7089385390281677, + "rewards/rejected": 0.9124359488487244, + "step": 8306 + }, + { + "epoch": 0.48, + "learning_rate": 5.510831631812105e-08, + "logits/chosen": -1.9316611289978027, + "logits/rejected": -1.9241968393325806, + "logps/chosen": -250.30267333984375, + "logps/rejected": -321.5606994628906, + "loss": 0.1341, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.576324462890625, + "rewards/margins": 1.3321441411972046, + "rewards/rejected": 1.2441803216934204, + "step": 8307 + }, + { + "epoch": 0.48, + "learning_rate": 5.509894151620501e-08, + "logits/chosen": -1.945456862449646, + "logits/rejected": -1.926332712173462, + "logps/chosen": -4.352487564086914, + "logps/rejected": -202.02517700195312, + "loss": 0.5314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04105806350708008, + "rewards/margins": 0.7740338444709778, + "rewards/rejected": -0.7329757809638977, + "step": 8308 + }, + { + "epoch": 0.48, + "learning_rate": 5.5089566533149635e-08, + "logits/chosen": -1.9951767921447754, + "logits/rejected": -2.0022218227386475, + "logps/chosen": -108.98880767822266, + "logps/rejected": -346.6618957519531, + "loss": 0.5877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9021980166435242, + "rewards/margins": 7.37786865234375, + "rewards/rejected": -8.28006649017334, + "step": 8309 + }, + { + "epoch": 0.48, + "learning_rate": 5.508019136928797e-08, + "logits/chosen": -2.027677059173584, + "logits/rejected": -2.027883529663086, + "logps/chosen": -8.018234252929688, + "logps/rejected": -86.78500366210938, + "loss": 0.5384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18264198303222656, + "rewards/margins": 0.30389291048049927, + "rewards/rejected": -0.12125091999769211, + "step": 8310 + }, + { + "epoch": 0.48, + "learning_rate": 5.5070816024953026e-08, + "logits/chosen": -1.942238211631775, + "logits/rejected": -1.8773874044418335, + "logps/chosen": -308.54010009765625, + "logps/rejected": -412.7414245605469, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5962769985198975, + "rewards/margins": 2.12503981590271, + "rewards/rejected": 0.4712371826171875, + "step": 8311 + }, + { + "epoch": 0.48, + "learning_rate": 5.506144050047792e-08, + "logits/chosen": -1.9372707605361938, + "logits/rejected": -1.9317877292633057, + "logps/chosen": -44.87505340576172, + "logps/rejected": -150.22808837890625, + "loss": 1.0817, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.9008491635322571, + "rewards/margins": -0.4749916195869446, + "rewards/rejected": -0.4258575439453125, + "step": 8312 + }, + { + "epoch": 0.48, + "learning_rate": 5.505206479619569e-08, + "logits/chosen": -1.987525224685669, + "logits/rejected": -1.9847216606140137, + "logps/chosen": -0.0012125512585043907, + "logps/rejected": -229.01373291015625, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.100440492038615e-05, + "rewards/margins": 3.2382218837738037, + "rewards/rejected": -3.238262891769409, + "step": 8313 + }, + { + "epoch": 0.48, + "learning_rate": 5.504268891243938e-08, + "logits/chosen": -1.841461181640625, + "logits/rejected": -1.8333659172058105, + "logps/chosen": -247.9539031982422, + "logps/rejected": -299.2924499511719, + "loss": 0.399, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.3723647594451904, + "rewards/margins": -0.14596104621887207, + "rewards/rejected": 2.5183258056640625, + "step": 8314 + }, + { + "epoch": 0.48, + "learning_rate": 5.503331284954211e-08, + "logits/chosen": -1.872840404510498, + "logits/rejected": -1.8718395233154297, + "logps/chosen": -19.695005416870117, + "logps/rejected": -66.43244934082031, + "loss": 0.8339, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4770174026489258, + "rewards/margins": -0.14287927746772766, + "rewards/rejected": -0.3341381251811981, + "step": 8315 + }, + { + "epoch": 0.48, + "learning_rate": 5.502393660783696e-08, + "logits/chosen": -1.7980209589004517, + "logits/rejected": -1.798290491104126, + "logps/chosen": -245.23489379882812, + "logps/rejected": -258.4945373535156, + "loss": 0.1826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4796600341796875, + "rewards/margins": 1.2522796392440796, + "rewards/rejected": 0.22738038003444672, + "step": 8316 + }, + { + "epoch": 0.48, + "learning_rate": 5.501456018765699e-08, + "logits/chosen": -2.1201064586639404, + "logits/rejected": -2.1141819953918457, + "logps/chosen": -91.3208999633789, + "logps/rejected": -323.65350341796875, + "loss": 0.0881, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6421219110488892, + "rewards/margins": 6.423814296722412, + "rewards/rejected": -4.7816925048828125, + "step": 8317 + }, + { + "epoch": 0.48, + "learning_rate": 5.5005183589335327e-08, + "logits/chosen": -1.7168920040130615, + "logits/rejected": -1.7100367546081543, + "logps/chosen": -11.088698387145996, + "logps/rejected": -67.60891723632812, + "loss": 0.7268, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0674779936671257, + "rewards/margins": -0.14933261275291443, + "rewards/rejected": 0.21681061387062073, + "step": 8318 + }, + { + "epoch": 0.48, + "learning_rate": 5.499580681320505e-08, + "logits/chosen": -1.9716116189956665, + "logits/rejected": -1.9813953638076782, + "logps/chosen": -82.19173431396484, + "logps/rejected": -217.84750366210938, + "loss": 0.176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7839866876602173, + "rewards/margins": 2.985290050506592, + "rewards/rejected": -2.201303243637085, + "step": 8319 + }, + { + "epoch": 0.48, + "learning_rate": 5.4986429859599284e-08, + "logits/chosen": -2.076557159423828, + "logits/rejected": -2.0763278007507324, + "logps/chosen": -0.00034614777541719377, + "logps/rejected": -136.8978271484375, + "loss": 0.3873, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5625850210199133e-05, + "rewards/margins": 2.2005186080932617, + "rewards/rejected": -2.200502872467041, + "step": 8320 + }, + { + "epoch": 0.48, + "learning_rate": 5.4977052728851125e-08, + "logits/chosen": -1.8678417205810547, + "logits/rejected": -1.8619351387023926, + "logps/chosen": -22.26565170288086, + "logps/rejected": -139.18402099609375, + "loss": 0.251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4789329469203949, + "rewards/margins": 2.636929988861084, + "rewards/rejected": -2.1579971313476562, + "step": 8321 + }, + { + "epoch": 0.48, + "learning_rate": 5.4967675421293725e-08, + "logits/chosen": -1.853919506072998, + "logits/rejected": -1.8340622186660767, + "logps/chosen": -224.72012329101562, + "logps/rejected": -438.17071533203125, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4404327869415283, + "rewards/margins": 4.622384548187256, + "rewards/rejected": -1.181951880455017, + "step": 8322 + }, + { + "epoch": 0.48, + "learning_rate": 5.495829793726018e-08, + "logits/chosen": -2.004060745239258, + "logits/rejected": -1.9903297424316406, + "logps/chosen": -116.82032775878906, + "logps/rejected": -328.4925231933594, + "loss": 0.2525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5498611330986023, + "rewards/margins": 2.2805862426757812, + "rewards/rejected": -1.7307251691818237, + "step": 8323 + }, + { + "epoch": 0.48, + "learning_rate": 5.494892027708365e-08, + "logits/chosen": -2.107649326324463, + "logits/rejected": -2.0994560718536377, + "logps/chosen": -0.028703834861516953, + "logps/rejected": -99.8932876586914, + "loss": 0.5657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018291134620085359, + "rewards/margins": 0.592613697052002, + "rewards/rejected": -0.5944427847862244, + "step": 8324 + }, + { + "epoch": 0.48, + "learning_rate": 5.493954244109725e-08, + "logits/chosen": -1.9541655778884888, + "logits/rejected": -1.954305648803711, + "logps/chosen": -22.305570602416992, + "logps/rejected": -189.86856079101562, + "loss": 0.2627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4038930833339691, + "rewards/margins": 2.9007771015167236, + "rewards/rejected": -2.4968841075897217, + "step": 8325 + }, + { + "epoch": 0.48, + "learning_rate": 5.4930164429634165e-08, + "logits/chosen": -1.8317762613296509, + "logits/rejected": -1.8302116394042969, + "logps/chosen": -168.08279418945312, + "logps/rejected": -299.5140380859375, + "loss": 0.1206, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4169158935546875, + "rewards/margins": 1.9686188697814941, + "rewards/rejected": -0.5517029166221619, + "step": 8326 + }, + { + "epoch": 0.48, + "learning_rate": 5.4920786243027494e-08, + "logits/chosen": -1.7897628545761108, + "logits/rejected": -1.774006724357605, + "logps/chosen": -240.88665771484375, + "logps/rejected": -364.1247253417969, + "loss": 0.2918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9421051144599915, + "rewards/margins": 1.325433373451233, + "rewards/rejected": -0.38332825899124146, + "step": 8327 + }, + { + "epoch": 0.48, + "learning_rate": 5.491140788161045e-08, + "logits/chosen": -2.1143596172332764, + "logits/rejected": -2.107548952102661, + "logps/chosen": -0.9924444556236267, + "logps/rejected": -159.05950927734375, + "loss": 0.4, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003418308449909091, + "rewards/margins": 1.8846912384033203, + "rewards/rejected": -1.8812729120254517, + "step": 8328 + }, + { + "epoch": 0.48, + "learning_rate": 5.4902029345716163e-08, + "logits/chosen": -2.0835797786712646, + "logits/rejected": -2.045382261276245, + "logps/chosen": -170.94190979003906, + "logps/rejected": -591.1972045898438, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.59405517578125, + "rewards/margins": 3.810699462890625, + "rewards/rejected": -2.216644287109375, + "step": 8329 + }, + { + "epoch": 0.48, + "learning_rate": 5.489265063567781e-08, + "logits/chosen": -1.9933620691299438, + "logits/rejected": -1.984142541885376, + "logps/chosen": -52.03025817871094, + "logps/rejected": -288.79779052734375, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11384735256433487, + "rewards/margins": 5.2071428298950195, + "rewards/rejected": -5.093295574188232, + "step": 8330 + }, + { + "epoch": 0.48, + "learning_rate": 5.4883271751828574e-08, + "logits/chosen": -1.8883469104766846, + "logits/rejected": -1.8907577991485596, + "logps/chosen": -62.099143981933594, + "logps/rejected": -241.05662536621094, + "loss": 0.2753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2558403015136719, + "rewards/margins": 2.201178789138794, + "rewards/rejected": -1.945338487625122, + "step": 8331 + }, + { + "epoch": 0.48, + "learning_rate": 5.487389269450165e-08, + "logits/chosen": -1.9084781408309937, + "logits/rejected": -1.9167795181274414, + "logps/chosen": -183.61172485351562, + "logps/rejected": -242.24386596679688, + "loss": 0.1842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7492538690567017, + "rewards/margins": 1.3766586780548096, + "rewards/rejected": 0.3725952208042145, + "step": 8332 + }, + { + "epoch": 0.48, + "learning_rate": 5.486451346403021e-08, + "logits/chosen": -1.8801960945129395, + "logits/rejected": -1.8886348009109497, + "logps/chosen": -215.276611328125, + "logps/rejected": -459.92620849609375, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0664308071136475, + "rewards/margins": 3.049283027648926, + "rewards/rejected": -0.9828521609306335, + "step": 8333 + }, + { + "epoch": 0.48, + "learning_rate": 5.4855134060747456e-08, + "logits/chosen": -1.7024999856948853, + "logits/rejected": -1.7332724332809448, + "logps/chosen": -206.78964233398438, + "logps/rejected": -324.2325744628906, + "loss": 0.0497, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.726007103919983, + "rewards/margins": 2.861315965652466, + "rewards/rejected": -1.135308861732483, + "step": 8334 + }, + { + "epoch": 0.49, + "learning_rate": 5.484575448498658e-08, + "logits/chosen": -1.872093915939331, + "logits/rejected": -1.864282250404358, + "logps/chosen": -51.028751373291016, + "logps/rejected": -401.4391174316406, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020615005865693092, + "rewards/margins": 2.906749963760376, + "rewards/rejected": -2.9273650646209717, + "step": 8335 + }, + { + "epoch": 0.49, + "learning_rate": 5.4836374737080814e-08, + "logits/chosen": -1.8740357160568237, + "logits/rejected": -1.8654967546463013, + "logps/chosen": -33.89092254638672, + "logps/rejected": -123.03761291503906, + "loss": 0.5575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2703414857387543, + "rewards/margins": 1.1867058277130127, + "rewards/rejected": -1.4570473432540894, + "step": 8336 + }, + { + "epoch": 0.49, + "learning_rate": 5.482699481736337e-08, + "logits/chosen": -1.851457118988037, + "logits/rejected": -1.848683476448059, + "logps/chosen": -16.53290557861328, + "logps/rejected": -91.27229309082031, + "loss": 0.4479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1608285903930664, + "rewards/margins": 1.051867127418518, + "rewards/rejected": -0.8910385370254517, + "step": 8337 + }, + { + "epoch": 0.49, + "learning_rate": 5.481761472616744e-08, + "logits/chosen": -1.8337217569351196, + "logits/rejected": -1.8694268465042114, + "logps/chosen": -295.25799560546875, + "logps/rejected": -407.50872802734375, + "loss": 0.1982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8163421750068665, + "rewards/margins": 1.1940765380859375, + "rewards/rejected": -0.37773439288139343, + "step": 8338 + }, + { + "epoch": 0.49, + "learning_rate": 5.480823446382629e-08, + "logits/chosen": -1.7906380891799927, + "logits/rejected": -1.7951222658157349, + "logps/chosen": -129.51788330078125, + "logps/rejected": -288.1451721191406, + "loss": 0.155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6153732538223267, + "rewards/margins": 1.5696426630020142, + "rewards/rejected": 0.0457305908203125, + "step": 8339 + }, + { + "epoch": 0.49, + "learning_rate": 5.4798854030673114e-08, + "logits/chosen": -2.051758050918579, + "logits/rejected": -2.042654275894165, + "logps/chosen": -8.537149429321289, + "logps/rejected": -175.6072540283203, + "loss": 0.3517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1394393891096115, + "rewards/margins": 2.462719678878784, + "rewards/rejected": -2.3232803344726562, + "step": 8340 + }, + { + "epoch": 0.49, + "learning_rate": 5.478947342704118e-08, + "logits/chosen": -1.8979305028915405, + "logits/rejected": -1.9380242824554443, + "logps/chosen": -192.1429443359375, + "logps/rejected": -196.2733612060547, + "loss": 0.2686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.767736792564392, + "rewards/margins": 0.5538619756698608, + "rewards/rejected": 1.2138748168945312, + "step": 8341 + }, + { + "epoch": 0.49, + "learning_rate": 5.478009265326371e-08, + "logits/chosen": -1.8234848976135254, + "logits/rejected": -1.8388891220092773, + "logps/chosen": -125.25960540771484, + "logps/rejected": -246.35208129882812, + "loss": 0.177, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4746254682540894, + "rewards/margins": 1.2042168378829956, + "rewards/rejected": 0.27040863037109375, + "step": 8342 + }, + { + "epoch": 0.49, + "learning_rate": 5.477071170967398e-08, + "logits/chosen": -1.991217851638794, + "logits/rejected": -2.00177001953125, + "logps/chosen": -97.7080307006836, + "logps/rejected": -190.3971710205078, + "loss": 0.2504, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1625473499298096, + "rewards/margins": 1.0048432350158691, + "rewards/rejected": 0.157704159617424, + "step": 8343 + }, + { + "epoch": 0.49, + "learning_rate": 5.4761330596605224e-08, + "logits/chosen": -1.890379548072815, + "logits/rejected": -1.8778481483459473, + "logps/chosen": -156.4667510986328, + "logps/rejected": -243.7042694091797, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6603103876113892, + "rewards/margins": 0.5891968011856079, + "rewards/rejected": 1.0711135864257812, + "step": 8344 + }, + { + "epoch": 0.49, + "learning_rate": 5.4751949314390724e-08, + "logits/chosen": -1.9357794523239136, + "logits/rejected": -1.907340407371521, + "logps/chosen": -245.3515625, + "logps/rejected": -419.20330810546875, + "loss": 0.0363, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.781201124191284, + "rewards/margins": 3.020678758621216, + "rewards/rejected": -0.23947754502296448, + "step": 8345 + }, + { + "epoch": 0.49, + "learning_rate": 5.474256786336373e-08, + "logits/chosen": -2.1693103313446045, + "logits/rejected": -2.165782928466797, + "logps/chosen": -0.7664369344711304, + "logps/rejected": -143.46937561035156, + "loss": 0.3421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02624119631946087, + "rewards/margins": 2.871601104736328, + "rewards/rejected": -2.8453598022460938, + "step": 8346 + }, + { + "epoch": 0.49, + "learning_rate": 5.473318624385754e-08, + "logits/chosen": -1.8686156272888184, + "logits/rejected": -1.858829379081726, + "logps/chosen": -74.41519165039062, + "logps/rejected": -214.8582305908203, + "loss": 0.2642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6750969290733337, + "rewards/margins": 1.7345407009124756, + "rewards/rejected": -1.059443712234497, + "step": 8347 + }, + { + "epoch": 0.49, + "learning_rate": 5.472380445620541e-08, + "logits/chosen": -2.0007739067077637, + "logits/rejected": -1.9335392713546753, + "logps/chosen": -186.54261779785156, + "logps/rejected": -336.44561767578125, + "loss": 0.1698, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3495972156524658, + "rewards/margins": 1.8067078590393066, + "rewards/rejected": -0.45711061358451843, + "step": 8348 + }, + { + "epoch": 0.49, + "learning_rate": 5.471442250074065e-08, + "logits/chosen": -1.8782017230987549, + "logits/rejected": -1.85398530960083, + "logps/chosen": -172.90501403808594, + "logps/rejected": -377.9860534667969, + "loss": 0.0592, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9575058221817017, + "rewards/margins": 2.883711338043213, + "rewards/rejected": -0.9262054562568665, + "step": 8349 + }, + { + "epoch": 0.49, + "learning_rate": 5.470504037779653e-08, + "logits/chosen": -2.0427558422088623, + "logits/rejected": -2.042311906814575, + "logps/chosen": -1.800315022468567, + "logps/rejected": -180.77110290527344, + "loss": 0.3429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1299455612897873, + "rewards/margins": 2.5844149589538574, + "rewards/rejected": -2.4544694423675537, + "step": 8350 + }, + { + "epoch": 0.49, + "learning_rate": 5.469565808770636e-08, + "logits/chosen": -2.287839889526367, + "logits/rejected": -2.2737948894500732, + "logps/chosen": -90.56144714355469, + "logps/rejected": -327.301025390625, + "loss": 0.2496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2918037474155426, + "rewards/margins": 2.327909231185913, + "rewards/rejected": -2.0361053943634033, + "step": 8351 + }, + { + "epoch": 0.49, + "learning_rate": 5.468627563080346e-08, + "logits/chosen": -2.0350193977355957, + "logits/rejected": -2.028264284133911, + "logps/chosen": -15.120325088500977, + "logps/rejected": -221.74319458007812, + "loss": 0.2484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28481826186180115, + "rewards/margins": 3.621134042739868, + "rewards/rejected": -3.336315870285034, + "step": 8352 + }, + { + "epoch": 0.49, + "learning_rate": 5.467689300742112e-08, + "logits/chosen": -1.8562277555465698, + "logits/rejected": -1.8439769744873047, + "logps/chosen": -34.054344177246094, + "logps/rejected": -148.90560913085938, + "loss": 0.6506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7691850662231445, + "rewards/margins": 1.6743695735931396, + "rewards/rejected": -2.443554639816284, + "step": 8353 + }, + { + "epoch": 0.49, + "learning_rate": 5.4667510217892654e-08, + "logits/chosen": -1.8422574996948242, + "logits/rejected": -1.839224934577942, + "logps/chosen": -3.7312052882043645e-05, + "logps/rejected": -101.39602661132812, + "loss": 0.4617, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.901775577716762e-06, + "rewards/margins": 1.3128937482833862, + "rewards/rejected": -1.3128868341445923, + "step": 8354 + }, + { + "epoch": 0.49, + "learning_rate": 5.465812726255141e-08, + "logits/chosen": -1.8895715475082397, + "logits/rejected": -1.921944260597229, + "logps/chosen": -148.84243774414062, + "logps/rejected": -385.6154479980469, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.881190538406372, + "rewards/margins": 4.6601243019104, + "rewards/rejected": -2.7789337635040283, + "step": 8355 + }, + { + "epoch": 0.49, + "learning_rate": 5.464874414173069e-08, + "logits/chosen": -2.1046016216278076, + "logits/rejected": -2.111067295074463, + "logps/chosen": -27.831886291503906, + "logps/rejected": -159.5867919921875, + "loss": 0.2804, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.053553819656372, + "rewards/margins": 0.9871048331260681, + "rewards/rejected": 0.06644897907972336, + "step": 8356 + }, + { + "epoch": 0.49, + "learning_rate": 5.463936085576384e-08, + "logits/chosen": -1.941980242729187, + "logits/rejected": -1.9375691413879395, + "logps/chosen": -201.48277282714844, + "logps/rejected": -280.1359558105469, + "loss": 0.1247, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0265121459960938, + "rewards/margins": 1.505448818206787, + "rewards/rejected": 0.5210632681846619, + "step": 8357 + }, + { + "epoch": 0.49, + "learning_rate": 5.4629977404984196e-08, + "logits/chosen": -1.8176273107528687, + "logits/rejected": -1.7550818920135498, + "logps/chosen": -304.1782531738281, + "logps/rejected": -510.28826904296875, + "loss": 0.1325, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.815155029296875, + "rewards/margins": 1.5020568370819092, + "rewards/rejected": 0.31309816241264343, + "step": 8358 + }, + { + "epoch": 0.49, + "learning_rate": 5.462059378972511e-08, + "logits/chosen": -1.7927091121673584, + "logits/rejected": -1.7701146602630615, + "logps/chosen": -203.85348510742188, + "logps/rejected": -457.0004577636719, + "loss": 0.0804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8351501822471619, + "rewards/margins": 3.4407105445861816, + "rewards/rejected": -2.605560302734375, + "step": 8359 + }, + { + "epoch": 0.49, + "learning_rate": 5.4611210010319944e-08, + "logits/chosen": -1.7710003852844238, + "logits/rejected": -1.8286644220352173, + "logps/chosen": -181.8114776611328, + "logps/rejected": -185.10980224609375, + "loss": 0.1356, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.123309373855591, + "rewards/margins": 1.5875000953674316, + "rewards/rejected": 0.535809338092804, + "step": 8360 + }, + { + "epoch": 0.49, + "learning_rate": 5.460182606710202e-08, + "logits/chosen": -2.0314643383026123, + "logits/rejected": -2.0279204845428467, + "logps/chosen": -162.26416015625, + "logps/rejected": -312.9472351074219, + "loss": 0.2525, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.288447618484497, + "rewards/margins": 0.7271896600723267, + "rewards/rejected": 0.5612579584121704, + "step": 8361 + }, + { + "epoch": 0.49, + "learning_rate": 5.459244196040474e-08, + "logits/chosen": -1.8088637590408325, + "logits/rejected": -1.8032448291778564, + "logps/chosen": -282.97821044921875, + "logps/rejected": -448.57940673828125, + "loss": 0.1418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36920166015625, + "rewards/margins": 4.521801948547363, + "rewards/rejected": -4.152600288391113, + "step": 8362 + }, + { + "epoch": 0.49, + "learning_rate": 5.458305769056145e-08, + "logits/chosen": -2.195197820663452, + "logits/rejected": -2.188532590866089, + "logps/chosen": -39.61131286621094, + "logps/rejected": -220.721923828125, + "loss": 0.3033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2543960511684418, + "rewards/margins": 1.88970947265625, + "rewards/rejected": -1.6353133916854858, + "step": 8363 + }, + { + "epoch": 0.49, + "learning_rate": 5.457367325790554e-08, + "logits/chosen": -1.942215085029602, + "logits/rejected": -1.9480794668197632, + "logps/chosen": -32.635047912597656, + "logps/rejected": -175.37281799316406, + "loss": 0.393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7482364773750305, + "rewards/margins": 0.7658085227012634, + "rewards/rejected": -0.01757202111184597, + "step": 8364 + }, + { + "epoch": 0.49, + "learning_rate": 5.4564288662770377e-08, + "logits/chosen": -2.0407795906066895, + "logits/rejected": -2.039921998977661, + "logps/chosen": -3.6081838607788086, + "logps/rejected": -94.88835144042969, + "loss": 0.4326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04450342804193497, + "rewards/margins": 1.6897739171981812, + "rewards/rejected": -1.734277367591858, + "step": 8365 + }, + { + "epoch": 0.49, + "learning_rate": 5.455490390548936e-08, + "logits/chosen": -2.015397548675537, + "logits/rejected": -2.014552116394043, + "logps/chosen": -24.497915267944336, + "logps/rejected": -112.41433715820312, + "loss": 0.3671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4771102964878082, + "rewards/margins": 1.148280382156372, + "rewards/rejected": -0.6711700558662415, + "step": 8366 + }, + { + "epoch": 0.49, + "learning_rate": 5.454551898639587e-08, + "logits/chosen": -2.0187370777130127, + "logits/rejected": -1.9967182874679565, + "logps/chosen": -51.988555908203125, + "logps/rejected": -274.4144287109375, + "loss": 0.1944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6360618472099304, + "rewards/margins": 3.10622501373291, + "rewards/rejected": -2.470163106918335, + "step": 8367 + }, + { + "epoch": 0.49, + "learning_rate": 5.453613390582331e-08, + "logits/chosen": -1.937666654586792, + "logits/rejected": -1.9035923480987549, + "logps/chosen": -225.9652099609375, + "logps/rejected": -388.11053466796875, + "loss": 0.049, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4721527099609375, + "rewards/margins": 2.6147429943084717, + "rewards/rejected": -0.14259032905101776, + "step": 8368 + }, + { + "epoch": 0.49, + "learning_rate": 5.452674866410508e-08, + "logits/chosen": -1.8291414976119995, + "logits/rejected": -1.7790679931640625, + "logps/chosen": -379.4161071777344, + "logps/rejected": -491.2516174316406, + "loss": 0.3998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9727630615234375, + "rewards/margins": 0.21053463220596313, + "rewards/rejected": 0.7622284293174744, + "step": 8369 + }, + { + "epoch": 0.49, + "learning_rate": 5.45173632615746e-08, + "logits/chosen": -2.07322359085083, + "logits/rejected": -2.0597658157348633, + "logps/chosen": -58.81413269042969, + "logps/rejected": -240.47952270507812, + "loss": 0.6299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1302337646484375, + "rewards/margins": 0.3186965882778168, + "rewards/rejected": -0.4489303529262543, + "step": 8370 + }, + { + "epoch": 0.49, + "learning_rate": 5.450797769856529e-08, + "logits/chosen": -1.9999295473098755, + "logits/rejected": -1.9844939708709717, + "logps/chosen": -208.06565856933594, + "logps/rejected": -329.035888671875, + "loss": 0.074, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0384719371795654, + "rewards/margins": 2.339063882827759, + "rewards/rejected": -0.3005920350551605, + "step": 8371 + }, + { + "epoch": 0.49, + "learning_rate": 5.449859197541055e-08, + "logits/chosen": -1.8422740697860718, + "logits/rejected": -1.9031132459640503, + "logps/chosen": -332.7997741699219, + "logps/rejected": -482.81134033203125, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.751272678375244, + "rewards/margins": 5.0800018310546875, + "rewards/rejected": -2.3287293910980225, + "step": 8372 + }, + { + "epoch": 0.49, + "learning_rate": 5.448920609244382e-08, + "logits/chosen": -2.007388114929199, + "logits/rejected": -1.9980127811431885, + "logps/chosen": -0.01268653105944395, + "logps/rejected": -231.24278259277344, + "loss": 0.3539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0021047391928732395, + "rewards/margins": 4.095238208770752, + "rewards/rejected": -4.093133449554443, + "step": 8373 + }, + { + "epoch": 0.49, + "learning_rate": 5.447982004999854e-08, + "logits/chosen": -1.9068602323532104, + "logits/rejected": -2.036903142929077, + "logps/chosen": -210.2718505859375, + "logps/rejected": -352.83544921875, + "loss": 0.1969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.85845947265625, + "rewards/margins": 0.9580047726631165, + "rewards/rejected": 0.9004546999931335, + "step": 8374 + }, + { + "epoch": 0.49, + "learning_rate": 5.447043384840812e-08, + "logits/chosen": -1.760628342628479, + "logits/rejected": -1.7075055837631226, + "logps/chosen": -180.55516052246094, + "logps/rejected": -433.090576171875, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8482651710510254, + "rewards/margins": 2.9958481788635254, + "rewards/rejected": -0.1475830078125, + "step": 8375 + }, + { + "epoch": 0.49, + "learning_rate": 5.446104748800604e-08, + "logits/chosen": -2.0314011573791504, + "logits/rejected": -2.0310933589935303, + "logps/chosen": -0.000537318002898246, + "logps/rejected": -134.88113403320312, + "loss": 0.5702, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.572689305059612e-05, + "rewards/margins": 0.5418682098388672, + "rewards/rejected": -0.5417724847793579, + "step": 8376 + }, + { + "epoch": 0.49, + "learning_rate": 5.4451660969125726e-08, + "logits/chosen": -1.9723491668701172, + "logits/rejected": -1.9434118270874023, + "logps/chosen": -197.17929077148438, + "logps/rejected": -413.895263671875, + "loss": 0.1148, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.438934326171875, + "rewards/margins": 1.9116942882537842, + "rewards/rejected": -0.47276002168655396, + "step": 8377 + }, + { + "epoch": 0.49, + "learning_rate": 5.444227429210062e-08, + "logits/chosen": -1.9644930362701416, + "logits/rejected": -1.9639582633972168, + "logps/chosen": -88.18104553222656, + "logps/rejected": -220.238525390625, + "loss": 1.0764, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6570945978164673, + "rewards/margins": -0.7569206357002258, + "rewards/rejected": 0.09982605278491974, + "step": 8378 + }, + { + "epoch": 0.49, + "learning_rate": 5.4432887457264234e-08, + "logits/chosen": -2.096585512161255, + "logits/rejected": -2.095916271209717, + "logps/chosen": -0.00029395241290330887, + "logps/rejected": -148.2427978515625, + "loss": 0.3578, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.328109637048328e-06, + "rewards/margins": 2.887960195541382, + "rewards/rejected": -2.887965440750122, + "step": 8379 + }, + { + "epoch": 0.49, + "learning_rate": 5.442350046494998e-08, + "logits/chosen": -1.9641550779342651, + "logits/rejected": -1.946420431137085, + "logps/chosen": -166.54428100585938, + "logps/rejected": -353.363525390625, + "loss": 0.05, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.508775472640991, + "rewards/margins": 2.724046468734741, + "rewards/rejected": -0.21527099609375, + "step": 8380 + }, + { + "epoch": 0.49, + "learning_rate": 5.4414113315491364e-08, + "logits/chosen": -1.8202770948410034, + "logits/rejected": -1.81450617313385, + "logps/chosen": -90.71530151367188, + "logps/rejected": -350.02105712890625, + "loss": 0.175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7025527954101562, + "rewards/margins": 6.125529766082764, + "rewards/rejected": -5.422976970672607, + "step": 8381 + }, + { + "epoch": 0.49, + "learning_rate": 5.440472600922185e-08, + "logits/chosen": -1.97784423828125, + "logits/rejected": -1.9716044664382935, + "logps/chosen": -386.6266784667969, + "logps/rejected": -423.2497253417969, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.01141357421875, + "rewards/margins": 0.29053032398223877, + "rewards/rejected": 1.7208832502365112, + "step": 8382 + }, + { + "epoch": 0.49, + "learning_rate": 5.439533854647492e-08, + "logits/chosen": -1.96249258518219, + "logits/rejected": -1.8971035480499268, + "logps/chosen": -115.43510437011719, + "logps/rejected": -278.2659912109375, + "loss": 0.2335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3291336297988892, + "rewards/margins": 1.1838425397872925, + "rewards/rejected": 0.14529113471508026, + "step": 8383 + }, + { + "epoch": 0.49, + "learning_rate": 5.438595092758406e-08, + "logits/chosen": -2.002239227294922, + "logits/rejected": -2.0380167961120605, + "logps/chosen": -187.04823303222656, + "logps/rejected": -380.80633544921875, + "loss": 0.0557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.870143175125122, + "rewards/margins": 3.1357192993164062, + "rewards/rejected": -1.2655762434005737, + "step": 8384 + }, + { + "epoch": 0.49, + "learning_rate": 5.4376563152882784e-08, + "logits/chosen": -1.8505748510360718, + "logits/rejected": -1.8477402925491333, + "logps/chosen": -0.4944928288459778, + "logps/rejected": -84.85912322998047, + "loss": 0.5793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023832321166992188, + "rewards/margins": 0.3651147782802582, + "rewards/rejected": -0.3674980103969574, + "step": 8385 + }, + { + "epoch": 0.49, + "learning_rate": 5.4367175222704555e-08, + "logits/chosen": -1.9134840965270996, + "logits/rejected": -1.9200915098190308, + "logps/chosen": -167.78936767578125, + "logps/rejected": -357.8394775390625, + "loss": 0.0836, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5264190435409546, + "rewards/margins": 2.8464598655700684, + "rewards/rejected": -1.3200409412384033, + "step": 8386 + }, + { + "epoch": 0.49, + "learning_rate": 5.435778713738292e-08, + "logits/chosen": -1.8571897745132446, + "logits/rejected": -1.8902987241744995, + "logps/chosen": -164.71612548828125, + "logps/rejected": -329.1036071777344, + "loss": 0.1636, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5491760969161987, + "rewards/margins": 1.2877472639083862, + "rewards/rejected": 0.2614288330078125, + "step": 8387 + }, + { + "epoch": 0.49, + "learning_rate": 5.4348398897251346e-08, + "logits/chosen": -2.1066014766693115, + "logits/rejected": -2.1076855659484863, + "logps/chosen": -0.0036840394604951143, + "logps/rejected": -245.86509704589844, + "loss": 0.3523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0019645162392407656, + "rewards/margins": 4.357712745666504, + "rewards/rejected": -4.355748176574707, + "step": 8388 + }, + { + "epoch": 0.49, + "learning_rate": 5.4339010502643404e-08, + "logits/chosen": -2.0723466873168945, + "logits/rejected": -2.0762476921081543, + "logps/chosen": -0.8001048564910889, + "logps/rejected": -57.454368591308594, + "loss": 0.5319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27605921030044556, + "rewards/margins": 0.43503785133361816, + "rewards/rejected": -0.1589786559343338, + "step": 8389 + }, + { + "epoch": 0.49, + "learning_rate": 5.432962195389256e-08, + "logits/chosen": -1.978092074394226, + "logits/rejected": -1.9701130390167236, + "logps/chosen": -144.9850616455078, + "logps/rejected": -224.72442626953125, + "loss": 0.2051, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2311325073242188, + "rewards/margins": 1.4891220331192017, + "rewards/rejected": -0.2579894959926605, + "step": 8390 + }, + { + "epoch": 0.49, + "learning_rate": 5.432023325133237e-08, + "logits/chosen": -1.7514753341674805, + "logits/rejected": -1.7562965154647827, + "logps/chosen": -19.842395782470703, + "logps/rejected": -109.23625183105469, + "loss": 0.6187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3287346065044403, + "rewards/margins": 0.04199448227882385, + "rewards/rejected": 0.28674012422561646, + "step": 8391 + }, + { + "epoch": 0.49, + "learning_rate": 5.4310844395296366e-08, + "logits/chosen": -1.7868589162826538, + "logits/rejected": -1.7599138021469116, + "logps/chosen": -189.8580322265625, + "logps/rejected": -298.98895263671875, + "loss": 0.2588, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2021026611328125, + "rewards/margins": 0.5242278575897217, + "rewards/rejected": 1.6778748035430908, + "step": 8392 + }, + { + "epoch": 0.49, + "learning_rate": 5.430145538611808e-08, + "logits/chosen": -1.916722059249878, + "logits/rejected": -1.9208157062530518, + "logps/chosen": -21.79353904724121, + "logps/rejected": -117.42234802246094, + "loss": 0.594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008499718271195889, + "rewards/margins": 0.4546911120414734, + "rewards/rejected": -0.4461914002895355, + "step": 8393 + }, + { + "epoch": 0.49, + "learning_rate": 5.429206622413105e-08, + "logits/chosen": -1.9731745719909668, + "logits/rejected": -1.9509717226028442, + "logps/chosen": -282.83782958984375, + "logps/rejected": -457.9706726074219, + "loss": 0.0445, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0109803676605225, + "rewards/margins": 4.007577896118164, + "rewards/rejected": -1.9965972900390625, + "step": 8394 + }, + { + "epoch": 0.49, + "learning_rate": 5.4282676909668856e-08, + "logits/chosen": -1.8534033298492432, + "logits/rejected": -1.8992093801498413, + "logps/chosen": -239.0858917236328, + "logps/rejected": -359.5055236816406, + "loss": 0.0734, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.565638780593872, + "rewards/margins": 2.110704183578491, + "rewards/rejected": 0.454934686422348, + "step": 8395 + }, + { + "epoch": 0.49, + "learning_rate": 5.427328744306501e-08, + "logits/chosen": -1.720917820930481, + "logits/rejected": -1.7175383567810059, + "logps/chosen": -264.41571044921875, + "logps/rejected": -445.4199523925781, + "loss": 0.1041, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.928924560546875, + "rewards/margins": 1.7167693376541138, + "rewards/rejected": 1.2121552228927612, + "step": 8396 + }, + { + "epoch": 0.49, + "learning_rate": 5.426389782465309e-08, + "logits/chosen": -1.9939957857131958, + "logits/rejected": -2.002612829208374, + "logps/chosen": -13.71202564239502, + "logps/rejected": -143.8686981201172, + "loss": 0.9427, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.41431865096092224, + "rewards/margins": -0.5443952679634094, + "rewards/rejected": 0.130076602101326, + "step": 8397 + }, + { + "epoch": 0.49, + "learning_rate": 5.4254508054766676e-08, + "logits/chosen": -1.8480114936828613, + "logits/rejected": -1.8478813171386719, + "logps/chosen": -22.122652053833008, + "logps/rejected": -82.32089233398438, + "loss": 0.4255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4770444929599762, + "rewards/margins": 0.5694761276245117, + "rewards/rejected": -0.09243164211511612, + "step": 8398 + }, + { + "epoch": 0.49, + "learning_rate": 5.424511813373932e-08, + "logits/chosen": -1.7975112199783325, + "logits/rejected": -1.8025611639022827, + "logps/chosen": -212.2431640625, + "logps/rejected": -487.41375732421875, + "loss": 0.0964, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5736786127090454, + "rewards/margins": 2.6416258811950684, + "rewards/rejected": -1.0679473876953125, + "step": 8399 + }, + { + "epoch": 0.49, + "learning_rate": 5.4235728061904605e-08, + "logits/chosen": -1.9893081188201904, + "logits/rejected": -1.9797390699386597, + "logps/chosen": -123.18103790283203, + "logps/rejected": -255.5266571044922, + "loss": 0.1769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9769920706748962, + "rewards/margins": 2.514446258544922, + "rewards/rejected": -1.5374542474746704, + "step": 8400 + }, + { + "epoch": 0.49, + "learning_rate": 5.42263378395961e-08, + "logits/chosen": -1.8851107358932495, + "logits/rejected": -1.8824512958526611, + "logps/chosen": -167.6046142578125, + "logps/rejected": -305.7379150390625, + "loss": 0.0931, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0230743885040283, + "rewards/margins": 1.9708648920059204, + "rewards/rejected": 0.05220947414636612, + "step": 8401 + }, + { + "epoch": 0.49, + "learning_rate": 5.4216947467147413e-08, + "logits/chosen": -1.8844659328460693, + "logits/rejected": -1.8729792833328247, + "logps/chosen": -0.24981701374053955, + "logps/rejected": -150.79672241210938, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0024989009834825993, + "rewards/margins": 2.3469431400299072, + "rewards/rejected": -2.3444442749023438, + "step": 8402 + }, + { + "epoch": 0.49, + "learning_rate": 5.420755694489212e-08, + "logits/chosen": -1.9475010633468628, + "logits/rejected": -1.9446630477905273, + "logps/chosen": -195.5575408935547, + "logps/rejected": -324.1781311035156, + "loss": 0.1844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2563339471817017, + "rewards/margins": 1.533595323562622, + "rewards/rejected": -0.277261346578598, + "step": 8403 + }, + { + "epoch": 0.49, + "learning_rate": 5.4198166273163824e-08, + "logits/chosen": -1.955222249031067, + "logits/rejected": -1.9492478370666504, + "logps/chosen": -0.0006102220504544675, + "logps/rejected": -215.9190673828125, + "loss": 0.3498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0005743509973399341, + "rewards/margins": 4.9863810539245605, + "rewards/rejected": -4.985806465148926, + "step": 8404 + }, + { + "epoch": 0.49, + "learning_rate": 5.418877545229612e-08, + "logits/chosen": -1.9765206575393677, + "logits/rejected": -1.9698636531829834, + "logps/chosen": -39.550987243652344, + "logps/rejected": -258.1617126464844, + "loss": 0.1016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4859832525253296, + "rewards/margins": 5.541507244110107, + "rewards/rejected": -4.055523872375488, + "step": 8405 + }, + { + "epoch": 0.49, + "learning_rate": 5.417938448262264e-08, + "logits/chosen": -2.0800604820251465, + "logits/rejected": -2.083066463470459, + "logps/chosen": -8.76213264465332, + "logps/rejected": -75.17548370361328, + "loss": 0.4154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2823346257209778, + "rewards/margins": 1.2166416645050049, + "rewards/rejected": -0.9343070983886719, + "step": 8406 + }, + { + "epoch": 0.49, + "learning_rate": 5.416999336447696e-08, + "logits/chosen": -1.967458963394165, + "logits/rejected": -1.9614242315292358, + "logps/chosen": -33.183902740478516, + "logps/rejected": -241.46463012695312, + "loss": 0.1963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5984855890274048, + "rewards/margins": 3.466308116912842, + "rewards/rejected": -2.8678224086761475, + "step": 8407 + }, + { + "epoch": 0.49, + "learning_rate": 5.4160602098192734e-08, + "logits/chosen": -1.9329861402511597, + "logits/rejected": -1.9175477027893066, + "logps/chosen": -24.902748107910156, + "logps/rejected": -339.7962341308594, + "loss": 0.2679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2318742722272873, + "rewards/margins": 7.832359790802002, + "rewards/rejected": -7.600485324859619, + "step": 8408 + }, + { + "epoch": 0.49, + "learning_rate": 5.415121068410355e-08, + "logits/chosen": -2.007073402404785, + "logits/rejected": -2.0038530826568604, + "logps/chosen": -0.0005941305425949395, + "logps/rejected": -198.2232666015625, + "loss": 0.3647, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7731080990633927e-05, + "rewards/margins": 3.4180281162261963, + "rewards/rejected": -3.418055772781372, + "step": 8409 + }, + { + "epoch": 0.49, + "learning_rate": 5.414181912254308e-08, + "logits/chosen": -1.9768699407577515, + "logits/rejected": -1.9653621912002563, + "logps/chosen": -6.631375312805176, + "logps/rejected": -143.67446899414062, + "loss": 0.4564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.099875308573246, + "rewards/margins": 1.1516010761260986, + "rewards/rejected": -1.0517257452011108, + "step": 8410 + }, + { + "epoch": 0.49, + "learning_rate": 5.413242741384492e-08, + "logits/chosen": -1.7372994422912598, + "logits/rejected": -1.7753864526748657, + "logps/chosen": -311.2679443359375, + "logps/rejected": -452.3019104003906, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.303335666656494, + "rewards/margins": 7.232617378234863, + "rewards/rejected": -3.929281711578369, + "step": 8411 + }, + { + "epoch": 0.49, + "learning_rate": 5.4123035558342714e-08, + "logits/chosen": -2.054277181625366, + "logits/rejected": -2.0423712730407715, + "logps/chosen": -2.465801239013672, + "logps/rejected": -89.40917205810547, + "loss": 0.4738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12001758068799973, + "rewards/margins": 0.9967418313026428, + "rewards/rejected": -0.8767242431640625, + "step": 8412 + }, + { + "epoch": 0.49, + "learning_rate": 5.411364355637012e-08, + "logits/chosen": -2.0638675689697266, + "logits/rejected": -2.0446534156799316, + "logps/chosen": -26.110050201416016, + "logps/rejected": -157.83218383789062, + "loss": 0.4406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5026111602783203, + "rewards/margins": 0.3355029821395874, + "rewards/rejected": 0.16710816323757172, + "step": 8413 + }, + { + "epoch": 0.49, + "learning_rate": 5.4104251408260805e-08, + "logits/chosen": -2.1807682514190674, + "logits/rejected": -2.177192211151123, + "logps/chosen": -6.759056122973561e-05, + "logps/rejected": -210.38873291015625, + "loss": 0.3338, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.588102799767512e-06, + "rewards/margins": 6.342453956604004, + "rewards/rejected": -6.3424577713012695, + "step": 8414 + }, + { + "epoch": 0.49, + "learning_rate": 5.409485911434839e-08, + "logits/chosen": -2.0037219524383545, + "logits/rejected": -1.9468779563903809, + "logps/chosen": -430.62445068359375, + "logps/rejected": -559.976806640625, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.197561740875244, + "rewards/margins": 4.6123809814453125, + "rewards/rejected": -0.4148193299770355, + "step": 8415 + }, + { + "epoch": 0.49, + "learning_rate": 5.408546667496656e-08, + "logits/chosen": -2.0548293590545654, + "logits/rejected": -2.014752149581909, + "logps/chosen": -259.0548400878906, + "logps/rejected": -369.626953125, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5041961669921875, + "rewards/margins": 0.04739689826965332, + "rewards/rejected": 2.456799268722534, + "step": 8416 + }, + { + "epoch": 0.49, + "learning_rate": 5.407607409044896e-08, + "logits/chosen": -1.8934935331344604, + "logits/rejected": -1.8922977447509766, + "logps/chosen": -199.4532012939453, + "logps/rejected": -468.1591491699219, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.067858934402466, + "rewards/margins": 7.875128746032715, + "rewards/rejected": -4.80726957321167, + "step": 8417 + }, + { + "epoch": 0.49, + "learning_rate": 5.4066681361129266e-08, + "logits/chosen": -1.927388310432434, + "logits/rejected": -1.925452470779419, + "logps/chosen": -6.38617467880249, + "logps/rejected": -120.8585205078125, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0004987716674804688, + "rewards/margins": 4.423381805419922, + "rewards/rejected": -4.422883033752441, + "step": 8418 + }, + { + "epoch": 0.49, + "learning_rate": 5.4057288487341165e-08, + "logits/chosen": -1.6691666841506958, + "logits/rejected": -1.6659561395645142, + "logps/chosen": -73.58528137207031, + "logps/rejected": -274.93756103515625, + "loss": 0.1642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9281204342842102, + "rewards/margins": 3.3278794288635254, + "rewards/rejected": -2.39975905418396, + "step": 8419 + }, + { + "epoch": 0.49, + "learning_rate": 5.404789546941834e-08, + "logits/chosen": -1.939351201057434, + "logits/rejected": -1.9938483238220215, + "logps/chosen": -194.36463928222656, + "logps/rejected": -421.87030029296875, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9707382917404175, + "rewards/margins": 3.1104965209960938, + "rewards/rejected": -1.1397583484649658, + "step": 8420 + }, + { + "epoch": 0.49, + "learning_rate": 5.403850230769446e-08, + "logits/chosen": -2.0519826412200928, + "logits/rejected": -2.047783613204956, + "logps/chosen": -0.00014877031208015978, + "logps/rejected": -79.19872283935547, + "loss": 0.7137, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7962276615435258e-05, + "rewards/margins": -0.0808185264468193, + "rewards/rejected": 0.08083648979663849, + "step": 8421 + }, + { + "epoch": 0.49, + "learning_rate": 5.402910900250321e-08, + "logits/chosen": -2.0491206645965576, + "logits/rejected": -2.0411570072174072, + "logps/chosen": -40.31205749511719, + "logps/rejected": -296.1781005859375, + "loss": 0.0861, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1788567304611206, + "rewards/margins": 5.010692596435547, + "rewards/rejected": -3.831835985183716, + "step": 8422 + }, + { + "epoch": 0.49, + "learning_rate": 5.401971555417832e-08, + "logits/chosen": -1.9704957008361816, + "logits/rejected": -1.9755818843841553, + "logps/chosen": -182.88162231445312, + "logps/rejected": -305.6015930175781, + "loss": 0.1103, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.468695044517517, + "rewards/margins": 2.1809723377227783, + "rewards/rejected": -0.7122772336006165, + "step": 8423 + }, + { + "epoch": 0.49, + "learning_rate": 5.4010321963053464e-08, + "logits/chosen": -1.8244438171386719, + "logits/rejected": -1.7973488569259644, + "logps/chosen": -209.05328369140625, + "logps/rejected": -260.93475341796875, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0586060285568237, + "rewards/margins": 0.9254761338233948, + "rewards/rejected": 0.13312987983226776, + "step": 8424 + }, + { + "epoch": 0.49, + "learning_rate": 5.400092822946235e-08, + "logits/chosen": -1.8992291688919067, + "logits/rejected": -1.9191392660140991, + "logps/chosen": -137.51907348632812, + "logps/rejected": -347.57147216796875, + "loss": 0.1543, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.320355176925659, + "rewards/margins": 1.2380950450897217, + "rewards/rejected": 1.0822601318359375, + "step": 8425 + }, + { + "epoch": 0.49, + "learning_rate": 5.39915343537387e-08, + "logits/chosen": -1.8699774742126465, + "logits/rejected": -1.8616571426391602, + "logps/chosen": -187.78631591796875, + "logps/rejected": -405.1649475097656, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6673524379730225, + "rewards/margins": 3.570993185043335, + "rewards/rejected": -0.9036407470703125, + "step": 8426 + }, + { + "epoch": 0.49, + "learning_rate": 5.398214033621622e-08, + "logits/chosen": -2.087235927581787, + "logits/rejected": -2.0844411849975586, + "logps/chosen": -49.12102508544922, + "logps/rejected": -193.11239624023438, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3695968687534332, + "rewards/margins": 1.6799774169921875, + "rewards/rejected": -1.3103805780410767, + "step": 8427 + }, + { + "epoch": 0.49, + "learning_rate": 5.397274617722864e-08, + "logits/chosen": -2.0121731758117676, + "logits/rejected": -2.0159289836883545, + "logps/chosen": -47.991119384765625, + "logps/rejected": -182.98924255371094, + "loss": 0.4576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0407867431640625, + "rewards/margins": 0.7354934811592102, + "rewards/rejected": -0.7762802243232727, + "step": 8428 + }, + { + "epoch": 0.49, + "learning_rate": 5.3963351877109685e-08, + "logits/chosen": -2.104680299758911, + "logits/rejected": -2.0976967811584473, + "logps/chosen": -4.708733831648715e-05, + "logps/rejected": -167.95892333984375, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6093101748992922e-06, + "rewards/margins": 4.408199310302734, + "rewards/rejected": -4.408200740814209, + "step": 8429 + }, + { + "epoch": 0.49, + "learning_rate": 5.3953957436193074e-08, + "logits/chosen": -2.0833544731140137, + "logits/rejected": -2.0765507221221924, + "logps/chosen": -65.1484375, + "logps/rejected": -265.17584228515625, + "loss": 0.6243, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2208290100097656, + "rewards/margins": 1.9984321594238281, + "rewards/rejected": -3.2192611694335938, + "step": 8430 + }, + { + "epoch": 0.49, + "learning_rate": 5.394456285481257e-08, + "logits/chosen": -1.954829216003418, + "logits/rejected": -1.9460489749908447, + "logps/chosen": -178.76419067382812, + "logps/rejected": -268.4829406738281, + "loss": 0.155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.379974365234375, + "rewards/margins": 2.0747954845428467, + "rewards/rejected": -0.6948211789131165, + "step": 8431 + }, + { + "epoch": 0.49, + "learning_rate": 5.393516813330189e-08, + "logits/chosen": -1.7861745357513428, + "logits/rejected": -1.774787425994873, + "logps/chosen": -273.4212951660156, + "logps/rejected": -431.0179443359375, + "loss": 0.0944, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.297204613685608, + "rewards/margins": 4.89984130859375, + "rewards/rejected": -3.6026368141174316, + "step": 8432 + }, + { + "epoch": 0.49, + "learning_rate": 5.392577327199478e-08, + "logits/chosen": -1.8195927143096924, + "logits/rejected": -1.808589220046997, + "logps/chosen": -226.00244140625, + "logps/rejected": -306.27294921875, + "loss": 0.5935, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9002426266670227, + "rewards/margins": -0.404731810092926, + "rewards/rejected": 1.3049744367599487, + "step": 8433 + }, + { + "epoch": 0.49, + "learning_rate": 5.3916378271225005e-08, + "logits/chosen": -2.0844602584838867, + "logits/rejected": -2.079263925552368, + "logps/chosen": -27.28142738342285, + "logps/rejected": -172.46514892578125, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0959373489022255, + "rewards/margins": 2.401465654373169, + "rewards/rejected": -2.305528402328491, + "step": 8434 + }, + { + "epoch": 0.49, + "learning_rate": 5.3906983131326336e-08, + "logits/chosen": -2.151110887527466, + "logits/rejected": -2.165027618408203, + "logps/chosen": -155.37123107910156, + "logps/rejected": -138.91966247558594, + "loss": 0.9317, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6997787356376648, + "rewards/margins": -0.30059659481048584, + "rewards/rejected": -0.39918214082717896, + "step": 8435 + }, + { + "epoch": 0.49, + "learning_rate": 5.3897587852632485e-08, + "logits/chosen": -2.030827045440674, + "logits/rejected": -2.079636812210083, + "logps/chosen": -129.28271484375, + "logps/rejected": -221.89028930664062, + "loss": 0.3566, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.775671362876892, + "rewards/margins": 0.1360992193222046, + "rewards/rejected": 1.6395721435546875, + "step": 8436 + }, + { + "epoch": 0.49, + "learning_rate": 5.3888192435477275e-08, + "logits/chosen": -2.0606374740600586, + "logits/rejected": -2.0578036308288574, + "logps/chosen": -6.395465850830078, + "logps/rejected": -198.0663299560547, + "loss": 0.5509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5225297808647156, + "rewards/margins": 1.9522275924682617, + "rewards/rejected": -2.474757432937622, + "step": 8437 + }, + { + "epoch": 0.49, + "learning_rate": 5.387879688019443e-08, + "logits/chosen": -1.9002234935760498, + "logits/rejected": -1.901132345199585, + "logps/chosen": -35.44108963012695, + "logps/rejected": -304.98828125, + "loss": 0.174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7689453363418579, + "rewards/margins": 3.2522826194763184, + "rewards/rejected": -2.48333740234375, + "step": 8438 + }, + { + "epoch": 0.49, + "learning_rate": 5.386940118711776e-08, + "logits/chosen": -1.7490428686141968, + "logits/rejected": -1.7492501735687256, + "logps/chosen": -80.16635131835938, + "logps/rejected": -391.5647277832031, + "loss": 0.1346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7841400504112244, + "rewards/margins": 3.0757081508636475, + "rewards/rejected": -2.2915680408477783, + "step": 8439 + }, + { + "epoch": 0.49, + "learning_rate": 5.386000535658103e-08, + "logits/chosen": -1.8258028030395508, + "logits/rejected": -1.8293135166168213, + "logps/chosen": -10.009239196777344, + "logps/rejected": -171.5973663330078, + "loss": 0.6704, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08734454959630966, + "rewards/margins": -0.07346173375844955, + "rewards/rejected": 0.16080628335475922, + "step": 8440 + }, + { + "epoch": 0.49, + "learning_rate": 5.3850609388918025e-08, + "logits/chosen": -1.9015967845916748, + "logits/rejected": -1.8848460912704468, + "logps/chosen": -249.572509765625, + "logps/rejected": -424.3223571777344, + "loss": 0.163, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9735840559005737, + "rewards/margins": 1.4033234119415283, + "rewards/rejected": 0.5702606439590454, + "step": 8441 + }, + { + "epoch": 0.49, + "learning_rate": 5.384121328446254e-08, + "logits/chosen": -1.8663792610168457, + "logits/rejected": -1.8656675815582275, + "logps/chosen": -17.31482696533203, + "logps/rejected": -127.67932891845703, + "loss": 0.4657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20726585388183594, + "rewards/margins": 0.9017101526260376, + "rewards/rejected": -0.6944442987442017, + "step": 8442 + }, + { + "epoch": 0.49, + "learning_rate": 5.3831817043548366e-08, + "logits/chosen": -1.8454378843307495, + "logits/rejected": -1.849118709564209, + "logps/chosen": -198.95217895507812, + "logps/rejected": -424.757080078125, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9268372058868408, + "rewards/margins": 4.291644096374512, + "rewards/rejected": -2.36480712890625, + "step": 8443 + }, + { + "epoch": 0.49, + "learning_rate": 5.3822420666509314e-08, + "logits/chosen": -1.90293550491333, + "logits/rejected": -1.9021960496902466, + "logps/chosen": -48.89712905883789, + "logps/rejected": -107.90867614746094, + "loss": 0.4024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04394874721765518, + "rewards/margins": 1.6692981719970703, + "rewards/rejected": -1.6253494024276733, + "step": 8444 + }, + { + "epoch": 0.49, + "learning_rate": 5.381302415367919e-08, + "logits/chosen": -1.9077314138412476, + "logits/rejected": -1.9001933336257935, + "logps/chosen": -68.45321655273438, + "logps/rejected": -407.6341552734375, + "loss": 0.3972, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23142166435718536, + "rewards/margins": 9.010290145874023, + "rewards/rejected": -9.241711616516113, + "step": 8445 + }, + { + "epoch": 0.49, + "learning_rate": 5.380362750539178e-08, + "logits/chosen": -1.7864186763763428, + "logits/rejected": -1.7640286684036255, + "logps/chosen": -220.59730529785156, + "logps/rejected": -412.08740234375, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0118240118026733, + "rewards/margins": 0.7197097539901733, + "rewards/rejected": 0.2921142578125, + "step": 8446 + }, + { + "epoch": 0.49, + "learning_rate": 5.3794230721980926e-08, + "logits/chosen": -1.8093101978302002, + "logits/rejected": -1.8100250959396362, + "logps/chosen": -1.2063915729522705, + "logps/rejected": -205.58938598632812, + "loss": 0.3829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0506049208343029, + "rewards/margins": 2.5348825454711914, + "rewards/rejected": -2.5854873657226562, + "step": 8447 + }, + { + "epoch": 0.49, + "learning_rate": 5.378483380378044e-08, + "logits/chosen": -1.990056037902832, + "logits/rejected": -1.9899531602859497, + "logps/chosen": -62.21459197998047, + "logps/rejected": -240.81668090820312, + "loss": 0.3697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01880645751953125, + "rewards/margins": 3.7107865810394287, + "rewards/rejected": -3.72959303855896, + "step": 8448 + }, + { + "epoch": 0.49, + "learning_rate": 5.3775436751124125e-08, + "logits/chosen": -1.8590550422668457, + "logits/rejected": -1.8695037364959717, + "logps/chosen": -165.8101348876953, + "logps/rejected": -454.6795654296875, + "loss": 0.0797, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.19425368309021, + "rewards/margins": 2.1195313930511475, + "rewards/rejected": 0.0747222900390625, + "step": 8449 + }, + { + "epoch": 0.49, + "learning_rate": 5.376603956434584e-08, + "logits/chosen": -1.8961198329925537, + "logits/rejected": -1.8992630243301392, + "logps/chosen": -21.68999671936035, + "logps/rejected": -173.8168182373047, + "loss": 0.5996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30812588334083557, + "rewards/margins": 1.0524729490280151, + "rewards/rejected": -1.3605988025665283, + "step": 8450 + }, + { + "epoch": 0.49, + "learning_rate": 5.375664224377942e-08, + "logits/chosen": -1.9258946180343628, + "logits/rejected": -1.927289366722107, + "logps/chosen": -18.072765350341797, + "logps/rejected": -58.291847229003906, + "loss": 0.517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02183246612548828, + "rewards/margins": 0.890877366065979, + "rewards/rejected": -0.9127098321914673, + "step": 8451 + }, + { + "epoch": 0.49, + "learning_rate": 5.374724478975868e-08, + "logits/chosen": -1.8673410415649414, + "logits/rejected": -1.7928850650787354, + "logps/chosen": -135.2176513671875, + "logps/rejected": -281.0491943359375, + "loss": 0.5371, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.4581879377365112, + "rewards/margins": -0.2958221435546875, + "rewards/rejected": 1.7540100812911987, + "step": 8452 + }, + { + "epoch": 0.49, + "learning_rate": 5.373784720261747e-08, + "logits/chosen": -2.1346347332000732, + "logits/rejected": -2.122401714324951, + "logps/chosen": -53.64699935913086, + "logps/rejected": -336.9998779296875, + "loss": 0.3878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18365059792995453, + "rewards/margins": 7.119575500488281, + "rewards/rejected": -7.303225994110107, + "step": 8453 + }, + { + "epoch": 0.49, + "learning_rate": 5.3728449482689643e-08, + "logits/chosen": -1.8833945989608765, + "logits/rejected": -1.8680763244628906, + "logps/chosen": -146.59536743164062, + "logps/rejected": -553.4234619140625, + "loss": 0.0355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.841516137123108, + "rewards/margins": 5.280261516571045, + "rewards/rejected": -3.4387452602386475, + "step": 8454 + }, + { + "epoch": 0.49, + "learning_rate": 5.371905163030905e-08, + "logits/chosen": -2.0553183555603027, + "logits/rejected": -2.0354933738708496, + "logps/chosen": -117.37443542480469, + "logps/rejected": -336.1198425292969, + "loss": 0.118, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0831695795059204, + "rewards/margins": 3.5475192070007324, + "rewards/rejected": -2.4643495082855225, + "step": 8455 + }, + { + "epoch": 0.49, + "learning_rate": 5.370965364580957e-08, + "logits/chosen": -1.9202792644500732, + "logits/rejected": -1.9119590520858765, + "logps/chosen": -21.724693298339844, + "logps/rejected": -175.0182647705078, + "loss": 0.4412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13387051224708557, + "rewards/margins": 2.209310293197632, + "rewards/rejected": -2.3431808948516846, + "step": 8456 + }, + { + "epoch": 0.49, + "learning_rate": 5.370025552952504e-08, + "logits/chosen": -1.919873595237732, + "logits/rejected": -1.8955626487731934, + "logps/chosen": -179.87950134277344, + "logps/rejected": -239.9463653564453, + "loss": 0.1346, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.903265357017517, + "rewards/margins": 1.8091049194335938, + "rewards/rejected": 0.09416045993566513, + "step": 8457 + }, + { + "epoch": 0.49, + "learning_rate": 5.369085728178934e-08, + "logits/chosen": -2.0242855548858643, + "logits/rejected": -2.0411853790283203, + "logps/chosen": -221.23458862304688, + "logps/rejected": -365.69842529296875, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.991624593734741, + "rewards/margins": 2.8111650943756104, + "rewards/rejected": 0.18045960366725922, + "step": 8458 + }, + { + "epoch": 0.49, + "learning_rate": 5.368145890293632e-08, + "logits/chosen": -1.7988120317459106, + "logits/rejected": -1.8029948472976685, + "logps/chosen": -267.3890686035156, + "logps/rejected": -348.123291015625, + "loss": 0.2308, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.194171190261841, + "rewards/margins": 0.8069397211074829, + "rewards/rejected": 1.387231469154358, + "step": 8459 + }, + { + "epoch": 0.49, + "learning_rate": 5.367206039329987e-08, + "logits/chosen": -1.9213461875915527, + "logits/rejected": -1.9205574989318848, + "logps/chosen": -197.88551330566406, + "logps/rejected": -385.7358703613281, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.284916639328003, + "rewards/margins": 3.4168899059295654, + "rewards/rejected": -1.1319732666015625, + "step": 8460 + }, + { + "epoch": 0.49, + "learning_rate": 5.366266175321388e-08, + "logits/chosen": -2.0271339416503906, + "logits/rejected": -2.025447368621826, + "logps/chosen": -0.15858490765094757, + "logps/rejected": -107.86470794677734, + "loss": 0.4358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005425174720585346, + "rewards/margins": 1.4957786798477173, + "rewards/rejected": -1.4903534650802612, + "step": 8461 + }, + { + "epoch": 0.49, + "learning_rate": 5.3653262983012223e-08, + "logits/chosen": -1.9058634042739868, + "logits/rejected": -1.9132235050201416, + "logps/chosen": -61.71603012084961, + "logps/rejected": -163.66366577148438, + "loss": 0.4319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41865044832229614, + "rewards/margins": 0.6182445883750916, + "rewards/rejected": -0.19959412515163422, + "step": 8462 + }, + { + "epoch": 0.49, + "learning_rate": 5.364386408302881e-08, + "logits/chosen": -2.1791932582855225, + "logits/rejected": -2.1635794639587402, + "logps/chosen": -86.2542953491211, + "logps/rejected": -321.505859375, + "loss": 0.1103, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2336418628692627, + "rewards/margins": 6.969063758850098, + "rewards/rejected": -5.735421657562256, + "step": 8463 + }, + { + "epoch": 0.49, + "learning_rate": 5.3634465053597514e-08, + "logits/chosen": -1.8432058095932007, + "logits/rejected": -1.8486913442611694, + "logps/chosen": -261.8314514160156, + "logps/rejected": -328.7819519042969, + "loss": 0.2332, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5619598627090454, + "rewards/margins": 0.6675812005996704, + "rewards/rejected": 0.894378662109375, + "step": 8464 + }, + { + "epoch": 0.49, + "learning_rate": 5.362506589505225e-08, + "logits/chosen": -1.9391900300979614, + "logits/rejected": -1.9394830465316772, + "logps/chosen": -0.00011574797099456191, + "logps/rejected": -272.61212158203125, + "loss": 0.3421, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3363215859717457e-06, + "rewards/margins": 3.821326494216919, + "rewards/rejected": -3.82132887840271, + "step": 8465 + }, + { + "epoch": 0.49, + "learning_rate": 5.3615666607726884e-08, + "logits/chosen": -1.8895350694656372, + "logits/rejected": -1.8877933025360107, + "logps/chosen": -0.05668230354785919, + "logps/rejected": -196.58303833007812, + "loss": 0.3538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009221130982041359, + "rewards/margins": 3.0123414993286133, + "rewards/rejected": -3.0031204223632812, + "step": 8466 + }, + { + "epoch": 0.49, + "learning_rate": 5.360626719195538e-08, + "logits/chosen": -2.087460517883301, + "logits/rejected": -2.0858919620513916, + "logps/chosen": -4.61361837387085, + "logps/rejected": -181.26853942871094, + "loss": 0.4168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04007992893457413, + "rewards/margins": 1.8835324048995972, + "rewards/rejected": -1.8434524536132812, + "step": 8467 + }, + { + "epoch": 0.49, + "learning_rate": 5.359686764807162e-08, + "logits/chosen": -2.030306816101074, + "logits/rejected": -2.029017448425293, + "logps/chosen": -27.646846771240234, + "logps/rejected": -153.992431640625, + "loss": 0.4203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21186943352222443, + "rewards/margins": 1.228159785270691, + "rewards/rejected": -1.016290307044983, + "step": 8468 + }, + { + "epoch": 0.49, + "learning_rate": 5.358746797640953e-08, + "logits/chosen": -1.5957751274108887, + "logits/rejected": -1.5867927074432373, + "logps/chosen": -189.9569854736328, + "logps/rejected": -386.1173095703125, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2059768438339233, + "rewards/margins": 0.4545455574989319, + "rewards/rejected": 0.7514312863349915, + "step": 8469 + }, + { + "epoch": 0.49, + "learning_rate": 5.3578068177303015e-08, + "logits/chosen": -2.1084084510803223, + "logits/rejected": -2.1069388389587402, + "logps/chosen": -16.875659942626953, + "logps/rejected": -170.0692596435547, + "loss": 0.3538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.062493324279785156, + "rewards/margins": 2.5838449001312256, + "rewards/rejected": -2.5213515758514404, + "step": 8470 + }, + { + "epoch": 0.49, + "learning_rate": 5.356866825108604e-08, + "logits/chosen": -1.858451247215271, + "logits/rejected": -1.8477264642715454, + "logps/chosen": -67.82228088378906, + "logps/rejected": -213.37330627441406, + "loss": 0.3333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33504486083984375, + "rewards/margins": 1.8042877912521362, + "rewards/rejected": -1.4692429304122925, + "step": 8471 + }, + { + "epoch": 0.49, + "learning_rate": 5.3559268198092475e-08, + "logits/chosen": -1.7956255674362183, + "logits/rejected": -1.7844151258468628, + "logps/chosen": -0.00021039461717009544, + "logps/rejected": -222.66978454589844, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3587763305622502e-06, + "rewards/margins": 4.26176643371582, + "rewards/rejected": -4.261767864227295, + "step": 8472 + }, + { + "epoch": 0.49, + "learning_rate": 5.354986801865632e-08, + "logits/chosen": -1.9458239078521729, + "logits/rejected": -1.9493846893310547, + "logps/chosen": -171.50381469726562, + "logps/rejected": -297.7393798828125, + "loss": 0.1341, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1376160383224487, + "rewards/margins": 1.9845246076583862, + "rewards/rejected": -0.8469085693359375, + "step": 8473 + }, + { + "epoch": 0.49, + "learning_rate": 5.354046771311148e-08, + "logits/chosen": -1.8838621377944946, + "logits/rejected": -1.8702749013900757, + "logps/chosen": -67.67724609375, + "logps/rejected": -194.0538330078125, + "loss": 0.1559, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.300482153892517, + "rewards/margins": 2.3448028564453125, + "rewards/rejected": -1.0443207025527954, + "step": 8474 + }, + { + "epoch": 0.49, + "learning_rate": 5.3531067281791916e-08, + "logits/chosen": -2.0233213901519775, + "logits/rejected": -2.0164566040039062, + "logps/chosen": -143.92767333984375, + "logps/rejected": -308.1380615234375, + "loss": 0.2034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.052978515625, + "rewards/margins": 0.7921935319900513, + "rewards/rejected": 1.2607849836349487, + "step": 8475 + }, + { + "epoch": 0.49, + "learning_rate": 5.352166672503157e-08, + "logits/chosen": -1.9750179052352905, + "logits/rejected": -1.9730534553527832, + "logps/chosen": -3.8261830806732178, + "logps/rejected": -202.89236450195312, + "loss": 0.2605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3404320478439331, + "rewards/margins": 4.2383599281311035, + "rewards/rejected": -3.89792799949646, + "step": 8476 + }, + { + "epoch": 0.49, + "learning_rate": 5.351226604316439e-08, + "logits/chosen": -1.9264367818832397, + "logits/rejected": -1.9256621599197388, + "logps/chosen": -259.89556884765625, + "logps/rejected": -394.7481689453125, + "loss": 0.0951, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0118134021759033, + "rewards/margins": 2.118151903152466, + "rewards/rejected": -0.1063385009765625, + "step": 8477 + }, + { + "epoch": 0.49, + "learning_rate": 5.350286523652434e-08, + "logits/chosen": -2.1049790382385254, + "logits/rejected": -2.088928699493408, + "logps/chosen": -46.28947067260742, + "logps/rejected": -110.73601531982422, + "loss": 0.6707, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.21083641052246094, + "rewards/margins": -0.032571032643318176, + "rewards/rejected": 0.2434074431657791, + "step": 8478 + }, + { + "epoch": 0.49, + "learning_rate": 5.349346430544539e-08, + "logits/chosen": -2.095479965209961, + "logits/rejected": -2.0965678691864014, + "logps/chosen": -0.01661091111600399, + "logps/rejected": -70.78821563720703, + "loss": 0.3804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016567254438996315, + "rewards/margins": 2.538990020751953, + "rewards/rejected": -2.5224227905273438, + "step": 8479 + }, + { + "epoch": 0.49, + "learning_rate": 5.348406325026149e-08, + "logits/chosen": -2.0456342697143555, + "logits/rejected": -2.0519895553588867, + "logps/chosen": -88.35711669921875, + "logps/rejected": -233.08978271484375, + "loss": 0.3651, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1365066766738892, + "rewards/margins": 0.34205931425094604, + "rewards/rejected": 0.7944473624229431, + "step": 8480 + }, + { + "epoch": 0.49, + "learning_rate": 5.347466207130662e-08, + "logits/chosen": -2.0642895698547363, + "logits/rejected": -2.0697147846221924, + "logps/chosen": -76.72309112548828, + "logps/rejected": -133.1192169189453, + "loss": 0.5716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004855346865952015, + "rewards/margins": 0.34189149737358093, + "rewards/rejected": -0.3467468321323395, + "step": 8481 + }, + { + "epoch": 0.49, + "learning_rate": 5.3465260768914764e-08, + "logits/chosen": -1.9665443897247314, + "logits/rejected": -1.962976336479187, + "logps/chosen": -22.726417541503906, + "logps/rejected": -131.94052124023438, + "loss": 0.5785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3005758225917816, + "rewards/margins": 0.041304588317871094, + "rewards/rejected": 0.2592712342739105, + "step": 8482 + }, + { + "epoch": 0.49, + "learning_rate": 5.3455859343419884e-08, + "logits/chosen": -2.0205557346343994, + "logits/rejected": -2.001391649246216, + "logps/chosen": -112.48893737792969, + "logps/rejected": -447.5206604003906, + "loss": 0.0821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4831573963165283, + "rewards/margins": 5.528546333312988, + "rewards/rejected": -4.045388698577881, + "step": 8483 + }, + { + "epoch": 0.49, + "learning_rate": 5.3446457795155995e-08, + "logits/chosen": -2.0061991214752197, + "logits/rejected": -2.010591506958008, + "logps/chosen": -5.548244953155518, + "logps/rejected": -140.87062072753906, + "loss": 0.3318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10997571796178818, + "rewards/margins": 2.758040189743042, + "rewards/rejected": -2.648064374923706, + "step": 8484 + }, + { + "epoch": 0.49, + "learning_rate": 5.343705612445705e-08, + "logits/chosen": -1.7740546464920044, + "logits/rejected": -1.7649449110031128, + "logps/chosen": -289.8659362792969, + "logps/rejected": -517.1123046875, + "loss": 0.0725, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.589630126953125, + "rewards/margins": 2.376678466796875, + "rewards/rejected": 0.21295166015625, + "step": 8485 + }, + { + "epoch": 0.49, + "learning_rate": 5.342765433165707e-08, + "logits/chosen": -2.043424606323242, + "logits/rejected": -2.071869134902954, + "logps/chosen": -180.55712890625, + "logps/rejected": -359.5670471191406, + "loss": 0.1356, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.310742139816284, + "rewards/margins": 1.3277678489685059, + "rewards/rejected": 0.9829742312431335, + "step": 8486 + }, + { + "epoch": 0.49, + "learning_rate": 5.341825241709004e-08, + "logits/chosen": -2.137211561203003, + "logits/rejected": -2.1245341300964355, + "logps/chosen": -176.63116455078125, + "logps/rejected": -405.29278564453125, + "loss": 0.7309, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.115193247795105, + "rewards/margins": 4.77139139175415, + "rewards/rejected": -5.886584758758545, + "step": 8487 + }, + { + "epoch": 0.49, + "learning_rate": 5.340885038108997e-08, + "logits/chosen": -1.9129290580749512, + "logits/rejected": -1.8996288776397705, + "logps/chosen": -30.861347198486328, + "logps/rejected": -295.30218505859375, + "loss": 0.3999, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3792251646518707, + "rewards/margins": 1.0563491582870483, + "rewards/rejected": -0.6771240234375, + "step": 8488 + }, + { + "epoch": 0.49, + "learning_rate": 5.339944822399084e-08, + "logits/chosen": -1.9252636432647705, + "logits/rejected": -1.8703638315200806, + "logps/chosen": -176.27685546875, + "logps/rejected": -312.5283203125, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4386794567108154, + "rewards/margins": 3.4179580211639404, + "rewards/rejected": -0.979278564453125, + "step": 8489 + }, + { + "epoch": 0.49, + "learning_rate": 5.339004594612669e-08, + "logits/chosen": -1.923610806465149, + "logits/rejected": -2.028508186340332, + "logps/chosen": -245.5728302001953, + "logps/rejected": -299.0667724609375, + "loss": 0.2865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6854141354560852, + "rewards/margins": 0.6527267694473267, + "rewards/rejected": 0.03268737718462944, + "step": 8490 + }, + { + "epoch": 0.49, + "learning_rate": 5.338064354783153e-08, + "logits/chosen": -2.0204102993011475, + "logits/rejected": -2.0161209106445312, + "logps/chosen": -50.13136672973633, + "logps/rejected": -124.13248443603516, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9216136932373047, + "rewards/margins": 1.1229000091552734, + "rewards/rejected": -0.20128631591796875, + "step": 8491 + }, + { + "epoch": 0.49, + "learning_rate": 5.3371241029439374e-08, + "logits/chosen": -1.9810972213745117, + "logits/rejected": -1.9699095487594604, + "logps/chosen": -84.43753051757812, + "logps/rejected": -388.39349365234375, + "loss": 0.5589, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.594287097454071, + "rewards/margins": 3.9913694858551025, + "rewards/rejected": -4.585656642913818, + "step": 8492 + }, + { + "epoch": 0.49, + "learning_rate": 5.336183839128424e-08, + "logits/chosen": -1.7917646169662476, + "logits/rejected": -1.7895532846450806, + "logps/chosen": -0.0003246879787184298, + "logps/rejected": -174.576416015625, + "loss": 0.3711, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.939940130403556e-07, + "rewards/margins": 2.964287519454956, + "rewards/rejected": -2.9642884731292725, + "step": 8493 + }, + { + "epoch": 0.49, + "learning_rate": 5.335243563370015e-08, + "logits/chosen": -1.7280292510986328, + "logits/rejected": -1.737284541130066, + "logps/chosen": -198.09878540039062, + "logps/rejected": -306.6562194824219, + "loss": 0.2382, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9151763916015625, + "rewards/margins": 0.7130066156387329, + "rewards/rejected": 1.2021697759628296, + "step": 8494 + }, + { + "epoch": 0.49, + "learning_rate": 5.3343032757021165e-08, + "logits/chosen": -2.030972957611084, + "logits/rejected": -2.028707981109619, + "logps/chosen": -159.9155731201172, + "logps/rejected": -163.2000732421875, + "loss": 0.4255, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2766541242599487, + "rewards/margins": -0.0446014404296875, + "rewards/rejected": 1.3212555646896362, + "step": 8495 + }, + { + "epoch": 0.49, + "learning_rate": 5.333362976158131e-08, + "logits/chosen": -1.9860918521881104, + "logits/rejected": -1.989122748374939, + "logps/chosen": -190.82667541503906, + "logps/rejected": -285.5560302734375, + "loss": 0.0587, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.395703077316284, + "rewards/margins": 2.512286424636841, + "rewards/rejected": -0.11658325046300888, + "step": 8496 + }, + { + "epoch": 0.49, + "learning_rate": 5.332422664771461e-08, + "logits/chosen": -1.9776742458343506, + "logits/rejected": -1.9759235382080078, + "logps/chosen": -20.300809860229492, + "logps/rejected": -158.61827087402344, + "loss": 0.6523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1413906067609787, + "rewards/margins": 0.44153326749801636, + "rewards/rejected": -0.5829238891601562, + "step": 8497 + }, + { + "epoch": 0.49, + "learning_rate": 5.3314823415755115e-08, + "logits/chosen": -1.8680146932601929, + "logits/rejected": -1.8735761642456055, + "logps/chosen": -25.27393913269043, + "logps/rejected": -194.15121459960938, + "loss": 0.4687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2144603729248047, + "rewards/margins": 0.7876049280166626, + "rewards/rejected": -0.5731445550918579, + "step": 8498 + }, + { + "epoch": 0.49, + "learning_rate": 5.330542006603689e-08, + "logits/chosen": -2.0664846897125244, + "logits/rejected": -2.0618388652801514, + "logps/chosen": -0.00040607666596770287, + "logps/rejected": -63.274593353271484, + "loss": 0.4166, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8722850654739887e-05, + "rewards/margins": 1.7103595733642578, + "rewards/rejected": -1.7103782892227173, + "step": 8499 + }, + { + "epoch": 0.49, + "learning_rate": 5.329601659889396e-08, + "logits/chosen": -2.045701026916504, + "logits/rejected": -2.0374720096588135, + "logps/chosen": -49.14974594116211, + "logps/rejected": -235.55227661132812, + "loss": 0.482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38010063767433167, + "rewards/margins": 0.3307720124721527, + "rewards/rejected": 0.04932861402630806, + "step": 8500 + }, + { + "epoch": 0.49, + "learning_rate": 5.328661301466041e-08, + "logits/chosen": -1.7944310903549194, + "logits/rejected": -1.8167680501937866, + "logps/chosen": -219.15830993652344, + "logps/rejected": -352.10125732421875, + "loss": 0.0623, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.626124620437622, + "rewards/margins": 3.1689653396606445, + "rewards/rejected": -1.542840600013733, + "step": 8501 + }, + { + "epoch": 0.49, + "learning_rate": 5.327720931367028e-08, + "logits/chosen": -1.8748315572738647, + "logits/rejected": -1.8448151350021362, + "logps/chosen": -145.10189819335938, + "logps/rejected": -299.9937744140625, + "loss": 0.1828, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.163996934890747, + "rewards/margins": 1.1845566034317017, + "rewards/rejected": 0.9794403314590454, + "step": 8502 + }, + { + "epoch": 0.49, + "learning_rate": 5.3267805496257656e-08, + "logits/chosen": -1.9077423810958862, + "logits/rejected": -1.9014266729354858, + "logps/chosen": -105.46784210205078, + "logps/rejected": -414.83392333984375, + "loss": 0.3516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15523147583007812, + "rewards/margins": 7.817371368408203, + "rewards/rejected": -7.662139892578125, + "step": 8503 + }, + { + "epoch": 0.49, + "learning_rate": 5.3258401562756585e-08, + "logits/chosen": -1.912280797958374, + "logits/rejected": -1.9058361053466797, + "logps/chosen": -159.96871948242188, + "logps/rejected": -195.68228149414062, + "loss": 0.1346, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.264666795730591, + "rewards/margins": 1.5357940196990967, + "rewards/rejected": 0.7288727164268494, + "step": 8504 + }, + { + "epoch": 0.49, + "learning_rate": 5.3248997513501156e-08, + "logits/chosen": -1.8436721563339233, + "logits/rejected": -1.8401849269866943, + "logps/chosen": -11.92049789428711, + "logps/rejected": -159.68667602539062, + "loss": 0.502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12406893074512482, + "rewards/margins": 0.8486355543136597, + "rewards/rejected": -0.724566638469696, + "step": 8505 + }, + { + "epoch": 0.49, + "learning_rate": 5.323959334882545e-08, + "logits/chosen": -2.0638866424560547, + "logits/rejected": -2.0586390495300293, + "logps/chosen": -77.8450698852539, + "logps/rejected": -245.33335876464844, + "loss": 0.1178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4541114568710327, + "rewards/margins": 2.7655539512634277, + "rewards/rejected": -1.3114426136016846, + "step": 8506 + }, + { + "epoch": 0.5, + "learning_rate": 5.323018906906354e-08, + "logits/chosen": -1.8893892765045166, + "logits/rejected": -1.889031171798706, + "logps/chosen": -41.13874816894531, + "logps/rejected": -99.6176528930664, + "loss": 0.5379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22425499558448792, + "rewards/margins": 0.3244182765483856, + "rewards/rejected": -0.10016327351331711, + "step": 8507 + }, + { + "epoch": 0.5, + "learning_rate": 5.32207846745495e-08, + "logits/chosen": -2.040581226348877, + "logits/rejected": -2.0337390899658203, + "logps/chosen": -57.02075958251953, + "logps/rejected": -174.18304443359375, + "loss": 0.5649, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012067794799804688, + "rewards/margins": 0.43870049715042114, + "rewards/rejected": -0.42663270235061646, + "step": 8508 + }, + { + "epoch": 0.5, + "learning_rate": 5.321138016561745e-08, + "logits/chosen": -1.8066834211349487, + "logits/rejected": -1.81757652759552, + "logps/chosen": -177.11524963378906, + "logps/rejected": -289.90484619140625, + "loss": 0.0868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8268111944198608, + "rewards/margins": 2.9366073608398438, + "rewards/rejected": -1.109796166419983, + "step": 8509 + }, + { + "epoch": 0.5, + "learning_rate": 5.320197554260146e-08, + "logits/chosen": -1.9046183824539185, + "logits/rejected": -1.884575605392456, + "logps/chosen": -291.3119201660156, + "logps/rejected": -447.4660949707031, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.994744896888733, + "rewards/margins": 4.96150541305542, + "rewards/rejected": -2.9667603969573975, + "step": 8510 + }, + { + "epoch": 0.5, + "learning_rate": 5.319257080583565e-08, + "logits/chosen": -1.9531266689300537, + "logits/rejected": -1.9463460445404053, + "logps/chosen": -190.550537109375, + "logps/rejected": -334.0012512207031, + "loss": 0.1965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1482025384902954, + "rewards/margins": 1.6040741205215454, + "rewards/rejected": -0.45587158203125, + "step": 8511 + }, + { + "epoch": 0.5, + "learning_rate": 5.318316595565409e-08, + "logits/chosen": -2.079040288925171, + "logits/rejected": -2.0847890377044678, + "logps/chosen": -14.996888160705566, + "logps/rejected": -189.19525146484375, + "loss": 0.3543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14055167138576508, + "rewards/margins": 2.0270872116088867, + "rewards/rejected": -1.88653564453125, + "step": 8512 + }, + { + "epoch": 0.5, + "learning_rate": 5.3173760992390915e-08, + "logits/chosen": -1.9899513721466064, + "logits/rejected": -1.992271900177002, + "logps/chosen": -26.121793746948242, + "logps/rejected": -198.98699951171875, + "loss": 0.3602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7362818121910095, + "rewards/margins": 0.879607617855072, + "rewards/rejected": -0.1433258056640625, + "step": 8513 + }, + { + "epoch": 0.5, + "learning_rate": 5.316435591638021e-08, + "logits/chosen": -1.9344509840011597, + "logits/rejected": -1.9371278285980225, + "logps/chosen": -63.9251594543457, + "logps/rejected": -234.35452270507812, + "loss": 0.1609, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1028308868408203, + "rewards/margins": 3.000075340270996, + "rewards/rejected": -1.8972443342208862, + "step": 8514 + }, + { + "epoch": 0.5, + "learning_rate": 5.31549507279561e-08, + "logits/chosen": -1.8750451803207397, + "logits/rejected": -1.8813303709030151, + "logps/chosen": -3.2410826683044434, + "logps/rejected": -195.07012939453125, + "loss": 0.3734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024267887696623802, + "rewards/margins": 3.202047348022461, + "rewards/rejected": -3.2263152599334717, + "step": 8515 + }, + { + "epoch": 0.5, + "learning_rate": 5.314554542745273e-08, + "logits/chosen": -1.8801709413528442, + "logits/rejected": -1.8942017555236816, + "logps/chosen": -238.0338134765625, + "logps/rejected": -478.649169921875, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1613311767578125, + "rewards/margins": 4.3914947509765625, + "rewards/rejected": -2.23016357421875, + "step": 8516 + }, + { + "epoch": 0.5, + "learning_rate": 5.31361400152042e-08, + "logits/chosen": -1.8184465169906616, + "logits/rejected": -1.8175114393234253, + "logps/chosen": -6.672443389892578, + "logps/rejected": -34.784732818603516, + "loss": 0.585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24044542014598846, + "rewards/margins": 0.13782700896263123, + "rewards/rejected": 0.10261841118335724, + "step": 8517 + }, + { + "epoch": 0.5, + "learning_rate": 5.3126734491544626e-08, + "logits/chosen": -1.8443416357040405, + "logits/rejected": -1.8806836605072021, + "logps/chosen": -185.47862243652344, + "logps/rejected": -219.50425720214844, + "loss": 0.1251, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.091172933578491, + "rewards/margins": 1.410446286201477, + "rewards/rejected": 0.6807266473770142, + "step": 8518 + }, + { + "epoch": 0.5, + "learning_rate": 5.3117328856808155e-08, + "logits/chosen": -2.056103467941284, + "logits/rejected": -2.0528314113616943, + "logps/chosen": -9.432568550109863, + "logps/rejected": -92.57685089111328, + "loss": 0.4526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09450855106115341, + "rewards/margins": 0.9089754223823547, + "rewards/rejected": -0.8144668936729431, + "step": 8519 + }, + { + "epoch": 0.5, + "learning_rate": 5.31079231113289e-08, + "logits/chosen": -2.0216174125671387, + "logits/rejected": -2.011411666870117, + "logps/chosen": -0.004848233424127102, + "logps/rejected": -300.41650390625, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029302691109478474, + "rewards/margins": 3.436053514480591, + "rewards/rejected": -3.4363465309143066, + "step": 8520 + }, + { + "epoch": 0.5, + "learning_rate": 5.309851725544102e-08, + "logits/chosen": -1.997202754020691, + "logits/rejected": -1.983586072921753, + "logps/chosen": -88.0271224975586, + "logps/rejected": -368.57843017578125, + "loss": 0.1457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8734931945800781, + "rewards/margins": 3.0325448513031006, + "rewards/rejected": -2.1590516567230225, + "step": 8521 + }, + { + "epoch": 0.5, + "learning_rate": 5.3089111289478663e-08, + "logits/chosen": -1.9825117588043213, + "logits/rejected": -1.9689093828201294, + "logps/chosen": -199.2928466796875, + "logps/rejected": -294.5999755859375, + "loss": 0.3598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31032562255859375, + "rewards/margins": 0.5565353631973267, + "rewards/rejected": -0.24620972573757172, + "step": 8522 + }, + { + "epoch": 0.5, + "learning_rate": 5.307970521377595e-08, + "logits/chosen": -1.9455232620239258, + "logits/rejected": -1.9272669553756714, + "logps/chosen": -259.16217041015625, + "logps/rejected": -436.42608642578125, + "loss": 0.0303, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2627503871917725, + "rewards/margins": 3.09566068649292, + "rewards/rejected": -0.8329101800918579, + "step": 8523 + }, + { + "epoch": 0.5, + "learning_rate": 5.3070299028667053e-08, + "logits/chosen": -1.5142381191253662, + "logits/rejected": -1.5111584663391113, + "logps/chosen": -0.021437451243400574, + "logps/rejected": -183.17955017089844, + "loss": 0.3539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013092991430312395, + "rewards/margins": 3.2369062900543213, + "rewards/rejected": -3.238215684890747, + "step": 8524 + }, + { + "epoch": 0.5, + "learning_rate": 5.3060892734486104e-08, + "logits/chosen": -2.0024163722991943, + "logits/rejected": -2.0006608963012695, + "logps/chosen": -51.8254280090332, + "logps/rejected": -216.25332641601562, + "loss": 0.2602, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0039509534835815, + "rewards/margins": 1.3115284442901611, + "rewards/rejected": -0.307577520608902, + "step": 8525 + }, + { + "epoch": 0.5, + "learning_rate": 5.305148633156729e-08, + "logits/chosen": -2.1075661182403564, + "logits/rejected": -2.1108081340789795, + "logps/chosen": -133.63246154785156, + "logps/rejected": -214.30313110351562, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11162872612476349, + "rewards/margins": 1.1764724254608154, + "rewards/rejected": -1.2881011962890625, + "step": 8526 + }, + { + "epoch": 0.5, + "learning_rate": 5.304207982024473e-08, + "logits/chosen": -1.887728214263916, + "logits/rejected": -1.8836530447006226, + "logps/chosen": -161.4120330810547, + "logps/rejected": -393.7107849121094, + "loss": 0.2194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5263931155204773, + "rewards/margins": 1.7179367542266846, + "rewards/rejected": -1.1915435791015625, + "step": 8527 + }, + { + "epoch": 0.5, + "learning_rate": 5.303267320085264e-08, + "logits/chosen": -2.0424535274505615, + "logits/rejected": -2.0350921154022217, + "logps/chosen": -12.646981239318848, + "logps/rejected": -112.61372375488281, + "loss": 0.3969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42390814423561096, + "rewards/margins": 0.8787940144538879, + "rewards/rejected": -0.454885870218277, + "step": 8528 + }, + { + "epoch": 0.5, + "learning_rate": 5.302326647372515e-08, + "logits/chosen": -1.7787530422210693, + "logits/rejected": -1.7805567979812622, + "logps/chosen": -305.6272277832031, + "logps/rejected": -399.95562744140625, + "loss": 0.1329, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.605734348297119, + "rewards/margins": 1.5966004133224487, + "rewards/rejected": 1.0091339349746704, + "step": 8529 + }, + { + "epoch": 0.5, + "learning_rate": 5.3013859639196445e-08, + "logits/chosen": -1.9112789630889893, + "logits/rejected": -1.898949384689331, + "logps/chosen": -22.36642837524414, + "logps/rejected": -288.02056884765625, + "loss": 0.173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6852657198905945, + "rewards/margins": 4.313216686248779, + "rewards/rejected": -3.627951145172119, + "step": 8530 + }, + { + "epoch": 0.5, + "learning_rate": 5.300445269760071e-08, + "logits/chosen": -1.8237913846969604, + "logits/rejected": -1.807507038116455, + "logps/chosen": -0.00016998568025883287, + "logps/rejected": -181.7355499267578, + "loss": 0.3775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00010105121327796951, + "rewards/margins": 2.7459218502044678, + "rewards/rejected": -2.7458207607269287, + "step": 8531 + }, + { + "epoch": 0.5, + "learning_rate": 5.299504564927212e-08, + "logits/chosen": -2.1065821647644043, + "logits/rejected": -2.0749523639678955, + "logps/chosen": -48.150840759277344, + "logps/rejected": -392.628173828125, + "loss": 0.1503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8961227536201477, + "rewards/margins": 5.987974643707275, + "rewards/rejected": -5.091851711273193, + "step": 8532 + }, + { + "epoch": 0.5, + "learning_rate": 5.298563849454485e-08, + "logits/chosen": -1.844308853149414, + "logits/rejected": -1.836399793624878, + "logps/chosen": -219.6893768310547, + "logps/rejected": -418.4086608886719, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6785507202148438, + "rewards/margins": 3.5496628284454346, + "rewards/rejected": -1.8711121082305908, + "step": 8533 + }, + { + "epoch": 0.5, + "learning_rate": 5.2976231233753097e-08, + "logits/chosen": -1.937214732170105, + "logits/rejected": -1.926016926765442, + "logps/chosen": -20.777597427368164, + "logps/rejected": -150.3258056640625, + "loss": 0.2612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4732856750488281, + "rewards/margins": 2.8094940185546875, + "rewards/rejected": -2.3362083435058594, + "step": 8534 + }, + { + "epoch": 0.5, + "learning_rate": 5.296682386723105e-08, + "logits/chosen": -1.8264771699905396, + "logits/rejected": -1.720223307609558, + "logps/chosen": -325.44940185546875, + "logps/rejected": -742.1357421875, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1351776123046875, + "rewards/margins": 5.027560234069824, + "rewards/rejected": -3.892382860183716, + "step": 8535 + }, + { + "epoch": 0.5, + "learning_rate": 5.295741639531291e-08, + "logits/chosen": -1.8572813272476196, + "logits/rejected": -1.8580405712127686, + "logps/chosen": -140.3717498779297, + "logps/rejected": -218.3946075439453, + "loss": 0.269, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2505050897598267, + "rewards/margins": 0.8583648800849915, + "rewards/rejected": 0.3921402096748352, + "step": 8536 + }, + { + "epoch": 0.5, + "learning_rate": 5.2948008818332865e-08, + "logits/chosen": -1.9940801858901978, + "logits/rejected": -2.013051748275757, + "logps/chosen": -260.1283264160156, + "logps/rejected": -532.1419067382812, + "loss": 0.1068, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4541138410568237, + "rewards/margins": 2.0164246559143066, + "rewards/rejected": -0.5623108148574829, + "step": 8537 + }, + { + "epoch": 0.5, + "learning_rate": 5.293860113662515e-08, + "logits/chosen": -2.0301506519317627, + "logits/rejected": -2.006727933883667, + "logps/chosen": -156.5091552734375, + "logps/rejected": -324.54766845703125, + "loss": 0.1835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.809722900390625, + "rewards/margins": 1.2732818126678467, + "rewards/rejected": 0.5364410281181335, + "step": 8538 + }, + { + "epoch": 0.5, + "learning_rate": 5.292919335052393e-08, + "logits/chosen": -2.1180145740509033, + "logits/rejected": -2.1115634441375732, + "logps/chosen": -23.562620162963867, + "logps/rejected": -137.02706909179688, + "loss": 0.2717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.962776780128479, + "rewards/margins": 1.2657980918884277, + "rewards/rejected": -0.30302125215530396, + "step": 8539 + }, + { + "epoch": 0.5, + "learning_rate": 5.291978546036344e-08, + "logits/chosen": -1.7245241403579712, + "logits/rejected": -1.7381768226623535, + "logps/chosen": -198.2386474609375, + "logps/rejected": -465.98663330078125, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9335052967071533, + "rewards/margins": 4.768246650695801, + "rewards/rejected": -2.8347413539886475, + "step": 8540 + }, + { + "epoch": 0.5, + "learning_rate": 5.2910377466477885e-08, + "logits/chosen": -2.0205912590026855, + "logits/rejected": -2.084557294845581, + "logps/chosen": -179.47012329101562, + "logps/rejected": -166.80291748046875, + "loss": 0.189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5410308837890625, + "rewards/margins": 1.080804467201233, + "rewards/rejected": 0.460226446390152, + "step": 8541 + }, + { + "epoch": 0.5, + "learning_rate": 5.290096936920149e-08, + "logits/chosen": -1.8807618618011475, + "logits/rejected": -1.8745335340499878, + "logps/chosen": -68.61071014404297, + "logps/rejected": -272.49407958984375, + "loss": 0.0778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5095818042755127, + "rewards/margins": 3.576239824295044, + "rewards/rejected": -2.0666580200195312, + "step": 8542 + }, + { + "epoch": 0.5, + "learning_rate": 5.289156116886847e-08, + "logits/chosen": -1.8746784925460815, + "logits/rejected": -1.8663758039474487, + "logps/chosen": -20.947521209716797, + "logps/rejected": -177.14447021484375, + "loss": 0.1921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9699417352676392, + "rewards/margins": 2.2669572830200195, + "rewards/rejected": -1.2970154285430908, + "step": 8543 + }, + { + "epoch": 0.5, + "learning_rate": 5.2882152865813056e-08, + "logits/chosen": -1.8746775388717651, + "logits/rejected": -1.867270827293396, + "logps/chosen": -0.05569595843553543, + "logps/rejected": -199.37451171875, + "loss": 0.3829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0046888720244169235, + "rewards/margins": 2.6199357509613037, + "rewards/rejected": -2.624624729156494, + "step": 8544 + }, + { + "epoch": 0.5, + "learning_rate": 5.287274446036948e-08, + "logits/chosen": -1.8556047677993774, + "logits/rejected": -1.8647726774215698, + "logps/chosen": -0.00478546554222703, + "logps/rejected": -176.8105010986328, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011698459275066853, + "rewards/margins": 2.8547475337982178, + "rewards/rejected": -2.8548645973205566, + "step": 8545 + }, + { + "epoch": 0.5, + "learning_rate": 5.2863335952871957e-08, + "logits/chosen": -1.881237506866455, + "logits/rejected": -1.8608254194259644, + "logps/chosen": -255.1490936279297, + "logps/rejected": -372.96600341796875, + "loss": 0.0542, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6838181018829346, + "rewards/margins": 3.442317247390747, + "rewards/rejected": -0.7584991455078125, + "step": 8546 + }, + { + "epoch": 0.5, + "learning_rate": 5.2853927343654744e-08, + "logits/chosen": -2.1298251152038574, + "logits/rejected": -2.115034580230713, + "logps/chosen": -0.08898717164993286, + "logps/rejected": -190.96432495117188, + "loss": 0.4097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006006177514791489, + "rewards/margins": 1.8054425716400146, + "rewards/rejected": -1.8114486932754517, + "step": 8547 + }, + { + "epoch": 0.5, + "learning_rate": 5.284451863305206e-08, + "logits/chosen": -1.921388864517212, + "logits/rejected": -1.9301215410232544, + "logps/chosen": -21.584888458251953, + "logps/rejected": -189.86456298828125, + "loss": 0.2097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.736758828163147, + "rewards/margins": 2.55368709564209, + "rewards/rejected": -1.8169281482696533, + "step": 8548 + }, + { + "epoch": 0.5, + "learning_rate": 5.283510982139817e-08, + "logits/chosen": -1.9261654615402222, + "logits/rejected": -1.924796462059021, + "logps/chosen": -34.478519439697266, + "logps/rejected": -136.2799072265625, + "loss": 0.5916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10053634643554688, + "rewards/margins": 0.3469444513320923, + "rewards/rejected": -0.24640808999538422, + "step": 8549 + }, + { + "epoch": 0.5, + "learning_rate": 5.2825700909027305e-08, + "logits/chosen": -2.1210803985595703, + "logits/rejected": -2.1349339485168457, + "logps/chosen": -38.55004119873047, + "logps/rejected": -116.0097427368164, + "loss": 0.3071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6942020654678345, + "rewards/margins": 1.423288345336914, + "rewards/rejected": -0.7290863394737244, + "step": 8550 + }, + { + "epoch": 0.5, + "learning_rate": 5.2816291896273737e-08, + "logits/chosen": -1.9915690422058105, + "logits/rejected": -1.9775025844573975, + "logps/chosen": -245.18765258789062, + "logps/rejected": -488.9556884765625, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8586701154708862, + "rewards/margins": 4.807565212249756, + "rewards/rejected": -2.948895215988159, + "step": 8551 + }, + { + "epoch": 0.5, + "learning_rate": 5.280688278347169e-08, + "logits/chosen": -1.7852120399475098, + "logits/rejected": -1.7780773639678955, + "logps/chosen": -0.02246270887553692, + "logps/rejected": -180.57949829101562, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019184490665793419, + "rewards/margins": 2.9920358657836914, + "rewards/rejected": -2.9939544200897217, + "step": 8552 + }, + { + "epoch": 0.5, + "learning_rate": 5.2797473570955444e-08, + "logits/chosen": -2.0059163570404053, + "logits/rejected": -1.9993904829025269, + "logps/chosen": -7.001517295837402, + "logps/rejected": -119.28985595703125, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16048108041286469, + "rewards/margins": 2.008666753768921, + "rewards/rejected": -1.8481857776641846, + "step": 8553 + }, + { + "epoch": 0.5, + "learning_rate": 5.278806425905924e-08, + "logits/chosen": -2.067359685897827, + "logits/rejected": -2.0543577671051025, + "logps/chosen": -24.952327728271484, + "logps/rejected": -276.45489501953125, + "loss": 0.348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07404518127441406, + "rewards/margins": 4.011987209320068, + "rewards/rejected": -4.086032390594482, + "step": 8554 + }, + { + "epoch": 0.5, + "learning_rate": 5.277865484811736e-08, + "logits/chosen": -1.9686367511749268, + "logits/rejected": -1.9624992609024048, + "logps/chosen": -43.96365737915039, + "logps/rejected": -218.36117553710938, + "loss": 0.4239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5444408655166626, + "rewards/margins": 0.5891475677490234, + "rewards/rejected": -0.04470672830939293, + "step": 8555 + }, + { + "epoch": 0.5, + "learning_rate": 5.2769245338464065e-08, + "logits/chosen": -2.1691603660583496, + "logits/rejected": -2.1804094314575195, + "logps/chosen": -35.752593994140625, + "logps/rejected": -90.19882202148438, + "loss": 0.5075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6104580163955688, + "rewards/margins": 0.22364923357963562, + "rewards/rejected": 0.3868087828159332, + "step": 8556 + }, + { + "epoch": 0.5, + "learning_rate": 5.275983573043364e-08, + "logits/chosen": -1.6699442863464355, + "logits/rejected": -1.6852178573608398, + "logps/chosen": -270.23602294921875, + "logps/rejected": -309.3270568847656, + "loss": 0.1371, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.197564721107483, + "rewards/margins": 1.7529754638671875, + "rewards/rejected": -0.5554108023643494, + "step": 8557 + }, + { + "epoch": 0.5, + "learning_rate": 5.275042602436034e-08, + "logits/chosen": -1.954930305480957, + "logits/rejected": -1.9552040100097656, + "logps/chosen": -118.38800048828125, + "logps/rejected": -245.41171264648438, + "loss": 0.5526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30310288071632385, + "rewards/margins": 0.1957801878452301, + "rewards/rejected": 0.10732269287109375, + "step": 8558 + }, + { + "epoch": 0.5, + "learning_rate": 5.274101622057847e-08, + "logits/chosen": -1.912972092628479, + "logits/rejected": -1.9165796041488647, + "logps/chosen": -2.480621814727783, + "logps/rejected": -111.65189361572266, + "loss": 0.4118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04575028643012047, + "rewards/margins": 1.6119314432144165, + "rewards/rejected": -1.5661811828613281, + "step": 8559 + }, + { + "epoch": 0.5, + "learning_rate": 5.2731606319422284e-08, + "logits/chosen": -1.9530285596847534, + "logits/rejected": -1.9580401182174683, + "logps/chosen": -0.052292659878730774, + "logps/rejected": -155.7785186767578, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00478404713794589, + "rewards/margins": 2.4709408283233643, + "rewards/rejected": -2.475724935531616, + "step": 8560 + }, + { + "epoch": 0.5, + "learning_rate": 5.2722196321226085e-08, + "logits/chosen": -1.5887781381607056, + "logits/rejected": -1.5896638631820679, + "logps/chosen": -0.043458353728055954, + "logps/rejected": -101.2000732421875, + "loss": 0.5141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0041497196070849895, + "rewards/margins": 1.0685157775878906, + "rewards/rejected": -1.0726654529571533, + "step": 8561 + }, + { + "epoch": 0.5, + "learning_rate": 5.271278622632415e-08, + "logits/chosen": -1.7493934631347656, + "logits/rejected": -1.7344021797180176, + "logps/chosen": -269.36334228515625, + "logps/rejected": -417.5374450683594, + "loss": 0.0984, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.442639112472534, + "rewards/margins": 1.7958709001541138, + "rewards/rejected": 0.6467682123184204, + "step": 8562 + }, + { + "epoch": 0.5, + "learning_rate": 5.2703376035050785e-08, + "logits/chosen": -1.9025657176971436, + "logits/rejected": -1.9020644426345825, + "logps/chosen": -0.00017153281078208238, + "logps/rejected": -201.49180603027344, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2134611097280867e-05, + "rewards/margins": 3.0667266845703125, + "rewards/rejected": -3.0667388439178467, + "step": 8563 + }, + { + "epoch": 0.5, + "learning_rate": 5.269396574774028e-08, + "logits/chosen": -2.0829031467437744, + "logits/rejected": -2.081244707107544, + "logps/chosen": -10.202702522277832, + "logps/rejected": -116.95809173583984, + "loss": 0.5614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02837543562054634, + "rewards/margins": 0.5304266214370728, + "rewards/rejected": -0.55880206823349, + "step": 8564 + }, + { + "epoch": 0.5, + "learning_rate": 5.2684555364726926e-08, + "logits/chosen": -2.1530954837799072, + "logits/rejected": -2.152111530303955, + "logps/chosen": -6.809726715087891, + "logps/rejected": -103.7256088256836, + "loss": 0.7619, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.14442558586597443, + "rewards/margins": -0.34020501375198364, + "rewards/rejected": 0.4846305847167969, + "step": 8565 + }, + { + "epoch": 0.5, + "learning_rate": 5.267514488634505e-08, + "logits/chosen": -2.1307456493377686, + "logits/rejected": -2.1244847774505615, + "logps/chosen": -67.80025482177734, + "logps/rejected": -141.71087646484375, + "loss": 0.4166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6527298092842102, + "rewards/margins": 0.5183883905410767, + "rewards/rejected": 0.13434143364429474, + "step": 8566 + }, + { + "epoch": 0.5, + "learning_rate": 5.266573431292892e-08, + "logits/chosen": -1.790014624595642, + "logits/rejected": -1.7940940856933594, + "logps/chosen": -15.963149070739746, + "logps/rejected": -155.70361328125, + "loss": 0.5628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34180840849876404, + "rewards/margins": 0.07309350371360779, + "rewards/rejected": 0.26871490478515625, + "step": 8567 + }, + { + "epoch": 0.5, + "learning_rate": 5.265632364481288e-08, + "logits/chosen": -2.0537354946136475, + "logits/rejected": -2.049731731414795, + "logps/chosen": -73.09940338134766, + "logps/rejected": -247.33274841308594, + "loss": 0.1845, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.147016167640686, + "rewards/margins": 1.8160606622695923, + "rewards/rejected": -0.6690444946289062, + "step": 8568 + }, + { + "epoch": 0.5, + "learning_rate": 5.264691288233123e-08, + "logits/chosen": -1.7788798809051514, + "logits/rejected": -1.8137327432632446, + "logps/chosen": -291.25347900390625, + "logps/rejected": -291.4773864746094, + "loss": 0.428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.732696533203125, + "rewards/margins": 0.010220348834991455, + "rewards/rejected": 0.7224761843681335, + "step": 8569 + }, + { + "epoch": 0.5, + "learning_rate": 5.263750202581829e-08, + "logits/chosen": -2.03482723236084, + "logits/rejected": -2.058993339538574, + "logps/chosen": -249.76803588867188, + "logps/rejected": -562.4998779296875, + "loss": 0.1347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.640777587890625, + "rewards/margins": 1.6656005382537842, + "rewards/rejected": -0.02482299879193306, + "step": 8570 + }, + { + "epoch": 0.5, + "learning_rate": 5.262809107560837e-08, + "logits/chosen": -1.9601304531097412, + "logits/rejected": -1.9594026803970337, + "logps/chosen": -76.86796569824219, + "logps/rejected": -400.8360595703125, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.130950927734375, + "rewards/margins": 6.425793647766113, + "rewards/rejected": -4.294842720031738, + "step": 8571 + }, + { + "epoch": 0.5, + "learning_rate": 5.261868003203581e-08, + "logits/chosen": -1.7994953393936157, + "logits/rejected": -1.792738914489746, + "logps/chosen": -150.70010375976562, + "logps/rejected": -393.53955078125, + "loss": 0.1165, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.039067029953003, + "rewards/margins": 1.738713026046753, + "rewards/rejected": 0.30035400390625, + "step": 8572 + }, + { + "epoch": 0.5, + "learning_rate": 5.260926889543491e-08, + "logits/chosen": -1.9004887342453003, + "logits/rejected": -1.9038519859313965, + "logps/chosen": -2.661222219467163, + "logps/rejected": -107.43804168701172, + "loss": 0.484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09689052402973175, + "rewards/margins": 1.518370509147644, + "rewards/rejected": -1.6152610778808594, + "step": 8573 + }, + { + "epoch": 0.5, + "learning_rate": 5.2599857666140034e-08, + "logits/chosen": -1.9915001392364502, + "logits/rejected": -1.9866160154342651, + "logps/chosen": -92.70341491699219, + "logps/rejected": -257.5008239746094, + "loss": 0.2077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7893158197402954, + "rewards/margins": 2.4387969970703125, + "rewards/rejected": -1.649481177330017, + "step": 8574 + }, + { + "epoch": 0.5, + "learning_rate": 5.259044634448549e-08, + "logits/chosen": -1.8978832960128784, + "logits/rejected": -1.902346134185791, + "logps/chosen": -185.2004852294922, + "logps/rejected": -265.54119873046875, + "loss": 0.7168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23115234076976776, + "rewards/margins": 0.2712615728378296, + "rewards/rejected": -0.5024139285087585, + "step": 8575 + }, + { + "epoch": 0.5, + "learning_rate": 5.25810349308056e-08, + "logits/chosen": -1.9861853122711182, + "logits/rejected": -2.008126974105835, + "logps/chosen": -182.59039306640625, + "logps/rejected": -416.3169250488281, + "loss": 0.0641, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2858794927597046, + "rewards/margins": 5.516781806945801, + "rewards/rejected": -4.230902194976807, + "step": 8576 + }, + { + "epoch": 0.5, + "learning_rate": 5.257162342543474e-08, + "logits/chosen": -1.994417428970337, + "logits/rejected": -1.9847571849822998, + "logps/chosen": -55.938568115234375, + "logps/rejected": -127.28736114501953, + "loss": 0.3851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25094300508499146, + "rewards/margins": 1.1585907936096191, + "rewards/rejected": -0.9076477289199829, + "step": 8577 + }, + { + "epoch": 0.5, + "learning_rate": 5.2562211828707246e-08, + "logits/chosen": -1.9253298044204712, + "logits/rejected": -1.9158748388290405, + "logps/chosen": -45.887054443359375, + "logps/rejected": -224.81568908691406, + "loss": 0.0729, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.851643443107605, + "rewards/margins": 3.738555908203125, + "rewards/rejected": -1.8869125843048096, + "step": 8578 + }, + { + "epoch": 0.5, + "learning_rate": 5.255280014095744e-08, + "logits/chosen": -1.821387767791748, + "logits/rejected": -1.828952431678772, + "logps/chosen": -48.17610549926758, + "logps/rejected": -147.97601318359375, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.526929497718811, + "rewards/margins": 1.3903450965881348, + "rewards/rejected": -0.863415539264679, + "step": 8579 + }, + { + "epoch": 0.5, + "learning_rate": 5.25433883625197e-08, + "logits/chosen": -1.9281320571899414, + "logits/rejected": -1.9046658277511597, + "logps/chosen": -190.29730224609375, + "logps/rejected": -436.8160095214844, + "loss": 0.1097, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9481308460235596, + "rewards/margins": 1.4912430047988892, + "rewards/rejected": 1.4568878412246704, + "step": 8580 + }, + { + "epoch": 0.5, + "learning_rate": 5.253397649372835e-08, + "logits/chosen": -2.070145845413208, + "logits/rejected": -2.0479891300201416, + "logps/chosen": -9.700510025024414, + "logps/rejected": -303.26556396484375, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19184894859790802, + "rewards/margins": 3.6034395694732666, + "rewards/rejected": -3.411590576171875, + "step": 8581 + }, + { + "epoch": 0.5, + "learning_rate": 5.252456453491778e-08, + "logits/chosen": -2.07368803024292, + "logits/rejected": -2.0623326301574707, + "logps/chosen": -17.123779296875, + "logps/rejected": -140.20977783203125, + "loss": 0.4379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1961357146501541, + "rewards/margins": 0.5121879577636719, + "rewards/rejected": -0.31605225801467896, + "step": 8582 + }, + { + "epoch": 0.5, + "learning_rate": 5.251515248642231e-08, + "logits/chosen": -2.013781785964966, + "logits/rejected": -1.9853984117507935, + "logps/chosen": -265.114990234375, + "logps/rejected": -401.2559814453125, + "loss": 0.234, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.160040259361267, + "rewards/margins": 0.8707488775253296, + "rewards/rejected": 0.2892913818359375, + "step": 8583 + }, + { + "epoch": 0.5, + "learning_rate": 5.250574034857632e-08, + "logits/chosen": -1.9609413146972656, + "logits/rejected": -1.953904390335083, + "logps/chosen": -55.404930114746094, + "logps/rejected": -206.6770782470703, + "loss": 0.1835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.994097888469696, + "rewards/margins": 2.270625352859497, + "rewards/rejected": -1.2765274047851562, + "step": 8584 + }, + { + "epoch": 0.5, + "learning_rate": 5.249632812171418e-08, + "logits/chosen": -2.098414897918701, + "logits/rejected": -2.089425563812256, + "logps/chosen": -43.00298309326172, + "logps/rejected": -265.92791748046875, + "loss": 0.5004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37698861956596375, + "rewards/margins": 1.7727596759796143, + "rewards/rejected": -2.1497483253479004, + "step": 8585 + }, + { + "epoch": 0.5, + "learning_rate": 5.248691580617026e-08, + "logits/chosen": -1.8112270832061768, + "logits/rejected": -1.7959375381469727, + "logps/chosen": -215.30593872070312, + "logps/rejected": -342.0262451171875, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.777600109577179, + "rewards/margins": 1.098779320716858, + "rewards/rejected": -0.32117921113967896, + "step": 8586 + }, + { + "epoch": 0.5, + "learning_rate": 5.247750340227892e-08, + "logits/chosen": -1.6991698741912842, + "logits/rejected": -1.7057521343231201, + "logps/chosen": -183.85980224609375, + "logps/rejected": -224.3734893798828, + "loss": 0.3735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5719269514083862, + "rewards/margins": 0.05379486083984375, + "rewards/rejected": 1.5181320905685425, + "step": 8587 + }, + { + "epoch": 0.5, + "learning_rate": 5.246809091037454e-08, + "logits/chosen": -1.8878953456878662, + "logits/rejected": -1.8936702013015747, + "logps/chosen": -12.405311584472656, + "logps/rejected": -216.63047790527344, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07233858108520508, + "rewards/margins": 2.5520780086517334, + "rewards/rejected": -2.4797394275665283, + "step": 8588 + }, + { + "epoch": 0.5, + "learning_rate": 5.2458678330791516e-08, + "logits/chosen": -1.9888566732406616, + "logits/rejected": -1.9952472448349, + "logps/chosen": -168.79330444335938, + "logps/rejected": -227.04583740234375, + "loss": 0.2346, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3524551391601562, + "rewards/margins": 0.8613876104354858, + "rewards/rejected": 0.491067498922348, + "step": 8589 + }, + { + "epoch": 0.5, + "learning_rate": 5.244926566386419e-08, + "logits/chosen": -2.084263324737549, + "logits/rejected": -2.0731983184814453, + "logps/chosen": -0.00021623563952744007, + "logps/rejected": -141.92333984375, + "loss": 0.4197, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00011484999413369223, + "rewards/margins": 1.6710941791534424, + "rewards/rejected": -1.670979380607605, + "step": 8590 + }, + { + "epoch": 0.5, + "learning_rate": 5.2439852909926974e-08, + "logits/chosen": -2.040163993835449, + "logits/rejected": -2.044745683670044, + "logps/chosen": -200.1744384765625, + "logps/rejected": -247.94854736328125, + "loss": 0.2411, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1107666492462158, + "rewards/margins": 1.2064026594161987, + "rewards/rejected": -0.09563598781824112, + "step": 8591 + }, + { + "epoch": 0.5, + "learning_rate": 5.243044006931424e-08, + "logits/chosen": -1.9351017475128174, + "logits/rejected": -1.9356331825256348, + "logps/chosen": -63.767948150634766, + "logps/rejected": -132.61544799804688, + "loss": 0.2804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49186667799949646, + "rewards/margins": 1.5999592542648315, + "rewards/rejected": -1.1080925464630127, + "step": 8592 + }, + { + "epoch": 0.5, + "learning_rate": 5.2421027142360406e-08, + "logits/chosen": -1.7867331504821777, + "logits/rejected": -1.7835569381713867, + "logps/chosen": -197.0074462890625, + "logps/rejected": -225.16744995117188, + "loss": 0.5538, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6617340445518494, + "rewards/margins": -0.47626644372940063, + "rewards/rejected": 1.13800048828125, + "step": 8593 + }, + { + "epoch": 0.5, + "learning_rate": 5.241161412939983e-08, + "logits/chosen": -1.8817306756973267, + "logits/rejected": -1.8600444793701172, + "logps/chosen": -115.0157699584961, + "logps/rejected": -220.84664916992188, + "loss": 0.3494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8890892267227173, + "rewards/margins": 0.7914482355117798, + "rewards/rejected": 0.0976409912109375, + "step": 8594 + }, + { + "epoch": 0.5, + "learning_rate": 5.2402201030766926e-08, + "logits/chosen": -2.0275964736938477, + "logits/rejected": -2.017862558364868, + "logps/chosen": -0.008513371460139751, + "logps/rejected": -255.26443481445312, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.085372696252307e-05, + "rewards/margins": 4.105494499206543, + "rewards/rejected": -4.105505466461182, + "step": 8595 + }, + { + "epoch": 0.5, + "learning_rate": 5.239278784679607e-08, + "logits/chosen": -1.780312418937683, + "logits/rejected": -1.7839632034301758, + "logps/chosen": -40.26229476928711, + "logps/rejected": -184.39305114746094, + "loss": 0.4585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07096672058105469, + "rewards/margins": 0.7605892419815063, + "rewards/rejected": -0.6896225214004517, + "step": 8596 + }, + { + "epoch": 0.5, + "learning_rate": 5.2383374577821716e-08, + "logits/chosen": -1.9323091506958008, + "logits/rejected": -1.9340898990631104, + "logps/chosen": -0.005036443006247282, + "logps/rejected": -67.99498748779297, + "loss": 0.4334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0001292298111366108, + "rewards/margins": 1.58089017868042, + "rewards/rejected": -1.5807609558105469, + "step": 8597 + }, + { + "epoch": 0.5, + "learning_rate": 5.237396122417823e-08, + "logits/chosen": -1.8839999437332153, + "logits/rejected": -1.8770692348480225, + "logps/chosen": -153.55613708496094, + "logps/rejected": -389.1219177246094, + "loss": 0.0841, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8892929553985596, + "rewards/margins": 2.149449110031128, + "rewards/rejected": 0.7398437857627869, + "step": 8598 + }, + { + "epoch": 0.5, + "learning_rate": 5.236454778620003e-08, + "logits/chosen": -2.0815320014953613, + "logits/rejected": -2.0631816387176514, + "logps/chosen": -237.31649780273438, + "logps/rejected": -309.132080078125, + "loss": 0.1865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7448028922080994, + "rewards/margins": 1.5498932600021362, + "rewards/rejected": -0.8050903677940369, + "step": 8599 + }, + { + "epoch": 0.5, + "learning_rate": 5.235513426422152e-08, + "logits/chosen": -1.8635203838348389, + "logits/rejected": -1.8584705591201782, + "logps/chosen": -169.48114013671875, + "logps/rejected": -322.33740234375, + "loss": 0.0728, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4427247047424316, + "rewards/margins": 2.1537537574768066, + "rewards/rejected": 0.288970947265625, + "step": 8600 + }, + { + "epoch": 0.5, + "learning_rate": 5.234572065857713e-08, + "logits/chosen": -2.04494309425354, + "logits/rejected": -2.0299434661865234, + "logps/chosen": -42.99648666381836, + "logps/rejected": -304.35736083984375, + "loss": 0.2135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48847389221191406, + "rewards/margins": 7.361955642700195, + "rewards/rejected": -6.873481750488281, + "step": 8601 + }, + { + "epoch": 0.5, + "learning_rate": 5.233630696960126e-08, + "logits/chosen": -1.8610508441925049, + "logits/rejected": -1.8535399436950684, + "logps/chosen": -156.74099731445312, + "logps/rejected": -296.26531982421875, + "loss": 0.123, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1424347162246704, + "rewards/margins": 2.81378173828125, + "rewards/rejected": -1.6713470220565796, + "step": 8602 + }, + { + "epoch": 0.5, + "learning_rate": 5.232689319762834e-08, + "logits/chosen": -1.6707017421722412, + "logits/rejected": -1.6757527589797974, + "logps/chosen": -16.83603286743164, + "logps/rejected": -63.35415267944336, + "loss": 0.7234, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.004245185758918524, + "rewards/margins": -0.2117074877023697, + "rewards/rejected": 0.21595267951488495, + "step": 8603 + }, + { + "epoch": 0.5, + "learning_rate": 5.23174793429928e-08, + "logits/chosen": -1.9227945804595947, + "logits/rejected": -2.0637080669403076, + "logps/chosen": -333.63665771484375, + "logps/rejected": -458.2613220214844, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4408081769943237, + "rewards/margins": 6.482211589813232, + "rewards/rejected": -5.041403293609619, + "step": 8604 + }, + { + "epoch": 0.5, + "learning_rate": 5.230806540602905e-08, + "logits/chosen": -1.6157095432281494, + "logits/rejected": -1.6081576347351074, + "logps/chosen": -94.92620849609375, + "logps/rejected": -320.7842712402344, + "loss": 0.3281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.041259001940488815, + "rewards/margins": 2.7980828285217285, + "rewards/rejected": -2.756823778152466, + "step": 8605 + }, + { + "epoch": 0.5, + "learning_rate": 5.2298651387071526e-08, + "logits/chosen": -1.8060334920883179, + "logits/rejected": -1.712386131286621, + "logps/chosen": -268.26446533203125, + "logps/rejected": -633.1289672851562, + "loss": 0.0367, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8318054676055908, + "rewards/margins": 3.857208251953125, + "rewards/rejected": -2.025402784347534, + "step": 8606 + }, + { + "epoch": 0.5, + "learning_rate": 5.2289237286454657e-08, + "logits/chosen": -1.9893068075180054, + "logits/rejected": -1.9924237728118896, + "logps/chosen": -1.6450832845293917e-05, + "logps/rejected": -250.8968505859375, + "loss": 0.3355, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6225953320135886e-07, + "rewards/margins": 6.161768913269043, + "rewards/rejected": -6.161769390106201, + "step": 8607 + }, + { + "epoch": 0.5, + "learning_rate": 5.227982310451289e-08, + "logits/chosen": -1.7493982315063477, + "logits/rejected": -1.7146260738372803, + "logps/chosen": -229.38941955566406, + "logps/rejected": -340.2254943847656, + "loss": 0.0838, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7350144386291504, + "rewards/margins": 2.2991838455200195, + "rewards/rejected": 0.435830682516098, + "step": 8608 + }, + { + "epoch": 0.5, + "learning_rate": 5.227040884158065e-08, + "logits/chosen": -1.8745617866516113, + "logits/rejected": -1.871528148651123, + "logps/chosen": -39.361549377441406, + "logps/rejected": -115.03372955322266, + "loss": 0.5417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11309204250574112, + "rewards/margins": 0.4297523498535156, + "rewards/rejected": -0.3166603147983551, + "step": 8609 + }, + { + "epoch": 0.5, + "learning_rate": 5.2260994497992393e-08, + "logits/chosen": -1.88030207157135, + "logits/rejected": -1.8790123462677002, + "logps/chosen": -222.1414794921875, + "logps/rejected": -322.05780029296875, + "loss": 0.0633, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2583558559417725, + "rewards/margins": 2.3518433570861816, + "rewards/rejected": -0.09348755329847336, + "step": 8610 + }, + { + "epoch": 0.5, + "learning_rate": 5.2251580074082536e-08, + "logits/chosen": -1.9473350048065186, + "logits/rejected": -1.9423959255218506, + "logps/chosen": -26.5410099029541, + "logps/rejected": -161.4183349609375, + "loss": 0.2537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4521234631538391, + "rewards/margins": 2.351958751678467, + "rewards/rejected": -1.899835228919983, + "step": 8611 + }, + { + "epoch": 0.5, + "learning_rate": 5.224216557018555e-08, + "logits/chosen": -1.9473137855529785, + "logits/rejected": -1.951758861541748, + "logps/chosen": -13.1257963180542, + "logps/rejected": -30.479106903076172, + "loss": 0.5911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09997148811817169, + "rewards/margins": 0.33849209547042847, + "rewards/rejected": -0.23852062225341797, + "step": 8612 + }, + { + "epoch": 0.5, + "learning_rate": 5.2232750986635864e-08, + "logits/chosen": -2.025942087173462, + "logits/rejected": -2.0285356044769287, + "logps/chosen": -0.00032661674777045846, + "logps/rejected": -124.29625701904297, + "loss": 0.7071, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.019060608901782e-06, + "rewards/margins": -0.09493376314640045, + "rewards/rejected": 0.09492874145507812, + "step": 8613 + }, + { + "epoch": 0.5, + "learning_rate": 5.2223336323767966e-08, + "logits/chosen": -2.068378448486328, + "logits/rejected": -2.060831308364868, + "logps/chosen": -8.002167701721191, + "logps/rejected": -136.08963012695312, + "loss": 0.3822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07021694630384445, + "rewards/margins": 2.227264881134033, + "rewards/rejected": -2.157047986984253, + "step": 8614 + }, + { + "epoch": 0.5, + "learning_rate": 5.2213921581916265e-08, + "logits/chosen": -2.123648166656494, + "logits/rejected": -2.1160662174224854, + "logps/chosen": -145.5720672607422, + "logps/rejected": -443.98321533203125, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2882461547851562, + "rewards/margins": 6.79940938949585, + "rewards/rejected": -5.511163234710693, + "step": 8615 + }, + { + "epoch": 0.5, + "learning_rate": 5.2204506761415245e-08, + "logits/chosen": -2.031318187713623, + "logits/rejected": -2.032231092453003, + "logps/chosen": -59.59777069091797, + "logps/rejected": -170.13026428222656, + "loss": 0.3422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6411728262901306, + "rewards/margins": 1.28448486328125, + "rewards/rejected": -0.6433120965957642, + "step": 8616 + }, + { + "epoch": 0.5, + "learning_rate": 5.219509186259935e-08, + "logits/chosen": -1.8382214307785034, + "logits/rejected": -1.8307335376739502, + "logps/chosen": -19.60370635986328, + "logps/rejected": -137.7689666748047, + "loss": 0.5349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3333488404750824, + "rewards/margins": 1.6397346258163452, + "rewards/rejected": -1.97308349609375, + "step": 8617 + }, + { + "epoch": 0.5, + "learning_rate": 5.218567688580308e-08, + "logits/chosen": -1.951015591621399, + "logits/rejected": -1.9541735649108887, + "logps/chosen": -30.32549476623535, + "logps/rejected": -219.41598510742188, + "loss": 0.5043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30815067887306213, + "rewards/margins": 0.2206171154975891, + "rewards/rejected": 0.08753357082605362, + "step": 8618 + }, + { + "epoch": 0.5, + "learning_rate": 5.217626183136084e-08, + "logits/chosen": -1.979607105255127, + "logits/rejected": -1.9839452505111694, + "logps/chosen": -219.5930938720703, + "logps/rejected": -303.3829040527344, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5598373413085938, + "rewards/margins": 3.9180617332458496, + "rewards/rejected": -1.3582245111465454, + "step": 8619 + }, + { + "epoch": 0.5, + "learning_rate": 5.216684669960717e-08, + "logits/chosen": -1.7839241027832031, + "logits/rejected": -1.7971444129943848, + "logps/chosen": -13.381858825683594, + "logps/rejected": -195.2230987548828, + "loss": 0.3344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11227808147668839, + "rewards/margins": 3.435100555419922, + "rewards/rejected": -3.3228225708007812, + "step": 8620 + }, + { + "epoch": 0.5, + "learning_rate": 5.2157431490876494e-08, + "logits/chosen": -1.8835145235061646, + "logits/rejected": -1.9295337200164795, + "logps/chosen": -159.11117553710938, + "logps/rejected": -635.108642578125, + "loss": 0.1137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.682830810546875, + "rewards/margins": 5.847625732421875, + "rewards/rejected": -5.164794921875, + "step": 8621 + }, + { + "epoch": 0.5, + "learning_rate": 5.21480162055033e-08, + "logits/chosen": -1.7352933883666992, + "logits/rejected": -1.7549934387207031, + "logps/chosen": -202.97958374023438, + "logps/rejected": -460.4547119140625, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9211441278457642, + "rewards/margins": 6.107182502746582, + "rewards/rejected": -4.186038494110107, + "step": 8622 + }, + { + "epoch": 0.5, + "learning_rate": 5.2138600843822057e-08, + "logits/chosen": -1.7522022724151611, + "logits/rejected": -1.7227537631988525, + "logps/chosen": -188.91879272460938, + "logps/rejected": -327.1744079589844, + "loss": 0.1204, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.53770911693573, + "rewards/margins": 2.6168413162231445, + "rewards/rejected": -1.079132080078125, + "step": 8623 + }, + { + "epoch": 0.5, + "learning_rate": 5.212918540616725e-08, + "logits/chosen": -1.8807830810546875, + "logits/rejected": -1.856406569480896, + "logps/chosen": -187.8245391845703, + "logps/rejected": -253.0692901611328, + "loss": 0.4213, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3588074445724487, + "rewards/margins": 0.09574127197265625, + "rewards/rejected": 1.2630661725997925, + "step": 8624 + }, + { + "epoch": 0.5, + "learning_rate": 5.2119769892873366e-08, + "logits/chosen": -2.037259101867676, + "logits/rejected": -2.076497793197632, + "logps/chosen": -214.3446807861328, + "logps/rejected": -251.2284698486328, + "loss": 0.1189, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.333583116531372, + "rewards/margins": 1.6567718982696533, + "rewards/rejected": 0.6768112182617188, + "step": 8625 + }, + { + "epoch": 0.5, + "learning_rate": 5.211035430427488e-08, + "logits/chosen": -2.0737969875335693, + "logits/rejected": -2.007761001586914, + "logps/chosen": -167.84539794921875, + "logps/rejected": -528.5458984375, + "loss": 0.1128, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.148028612136841, + "rewards/margins": 1.853607177734375, + "rewards/rejected": 0.29442140460014343, + "step": 8626 + }, + { + "epoch": 0.5, + "learning_rate": 5.210093864070631e-08, + "logits/chosen": -2.097158908843994, + "logits/rejected": -2.099478006362915, + "logps/chosen": -0.16452811658382416, + "logps/rejected": -154.65350341796875, + "loss": 0.3904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014896847307682037, + "rewards/margins": 2.285733461380005, + "rewards/rejected": -2.3006303310394287, + "step": 8627 + }, + { + "epoch": 0.5, + "learning_rate": 5.209152290250209e-08, + "logits/chosen": -1.8857396841049194, + "logits/rejected": -1.8738362789154053, + "logps/chosen": -146.303466796875, + "logps/rejected": -265.340576171875, + "loss": 0.2309, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.27789306640625, + "rewards/margins": 1.0094391107559204, + "rewards/rejected": 0.268453985452652, + "step": 8628 + }, + { + "epoch": 0.5, + "learning_rate": 5.2082107089996775e-08, + "logits/chosen": -1.8071720600128174, + "logits/rejected": -1.8109982013702393, + "logps/chosen": -62.738525390625, + "logps/rejected": -308.2220153808594, + "loss": 0.223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6613655090332031, + "rewards/margins": 2.707904815673828, + "rewards/rejected": -2.046539306640625, + "step": 8629 + }, + { + "epoch": 0.5, + "learning_rate": 5.207269120352481e-08, + "logits/chosen": -1.9498021602630615, + "logits/rejected": -1.937186360359192, + "logps/chosen": -211.77630615234375, + "logps/rejected": -486.41363525390625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7304933071136475, + "rewards/margins": 7.60595703125, + "rewards/rejected": -4.875463962554932, + "step": 8630 + }, + { + "epoch": 0.5, + "learning_rate": 5.206327524342073e-08, + "logits/chosen": -1.842380404472351, + "logits/rejected": -1.8496017456054688, + "logps/chosen": -6.222573574632406e-05, + "logps/rejected": -142.8135986328125, + "loss": 0.36, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.241085212517646e-06, + "rewards/margins": 3.0063605308532715, + "rewards/rejected": -3.0063583850860596, + "step": 8631 + }, + { + "epoch": 0.5, + "learning_rate": 5.2053859210019016e-08, + "logits/chosen": -1.883879542350769, + "logits/rejected": -1.8812891244888306, + "logps/chosen": -336.0391845703125, + "logps/rejected": -472.881591796875, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9445130825042725, + "rewards/margins": 3.7737550735473633, + "rewards/rejected": -0.829241931438446, + "step": 8632 + }, + { + "epoch": 0.5, + "learning_rate": 5.204444310365418e-08, + "logits/chosen": -2.1195034980773926, + "logits/rejected": -2.1207871437072754, + "logps/chosen": -3.340420722961426, + "logps/rejected": -243.988037109375, + "loss": 0.3441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12869977951049805, + "rewards/margins": 2.844846725463867, + "rewards/rejected": -2.716146945953369, + "step": 8633 + }, + { + "epoch": 0.5, + "learning_rate": 5.203502692466071e-08, + "logits/chosen": -1.9200478792190552, + "logits/rejected": -1.930822491645813, + "logps/chosen": -192.58721923828125, + "logps/rejected": -334.44073486328125, + "loss": 0.1085, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.029315233230591, + "rewards/margins": 1.6985352039337158, + "rewards/rejected": 0.330780029296875, + "step": 8634 + }, + { + "epoch": 0.5, + "learning_rate": 5.202561067337314e-08, + "logits/chosen": -1.986752986907959, + "logits/rejected": -1.9810535907745361, + "logps/chosen": -20.566736221313477, + "logps/rejected": -285.17431640625, + "loss": 0.2742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.933563232421875, + "rewards/margins": 0.988812267780304, + "rewards/rejected": -0.05524902418255806, + "step": 8635 + }, + { + "epoch": 0.5, + "learning_rate": 5.2016194350125975e-08, + "logits/chosen": -1.9049532413482666, + "logits/rejected": -1.902449607849121, + "logps/chosen": -173.29940795898438, + "logps/rejected": -340.14599609375, + "loss": 0.2706, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4451935291290283, + "rewards/margins": 0.45650941133499146, + "rewards/rejected": 0.9886841177940369, + "step": 8636 + }, + { + "epoch": 0.5, + "learning_rate": 5.200677795525371e-08, + "logits/chosen": -1.7811299562454224, + "logits/rejected": -1.797349214553833, + "logps/chosen": -164.88873291015625, + "logps/rejected": -393.10870361328125, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1601502895355225, + "rewards/margins": 3.67779541015625, + "rewards/rejected": -1.517645239830017, + "step": 8637 + }, + { + "epoch": 0.5, + "learning_rate": 5.199736148909087e-08, + "logits/chosen": -2.023470640182495, + "logits/rejected": -2.0126514434814453, + "logps/chosen": -20.65644073486328, + "logps/rejected": -230.01803588867188, + "loss": 0.3268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09501514583826065, + "rewards/margins": 2.5670273303985596, + "rewards/rejected": -2.4720122814178467, + "step": 8638 + }, + { + "epoch": 0.5, + "learning_rate": 5.198794495197201e-08, + "logits/chosen": -1.9864251613616943, + "logits/rejected": -2.020209789276123, + "logps/chosen": -254.38690185546875, + "logps/rejected": -315.04058837890625, + "loss": 0.064, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.024456739425659, + "rewards/margins": 2.1201536655426025, + "rewards/rejected": 0.9043030142784119, + "step": 8639 + }, + { + "epoch": 0.5, + "learning_rate": 5.1978528344231596e-08, + "logits/chosen": -1.9477834701538086, + "logits/rejected": -1.9538640975952148, + "logps/chosen": -25.108781814575195, + "logps/rejected": -191.64918518066406, + "loss": 0.4605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14789791405200958, + "rewards/margins": 0.8019827008247375, + "rewards/rejected": -0.6540848016738892, + "step": 8640 + }, + { + "epoch": 0.5, + "learning_rate": 5.196911166620419e-08, + "logits/chosen": -2.014331340789795, + "logits/rejected": -2.0104613304138184, + "logps/chosen": -84.97479248046875, + "logps/rejected": -179.5682373046875, + "loss": 0.121, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7687500715255737, + "rewards/margins": 2.2162904739379883, + "rewards/rejected": -0.447540283203125, + "step": 8641 + }, + { + "epoch": 0.5, + "learning_rate": 5.1959694918224305e-08, + "logits/chosen": -1.8813080787658691, + "logits/rejected": -1.8827648162841797, + "logps/chosen": -175.2749786376953, + "logps/rejected": -351.07781982421875, + "loss": 0.1498, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0144394636154175, + "rewards/margins": 1.802137851715088, + "rewards/rejected": -0.7876983880996704, + "step": 8642 + }, + { + "epoch": 0.5, + "learning_rate": 5.195027810062646e-08, + "logits/chosen": -1.927369236946106, + "logits/rejected": -1.930921196937561, + "logps/chosen": -30.33510971069336, + "logps/rejected": -167.35260009765625, + "loss": 0.5224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22162513434886932, + "rewards/margins": 0.19976386427879333, + "rewards/rejected": 0.02186126820743084, + "step": 8643 + }, + { + "epoch": 0.5, + "learning_rate": 5.194086121374521e-08, + "logits/chosen": -1.6517099142074585, + "logits/rejected": -1.6925512552261353, + "logps/chosen": -193.9163055419922, + "logps/rejected": -233.7001953125, + "loss": 0.2608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7981979846954346, + "rewards/margins": 0.5225967168807983, + "rewards/rejected": 1.2756012678146362, + "step": 8644 + }, + { + "epoch": 0.5, + "learning_rate": 5.193144425791507e-08, + "logits/chosen": -1.81730055809021, + "logits/rejected": -1.819274663925171, + "logps/chosen": -20.342769622802734, + "logps/rejected": -122.30403900146484, + "loss": 0.5624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5207007527351379, + "rewards/margins": 0.9928956627845764, + "rewards/rejected": -1.5135964155197144, + "step": 8645 + }, + { + "epoch": 0.5, + "learning_rate": 5.192202723347059e-08, + "logits/chosen": -1.826256513595581, + "logits/rejected": -1.808135747909546, + "logps/chosen": -26.6897029876709, + "logps/rejected": -43.23337936401367, + "loss": 0.4867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5065038800239563, + "rewards/margins": 0.35130634903907776, + "rewards/rejected": 0.15519753098487854, + "step": 8646 + }, + { + "epoch": 0.5, + "learning_rate": 5.191261014074628e-08, + "logits/chosen": -1.9660484790802002, + "logits/rejected": -1.9672446250915527, + "logps/chosen": -1.9301520586013794, + "logps/rejected": -126.89794921875, + "loss": 0.4828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11290212720632553, + "rewards/margins": 0.8477272391319275, + "rewards/rejected": -0.7348251342773438, + "step": 8647 + }, + { + "epoch": 0.5, + "learning_rate": 5.190319298007672e-08, + "logits/chosen": -1.7553913593292236, + "logits/rejected": -1.7605386972427368, + "logps/chosen": -0.2831535339355469, + "logps/rejected": -103.67339324951172, + "loss": 0.5396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025905264541506767, + "rewards/margins": 0.8511181473731995, + "rewards/rejected": -0.8252128958702087, + "step": 8648 + }, + { + "epoch": 0.5, + "learning_rate": 5.189377575179643e-08, + "logits/chosen": -2.1218323707580566, + "logits/rejected": -2.0868728160858154, + "logps/chosen": -177.50624084472656, + "logps/rejected": -403.6264953613281, + "loss": 0.2268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7266815900802612, + "rewards/margins": 0.7819458842277527, + "rewards/rejected": 0.9447357058525085, + "step": 8649 + }, + { + "epoch": 0.5, + "learning_rate": 5.1884358456239975e-08, + "logits/chosen": -1.8810830116271973, + "logits/rejected": -1.889682412147522, + "logps/chosen": -214.76231384277344, + "logps/rejected": -417.6290283203125, + "loss": 0.0521, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.645832896232605, + "rewards/margins": 2.7661638259887695, + "rewards/rejected": -1.120330810546875, + "step": 8650 + }, + { + "epoch": 0.5, + "learning_rate": 5.1874941093741875e-08, + "logits/chosen": -2.108743190765381, + "logits/rejected": -2.1071512699127197, + "logps/chosen": -48.22630310058594, + "logps/rejected": -145.34365844726562, + "loss": 0.3212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4899139404296875, + "rewards/margins": 1.4321930408477783, + "rewards/rejected": -0.942279040813446, + "step": 8651 + }, + { + "epoch": 0.5, + "learning_rate": 5.1865523664636703e-08, + "logits/chosen": -1.7624170780181885, + "logits/rejected": -1.7642651796340942, + "logps/chosen": -17.37650489807129, + "logps/rejected": -283.8373718261719, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05007362365722656, + "rewards/margins": 7.0308966636657715, + "rewards/rejected": -6.980823040008545, + "step": 8652 + }, + { + "epoch": 0.5, + "learning_rate": 5.185610616925899e-08, + "logits/chosen": -2.113386392593384, + "logits/rejected": -2.106564521789551, + "logps/chosen": -0.005299534648656845, + "logps/rejected": -169.8114471435547, + "loss": 0.4533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003326701116748154, + "rewards/margins": 1.3933089971542358, + "rewards/rejected": -1.393641710281372, + "step": 8653 + }, + { + "epoch": 0.5, + "learning_rate": 5.1846688607943334e-08, + "logits/chosen": -2.0208914279937744, + "logits/rejected": -2.0049288272857666, + "logps/chosen": -157.85484313964844, + "logps/rejected": -246.40159606933594, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8973770141601562, + "rewards/margins": 1.822442650794983, + "rewards/rejected": 1.0749343633651733, + "step": 8654 + }, + { + "epoch": 0.5, + "learning_rate": 5.183727098102424e-08, + "logits/chosen": -1.977274775505066, + "logits/rejected": -1.9863755702972412, + "logps/chosen": -5.165831565856934, + "logps/rejected": -142.29490661621094, + "loss": 0.488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02499842643737793, + "rewards/margins": 1.1060303449630737, + "rewards/rejected": -1.1310287714004517, + "step": 8655 + }, + { + "epoch": 0.5, + "learning_rate": 5.182785328883631e-08, + "logits/chosen": -1.9381299018859863, + "logits/rejected": -1.9348000288009644, + "logps/chosen": -31.794761657714844, + "logps/rejected": -137.68502807617188, + "loss": 0.3321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.626102864742279, + "rewards/margins": 1.2985577583312988, + "rewards/rejected": -0.672454833984375, + "step": 8656 + }, + { + "epoch": 0.5, + "learning_rate": 5.1818435531714064e-08, + "logits/chosen": -2.2179222106933594, + "logits/rejected": -2.2157115936279297, + "logps/chosen": -0.023854680359363556, + "logps/rejected": -165.457763671875, + "loss": 0.4528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014090332842897624, + "rewards/margins": 1.3689295053482056, + "rewards/rejected": -1.3690704107284546, + "step": 8657 + }, + { + "epoch": 0.5, + "learning_rate": 5.180901770999212e-08, + "logits/chosen": -1.9371975660324097, + "logits/rejected": -1.9326971769332886, + "logps/chosen": -4.070727825164795, + "logps/rejected": -86.5609359741211, + "loss": 0.4831, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03987779840826988, + "rewards/margins": 1.2038707733154297, + "rewards/rejected": -1.2437485456466675, + "step": 8658 + }, + { + "epoch": 0.5, + "learning_rate": 5.1799599824005015e-08, + "logits/chosen": -2.0290067195892334, + "logits/rejected": -2.0283737182617188, + "logps/chosen": -10.943172454833984, + "logps/rejected": -353.50897216796875, + "loss": 0.437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3916432857513428, + "rewards/margins": 2.643043041229248, + "rewards/rejected": -3.034686326980591, + "step": 8659 + }, + { + "epoch": 0.5, + "learning_rate": 5.179018187408732e-08, + "logits/chosen": -1.8901748657226562, + "logits/rejected": -1.8915280103683472, + "logps/chosen": -19.267210006713867, + "logps/rejected": -138.99887084960938, + "loss": 0.5088, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.152557373046875e-05, + "rewards/margins": 0.9167570471763611, + "rewards/rejected": -0.9166855216026306, + "step": 8660 + }, + { + "epoch": 0.5, + "learning_rate": 5.1780763860573596e-08, + "logits/chosen": -1.8436424732208252, + "logits/rejected": -1.8387349843978882, + "logps/chosen": -43.98179626464844, + "logps/rejected": -128.8447265625, + "loss": 0.4377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33943748474121094, + "rewards/margins": 0.8971409201622009, + "rewards/rejected": -0.55770343542099, + "step": 8661 + }, + { + "epoch": 0.5, + "learning_rate": 5.1771345783798445e-08, + "logits/chosen": -1.9833135604858398, + "logits/rejected": -1.981722354888916, + "logps/chosen": -63.627960205078125, + "logps/rejected": -203.7808837890625, + "loss": 0.4436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4029247462749481, + "rewards/margins": 1.5823230743408203, + "rewards/rejected": -1.9852478504180908, + "step": 8662 + }, + { + "epoch": 0.5, + "learning_rate": 5.176192764409642e-08, + "logits/chosen": -1.929509162902832, + "logits/rejected": -1.9144999980926514, + "logps/chosen": -159.49269104003906, + "logps/rejected": -282.3034362792969, + "loss": 0.2443, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4112000465393066, + "rewards/margins": 0.569229245185852, + "rewards/rejected": 1.8419708013534546, + "step": 8663 + }, + { + "epoch": 0.5, + "learning_rate": 5.1752509441802114e-08, + "logits/chosen": -1.9455832242965698, + "logits/rejected": -1.9448738098144531, + "logps/chosen": -184.52667236328125, + "logps/rejected": -342.2464599609375, + "loss": 0.0891, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6362122297286987, + "rewards/margins": 2.36553955078125, + "rewards/rejected": -0.729327380657196, + "step": 8664 + }, + { + "epoch": 0.5, + "learning_rate": 5.174309117725009e-08, + "logits/chosen": -1.70429527759552, + "logits/rejected": -1.6833837032318115, + "logps/chosen": -321.01397705078125, + "logps/rejected": -472.318115234375, + "loss": 0.1819, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0531647205352783, + "rewards/margins": 0.9450317621231079, + "rewards/rejected": 1.1081329584121704, + "step": 8665 + }, + { + "epoch": 0.5, + "learning_rate": 5.1733672850774934e-08, + "logits/chosen": -2.0802624225616455, + "logits/rejected": -2.0740911960601807, + "logps/chosen": -7.227529048919678, + "logps/rejected": -144.6324005126953, + "loss": 0.4149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22769828140735626, + "rewards/margins": 1.2633718252182007, + "rewards/rejected": -1.0356734991073608, + "step": 8666 + }, + { + "epoch": 0.5, + "learning_rate": 5.172425446271126e-08, + "logits/chosen": -2.0427587032318115, + "logits/rejected": -2.0428504943847656, + "logps/chosen": -20.646757125854492, + "logps/rejected": -168.5584716796875, + "loss": 0.4167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4080650508403778, + "rewards/margins": 0.7655540704727173, + "rewards/rejected": -0.3574890196323395, + "step": 8667 + }, + { + "epoch": 0.5, + "learning_rate": 5.1714836013393613e-08, + "logits/chosen": -1.8936878442764282, + "logits/rejected": -1.8918579816818237, + "logps/chosen": -33.92134475708008, + "logps/rejected": -203.50311279296875, + "loss": 0.1783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9500507712364197, + "rewards/margins": 2.509627103805542, + "rewards/rejected": -1.559576392173767, + "step": 8668 + }, + { + "epoch": 0.5, + "learning_rate": 5.1705417503156625e-08, + "logits/chosen": -2.1349658966064453, + "logits/rejected": -2.13641357421875, + "logps/chosen": -85.44548034667969, + "logps/rejected": -236.62924194335938, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3327339291572571, + "rewards/margins": 1.9818153381347656, + "rewards/rejected": -1.6490814685821533, + "step": 8669 + }, + { + "epoch": 0.5, + "learning_rate": 5.169599893233484e-08, + "logits/chosen": -1.9736788272857666, + "logits/rejected": -1.9706530570983887, + "logps/chosen": -0.1605377346277237, + "logps/rejected": -70.00889587402344, + "loss": 0.486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006008535623550415, + "rewards/margins": 1.051496982574463, + "rewards/rejected": -1.057505488395691, + "step": 8670 + }, + { + "epoch": 0.5, + "learning_rate": 5.1686580301262895e-08, + "logits/chosen": -1.9285924434661865, + "logits/rejected": -1.9148832559585571, + "logps/chosen": -267.52276611328125, + "logps/rejected": -376.37628173828125, + "loss": 0.1848, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2758331298828125, + "rewards/margins": 1.1561156511306763, + "rewards/rejected": 1.1197174787521362, + "step": 8671 + }, + { + "epoch": 0.5, + "learning_rate": 5.1677161610275355e-08, + "logits/chosen": -1.8564097881317139, + "logits/rejected": -1.804785132408142, + "logps/chosen": -245.19509887695312, + "logps/rejected": -545.9177856445312, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.630929708480835, + "rewards/margins": 5.396475315093994, + "rewards/rejected": -1.7655457258224487, + "step": 8672 + }, + { + "epoch": 0.5, + "learning_rate": 5.1667742859706843e-08, + "logits/chosen": -1.7648805379867554, + "logits/rejected": -1.7618646621704102, + "logps/chosen": -203.5284423828125, + "logps/rejected": -278.417724609375, + "loss": 0.1239, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9101274013519287, + "rewards/margins": 1.4363359212875366, + "rewards/rejected": 1.473791480064392, + "step": 8673 + }, + { + "epoch": 0.5, + "learning_rate": 5.1658324049891944e-08, + "logits/chosen": -1.8246831893920898, + "logits/rejected": -1.8299446105957031, + "logps/chosen": -19.449792861938477, + "logps/rejected": -125.02326965332031, + "loss": 0.7676, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04605674743652344, + "rewards/margins": -0.4248233735561371, + "rewards/rejected": 0.4708801209926605, + "step": 8674 + }, + { + "epoch": 0.5, + "learning_rate": 5.164890518116526e-08, + "logits/chosen": -1.9600934982299805, + "logits/rejected": -1.9555503129959106, + "logps/chosen": -31.177536010742188, + "logps/rejected": -117.06095123291016, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8470256924629211, + "rewards/margins": 0.7693767547607422, + "rewards/rejected": 0.07764893025159836, + "step": 8675 + }, + { + "epoch": 0.5, + "learning_rate": 5.16394862538614e-08, + "logits/chosen": -1.8215467929840088, + "logits/rejected": -1.8049752712249756, + "logps/chosen": -17.87903594970703, + "logps/rejected": -289.91522216796875, + "loss": 0.3132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06597480922937393, + "rewards/margins": 3.8491015434265137, + "rewards/rejected": -3.7831268310546875, + "step": 8676 + }, + { + "epoch": 0.5, + "learning_rate": 5.163006726831497e-08, + "logits/chosen": -2.1089160442352295, + "logits/rejected": -2.101001739501953, + "logps/chosen": -0.0027798409573733807, + "logps/rejected": -254.61781311035156, + "loss": 0.3511, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.018134783720598e-05, + "rewards/margins": 4.802531719207764, + "rewards/rejected": -4.8026018142700195, + "step": 8677 + }, + { + "epoch": 0.51, + "learning_rate": 5.162064822486055e-08, + "logits/chosen": -2.0349924564361572, + "logits/rejected": -2.0388741493225098, + "logps/chosen": -4.883763790130615, + "logps/rejected": -102.38139343261719, + "loss": 0.673, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1369006186723709, + "rewards/margins": -0.060513779520988464, + "rewards/rejected": 0.19741439819335938, + "step": 8678 + }, + { + "epoch": 0.51, + "learning_rate": 5.161122912383281e-08, + "logits/chosen": -1.9473930597305298, + "logits/rejected": -1.9181008338928223, + "logps/chosen": -4.9128193855285645, + "logps/rejected": -424.9215087890625, + "loss": 0.3148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035517122596502304, + "rewards/margins": 6.8168816566467285, + "rewards/rejected": -6.852398872375488, + "step": 8679 + }, + { + "epoch": 0.51, + "learning_rate": 5.1601809965566324e-08, + "logits/chosen": -1.8763251304626465, + "logits/rejected": -1.8449941873550415, + "logps/chosen": -108.87753295898438, + "logps/rejected": -461.099365234375, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.044386386871338, + "rewards/margins": 7.563093662261963, + "rewards/rejected": -5.518707275390625, + "step": 8680 + }, + { + "epoch": 0.51, + "learning_rate": 5.159239075039571e-08, + "logits/chosen": -1.944840908050537, + "logits/rejected": -1.9979636669158936, + "logps/chosen": -257.97119140625, + "logps/rejected": -250.73936462402344, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4934754371643066, + "rewards/margins": 4.022276401519775, + "rewards/rejected": -1.5288009643554688, + "step": 8681 + }, + { + "epoch": 0.51, + "learning_rate": 5.158297147865558e-08, + "logits/chosen": -1.9546456336975098, + "logits/rejected": -1.9662727117538452, + "logps/chosen": -233.66641235351562, + "logps/rejected": -379.34967041015625, + "loss": 0.0444, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6703553199768066, + "rewards/margins": 2.748919725418091, + "rewards/rejected": -0.07856445759534836, + "step": 8682 + }, + { + "epoch": 0.51, + "learning_rate": 5.1573552150680576e-08, + "logits/chosen": -2.025745153427124, + "logits/rejected": -2.022538900375366, + "logps/chosen": -49.155296325683594, + "logps/rejected": -235.5511474609375, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18057136237621307, + "rewards/margins": 3.796898365020752, + "rewards/rejected": -3.6163270473480225, + "step": 8683 + }, + { + "epoch": 0.51, + "learning_rate": 5.156413276680529e-08, + "logits/chosen": -2.0431532859802246, + "logits/rejected": -2.033306360244751, + "logps/chosen": -55.89183044433594, + "logps/rejected": -210.6949462890625, + "loss": 0.1909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8246536254882812, + "rewards/margins": 3.218339681625366, + "rewards/rejected": -2.393686056137085, + "step": 8684 + }, + { + "epoch": 0.51, + "learning_rate": 5.1554713327364376e-08, + "logits/chosen": -2.0121357440948486, + "logits/rejected": -1.9831050634384155, + "logps/chosen": -201.41563415527344, + "logps/rejected": -389.96966552734375, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1032302379608154, + "rewards/margins": 3.885105848312378, + "rewards/rejected": -0.7818756103515625, + "step": 8685 + }, + { + "epoch": 0.51, + "learning_rate": 5.1545293832692415e-08, + "logits/chosen": -1.8179025650024414, + "logits/rejected": -1.8492860794067383, + "logps/chosen": -182.4990234375, + "logps/rejected": -417.82379150390625, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.918621838092804, + "rewards/margins": 3.5650787353515625, + "rewards/rejected": -2.6464569568634033, + "step": 8686 + }, + { + "epoch": 0.51, + "learning_rate": 5.153587428312406e-08, + "logits/chosen": -2.1212990283966064, + "logits/rejected": -2.114379644393921, + "logps/chosen": -37.910301208496094, + "logps/rejected": -162.96511840820312, + "loss": 0.3605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6526695489883423, + "rewards/margins": 0.789771318435669, + "rewards/rejected": -0.13710175454616547, + "step": 8687 + }, + { + "epoch": 0.51, + "learning_rate": 5.152645467899396e-08, + "logits/chosen": -1.9586642980575562, + "logits/rejected": -1.9635286331176758, + "logps/chosen": -168.52090454101562, + "logps/rejected": -259.61907958984375, + "loss": 0.1031, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.873193383216858, + "rewards/margins": 1.624090552330017, + "rewards/rejected": 0.24910278618335724, + "step": 8688 + }, + { + "epoch": 0.51, + "learning_rate": 5.15170350206367e-08, + "logits/chosen": -1.9211909770965576, + "logits/rejected": -1.9850603342056274, + "logps/chosen": -240.65943908691406, + "logps/rejected": -369.8072204589844, + "loss": 0.044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6203049421310425, + "rewards/margins": 3.661665439605713, + "rewards/rejected": -2.04136061668396, + "step": 8689 + }, + { + "epoch": 0.51, + "learning_rate": 5.150761530838694e-08, + "logits/chosen": -1.8209278583526611, + "logits/rejected": -1.8620051145553589, + "logps/chosen": -176.1184844970703, + "logps/rejected": -427.2843017578125, + "loss": 0.0415, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4421310424804688, + "rewards/margins": 2.9110915660858154, + "rewards/rejected": -0.46896058320999146, + "step": 8690 + }, + { + "epoch": 0.51, + "learning_rate": 5.149819554257932e-08, + "logits/chosen": -1.7835838794708252, + "logits/rejected": -1.7853960990905762, + "logps/chosen": -38.16267776489258, + "logps/rejected": -132.98086547851562, + "loss": 0.5263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23053932189941406, + "rewards/margins": 0.5436680316925049, + "rewards/rejected": -0.31312867999076843, + "step": 8691 + }, + { + "epoch": 0.51, + "learning_rate": 5.1488775723548464e-08, + "logits/chosen": -1.8234105110168457, + "logits/rejected": -1.819206953048706, + "logps/chosen": -19.200462341308594, + "logps/rejected": -113.50093078613281, + "loss": 0.2412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8200527429580688, + "rewards/margins": 1.718428373336792, + "rewards/rejected": -0.8983756899833679, + "step": 8692 + }, + { + "epoch": 0.51, + "learning_rate": 5.1479355851629006e-08, + "logits/chosen": -1.8268985748291016, + "logits/rejected": -1.7940478324890137, + "logps/chosen": -125.67216491699219, + "logps/rejected": -407.82763671875, + "loss": 0.0379, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9384446144104004, + "rewards/margins": 3.0934953689575195, + "rewards/rejected": -0.15505066514015198, + "step": 8693 + }, + { + "epoch": 0.51, + "learning_rate": 5.146993592715559e-08, + "logits/chosen": -2.151463508605957, + "logits/rejected": -2.1420764923095703, + "logps/chosen": -38.197086334228516, + "logps/rejected": -304.757568359375, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44409486651420593, + "rewards/margins": 2.1214873790740967, + "rewards/rejected": -1.677392601966858, + "step": 8694 + }, + { + "epoch": 0.51, + "learning_rate": 5.146051595046287e-08, + "logits/chosen": -1.9546527862548828, + "logits/rejected": -1.9297162294387817, + "logps/chosen": -216.92408752441406, + "logps/rejected": -413.577880859375, + "loss": 0.0678, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5380935668945312, + "rewards/margins": 2.2250962257385254, + "rewards/rejected": 0.312997430562973, + "step": 8695 + }, + { + "epoch": 0.51, + "learning_rate": 5.1451095921885476e-08, + "logits/chosen": -1.7551612854003906, + "logits/rejected": -1.6732361316680908, + "logps/chosen": -206.204833984375, + "logps/rejected": -496.30914306640625, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5666122436523438, + "rewards/margins": 3.4541244506835938, + "rewards/rejected": -0.88751220703125, + "step": 8696 + }, + { + "epoch": 0.51, + "learning_rate": 5.144167584175805e-08, + "logits/chosen": -2.0261569023132324, + "logits/rejected": -2.016101360321045, + "logps/chosen": -0.009022864513099194, + "logps/rejected": -156.9654541015625, + "loss": 0.3569, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004203194403089583, + "rewards/margins": 2.761716604232788, + "rewards/rejected": -2.762136936187744, + "step": 8697 + }, + { + "epoch": 0.51, + "learning_rate": 5.143225571041524e-08, + "logits/chosen": -1.8791003227233887, + "logits/rejected": -1.841103196144104, + "logps/chosen": -297.47705078125, + "logps/rejected": -461.9310302734375, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4939545392990112, + "rewards/margins": 4.084393501281738, + "rewards/rejected": -2.5904388427734375, + "step": 8698 + }, + { + "epoch": 0.51, + "learning_rate": 5.142283552819171e-08, + "logits/chosen": -1.650184154510498, + "logits/rejected": -1.70686674118042, + "logps/chosen": -189.5205841064453, + "logps/rejected": -356.3101806640625, + "loss": 0.0347, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2902145385742188, + "rewards/margins": 4.0825910568237305, + "rewards/rejected": -1.7923767566680908, + "step": 8699 + }, + { + "epoch": 0.51, + "learning_rate": 5.1413415295422105e-08, + "logits/chosen": -2.016378402709961, + "logits/rejected": -1.9984549283981323, + "logps/chosen": -101.6393814086914, + "logps/rejected": -343.699462890625, + "loss": 0.348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05428314208984375, + "rewards/margins": 1.0744339227676392, + "rewards/rejected": -1.0201507806777954, + "step": 8700 + }, + { + "epoch": 0.51, + "learning_rate": 5.140399501244107e-08, + "logits/chosen": -1.834883213043213, + "logits/rejected": -1.8317701816558838, + "logps/chosen": -0.016096552833914757, + "logps/rejected": -352.6336975097656, + "loss": 0.3352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006947656511329114, + "rewards/margins": 7.4966654777526855, + "rewards/rejected": -7.4973602294921875, + "step": 8701 + }, + { + "epoch": 0.51, + "learning_rate": 5.139457467958327e-08, + "logits/chosen": -1.9148892164230347, + "logits/rejected": -1.9122072458267212, + "logps/chosen": -0.008869301527738571, + "logps/rejected": -44.624542236328125, + "loss": 0.549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014891762984916568, + "rewards/margins": 0.6423163414001465, + "rewards/rejected": -0.6408271789550781, + "step": 8702 + }, + { + "epoch": 0.51, + "learning_rate": 5.138515429718335e-08, + "logits/chosen": -1.943879246711731, + "logits/rejected": -1.9385125637054443, + "logps/chosen": -0.0003348208556417376, + "logps/rejected": -179.32705688476562, + "loss": 0.3439, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.138297521829372e-06, + "rewards/margins": 3.6206283569335938, + "rewards/rejected": -3.6206345558166504, + "step": 8703 + }, + { + "epoch": 0.51, + "learning_rate": 5.137573386557599e-08, + "logits/chosen": -2.147484064102173, + "logits/rejected": -2.1487369537353516, + "logps/chosen": -55.534423828125, + "logps/rejected": -178.80511474609375, + "loss": 0.6487, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9222316741943359, + "rewards/margins": 1.9405598640441895, + "rewards/rejected": -2.8627915382385254, + "step": 8704 + }, + { + "epoch": 0.51, + "learning_rate": 5.13663133850958e-08, + "logits/chosen": -1.885474681854248, + "logits/rejected": -1.8783332109451294, + "logps/chosen": -0.033807337284088135, + "logps/rejected": -62.141502380371094, + "loss": 0.538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009838053956627846, + "rewards/margins": 0.6550090909004211, + "rewards/rejected": -0.6559929251670837, + "step": 8705 + }, + { + "epoch": 0.51, + "learning_rate": 5.1356892856077505e-08, + "logits/chosen": -1.9862531423568726, + "logits/rejected": -2.0336661338806152, + "logps/chosen": -342.2724609375, + "logps/rejected": -523.75390625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.672869920730591, + "rewards/margins": 5.6181640625, + "rewards/rejected": -2.945294141769409, + "step": 8706 + }, + { + "epoch": 0.51, + "learning_rate": 5.134747227885573e-08, + "logits/chosen": -1.8715442419052124, + "logits/rejected": -1.8629733324050903, + "logps/chosen": -158.38992309570312, + "logps/rejected": -224.98777770996094, + "loss": 0.6242, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.546606421470642, + "rewards/margins": -0.8141740560531616, + "rewards/rejected": 2.3607804775238037, + "step": 8707 + }, + { + "epoch": 0.51, + "learning_rate": 5.133805165376515e-08, + "logits/chosen": -1.9771782159805298, + "logits/rejected": -1.969594955444336, + "logps/chosen": -260.5621337890625, + "logps/rejected": -457.3001708984375, + "loss": 0.0765, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6218719482421875, + "rewards/margins": 2.1842377185821533, + "rewards/rejected": 0.43763428926467896, + "step": 8708 + }, + { + "epoch": 0.51, + "learning_rate": 5.132863098114043e-08, + "logits/chosen": -2.0786800384521484, + "logits/rejected": -2.1096456050872803, + "logps/chosen": -213.197265625, + "logps/rejected": -441.6923828125, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8868988752365112, + "rewards/margins": 6.086716175079346, + "rewards/rejected": -4.199817180633545, + "step": 8709 + }, + { + "epoch": 0.51, + "learning_rate": 5.131921026131623e-08, + "logits/chosen": -1.6686192750930786, + "logits/rejected": -1.6512727737426758, + "logps/chosen": -231.51058959960938, + "logps/rejected": -322.519775390625, + "loss": 0.3848, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6152618527412415, + "rewards/margins": 0.262533575296402, + "rewards/rejected": 0.3527282774448395, + "step": 8710 + }, + { + "epoch": 0.51, + "learning_rate": 5.130978949462724e-08, + "logits/chosen": -2.0293753147125244, + "logits/rejected": -2.0191080570220947, + "logps/chosen": -25.859390258789062, + "logps/rejected": -240.92410278320312, + "loss": 0.384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05289135128259659, + "rewards/margins": 2.9491076469421387, + "rewards/rejected": -3.0019989013671875, + "step": 8711 + }, + { + "epoch": 0.51, + "learning_rate": 5.130036868140811e-08, + "logits/chosen": -1.9253519773483276, + "logits/rejected": -1.9223535060882568, + "logps/chosen": -51.21706008911133, + "logps/rejected": -105.51038360595703, + "loss": 0.5865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05729103088378906, + "rewards/margins": 0.5437389612197876, + "rewards/rejected": -0.6010299921035767, + "step": 8712 + }, + { + "epoch": 0.51, + "learning_rate": 5.1290947821993534e-08, + "logits/chosen": -1.9830570220947266, + "logits/rejected": -1.9807546138763428, + "logps/chosen": -29.720245361328125, + "logps/rejected": -152.2828369140625, + "loss": 0.4955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0983760878443718, + "rewards/margins": 1.0112401247024536, + "rewards/rejected": -1.1096161603927612, + "step": 8713 + }, + { + "epoch": 0.51, + "learning_rate": 5.1281526916718166e-08, + "logits/chosen": -1.9562864303588867, + "logits/rejected": -1.945177435874939, + "logps/chosen": -82.37127685546875, + "logps/rejected": -321.947998046875, + "loss": 0.241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42958831787109375, + "rewards/margins": 4.351860046386719, + "rewards/rejected": -3.922271728515625, + "step": 8714 + }, + { + "epoch": 0.51, + "learning_rate": 5.127210596591671e-08, + "logits/chosen": -1.9974933862686157, + "logits/rejected": -1.919910192489624, + "logps/chosen": -227.04347229003906, + "logps/rejected": -446.3041076660156, + "loss": 0.7785, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8949859738349915, + "rewards/margins": -0.9592528939247131, + "rewards/rejected": 1.8542388677597046, + "step": 8715 + }, + { + "epoch": 0.51, + "learning_rate": 5.126268496992381e-08, + "logits/chosen": -2.1794188022613525, + "logits/rejected": -2.1659739017486572, + "logps/chosen": -59.67533493041992, + "logps/rejected": -182.64297485351562, + "loss": 0.6239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7466266751289368, + "rewards/margins": 1.3884046077728271, + "rewards/rejected": -2.135031223297119, + "step": 8716 + }, + { + "epoch": 0.51, + "learning_rate": 5.125326392907417e-08, + "logits/chosen": -1.8975005149841309, + "logits/rejected": -1.8990622758865356, + "logps/chosen": -174.564208984375, + "logps/rejected": -314.45367431640625, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0902650356292725, + "rewards/margins": 1.9453492164611816, + "rewards/rejected": 0.14491577446460724, + "step": 8717 + }, + { + "epoch": 0.51, + "learning_rate": 5.1243842843702455e-08, + "logits/chosen": -2.0318663120269775, + "logits/rejected": -2.0234522819519043, + "logps/chosen": -288.8475341796875, + "logps/rejected": -378.9720153808594, + "loss": 0.0415, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.15775465965271, + "rewards/margins": 2.884979486465454, + "rewards/rejected": 0.272775262594223, + "step": 8718 + }, + { + "epoch": 0.51, + "learning_rate": 5.123442171414337e-08, + "logits/chosen": -1.794216513633728, + "logits/rejected": -1.7892638444900513, + "logps/chosen": -149.8881072998047, + "logps/rejected": -283.0025634765625, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7930084466934204, + "rewards/margins": 0.3294951915740967, + "rewards/rejected": 1.4635132551193237, + "step": 8719 + }, + { + "epoch": 0.51, + "learning_rate": 5.1225000540731577e-08, + "logits/chosen": -1.7966957092285156, + "logits/rejected": -1.7861803770065308, + "logps/chosen": -293.9943542480469, + "logps/rejected": -327.9635009765625, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.373577833175659, + "rewards/margins": 3.399078369140625, + "rewards/rejected": -0.02550048939883709, + "step": 8720 + }, + { + "epoch": 0.51, + "learning_rate": 5.121557932380179e-08, + "logits/chosen": -1.8591420650482178, + "logits/rejected": -1.8538058996200562, + "logps/chosen": -38.358985900878906, + "logps/rejected": -278.17425537109375, + "loss": 0.492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021433640271425247, + "rewards/margins": 0.5406818389892578, + "rewards/rejected": -0.562115490436554, + "step": 8721 + }, + { + "epoch": 0.51, + "learning_rate": 5.1206158063688665e-08, + "logits/chosen": -1.9456636905670166, + "logits/rejected": -1.9899954795837402, + "logps/chosen": -142.3595733642578, + "logps/rejected": -375.7666320800781, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.702490210533142, + "rewards/margins": 4.033267021179199, + "rewards/rejected": -2.3307769298553467, + "step": 8722 + }, + { + "epoch": 0.51, + "learning_rate": 5.119673676072692e-08, + "logits/chosen": -2.023536443710327, + "logits/rejected": -2.018951654434204, + "logps/chosen": -55.618412017822266, + "logps/rejected": -248.5789031982422, + "loss": 0.5227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5160778164863586, + "rewards/margins": 1.8021976947784424, + "rewards/rejected": -2.3182754516601562, + "step": 8723 + }, + { + "epoch": 0.51, + "learning_rate": 5.118731541525122e-08, + "logits/chosen": -2.0281388759613037, + "logits/rejected": -2.0332815647125244, + "logps/chosen": -92.6215591430664, + "logps/rejected": -241.41677856445312, + "loss": 0.5169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7631393671035767, + "rewards/margins": 0.08918613195419312, + "rewards/rejected": 0.6739532351493835, + "step": 8724 + }, + { + "epoch": 0.51, + "learning_rate": 5.117789402759627e-08, + "logits/chosen": -1.942692756652832, + "logits/rejected": -1.9480282068252563, + "logps/chosen": -2.58174204826355, + "logps/rejected": -91.57363891601562, + "loss": 0.5169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022964073345065117, + "rewards/margins": 0.9417995810508728, + "rewards/rejected": -0.9647636413574219, + "step": 8725 + }, + { + "epoch": 0.51, + "learning_rate": 5.116847259809677e-08, + "logits/chosen": -1.9036436080932617, + "logits/rejected": -1.9306477308273315, + "logps/chosen": -189.64254760742188, + "logps/rejected": -418.23480224609375, + "loss": 0.0607, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.450048804283142, + "rewards/margins": 3.43505859375, + "rewards/rejected": -1.985009789466858, + "step": 8726 + }, + { + "epoch": 0.51, + "learning_rate": 5.1159051127087395e-08, + "logits/chosen": -2.0024948120117188, + "logits/rejected": -1.9946779012680054, + "logps/chosen": -311.1959228515625, + "logps/rejected": -541.0767211914062, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3626465797424316, + "rewards/margins": 3.9067201614379883, + "rewards/rejected": -1.544073462486267, + "step": 8727 + }, + { + "epoch": 0.51, + "learning_rate": 5.1149629614902856e-08, + "logits/chosen": -1.7072279453277588, + "logits/rejected": -1.7135380506515503, + "logps/chosen": -19.991729736328125, + "logps/rejected": -162.00616455078125, + "loss": 0.3524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.414022833108902, + "rewards/margins": 1.3201141357421875, + "rewards/rejected": -0.9060913324356079, + "step": 8728 + }, + { + "epoch": 0.51, + "learning_rate": 5.1140208061877855e-08, + "logits/chosen": -2.0000815391540527, + "logits/rejected": -2.0167508125305176, + "logps/chosen": -127.70040130615234, + "logps/rejected": -277.55072021484375, + "loss": 0.0674, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7189491987228394, + "rewards/margins": 4.560556888580322, + "rewards/rejected": -2.8416078090667725, + "step": 8729 + }, + { + "epoch": 0.51, + "learning_rate": 5.113078646834709e-08, + "logits/chosen": -1.9663680791854858, + "logits/rejected": -1.9661246538162231, + "logps/chosen": -21.020191192626953, + "logps/rejected": -156.50347900390625, + "loss": 0.4926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08263397216796875, + "rewards/margins": 0.9301285147666931, + "rewards/rejected": -0.8474945425987244, + "step": 8730 + }, + { + "epoch": 0.51, + "learning_rate": 5.112136483464525e-08, + "logits/chosen": -1.8372663259506226, + "logits/rejected": -1.8451416492462158, + "logps/chosen": -139.15713500976562, + "logps/rejected": -194.05821228027344, + "loss": 0.3327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9330810904502869, + "rewards/margins": 0.7245956659317017, + "rewards/rejected": 0.208485409617424, + "step": 8731 + }, + { + "epoch": 0.51, + "learning_rate": 5.111194316110705e-08, + "logits/chosen": -1.862568974494934, + "logits/rejected": -1.8222404718399048, + "logps/chosen": -223.98471069335938, + "logps/rejected": -465.61822509765625, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4784302711486816, + "rewards/margins": 2.81463623046875, + "rewards/rejected": -0.3362060487270355, + "step": 8732 + }, + { + "epoch": 0.51, + "learning_rate": 5.1102521448067195e-08, + "logits/chosen": -2.102954149246216, + "logits/rejected": -2.082871198654175, + "logps/chosen": -0.07814870029687881, + "logps/rejected": -291.0622863769531, + "loss": 0.3324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004159748554229736, + "rewards/margins": 4.606782913208008, + "rewards/rejected": -4.602622985839844, + "step": 8733 + }, + { + "epoch": 0.51, + "learning_rate": 5.109309969586038e-08, + "logits/chosen": -2.006925106048584, + "logits/rejected": -2.0022852420806885, + "logps/chosen": -29.246707916259766, + "logps/rejected": -360.3331298828125, + "loss": 0.2044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7342510223388672, + "rewards/margins": 7.622437953948975, + "rewards/rejected": -6.888186931610107, + "step": 8734 + }, + { + "epoch": 0.51, + "learning_rate": 5.108367790482131e-08, + "logits/chosen": -1.9554437398910522, + "logits/rejected": -1.9934253692626953, + "logps/chosen": -293.641845703125, + "logps/rejected": -601.42333984375, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4196412563323975, + "rewards/margins": 7.747583389282227, + "rewards/rejected": -4.32794189453125, + "step": 8735 + }, + { + "epoch": 0.51, + "learning_rate": 5.107425607528472e-08, + "logits/chosen": -1.936667561531067, + "logits/rejected": -1.9298784732818604, + "logps/chosen": -53.29444885253906, + "logps/rejected": -290.30462646484375, + "loss": 0.2445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7312160730361938, + "rewards/margins": 1.9414700269699097, + "rewards/rejected": -1.2102539539337158, + "step": 8736 + }, + { + "epoch": 0.51, + "learning_rate": 5.106483420758527e-08, + "logits/chosen": -1.5737204551696777, + "logits/rejected": -1.5656726360321045, + "logps/chosen": -200.17050170898438, + "logps/rejected": -245.50628662109375, + "loss": 0.093, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.029388427734375, + "rewards/margins": 2.119899034500122, + "rewards/rejected": -0.09051056206226349, + "step": 8737 + }, + { + "epoch": 0.51, + "learning_rate": 5.1055412302057735e-08, + "logits/chosen": -1.9915019273757935, + "logits/rejected": -1.9861565828323364, + "logps/chosen": -311.60321044921875, + "logps/rejected": -457.6146240234375, + "loss": 0.1456, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7222535610198975, + "rewards/margins": 1.1171021461486816, + "rewards/rejected": 2.605151414871216, + "step": 8738 + }, + { + "epoch": 0.51, + "learning_rate": 5.104599035903675e-08, + "logits/chosen": -1.9725861549377441, + "logits/rejected": -1.9718430042266846, + "logps/chosen": -1.6470959186553955, + "logps/rejected": -45.6937255859375, + "loss": 0.6061, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10360320657491684, + "rewards/margins": 0.42170390486717224, + "rewards/rejected": -0.5253071188926697, + "step": 8739 + }, + { + "epoch": 0.51, + "learning_rate": 5.10365683788571e-08, + "logits/chosen": -1.8874017000198364, + "logits/rejected": -1.887152910232544, + "logps/chosen": -153.81146240234375, + "logps/rejected": -184.17123413085938, + "loss": 0.5468, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6169677972793579, + "rewards/margins": -0.3915649652481079, + "rewards/rejected": 1.0085327625274658, + "step": 8740 + }, + { + "epoch": 0.51, + "learning_rate": 5.102714636185346e-08, + "logits/chosen": -1.9626071453094482, + "logits/rejected": -1.9593251943588257, + "logps/chosen": -3.4451117244316265e-05, + "logps/rejected": -75.71688842773438, + "loss": 0.6415, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.053012219173979e-07, + "rewards/margins": 0.17958104610443115, + "rewards/rejected": -0.1795814484357834, + "step": 8741 + }, + { + "epoch": 0.51, + "learning_rate": 5.101772430836056e-08, + "logits/chosen": -1.9148999452590942, + "logits/rejected": -1.901690125465393, + "logps/chosen": -51.04295349121094, + "logps/rejected": -180.8687744140625, + "loss": 0.621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18719978630542755, + "rewards/margins": 0.5232982635498047, + "rewards/rejected": -0.710498034954071, + "step": 8742 + }, + { + "epoch": 0.51, + "learning_rate": 5.100830221871312e-08, + "logits/chosen": -1.8272035121917725, + "logits/rejected": -1.8188977241516113, + "logps/chosen": -7.598105430603027, + "logps/rejected": -61.42572021484375, + "loss": 0.6892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01880049705505371, + "rewards/margins": 0.015316247940063477, + "rewards/rejected": -0.03411674499511719, + "step": 8743 + }, + { + "epoch": 0.51, + "learning_rate": 5.0998880093245854e-08, + "logits/chosen": -1.8720097541809082, + "logits/rejected": -1.8733657598495483, + "logps/chosen": -175.65536499023438, + "logps/rejected": -459.7279357910156, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.736114501953125, + "rewards/margins": 6.131735324859619, + "rewards/rejected": -4.395620822906494, + "step": 8744 + }, + { + "epoch": 0.51, + "learning_rate": 5.098945793229348e-08, + "logits/chosen": -1.9255746603012085, + "logits/rejected": -1.9647952318191528, + "logps/chosen": -179.32984924316406, + "logps/rejected": -291.2091369628906, + "loss": 0.0387, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1138534545898438, + "rewards/margins": 4.198369026184082, + "rewards/rejected": -2.084515333175659, + "step": 8745 + }, + { + "epoch": 0.51, + "learning_rate": 5.098003573619072e-08, + "logits/chosen": -1.8465380668640137, + "logits/rejected": -1.840507984161377, + "logps/chosen": -31.900911331176758, + "logps/rejected": -204.22430419921875, + "loss": 0.5102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09264278411865234, + "rewards/margins": 0.7565649151802063, + "rewards/rejected": -0.663922131061554, + "step": 8746 + }, + { + "epoch": 0.51, + "learning_rate": 5.0970613505272285e-08, + "logits/chosen": -1.8632299900054932, + "logits/rejected": -1.853293538093567, + "logps/chosen": -202.0570831298828, + "logps/rejected": -313.1171875, + "loss": 0.1053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8254470825195312, + "rewards/margins": 1.8415664434432983, + "rewards/rejected": -0.01611938513815403, + "step": 8747 + }, + { + "epoch": 0.51, + "learning_rate": 5.0961191239872915e-08, + "logits/chosen": -2.141103744506836, + "logits/rejected": -2.1330552101135254, + "logps/chosen": -12.00434684753418, + "logps/rejected": -318.62017822265625, + "loss": 0.2499, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4753536283969879, + "rewards/margins": 4.393233776092529, + "rewards/rejected": -3.9178802967071533, + "step": 8748 + }, + { + "epoch": 0.51, + "learning_rate": 5.0951768940327344e-08, + "logits/chosen": -1.960849404335022, + "logits/rejected": -1.9589051008224487, + "logps/chosen": -41.193206787109375, + "logps/rejected": -198.26431274414062, + "loss": 0.3708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17327880859375, + "rewards/margins": 3.4646546840667725, + "rewards/rejected": -3.6379334926605225, + "step": 8749 + }, + { + "epoch": 0.51, + "learning_rate": 5.094234660697026e-08, + "logits/chosen": -2.026216983795166, + "logits/rejected": -1.9967893362045288, + "logps/chosen": -167.75830078125, + "logps/rejected": -496.13165283203125, + "loss": 0.1616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7375335693359375, + "rewards/margins": 2.8329498767852783, + "rewards/rejected": -2.095416307449341, + "step": 8750 + }, + { + "epoch": 0.51, + "learning_rate": 5.093292424013644e-08, + "logits/chosen": -2.026461362838745, + "logits/rejected": -2.0332682132720947, + "logps/chosen": -0.004641356877982616, + "logps/rejected": -46.41307067871094, + "loss": 0.6766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000157967209815979, + "rewards/margins": 0.013438376598060131, + "rewards/rejected": -0.01359634380787611, + "step": 8751 + }, + { + "epoch": 0.51, + "learning_rate": 5.0923501840160575e-08, + "logits/chosen": -1.672054409980774, + "logits/rejected": -1.6571744680404663, + "logps/chosen": -54.11690902709961, + "logps/rejected": -246.2023468017578, + "loss": 0.2101, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5790691375732422, + "rewards/margins": 2.6621761322021484, + "rewards/rejected": -2.0831069946289062, + "step": 8752 + }, + { + "epoch": 0.51, + "learning_rate": 5.0914079407377416e-08, + "logits/chosen": -1.9096009731292725, + "logits/rejected": -1.9405280351638794, + "logps/chosen": -218.75025939941406, + "logps/rejected": -414.2469787597656, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.431540012359619, + "rewards/margins": 3.6773500442504883, + "rewards/rejected": -1.2458099126815796, + "step": 8753 + }, + { + "epoch": 0.51, + "learning_rate": 5.090465694212167e-08, + "logits/chosen": -1.9488660097122192, + "logits/rejected": -1.9495410919189453, + "logps/chosen": -127.99982452392578, + "logps/rejected": -348.5224304199219, + "loss": 0.2048, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5085029602050781, + "rewards/margins": 3.4398598670959473, + "rewards/rejected": -2.931356906890869, + "step": 8754 + }, + { + "epoch": 0.51, + "learning_rate": 5.0895234444728106e-08, + "logits/chosen": -1.9126701354980469, + "logits/rejected": -1.9679514169692993, + "logps/chosen": -192.9298095703125, + "logps/rejected": -246.634033203125, + "loss": 0.1693, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.653570532798767, + "rewards/margins": 1.2473220825195312, + "rewards/rejected": 0.4062484800815582, + "step": 8755 + }, + { + "epoch": 0.51, + "learning_rate": 5.0885811915531416e-08, + "logits/chosen": -1.9027793407440186, + "logits/rejected": -1.8988254070281982, + "logps/chosen": -4.087227821350098, + "logps/rejected": -385.830078125, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07887406647205353, + "rewards/margins": 9.20441722869873, + "rewards/rejected": -9.125543594360352, + "step": 8756 + }, + { + "epoch": 0.51, + "learning_rate": 5.0876389354866366e-08, + "logits/chosen": -1.8860712051391602, + "logits/rejected": -1.8719364404678345, + "logps/chosen": -0.0001100280205719173, + "logps/rejected": -104.53734588623047, + "loss": 0.4516, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.067894067498855e-05, + "rewards/margins": 1.3949792385101318, + "rewards/rejected": -1.3949486017227173, + "step": 8757 + }, + { + "epoch": 0.51, + "learning_rate": 5.0866966763067674e-08, + "logits/chosen": -1.9058996438980103, + "logits/rejected": -1.901340126991272, + "logps/chosen": -8.46365001052618e-05, + "logps/rejected": -211.70567321777344, + "loss": 0.3538, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.243675448378781e-06, + "rewards/margins": 4.005380630493164, + "rewards/rejected": -4.005384922027588, + "step": 8758 + }, + { + "epoch": 0.51, + "learning_rate": 5.0857544140470076e-08, + "logits/chosen": -2.131343364715576, + "logits/rejected": -2.1121184825897217, + "logps/chosen": -86.99066162109375, + "logps/rejected": -336.3857421875, + "loss": 0.2381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2794693112373352, + "rewards/margins": 6.911217212677002, + "rewards/rejected": -6.631747722625732, + "step": 8759 + }, + { + "epoch": 0.51, + "learning_rate": 5.0848121487408316e-08, + "logits/chosen": -1.9025566577911377, + "logits/rejected": -1.902498483657837, + "logps/chosen": -3.794189214706421, + "logps/rejected": -372.5896301269531, + "loss": 0.4006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11865603923797607, + "rewards/margins": 4.578777313232422, + "rewards/rejected": -4.6974334716796875, + "step": 8760 + }, + { + "epoch": 0.51, + "learning_rate": 5.083869880421714e-08, + "logits/chosen": -1.8321083784103394, + "logits/rejected": -1.8305906057357788, + "logps/chosen": -8.002639770507812, + "logps/rejected": -57.380714416503906, + "loss": 0.6273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23739968240261078, + "rewards/margins": 0.040213778614997864, + "rewards/rejected": 0.19718590378761292, + "step": 8761 + }, + { + "epoch": 0.51, + "learning_rate": 5.082927609123128e-08, + "logits/chosen": -1.9860504865646362, + "logits/rejected": -1.9814364910125732, + "logps/chosen": -7.271648064488545e-05, + "logps/rejected": -322.4169616699219, + "loss": 0.3378, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.659632739072549e-06, + "rewards/margins": 6.628200531005859, + "rewards/rejected": -6.628204345703125, + "step": 8762 + }, + { + "epoch": 0.51, + "learning_rate": 5.081985334878548e-08, + "logits/chosen": -1.90914785861969, + "logits/rejected": -1.916325330734253, + "logps/chosen": -35.86774826049805, + "logps/rejected": -218.9307403564453, + "loss": 0.2314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1678470373153687, + "rewards/margins": 1.3127124309539795, + "rewards/rejected": -0.14486542344093323, + "step": 8763 + }, + { + "epoch": 0.51, + "learning_rate": 5.081043057721447e-08, + "logits/chosen": -1.9186583757400513, + "logits/rejected": -1.9185527563095093, + "logps/chosen": -12.300322532653809, + "logps/rejected": -76.31161499023438, + "loss": 0.4492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44575414061546326, + "rewards/margins": 0.5931090116500854, + "rewards/rejected": -0.1473548859357834, + "step": 8764 + }, + { + "epoch": 0.51, + "learning_rate": 5.080100777685301e-08, + "logits/chosen": -1.9105230569839478, + "logits/rejected": -1.903167724609375, + "logps/chosen": -7.352927207946777, + "logps/rejected": -152.1291046142578, + "loss": 0.5397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19763222336769104, + "rewards/margins": 0.33379095792770386, + "rewards/rejected": -0.136158749461174, + "step": 8765 + }, + { + "epoch": 0.51, + "learning_rate": 5.0791584948035825e-08, + "logits/chosen": -1.6528133153915405, + "logits/rejected": -1.6469255685806274, + "logps/chosen": -26.928302764892578, + "logps/rejected": -109.29838562011719, + "loss": 0.3951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2750057280063629, + "rewards/margins": 1.0565929412841797, + "rewards/rejected": -0.7815872430801392, + "step": 8766 + }, + { + "epoch": 0.51, + "learning_rate": 5.078216209109768e-08, + "logits/chosen": -1.7741858959197998, + "logits/rejected": -1.771183729171753, + "logps/chosen": -32.437686920166016, + "logps/rejected": -229.67298889160156, + "loss": 0.3045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34012946486473083, + "rewards/margins": 2.9096882343292236, + "rewards/rejected": -2.56955885887146, + "step": 8767 + }, + { + "epoch": 0.51, + "learning_rate": 5.0772739206373305e-08, + "logits/chosen": -1.8037909269332886, + "logits/rejected": -1.8027812242507935, + "logps/chosen": -320.2545166015625, + "logps/rejected": -523.40087890625, + "loss": 0.0643, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.404425024986267, + "rewards/margins": 4.131661891937256, + "rewards/rejected": -2.7272369861602783, + "step": 8768 + }, + { + "epoch": 0.51, + "learning_rate": 5.076331629419744e-08, + "logits/chosen": -2.060023069381714, + "logits/rejected": -2.060170888900757, + "logps/chosen": -6.434159278869629, + "logps/rejected": -31.939485549926758, + "loss": 0.6474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09295034408569336, + "rewards/margins": 0.14224442839622498, + "rewards/rejected": -0.04929409176111221, + "step": 8769 + }, + { + "epoch": 0.51, + "learning_rate": 5.0753893354904874e-08, + "logits/chosen": -1.9931707382202148, + "logits/rejected": -1.9854716062545776, + "logps/chosen": -14.90080451965332, + "logps/rejected": -206.9258575439453, + "loss": 0.2384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4367710053920746, + "rewards/margins": 2.566398859024048, + "rewards/rejected": -2.1296279430389404, + "step": 8770 + }, + { + "epoch": 0.51, + "learning_rate": 5.074447038883031e-08, + "logits/chosen": -2.054551839828491, + "logits/rejected": -2.017634630203247, + "logps/chosen": -195.0157470703125, + "logps/rejected": -352.3810729980469, + "loss": 0.214, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.252156138420105, + "rewards/margins": 1.4407150745391846, + "rewards/rejected": -0.18855896592140198, + "step": 8771 + }, + { + "epoch": 0.51, + "learning_rate": 5.073504739630853e-08, + "logits/chosen": -1.8819169998168945, + "logits/rejected": -1.8779877424240112, + "logps/chosen": -7.76005220413208, + "logps/rejected": -158.5122528076172, + "loss": 0.5041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16233468055725098, + "rewards/margins": 0.7256419062614441, + "rewards/rejected": -0.5633072257041931, + "step": 8772 + }, + { + "epoch": 0.51, + "learning_rate": 5.0725624377674256e-08, + "logits/chosen": -2.06628155708313, + "logits/rejected": -2.073425769805908, + "logps/chosen": -240.85006713867188, + "logps/rejected": -412.5122375488281, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6615631580352783, + "rewards/margins": 1.7868409156799316, + "rewards/rejected": 0.8747223019599915, + "step": 8773 + }, + { + "epoch": 0.51, + "learning_rate": 5.071620133326228e-08, + "logits/chosen": -1.7944434881210327, + "logits/rejected": -1.7922933101654053, + "logps/chosen": -73.90479278564453, + "logps/rejected": -220.3202362060547, + "loss": 0.1254, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7870155572891235, + "rewards/margins": 2.152973175048828, + "rewards/rejected": -0.365957647562027, + "step": 8774 + }, + { + "epoch": 0.51, + "learning_rate": 5.07067782634073e-08, + "logits/chosen": -1.879624366760254, + "logits/rejected": -1.8628135919570923, + "logps/chosen": -205.24514770507812, + "logps/rejected": -460.5714111328125, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.506854295730591, + "rewards/margins": 3.8350954055786133, + "rewards/rejected": -1.328240990638733, + "step": 8775 + }, + { + "epoch": 0.51, + "learning_rate": 5.069735516844412e-08, + "logits/chosen": -1.868916392326355, + "logits/rejected": -1.8765695095062256, + "logps/chosen": -218.87979125976562, + "logps/rejected": -424.3536682128906, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.716975450515747, + "rewards/margins": 2.8150558471679688, + "rewards/rejected": -0.09808044880628586, + "step": 8776 + }, + { + "epoch": 0.51, + "learning_rate": 5.068793204870746e-08, + "logits/chosen": -1.8310068845748901, + "logits/rejected": -1.8321545124053955, + "logps/chosen": -45.80994415283203, + "logps/rejected": -216.48263549804688, + "loss": 0.2618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5348235964775085, + "rewards/margins": 3.288540840148926, + "rewards/rejected": -2.7537171840667725, + "step": 8777 + }, + { + "epoch": 0.51, + "learning_rate": 5.0678508904532093e-08, + "logits/chosen": -2.228637456893921, + "logits/rejected": -2.221168041229248, + "logps/chosen": -16.782249450683594, + "logps/rejected": -118.6685791015625, + "loss": 0.5787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2715616226196289, + "rewards/margins": 0.15931949019432068, + "rewards/rejected": 0.11224212497472763, + "step": 8778 + }, + { + "epoch": 0.51, + "learning_rate": 5.0669085736252757e-08, + "logits/chosen": -1.7983317375183105, + "logits/rejected": -1.793806552886963, + "logps/chosen": -19.071495056152344, + "logps/rejected": -133.13987731933594, + "loss": 0.3902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5360546112060547, + "rewards/margins": 0.7537547945976257, + "rewards/rejected": -0.21770019829273224, + "step": 8779 + }, + { + "epoch": 0.51, + "learning_rate": 5.065966254420424e-08, + "logits/chosen": -1.9033578634262085, + "logits/rejected": -1.9428898096084595, + "logps/chosen": -190.860595703125, + "logps/rejected": -258.5704345703125, + "loss": 0.0571, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7389129400253296, + "rewards/margins": 2.6607863903045654, + "rewards/rejected": -0.9218735098838806, + "step": 8780 + }, + { + "epoch": 0.51, + "learning_rate": 5.065023932872128e-08, + "logits/chosen": -2.0862889289855957, + "logits/rejected": -2.0846691131591797, + "logps/chosen": -22.341148376464844, + "logps/rejected": -231.84132385253906, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4546993374824524, + "rewards/margins": 2.398427963256836, + "rewards/rejected": -1.9437286853790283, + "step": 8781 + }, + { + "epoch": 0.51, + "learning_rate": 5.0640816090138637e-08, + "logits/chosen": -2.0563032627105713, + "logits/rejected": -2.034390687942505, + "logps/chosen": -19.160831451416016, + "logps/rejected": -428.25311279296875, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10901489108800888, + "rewards/margins": 9.82627010345459, + "rewards/rejected": -9.935284614562988, + "step": 8782 + }, + { + "epoch": 0.51, + "learning_rate": 5.0631392828791065e-08, + "logits/chosen": -2.0697097778320312, + "logits/rejected": -2.0672757625579834, + "logps/chosen": -5.72584867477417, + "logps/rejected": -89.43081665039062, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2433236688375473, + "rewards/margins": 1.6070597171783447, + "rewards/rejected": -1.3637360334396362, + "step": 8783 + }, + { + "epoch": 0.51, + "learning_rate": 5.0621969545013346e-08, + "logits/chosen": -1.947943925857544, + "logits/rejected": -1.946116328239441, + "logps/chosen": -0.5770701766014099, + "logps/rejected": -179.37232971191406, + "loss": 0.3284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.056062906980514526, + "rewards/margins": 4.9972004890441895, + "rewards/rejected": -4.941137790679932, + "step": 8784 + }, + { + "epoch": 0.51, + "learning_rate": 5.061254623914022e-08, + "logits/chosen": -1.8708332777023315, + "logits/rejected": -1.8063546419143677, + "logps/chosen": -271.16912841796875, + "logps/rejected": -484.4341125488281, + "loss": 0.0896, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.022418260574341, + "rewards/margins": 2.4620392322540283, + "rewards/rejected": -0.4396209716796875, + "step": 8785 + }, + { + "epoch": 0.51, + "learning_rate": 5.060312291150646e-08, + "logits/chosen": -1.9003691673278809, + "logits/rejected": -1.9007630348205566, + "logps/chosen": -17.35928726196289, + "logps/rejected": -150.89053344726562, + "loss": 0.3643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01760711707174778, + "rewards/margins": 2.897326707839966, + "rewards/rejected": -2.8797194957733154, + "step": 8786 + }, + { + "epoch": 0.51, + "learning_rate": 5.0593699562446814e-08, + "logits/chosen": -2.0182299613952637, + "logits/rejected": -2.0109975337982178, + "logps/chosen": -4.5418066292768344e-05, + "logps/rejected": -187.86666870117188, + "loss": 0.3523, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.344446769115166e-07, + "rewards/margins": 3.875502824783325, + "rewards/rejected": -3.8755035400390625, + "step": 8787 + }, + { + "epoch": 0.51, + "learning_rate": 5.058427619229606e-08, + "logits/chosen": -2.020756959915161, + "logits/rejected": -2.0168228149414062, + "logps/chosen": -14.769025802612305, + "logps/rejected": -120.5189437866211, + "loss": 0.4899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03829231485724449, + "rewards/margins": 1.1758419275283813, + "rewards/rejected": -1.137549638748169, + "step": 8788 + }, + { + "epoch": 0.51, + "learning_rate": 5.0574852801388956e-08, + "logits/chosen": -2.0077877044677734, + "logits/rejected": -2.004183292388916, + "logps/chosen": -0.00017868608119897544, + "logps/rejected": -166.76531982421875, + "loss": 0.4301, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.76471900078468e-05, + "rewards/margins": 1.6466679573059082, + "rewards/rejected": -1.646600365638733, + "step": 8789 + }, + { + "epoch": 0.51, + "learning_rate": 5.0565429390060265e-08, + "logits/chosen": -1.8818624019622803, + "logits/rejected": -1.8766719102859497, + "logps/chosen": -58.010231018066406, + "logps/rejected": -233.10775756835938, + "loss": 0.3757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12524032592773438, + "rewards/margins": 1.9924339056015015, + "rewards/rejected": -1.867193579673767, + "step": 8790 + }, + { + "epoch": 0.51, + "learning_rate": 5.055600595864476e-08, + "logits/chosen": -2.1031250953674316, + "logits/rejected": -2.09997296333313, + "logps/chosen": -47.030391693115234, + "logps/rejected": -176.79270935058594, + "loss": 0.1401, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1912411451339722, + "rewards/margins": 3.016125202178955, + "rewards/rejected": -1.824884057044983, + "step": 8791 + }, + { + "epoch": 0.51, + "learning_rate": 5.0546582507477207e-08, + "logits/chosen": -1.940216302871704, + "logits/rejected": -1.9329522848129272, + "logps/chosen": -200.4377899169922, + "logps/rejected": -385.13568115234375, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.322161912918091, + "rewards/margins": 4.230920314788818, + "rewards/rejected": -1.908758521080017, + "step": 8792 + }, + { + "epoch": 0.51, + "learning_rate": 5.0537159036892373e-08, + "logits/chosen": -1.9642754793167114, + "logits/rejected": -1.9530695676803589, + "logps/chosen": -224.26760864257812, + "logps/rejected": -360.4893798828125, + "loss": 0.1451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48084259033203125, + "rewards/margins": 2.1624956130981445, + "rewards/rejected": -1.6816529035568237, + "step": 8793 + }, + { + "epoch": 0.51, + "learning_rate": 5.052773554722501e-08, + "logits/chosen": -2.037748098373413, + "logits/rejected": -2.0406832695007324, + "logps/chosen": -49.77632141113281, + "logps/rejected": -189.25030517578125, + "loss": 0.2993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2457984983921051, + "rewards/margins": 3.0309700965881348, + "rewards/rejected": -2.7851715087890625, + "step": 8794 + }, + { + "epoch": 0.51, + "learning_rate": 5.051831203880991e-08, + "logits/chosen": -1.9674068689346313, + "logits/rejected": -1.9647026062011719, + "logps/chosen": -176.3428192138672, + "logps/rejected": -372.3321228027344, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.636358618736267, + "rewards/margins": 4.531930446624756, + "rewards/rejected": -2.8955719470977783, + "step": 8795 + }, + { + "epoch": 0.51, + "learning_rate": 5.050888851198183e-08, + "logits/chosen": -2.0021398067474365, + "logits/rejected": -1.9950666427612305, + "logps/chosen": -38.283966064453125, + "logps/rejected": -155.25457763671875, + "loss": 0.2998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33539125323295593, + "rewards/margins": 2.115917921066284, + "rewards/rejected": -1.7805267572402954, + "step": 8796 + }, + { + "epoch": 0.51, + "learning_rate": 5.049946496707553e-08, + "logits/chosen": -1.9769690036773682, + "logits/rejected": -1.9810177087783813, + "logps/chosen": -75.82643127441406, + "logps/rejected": -244.13238525390625, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7134445309638977, + "rewards/margins": 0.6421249508857727, + "rewards/rejected": 0.071319580078125, + "step": 8797 + }, + { + "epoch": 0.51, + "learning_rate": 5.04900414044258e-08, + "logits/chosen": -1.8851113319396973, + "logits/rejected": -1.8703917264938354, + "logps/chosen": -14.134481430053711, + "logps/rejected": -335.662353515625, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07551689445972443, + "rewards/margins": 9.405019760131836, + "rewards/rejected": -9.329503059387207, + "step": 8798 + }, + { + "epoch": 0.51, + "learning_rate": 5.048061782436741e-08, + "logits/chosen": -1.9306726455688477, + "logits/rejected": -1.921891450881958, + "logps/chosen": -87.49321746826172, + "logps/rejected": -213.0908660888672, + "loss": 0.4579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.043412018567323685, + "rewards/margins": 1.099907636642456, + "rewards/rejected": -1.0564956665039062, + "step": 8799 + }, + { + "epoch": 0.51, + "learning_rate": 5.0471194227235095e-08, + "logits/chosen": -1.8788182735443115, + "logits/rejected": -1.887134075164795, + "logps/chosen": -0.0002813177998177707, + "logps/rejected": -368.5426025390625, + "loss": 0.3365, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4792598449275829e-05, + "rewards/margins": 6.988461971282959, + "rewards/rejected": -6.988476753234863, + "step": 8800 + }, + { + "epoch": 0.51, + "learning_rate": 5.046177061336369e-08, + "logits/chosen": -1.8990663290023804, + "logits/rejected": -1.911065936088562, + "logps/chosen": -279.1312561035156, + "logps/rejected": -440.48553466796875, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.048391819000244, + "rewards/margins": 4.243499755859375, + "rewards/rejected": -2.19510817527771, + "step": 8801 + }, + { + "epoch": 0.51, + "learning_rate": 5.045234698308791e-08, + "logits/chosen": -1.9161643981933594, + "logits/rejected": -2.007636308670044, + "logps/chosen": -243.86111450195312, + "logps/rejected": -318.98748779296875, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.918692111968994, + "rewards/margins": 4.607181072235107, + "rewards/rejected": -1.6884888410568237, + "step": 8802 + }, + { + "epoch": 0.51, + "learning_rate": 5.044292333674257e-08, + "logits/chosen": -1.7922180891036987, + "logits/rejected": -1.788269281387329, + "logps/chosen": -71.79600524902344, + "logps/rejected": -227.2991943359375, + "loss": 0.1508, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1105858087539673, + "rewards/margins": 3.356334686279297, + "rewards/rejected": -2.245748996734619, + "step": 8803 + }, + { + "epoch": 0.51, + "learning_rate": 5.0433499674662413e-08, + "logits/chosen": -1.7823435068130493, + "logits/rejected": -1.8106216192245483, + "logps/chosen": -222.7313232421875, + "logps/rejected": -485.711181640625, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3004119396209717, + "rewards/margins": 5.3098297119140625, + "rewards/rejected": -3.009417772293091, + "step": 8804 + }, + { + "epoch": 0.51, + "learning_rate": 5.042407599718224e-08, + "logits/chosen": -1.8091493844985962, + "logits/rejected": -1.829788327217102, + "logps/chosen": -189.58511352539062, + "logps/rejected": -215.76589965820312, + "loss": 0.1301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9990768432617188, + "rewards/margins": 1.822357177734375, + "rewards/rejected": 0.17671966552734375, + "step": 8805 + }, + { + "epoch": 0.51, + "learning_rate": 5.0414652304636796e-08, + "logits/chosen": -1.794651985168457, + "logits/rejected": -1.7921576499938965, + "logps/chosen": -275.3326416015625, + "logps/rejected": -364.9666748046875, + "loss": 0.132, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5265686511993408, + "rewards/margins": 1.9006012678146362, + "rewards/rejected": -0.374032586812973, + "step": 8806 + }, + { + "epoch": 0.51, + "learning_rate": 5.0405228597360894e-08, + "logits/chosen": -1.925835371017456, + "logits/rejected": -1.8969849348068237, + "logps/chosen": -242.2407684326172, + "logps/rejected": -406.45135498046875, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4336624145507812, + "rewards/margins": 4.327995300292969, + "rewards/rejected": -1.8943328857421875, + "step": 8807 + }, + { + "epoch": 0.51, + "learning_rate": 5.039580487568928e-08, + "logits/chosen": -2.031176805496216, + "logits/rejected": -2.036566972732544, + "logps/chosen": -243.17425537109375, + "logps/rejected": -385.71832275390625, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.028918504714966, + "rewards/margins": 3.609149217605591, + "rewards/rejected": -0.580230712890625, + "step": 8808 + }, + { + "epoch": 0.51, + "learning_rate": 5.0386381139956734e-08, + "logits/chosen": -1.7517026662826538, + "logits/rejected": -1.7446653842926025, + "logps/chosen": -4.882740497589111, + "logps/rejected": -258.023193359375, + "loss": 0.3327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09951648861169815, + "rewards/margins": 3.8224549293518066, + "rewards/rejected": -3.7229385375976562, + "step": 8809 + }, + { + "epoch": 0.51, + "learning_rate": 5.0376957390498055e-08, + "logits/chosen": -1.9351242780685425, + "logits/rejected": -1.9160569906234741, + "logps/chosen": -193.5374298095703, + "logps/rejected": -281.70684814453125, + "loss": 0.1359, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6989076137542725, + "rewards/margins": 1.385025143623352, + "rewards/rejected": 1.3138824701309204, + "step": 8810 + }, + { + "epoch": 0.51, + "learning_rate": 5.0367533627647984e-08, + "logits/chosen": -1.9557472467422485, + "logits/rejected": -1.962181806564331, + "logps/chosen": -3.739912271499634, + "logps/rejected": -94.79882049560547, + "loss": 0.3901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0015650511486455798, + "rewards/margins": 2.2153451442718506, + "rewards/rejected": -2.213780164718628, + "step": 8811 + }, + { + "epoch": 0.51, + "learning_rate": 5.035810985174135e-08, + "logits/chosen": -1.9972187280654907, + "logits/rejected": -1.9921298027038574, + "logps/chosen": -12.500176429748535, + "logps/rejected": -142.1749267578125, + "loss": 0.3285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09999704360961914, + "rewards/margins": 3.0695040225982666, + "rewards/rejected": -2.9695069789886475, + "step": 8812 + }, + { + "epoch": 0.51, + "learning_rate": 5.034868606311288e-08, + "logits/chosen": -2.017343759536743, + "logits/rejected": -1.9921830892562866, + "logps/chosen": -68.18606567382812, + "logps/rejected": -234.50424194335938, + "loss": 0.1501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6713111996650696, + "rewards/margins": 5.174023151397705, + "rewards/rejected": -4.502711772918701, + "step": 8813 + }, + { + "epoch": 0.51, + "learning_rate": 5.033926226209738e-08, + "logits/chosen": -1.9809479713439941, + "logits/rejected": -1.9640308618545532, + "logps/chosen": -63.63137435913086, + "logps/rejected": -573.81005859375, + "loss": 0.1025, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4721263647079468, + "rewards/margins": 7.320796012878418, + "rewards/rejected": -5.848669528961182, + "step": 8814 + }, + { + "epoch": 0.51, + "learning_rate": 5.0329838449029625e-08, + "logits/chosen": -1.9489086866378784, + "logits/rejected": -1.9358643293380737, + "logps/chosen": -3.964432716369629, + "logps/rejected": -187.0870819091797, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.057265784591436386, + "rewards/margins": 2.3402180671691895, + "rewards/rejected": -2.3974838256835938, + "step": 8815 + }, + { + "epoch": 0.51, + "learning_rate": 5.032041462424441e-08, + "logits/chosen": -1.941215991973877, + "logits/rejected": -1.9428563117980957, + "logps/chosen": -170.12603759765625, + "logps/rejected": -256.4737854003906, + "loss": 0.1622, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.344928026199341, + "rewards/margins": 1.0929596424102783, + "rewards/rejected": 1.2519683837890625, + "step": 8816 + }, + { + "epoch": 0.51, + "learning_rate": 5.031099078807648e-08, + "logits/chosen": -2.0409793853759766, + "logits/rejected": -2.0337164402008057, + "logps/chosen": -30.279273986816406, + "logps/rejected": -130.81373596191406, + "loss": 0.5218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02948150597512722, + "rewards/margins": 0.6232040524482727, + "rewards/rejected": -0.6526855826377869, + "step": 8817 + }, + { + "epoch": 0.51, + "learning_rate": 5.030156694086065e-08, + "logits/chosen": -2.097059488296509, + "logits/rejected": -2.101926326751709, + "logps/chosen": -3.048711061477661, + "logps/rejected": -147.2725830078125, + "loss": 0.3174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13388803601264954, + "rewards/margins": 3.4744200706481934, + "rewards/rejected": -3.340532064437866, + "step": 8818 + }, + { + "epoch": 0.51, + "learning_rate": 5.029214308293167e-08, + "logits/chosen": -2.023576498031616, + "logits/rejected": -2.0254788398742676, + "logps/chosen": -33.89193344116211, + "logps/rejected": -245.78414916992188, + "loss": 0.2568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3877330720424652, + "rewards/margins": 4.76051139831543, + "rewards/rejected": -4.372778415679932, + "step": 8819 + }, + { + "epoch": 0.51, + "learning_rate": 5.0282719214624346e-08, + "logits/chosen": -1.8359047174453735, + "logits/rejected": -1.7814795970916748, + "logps/chosen": -302.59820556640625, + "logps/rejected": -541.2507934570312, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.514392137527466, + "rewards/margins": 3.7117247581481934, + "rewards/rejected": -1.197332739830017, + "step": 8820 + }, + { + "epoch": 0.51, + "learning_rate": 5.0273295336273444e-08, + "logits/chosen": -2.1587159633636475, + "logits/rejected": -2.1402766704559326, + "logps/chosen": -120.98197937011719, + "logps/rejected": -506.48321533203125, + "loss": 0.0555, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4699219465255737, + "rewards/margins": 5.735095500946045, + "rewards/rejected": -4.265173435211182, + "step": 8821 + }, + { + "epoch": 0.51, + "learning_rate": 5.026387144821378e-08, + "logits/chosen": -1.9183762073516846, + "logits/rejected": -1.9144697189331055, + "logps/chosen": -6.072498321533203, + "logps/rejected": -139.32298278808594, + "loss": 0.2971, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20069389045238495, + "rewards/margins": 2.552119016647339, + "rewards/rejected": -2.3514251708984375, + "step": 8822 + }, + { + "epoch": 0.51, + "learning_rate": 5.02544475507801e-08, + "logits/chosen": -2.005795478820801, + "logits/rejected": -2.0062026977539062, + "logps/chosen": -1.0647597312927246, + "logps/rejected": -208.1732177734375, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08951237052679062, + "rewards/margins": 3.2648212909698486, + "rewards/rejected": -3.3543336391448975, + "step": 8823 + }, + { + "epoch": 0.51, + "learning_rate": 5.0245023644307193e-08, + "logits/chosen": -2.1654903888702393, + "logits/rejected": -2.1646647453308105, + "logps/chosen": -1.5514256954193115, + "logps/rejected": -22.125795364379883, + "loss": 0.6991, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.034083496779203415, + "rewards/margins": -0.11587530374526978, + "rewards/rejected": 0.1499588042497635, + "step": 8824 + }, + { + "epoch": 0.51, + "learning_rate": 5.0235599729129854e-08, + "logits/chosen": -1.7636433839797974, + "logits/rejected": -1.7564549446105957, + "logps/chosen": -178.27444458007812, + "logps/rejected": -251.96290588378906, + "loss": 0.3316, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.492950439453125, + "rewards/margins": 0.32901763916015625, + "rewards/rejected": 1.1639328002929688, + "step": 8825 + }, + { + "epoch": 0.51, + "learning_rate": 5.022617580558286e-08, + "logits/chosen": -1.753743052482605, + "logits/rejected": -1.7447471618652344, + "logps/chosen": -49.09421920776367, + "logps/rejected": -282.7676696777344, + "loss": 0.1916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5988052487373352, + "rewards/margins": 6.316189765930176, + "rewards/rejected": -5.717384338378906, + "step": 8826 + }, + { + "epoch": 0.51, + "learning_rate": 5.021675187400098e-08, + "logits/chosen": -1.93618905544281, + "logits/rejected": -1.9209105968475342, + "logps/chosen": -0.00011038421507691965, + "logps/rejected": -317.1329345703125, + "loss": 0.3395, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5364325008704327e-05, + "rewards/margins": 5.950829982757568, + "rewards/rejected": -5.950814723968506, + "step": 8827 + }, + { + "epoch": 0.51, + "learning_rate": 5.020732793471904e-08, + "logits/chosen": -1.9557850360870361, + "logits/rejected": -1.9490150213241577, + "logps/chosen": -43.412681579589844, + "logps/rejected": -321.37841796875, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16875076293945312, + "rewards/margins": 1.6197456121444702, + "rewards/rejected": -1.450994849205017, + "step": 8828 + }, + { + "epoch": 0.51, + "learning_rate": 5.019790398807178e-08, + "logits/chosen": -1.8638253211975098, + "logits/rejected": -1.852906346321106, + "logps/chosen": -225.83816528320312, + "logps/rejected": -598.6077880859375, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.238865613937378, + "rewards/margins": 5.504197597503662, + "rewards/rejected": -3.265331983566284, + "step": 8829 + }, + { + "epoch": 0.51, + "learning_rate": 5.0188480034394e-08, + "logits/chosen": -2.0422987937927246, + "logits/rejected": -1.9919919967651367, + "logps/chosen": -291.69732666015625, + "logps/rejected": -610.4918823242188, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3856842517852783, + "rewards/margins": 6.000528335571289, + "rewards/rejected": -3.6148438453674316, + "step": 8830 + }, + { + "epoch": 0.51, + "learning_rate": 5.0179056074020496e-08, + "logits/chosen": -1.9939368963241577, + "logits/rejected": -1.9973560571670532, + "logps/chosen": -33.1151237487793, + "logps/rejected": -153.6456298828125, + "loss": 0.5291, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8652530908584595, + "rewards/margins": -0.17014729976654053, + "rewards/rejected": 1.035400390625, + "step": 8831 + }, + { + "epoch": 0.51, + "learning_rate": 5.0169632107286034e-08, + "logits/chosen": -1.7538236379623413, + "logits/rejected": -1.7533828020095825, + "logps/chosen": -0.00021027869661338627, + "logps/rejected": -297.9853515625, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0225250409566797e-05, + "rewards/margins": 6.993370532989502, + "rewards/rejected": -6.993350505828857, + "step": 8832 + }, + { + "epoch": 0.51, + "learning_rate": 5.0160208134525416e-08, + "logits/chosen": -1.6381072998046875, + "logits/rejected": -1.6184719800949097, + "logps/chosen": -358.21484375, + "logps/rejected": -532.193359375, + "loss": 0.2738, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.102941989898682, + "rewards/margins": 0.31864023208618164, + "rewards/rejected": 4.7843017578125, + "step": 8833 + }, + { + "epoch": 0.51, + "learning_rate": 5.0150784156073425e-08, + "logits/chosen": -1.741288185119629, + "logits/rejected": -1.741461992263794, + "logps/chosen": -11.946234703063965, + "logps/rejected": -204.99887084960938, + "loss": 0.3811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2601911723613739, + "rewards/margins": 1.4664900302886963, + "rewards/rejected": -1.206298828125, + "step": 8834 + }, + { + "epoch": 0.51, + "learning_rate": 5.0141360172264844e-08, + "logits/chosen": -1.7827181816101074, + "logits/rejected": -1.760398030281067, + "logps/chosen": -140.07369995117188, + "logps/rejected": -295.0355224609375, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.738073706626892, + "rewards/margins": 3.422320604324341, + "rewards/rejected": -1.6842468976974487, + "step": 8835 + }, + { + "epoch": 0.51, + "learning_rate": 5.0131936183434456e-08, + "logits/chosen": -1.8242461681365967, + "logits/rejected": -1.8169145584106445, + "logps/chosen": -21.715335845947266, + "logps/rejected": -162.2932586669922, + "loss": 0.2624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3968540132045746, + "rewards/margins": 2.9201221466064453, + "rewards/rejected": -2.523268222808838, + "step": 8836 + }, + { + "epoch": 0.51, + "learning_rate": 5.0122512189917044e-08, + "logits/chosen": -1.8047298192977905, + "logits/rejected": -1.8064428567886353, + "logps/chosen": -48.69629669189453, + "logps/rejected": -110.94242095947266, + "loss": 0.22, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.631173312664032, + "rewards/margins": 2.601746082305908, + "rewards/rejected": -1.9705727100372314, + "step": 8837 + }, + { + "epoch": 0.51, + "learning_rate": 5.0113088192047405e-08, + "logits/chosen": -1.984758973121643, + "logits/rejected": -1.9806357622146606, + "logps/chosen": -14.5004301071167, + "logps/rejected": -171.45809936523438, + "loss": 0.3542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.074987031519413, + "rewards/margins": 2.9281485080718994, + "rewards/rejected": -2.853161573410034, + "step": 8838 + }, + { + "epoch": 0.51, + "learning_rate": 5.010366419016032e-08, + "logits/chosen": -1.8015960454940796, + "logits/rejected": -1.801809310913086, + "logps/chosen": -114.86532592773438, + "logps/rejected": -249.809326171875, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5888000726699829, + "rewards/margins": 0.8820709586143494, + "rewards/rejected": -0.29327088594436646, + "step": 8839 + }, + { + "epoch": 0.51, + "learning_rate": 5.0094240184590564e-08, + "logits/chosen": -1.915665626525879, + "logits/rejected": -1.9117863178253174, + "logps/chosen": -259.802978515625, + "logps/rejected": -403.695068359375, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9676759243011475, + "rewards/margins": 3.1171693801879883, + "rewards/rejected": -0.14949341118335724, + "step": 8840 + }, + { + "epoch": 0.51, + "learning_rate": 5.008481617567295e-08, + "logits/chosen": -1.7281053066253662, + "logits/rejected": -1.727379560470581, + "logps/chosen": -287.5130615234375, + "logps/rejected": -348.5058288574219, + "loss": 0.0472, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.841906785964966, + "rewards/margins": 2.497833251953125, + "rewards/rejected": 0.34407350420951843, + "step": 8841 + }, + { + "epoch": 0.51, + "learning_rate": 5.007539216374224e-08, + "logits/chosen": -1.9459333419799805, + "logits/rejected": -1.971460223197937, + "logps/chosen": -196.74993896484375, + "logps/rejected": -466.90496826171875, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.216461181640625, + "rewards/margins": 5.398333549499512, + "rewards/rejected": -3.181872606277466, + "step": 8842 + }, + { + "epoch": 0.51, + "learning_rate": 5.006596814913324e-08, + "logits/chosen": -2.0255045890808105, + "logits/rejected": -2.0236001014709473, + "logps/chosen": -6.834253311157227, + "logps/rejected": -83.47998809814453, + "loss": 0.4829, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018665505573153496, + "rewards/margins": 0.9810686111450195, + "rewards/rejected": -0.9624031186103821, + "step": 8843 + }, + { + "epoch": 0.51, + "learning_rate": 5.0056544132180725e-08, + "logits/chosen": -1.9439855813980103, + "logits/rejected": -1.9473330974578857, + "logps/chosen": -5.610235214233398, + "logps/rejected": -112.50142669677734, + "loss": 0.5923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08760251849889755, + "rewards/margins": 0.33078184723854065, + "rewards/rejected": -0.2431793212890625, + "step": 8844 + }, + { + "epoch": 0.51, + "learning_rate": 5.0047120113219496e-08, + "logits/chosen": -1.8178009986877441, + "logits/rejected": -1.820975661277771, + "logps/chosen": -4.8698272705078125, + "logps/rejected": -113.92145538330078, + "loss": 0.4591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07621093094348907, + "rewards/margins": 1.1630266904830933, + "rewards/rejected": -1.0868157148361206, + "step": 8845 + }, + { + "epoch": 0.51, + "learning_rate": 5.003769609258431e-08, + "logits/chosen": -2.0438551902770996, + "logits/rejected": -2.0464389324188232, + "logps/chosen": -26.149927139282227, + "logps/rejected": -122.04367065429688, + "loss": 0.3965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29598236083984375, + "rewards/margins": 1.169036865234375, + "rewards/rejected": -0.8730545043945312, + "step": 8846 + }, + { + "epoch": 0.51, + "learning_rate": 5.002827207061e-08, + "logits/chosen": -2.0040030479431152, + "logits/rejected": -2.0086123943328857, + "logps/chosen": -13.872560501098633, + "logps/rejected": -86.05779266357422, + "loss": 0.4115, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030048180371522903, + "rewards/margins": 1.1720548868179321, + "rewards/rejected": -1.142006754875183, + "step": 8847 + }, + { + "epoch": 0.51, + "learning_rate": 5.0018848047631314e-08, + "logits/chosen": -1.9360895156860352, + "logits/rejected": -1.9312686920166016, + "logps/chosen": -6.625640392303467, + "logps/rejected": -207.88784790039062, + "loss": 0.3263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09910183399915695, + "rewards/margins": 2.736133337020874, + "rewards/rejected": -2.6370315551757812, + "step": 8848 + }, + { + "epoch": 0.51, + "learning_rate": 5.000942402398305e-08, + "logits/chosen": -1.8115683794021606, + "logits/rejected": -1.8192222118377686, + "logps/chosen": -11.008949279785156, + "logps/rejected": -242.36300659179688, + "loss": 0.2547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43425217270851135, + "rewards/margins": 3.588756799697876, + "rewards/rejected": -3.1545045375823975, + "step": 8849 + }, + { + "epoch": 0.52, + "learning_rate": 5e-08, + "logits/chosen": -1.7552053928375244, + "logits/rejected": -1.7602492570877075, + "logps/chosen": -42.574588775634766, + "logps/rejected": -119.91036224365234, + "loss": 0.1476, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7314579486846924, + "rewards/margins": 1.568510890007019, + "rewards/rejected": 0.16294708847999573, + "step": 8850 + }, + { + "epoch": 0.52, + "learning_rate": 4.999057597601695e-08, + "logits/chosen": -2.0218465328216553, + "logits/rejected": -1.9831242561340332, + "logps/chosen": -218.6167449951172, + "logps/rejected": -310.546142578125, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.801988363265991, + "rewards/margins": 2.9447038173675537, + "rewards/rejected": -0.1427154541015625, + "step": 8851 + }, + { + "epoch": 0.52, + "learning_rate": 4.9981151952368695e-08, + "logits/chosen": -1.9318724870681763, + "logits/rejected": -1.9204809665679932, + "logps/chosen": -168.46426391601562, + "logps/rejected": -285.7340087890625, + "loss": 0.0794, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.01047682762146, + "rewards/margins": 1.889755368232727, + "rewards/rejected": 1.120721459388733, + "step": 8852 + }, + { + "epoch": 0.52, + "learning_rate": 4.997172792939e-08, + "logits/chosen": -1.8750009536743164, + "logits/rejected": -1.8962312936782837, + "logps/chosen": -130.20089721679688, + "logps/rejected": -270.1490173339844, + "loss": 0.232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5216064453125, + "rewards/margins": 0.8141937255859375, + "rewards/rejected": 0.7074127197265625, + "step": 8853 + }, + { + "epoch": 0.52, + "learning_rate": 4.9962303907415684e-08, + "logits/chosen": -2.0832622051239014, + "logits/rejected": -2.07790207862854, + "logps/chosen": -0.07354915142059326, + "logps/rejected": -251.22039794921875, + "loss": 0.3617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013581186532974243, + "rewards/margins": 4.92494010925293, + "rewards/rejected": -4.911358833312988, + "step": 8854 + }, + { + "epoch": 0.52, + "learning_rate": 4.9952879886780506e-08, + "logits/chosen": -1.8378409147262573, + "logits/rejected": -1.8239871263504028, + "logps/chosen": -151.8103790283203, + "logps/rejected": -427.4671936035156, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5458786487579346, + "rewards/margins": 3.6105942726135254, + "rewards/rejected": -1.0647156238555908, + "step": 8855 + }, + { + "epoch": 0.52, + "learning_rate": 4.994345586781927e-08, + "logits/chosen": -2.0417981147766113, + "logits/rejected": -2.0457634925842285, + "logps/chosen": -27.62995719909668, + "logps/rejected": -169.66885375976562, + "loss": 0.4201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31147289276123047, + "rewards/margins": 0.757166862487793, + "rewards/rejected": -0.4456939697265625, + "step": 8856 + }, + { + "epoch": 0.52, + "learning_rate": 4.9934031850866746e-08, + "logits/chosen": -1.7862026691436768, + "logits/rejected": -1.7635622024536133, + "logps/chosen": -201.37734985351562, + "logps/rejected": -396.64508056640625, + "loss": 0.168, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0914582014083862, + "rewards/margins": 2.025805711746216, + "rewards/rejected": -0.9343475699424744, + "step": 8857 + }, + { + "epoch": 0.52, + "learning_rate": 4.992460783625776e-08, + "logits/chosen": -1.9643088579177856, + "logits/rejected": -1.9522331953048706, + "logps/chosen": -198.60833740234375, + "logps/rejected": -364.86474609375, + "loss": 0.222, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7857117652893066, + "rewards/margins": 0.62835693359375, + "rewards/rejected": 2.1573548316955566, + "step": 8858 + }, + { + "epoch": 0.52, + "learning_rate": 4.991518382432704e-08, + "logits/chosen": -2.001187562942505, + "logits/rejected": -1.9862571954727173, + "logps/chosen": -38.3146858215332, + "logps/rejected": -282.8933410644531, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4310321807861328, + "rewards/margins": 4.447646141052246, + "rewards/rejected": -4.016613960266113, + "step": 8859 + }, + { + "epoch": 0.52, + "learning_rate": 4.9905759815409445e-08, + "logits/chosen": -1.7542212009429932, + "logits/rejected": -1.7323217391967773, + "logps/chosen": -147.85528564453125, + "logps/rejected": -174.365966796875, + "loss": 0.3226, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2938050031661987, + "rewards/margins": 0.5889953970909119, + "rewards/rejected": 0.7048096060752869, + "step": 8860 + }, + { + "epoch": 0.52, + "learning_rate": 4.989633580983968e-08, + "logits/chosen": -1.9847944974899292, + "logits/rejected": -1.9764785766601562, + "logps/chosen": -217.61294555664062, + "logps/rejected": -335.79656982421875, + "loss": 0.3568, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5161300897598267, + "rewards/margins": 0.09401094913482666, + "rewards/rejected": 1.422119140625, + "step": 8861 + }, + { + "epoch": 0.52, + "learning_rate": 4.988691180795261e-08, + "logits/chosen": -1.9230647087097168, + "logits/rejected": -1.9290474653244019, + "logps/chosen": -50.61485290527344, + "logps/rejected": -185.68540954589844, + "loss": 0.3032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4868179261684418, + "rewards/margins": 1.6281830072402954, + "rewards/rejected": -1.1413650512695312, + "step": 8862 + }, + { + "epoch": 0.52, + "learning_rate": 4.987748781008295e-08, + "logits/chosen": -2.0385639667510986, + "logits/rejected": -2.0334603786468506, + "logps/chosen": -10.729555130004883, + "logps/rejected": -249.67825317382812, + "loss": 0.6707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4283527433872223, + "rewards/margins": 0.6967816352844238, + "rewards/rejected": -1.1251343488693237, + "step": 8863 + }, + { + "epoch": 0.52, + "learning_rate": 4.9868063816565566e-08, + "logits/chosen": -1.8203364610671997, + "logits/rejected": -1.8325347900390625, + "logps/chosen": -124.9803237915039, + "logps/rejected": -398.4267883300781, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6100562810897827, + "rewards/margins": 6.188556671142578, + "rewards/rejected": -4.578500270843506, + "step": 8864 + }, + { + "epoch": 0.52, + "learning_rate": 4.985863982773516e-08, + "logits/chosen": -1.9222337007522583, + "logits/rejected": -1.9012552499771118, + "logps/chosen": -207.83596801757812, + "logps/rejected": -322.6136474609375, + "loss": 0.263, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1576569080352783, + "rewards/margins": 0.47527778148651123, + "rewards/rejected": 1.682379126548767, + "step": 8865 + }, + { + "epoch": 0.52, + "learning_rate": 4.984921584392658e-08, + "logits/chosen": -1.8827799558639526, + "logits/rejected": -1.8781834840774536, + "logps/chosen": -60.76506805419922, + "logps/rejected": -355.5142822265625, + "loss": 0.1795, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1796379089355469, + "rewards/margins": 2.0202536582946777, + "rewards/rejected": -0.8406158685684204, + "step": 8866 + }, + { + "epoch": 0.52, + "learning_rate": 4.983979186547458e-08, + "logits/chosen": -2.1715879440307617, + "logits/rejected": -2.1631886959075928, + "logps/chosen": -40.36648941040039, + "logps/rejected": -169.87611389160156, + "loss": 0.3246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17622680962085724, + "rewards/margins": 2.584913730621338, + "rewards/rejected": -2.408686876296997, + "step": 8867 + }, + { + "epoch": 0.52, + "learning_rate": 4.983036789271397e-08, + "logits/chosen": -1.9481052160263062, + "logits/rejected": -1.9443823099136353, + "logps/chosen": -10.751449584960938, + "logps/rejected": -154.3553466796875, + "loss": 0.3689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03743715211749077, + "rewards/margins": 2.6905531883239746, + "rewards/rejected": -2.65311598777771, + "step": 8868 + }, + { + "epoch": 0.52, + "learning_rate": 4.9820943925979507e-08, + "logits/chosen": -1.8879611492156982, + "logits/rejected": -1.8412193059921265, + "logps/chosen": -147.32913208007812, + "logps/rejected": -390.87579345703125, + "loss": 0.1754, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3390716314315796, + "rewards/margins": 1.834863305091858, + "rewards/rejected": -0.49579164385795593, + "step": 8869 + }, + { + "epoch": 0.52, + "learning_rate": 4.981151996560601e-08, + "logits/chosen": -1.6549148559570312, + "logits/rejected": -1.6266822814941406, + "logps/chosen": -276.7021179199219, + "logps/rejected": -414.3812561035156, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3533356189727783, + "rewards/margins": 1.6944031715393066, + "rewards/rejected": -0.34106752276420593, + "step": 8870 + }, + { + "epoch": 0.52, + "learning_rate": 4.980209601192823e-08, + "logits/chosen": -1.9352405071258545, + "logits/rejected": -1.9265891313552856, + "logps/chosen": -66.45489501953125, + "logps/rejected": -167.10784912109375, + "loss": 0.3126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3073898255825043, + "rewards/margins": 1.6686463356018066, + "rewards/rejected": -1.36125648021698, + "step": 8871 + }, + { + "epoch": 0.52, + "learning_rate": 4.9792672065280964e-08, + "logits/chosen": -1.9211291074752808, + "logits/rejected": -1.9221405982971191, + "logps/chosen": -184.15037536621094, + "logps/rejected": -210.8809814453125, + "loss": 0.5069, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3009292781352997, + "rewards/margins": -0.20520171523094177, + "rewards/rejected": 0.5061309933662415, + "step": 8872 + }, + { + "epoch": 0.52, + "learning_rate": 4.978324812599902e-08, + "logits/chosen": -2.002898931503296, + "logits/rejected": -1.9976372718811035, + "logps/chosen": -98.71156311035156, + "logps/rejected": -197.24264526367188, + "loss": 0.2711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8494110107421875, + "rewards/margins": 1.1365035772323608, + "rewards/rejected": -0.2870925962924957, + "step": 8873 + }, + { + "epoch": 0.52, + "learning_rate": 4.977382419441714e-08, + "logits/chosen": -1.7943636178970337, + "logits/rejected": -1.7994986772537231, + "logps/chosen": -1.245365858078003, + "logps/rejected": -156.41285705566406, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05449967458844185, + "rewards/margins": 2.9094557762145996, + "rewards/rejected": -2.8549561500549316, + "step": 8874 + }, + { + "epoch": 0.52, + "learning_rate": 4.9764400270870155e-08, + "logits/chosen": -1.9305533170700073, + "logits/rejected": -1.9336018562316895, + "logps/chosen": -0.2763099670410156, + "logps/rejected": -220.6295623779297, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004548743367195129, + "rewards/margins": 3.924879550933838, + "rewards/rejected": -3.920330762863159, + "step": 8875 + }, + { + "epoch": 0.52, + "learning_rate": 4.975497635569281e-08, + "logits/chosen": -2.054100751876831, + "logits/rejected": -2.0462424755096436, + "logps/chosen": -35.13328170776367, + "logps/rejected": -142.4179229736328, + "loss": 0.3842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35562896728515625, + "rewards/margins": 1.075099229812622, + "rewards/rejected": -0.719470202922821, + "step": 8876 + }, + { + "epoch": 0.52, + "learning_rate": 4.974555244921991e-08, + "logits/chosen": -2.006840467453003, + "logits/rejected": -2.0017683506011963, + "logps/chosen": -8.103706359863281, + "logps/rejected": -147.98660278320312, + "loss": 0.5955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3834443986415863, + "rewards/margins": 1.0108473300933838, + "rewards/rejected": -1.3942917585372925, + "step": 8877 + }, + { + "epoch": 0.52, + "learning_rate": 4.973612855178621e-08, + "logits/chosen": -2.0523910522460938, + "logits/rejected": -2.050048351287842, + "logps/chosen": -19.31857681274414, + "logps/rejected": -264.734619140625, + "loss": 0.2519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42459240555763245, + "rewards/margins": 3.7335188388824463, + "rewards/rejected": -3.3089263439178467, + "step": 8878 + }, + { + "epoch": 0.52, + "learning_rate": 4.972670466372655e-08, + "logits/chosen": -1.9642325639724731, + "logits/rejected": -1.965993881225586, + "logps/chosen": -37.548492431640625, + "logps/rejected": -232.85699462890625, + "loss": 0.5369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11229515075683594, + "rewards/margins": 0.7587329745292664, + "rewards/rejected": -0.8710281252861023, + "step": 8879 + }, + { + "epoch": 0.52, + "learning_rate": 4.971728078537564e-08, + "logits/chosen": -1.7744323015213013, + "logits/rejected": -1.7679245471954346, + "logps/chosen": -204.93331909179688, + "logps/rejected": -483.4648132324219, + "loss": 0.0555, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0633896589279175, + "rewards/margins": 5.198512554168701, + "rewards/rejected": -4.135122776031494, + "step": 8880 + }, + { + "epoch": 0.52, + "learning_rate": 4.970785691706834e-08, + "logits/chosen": -1.8833518028259277, + "logits/rejected": -1.8883594274520874, + "logps/chosen": -176.6083526611328, + "logps/rejected": -138.58224487304688, + "loss": 0.2051, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.347825765609741, + "rewards/margins": 0.9946732521057129, + "rewards/rejected": 1.3531525135040283, + "step": 8881 + }, + { + "epoch": 0.52, + "learning_rate": 4.969843305913935e-08, + "logits/chosen": -1.9804872274398804, + "logits/rejected": -1.976683497428894, + "logps/chosen": -63.54758834838867, + "logps/rejected": -169.6599578857422, + "loss": 0.525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17868080735206604, + "rewards/margins": 0.8713498115539551, + "rewards/rejected": -1.0500305891036987, + "step": 8882 + }, + { + "epoch": 0.52, + "learning_rate": 4.9689009211923536e-08, + "logits/chosen": -1.8231229782104492, + "logits/rejected": -1.7993606328964233, + "logps/chosen": -160.75997924804688, + "logps/rejected": -435.94561767578125, + "loss": 0.1376, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0603851079940796, + "rewards/margins": 1.7807037830352783, + "rewards/rejected": -0.720318615436554, + "step": 8883 + }, + { + "epoch": 0.52, + "learning_rate": 4.9679585375755597e-08, + "logits/chosen": -2.100576877593994, + "logits/rejected": -2.0995497703552246, + "logps/chosen": -172.6798553466797, + "logps/rejected": -468.00018310546875, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7337265014648438, + "rewards/margins": 7.700749397277832, + "rewards/rejected": -4.967022895812988, + "step": 8884 + }, + { + "epoch": 0.52, + "learning_rate": 4.9670161550970384e-08, + "logits/chosen": -1.985028624534607, + "logits/rejected": -1.944908857345581, + "logps/chosen": -214.68386840820312, + "logps/rejected": -648.046142578125, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6086792945861816, + "rewards/margins": 9.79608154296875, + "rewards/rejected": -7.187402248382568, + "step": 8885 + }, + { + "epoch": 0.52, + "learning_rate": 4.9660737737902615e-08, + "logits/chosen": -1.9389384984970093, + "logits/rejected": -1.939142107963562, + "logps/chosen": -17.275901794433594, + "logps/rejected": -141.5406494140625, + "loss": 0.465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4869144558906555, + "rewards/margins": 0.4637821316719055, + "rewards/rejected": 0.02313232421875, + "step": 8886 + }, + { + "epoch": 0.52, + "learning_rate": 4.9651313936887125e-08, + "logits/chosen": -1.8947875499725342, + "logits/rejected": -1.874916672706604, + "logps/chosen": -133.99838256835938, + "logps/rejected": -318.3127136230469, + "loss": 0.1431, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6259933710098267, + "rewards/margins": 1.9463486671447754, + "rewards/rejected": -0.32035523653030396, + "step": 8887 + }, + { + "epoch": 0.52, + "learning_rate": 4.964189014825865e-08, + "logits/chosen": -1.9321893453598022, + "logits/rejected": -1.9316184520721436, + "logps/chosen": -200.10394287109375, + "logps/rejected": -302.5024719238281, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9249206781387329, + "rewards/margins": 0.29141539335250854, + "rewards/rejected": 0.6335052847862244, + "step": 8888 + }, + { + "epoch": 0.52, + "learning_rate": 4.963246637235201e-08, + "logits/chosen": -1.9707823991775513, + "logits/rejected": -1.9544286727905273, + "logps/chosen": -0.012810205109417439, + "logps/rejected": -236.08840942382812, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005418930086307228, + "rewards/margins": 5.554931640625, + "rewards/rejected": -5.555473327636719, + "step": 8889 + }, + { + "epoch": 0.52, + "learning_rate": 4.962304260950195e-08, + "logits/chosen": -1.804145336151123, + "logits/rejected": -1.8170217275619507, + "logps/chosen": -229.54287719726562, + "logps/rejected": -232.30772399902344, + "loss": 0.4146, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7264190912246704, + "rewards/margins": -0.11760103702545166, + "rewards/rejected": 1.844020128250122, + "step": 8890 + }, + { + "epoch": 0.52, + "learning_rate": 4.9613618860043255e-08, + "logits/chosen": -1.7223230600357056, + "logits/rejected": -1.72234308719635, + "logps/chosen": -35.16960144042969, + "logps/rejected": -281.913330078125, + "loss": 0.1472, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.817901611328125, + "rewards/margins": 4.465883255004883, + "rewards/rejected": -3.6479814052581787, + "step": 8891 + }, + { + "epoch": 0.52, + "learning_rate": 4.9604195124310725e-08, + "logits/chosen": -1.913589358329773, + "logits/rejected": -1.9736418724060059, + "logps/chosen": -261.9159851074219, + "logps/rejected": -330.2643127441406, + "loss": 0.0423, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9747132062911987, + "rewards/margins": 3.817007541656494, + "rewards/rejected": -1.8422943353652954, + "step": 8892 + }, + { + "epoch": 0.52, + "learning_rate": 4.959477140263911e-08, + "logits/chosen": -2.0593173503875732, + "logits/rejected": -2.0517704486846924, + "logps/chosen": -2.8729122277582064e-05, + "logps/rejected": -108.3589859008789, + "loss": 0.4881, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9338121382570534e-07, + "rewards/margins": 1.079019546508789, + "rewards/rejected": -1.0790199041366577, + "step": 8893 + }, + { + "epoch": 0.52, + "learning_rate": 4.95853476953632e-08, + "logits/chosen": -1.9778122901916504, + "logits/rejected": -1.9729033708572388, + "logps/chosen": -4.914206027984619, + "logps/rejected": -116.07300567626953, + "loss": 0.6835, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.030693674460053444, + "rewards/margins": -0.05735611915588379, + "rewards/rejected": 0.026662444695830345, + "step": 8894 + }, + { + "epoch": 0.52, + "learning_rate": 4.957592400281776e-08, + "logits/chosen": -1.8961305618286133, + "logits/rejected": -1.933303713798523, + "logps/chosen": -249.1620330810547, + "logps/rejected": -276.3675231933594, + "loss": 0.1961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6419693231582642, + "rewards/margins": 1.3122848272323608, + "rewards/rejected": 0.32968446612358093, + "step": 8895 + }, + { + "epoch": 0.52, + "learning_rate": 4.956650032533759e-08, + "logits/chosen": -1.9561032056808472, + "logits/rejected": -1.9630239009857178, + "logps/chosen": -175.22317504882812, + "logps/rejected": -171.0614013671875, + "loss": 0.2747, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7339600324630737, + "rewards/margins": 0.43856513500213623, + "rewards/rejected": 1.2953948974609375, + "step": 8896 + }, + { + "epoch": 0.52, + "learning_rate": 4.955707666325743e-08, + "logits/chosen": -2.050739049911499, + "logits/rejected": -2.050652503967285, + "logps/chosen": -0.7979699969291687, + "logps/rejected": -127.68688201904297, + "loss": 0.4307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002570897340774536, + "rewards/margins": 1.5334162712097168, + "rewards/rejected": -1.535987138748169, + "step": 8897 + }, + { + "epoch": 0.52, + "learning_rate": 4.954765301691209e-08, + "logits/chosen": -2.0164289474487305, + "logits/rejected": -1.9900484085083008, + "logps/chosen": -27.594093322753906, + "logps/rejected": -235.42111206054688, + "loss": 0.2813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35715293884277344, + "rewards/margins": 2.4909939765930176, + "rewards/rejected": -2.133841037750244, + "step": 8898 + }, + { + "epoch": 0.52, + "learning_rate": 4.95382293866363e-08, + "logits/chosen": -1.9826829433441162, + "logits/rejected": -1.9776005744934082, + "logps/chosen": -88.63395690917969, + "logps/rejected": -238.9695281982422, + "loss": 0.2793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.687817394733429, + "rewards/margins": 1.5742905139923096, + "rewards/rejected": -0.8864731192588806, + "step": 8899 + }, + { + "epoch": 0.52, + "learning_rate": 4.95288057727649e-08, + "logits/chosen": -1.990475058555603, + "logits/rejected": -2.0150394439697266, + "logps/chosen": -198.00791931152344, + "logps/rejected": -356.5803527832031, + "loss": 0.0867, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.34783935546875, + "rewards/margins": 1.8359649181365967, + "rewards/rejected": 1.5118744373321533, + "step": 8900 + }, + { + "epoch": 0.52, + "learning_rate": 4.95193821756326e-08, + "logits/chosen": -1.9875239133834839, + "logits/rejected": -1.9869654178619385, + "logps/chosen": -17.995574951171875, + "logps/rejected": -110.22660827636719, + "loss": 0.3123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23778095841407776, + "rewards/margins": 2.8996715545654297, + "rewards/rejected": -2.6618905067443848, + "step": 8901 + }, + { + "epoch": 0.52, + "learning_rate": 4.9509958595574214e-08, + "logits/chosen": -1.9284965991973877, + "logits/rejected": -1.9324069023132324, + "logps/chosen": -60.93254089355469, + "logps/rejected": -277.78704833984375, + "loss": 0.2487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.989398181438446, + "rewards/margins": 1.447229027748108, + "rewards/rejected": -0.4578308165073395, + "step": 8902 + }, + { + "epoch": 0.52, + "learning_rate": 4.9500535032924464e-08, + "logits/chosen": -1.8013100624084473, + "logits/rejected": -1.8028826713562012, + "logps/chosen": -7.628593921661377, + "logps/rejected": -149.94503784179688, + "loss": 0.3946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27161088585853577, + "rewards/margins": 1.4044874906539917, + "rewards/rejected": -1.1328766345977783, + "step": 8903 + }, + { + "epoch": 0.52, + "learning_rate": 4.9491111488018187e-08, + "logits/chosen": -1.7472631931304932, + "logits/rejected": -1.783211350440979, + "logps/chosen": -195.57498168945312, + "logps/rejected": -338.4400329589844, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.24700927734375, + "rewards/margins": 2.830548048019409, + "rewards/rejected": -0.583538830280304, + "step": 8904 + }, + { + "epoch": 0.52, + "learning_rate": 4.948168796119009e-08, + "logits/chosen": -2.0564088821411133, + "logits/rejected": -2.052471399307251, + "logps/chosen": -32.640289306640625, + "logps/rejected": -164.89979553222656, + "loss": 0.4908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15099601447582245, + "rewards/margins": 0.9333858489990234, + "rewards/rejected": -0.7823898196220398, + "step": 8905 + }, + { + "epoch": 0.52, + "learning_rate": 4.947226445277499e-08, + "logits/chosen": -2.0549585819244385, + "logits/rejected": -2.0495431423187256, + "logps/chosen": -4.018184185028076, + "logps/rejected": -282.25311279296875, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12370848655700684, + "rewards/margins": 6.496539115905762, + "rewards/rejected": -6.372830390930176, + "step": 8906 + }, + { + "epoch": 0.52, + "learning_rate": 4.9462840963107635e-08, + "logits/chosen": -1.8778645992279053, + "logits/rejected": -1.8747482299804688, + "logps/chosen": -5.199609279632568, + "logps/rejected": -34.716209411621094, + "loss": 0.6662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018646717071533203, + "rewards/margins": 0.06229390949010849, + "rewards/rejected": -0.0809406265616417, + "step": 8907 + }, + { + "epoch": 0.52, + "learning_rate": 4.9453417492522795e-08, + "logits/chosen": -2.019270658493042, + "logits/rejected": -2.0195915699005127, + "logps/chosen": -1.8147587776184082, + "logps/rejected": -76.76473999023438, + "loss": 0.711, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02326204814016819, + "rewards/margins": -0.08931425213813782, + "rewards/rejected": 0.11257629841566086, + "step": 8908 + }, + { + "epoch": 0.52, + "learning_rate": 4.944399404135523e-08, + "logits/chosen": -1.8120903968811035, + "logits/rejected": -1.8183019161224365, + "logps/chosen": -230.48779296875, + "logps/rejected": -462.5184326171875, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.089865207672119, + "rewards/margins": 3.916677951812744, + "rewards/rejected": -1.826812744140625, + "step": 8909 + }, + { + "epoch": 0.52, + "learning_rate": 4.9434570609939744e-08, + "logits/chosen": -1.6729224920272827, + "logits/rejected": -1.6567922830581665, + "logps/chosen": -36.6519889831543, + "logps/rejected": -264.6053466796875, + "loss": 0.4047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22049713134765625, + "rewards/margins": 1.0616683959960938, + "rewards/rejected": -0.8411712646484375, + "step": 8910 + }, + { + "epoch": 0.52, + "learning_rate": 4.942514719861105e-08, + "logits/chosen": -1.644115686416626, + "logits/rejected": -1.5972099304199219, + "logps/chosen": -205.67648315429688, + "logps/rejected": -334.37506103515625, + "loss": 0.0625, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2821381092071533, + "rewards/margins": 2.847979784011841, + "rewards/rejected": -0.5658416748046875, + "step": 8911 + }, + { + "epoch": 0.52, + "learning_rate": 4.941572380770394e-08, + "logits/chosen": -1.8888566493988037, + "logits/rejected": -1.8765227794647217, + "logps/chosen": -26.915340423583984, + "logps/rejected": -289.9101867675781, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04499511793255806, + "rewards/margins": 5.253988742828369, + "rewards/rejected": -5.208993434906006, + "step": 8912 + }, + { + "epoch": 0.52, + "learning_rate": 4.9406300437553195e-08, + "logits/chosen": -2.0256733894348145, + "logits/rejected": -2.0181310176849365, + "logps/chosen": -1.6503762006759644, + "logps/rejected": -154.7044677734375, + "loss": 0.4681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08221365511417389, + "rewards/margins": 1.0711205005645752, + "rewards/rejected": -0.9889068603515625, + "step": 8913 + }, + { + "epoch": 0.52, + "learning_rate": 4.939687708849354e-08, + "logits/chosen": -2.0806896686553955, + "logits/rejected": -2.084073066711426, + "logps/chosen": -17.503049850463867, + "logps/rejected": -166.4073486328125, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24893474578857422, + "rewards/margins": 1.5066908597946167, + "rewards/rejected": -1.2577561140060425, + "step": 8914 + }, + { + "epoch": 0.52, + "learning_rate": 4.9387453760859785e-08, + "logits/chosen": -1.828160047531128, + "logits/rejected": -1.8323659896850586, + "logps/chosen": -0.00011229166557313874, + "logps/rejected": -79.642333984375, + "loss": 0.4088, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.336140096304007e-05, + "rewards/margins": 1.8909478187561035, + "rewards/rejected": -1.8909244537353516, + "step": 8915 + }, + { + "epoch": 0.52, + "learning_rate": 4.937803045498665e-08, + "logits/chosen": -1.9403698444366455, + "logits/rejected": -1.926864504814148, + "logps/chosen": -209.58847045898438, + "logps/rejected": -349.98614501953125, + "loss": 0.1702, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0843108892440796, + "rewards/margins": 1.5312896966934204, + "rewards/rejected": -0.44697877764701843, + "step": 8916 + }, + { + "epoch": 0.52, + "learning_rate": 4.936860717120893e-08, + "logits/chosen": -1.837372064590454, + "logits/rejected": -1.8408560752868652, + "logps/chosen": -224.84347534179688, + "logps/rejected": -190.0595703125, + "loss": 0.4849, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.587139904499054, + "rewards/margins": -0.18919527530670166, + "rewards/rejected": 0.7763351798057556, + "step": 8917 + }, + { + "epoch": 0.52, + "learning_rate": 4.9359183909861366e-08, + "logits/chosen": -2.113157033920288, + "logits/rejected": -2.1121408939361572, + "logps/chosen": -76.72532653808594, + "logps/rejected": -298.5555114746094, + "loss": 0.4065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21022950112819672, + "rewards/margins": 1.536383032798767, + "rewards/rejected": -1.746612548828125, + "step": 8918 + }, + { + "epoch": 0.52, + "learning_rate": 4.934976067127873e-08, + "logits/chosen": -2.0491018295288086, + "logits/rejected": -2.0461907386779785, + "logps/chosen": -0.017175551503896713, + "logps/rejected": -82.50509643554688, + "loss": 0.522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006261641392484307, + "rewards/margins": 0.7690343856811523, + "rewards/rejected": -0.7684082388877869, + "step": 8919 + }, + { + "epoch": 0.52, + "learning_rate": 4.934033745579575e-08, + "logits/chosen": -2.078733205795288, + "logits/rejected": -2.045496940612793, + "logps/chosen": -157.7322540283203, + "logps/rejected": -405.457275390625, + "loss": 0.0776, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0844788551330566, + "rewards/margins": 3.3994386196136475, + "rewards/rejected": -1.3149597644805908, + "step": 8920 + }, + { + "epoch": 0.52, + "learning_rate": 4.9330914263747246e-08, + "logits/chosen": -1.9085590839385986, + "logits/rejected": -1.9528459310531616, + "logps/chosen": -205.69659423828125, + "logps/rejected": -330.0259094238281, + "loss": 0.1069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9316574335098267, + "rewards/margins": 1.7117050886154175, + "rewards/rejected": 0.21995238959789276, + "step": 8921 + }, + { + "epoch": 0.52, + "learning_rate": 4.93214910954679e-08, + "logits/chosen": -2.0628414154052734, + "logits/rejected": -2.044715404510498, + "logps/chosen": -31.97021484375, + "logps/rejected": -135.06707763671875, + "loss": 0.506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030585480853915215, + "rewards/margins": 0.7251914739608765, + "rewards/rejected": -0.6946060061454773, + "step": 8922 + }, + { + "epoch": 0.52, + "learning_rate": 4.9312067951292555e-08, + "logits/chosen": -1.997145414352417, + "logits/rejected": -2.004929542541504, + "logps/chosen": -48.29670333862305, + "logps/rejected": -71.6434326171875, + "loss": 0.6404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03399009630084038, + "rewards/margins": 0.03430595248937607, + "rewards/rejected": -0.0003158569452352822, + "step": 8923 + }, + { + "epoch": 0.52, + "learning_rate": 4.930264483155588e-08, + "logits/chosen": -1.9464436769485474, + "logits/rejected": -1.9483617544174194, + "logps/chosen": -43.55681228637695, + "logps/rejected": -214.12022399902344, + "loss": 0.3713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15331916511058807, + "rewards/margins": 1.7731976509094238, + "rewards/rejected": -1.9265167713165283, + "step": 8924 + }, + { + "epoch": 0.52, + "learning_rate": 4.929322173659271e-08, + "logits/chosen": -2.1822688579559326, + "logits/rejected": -2.1783511638641357, + "logps/chosen": -14.750075340270996, + "logps/rejected": -244.36253356933594, + "loss": 0.3336, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5258789289873675e-06, + "rewards/margins": 5.259422302246094, + "rewards/rejected": -5.259420871734619, + "step": 8925 + }, + { + "epoch": 0.52, + "learning_rate": 4.928379866673773e-08, + "logits/chosen": -2.022155284881592, + "logits/rejected": -2.0056726932525635, + "logps/chosen": -72.33821868896484, + "logps/rejected": -293.7775573730469, + "loss": 0.1975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7957481741905212, + "rewards/margins": 2.5385308265686035, + "rewards/rejected": -1.7427825927734375, + "step": 8926 + }, + { + "epoch": 0.52, + "learning_rate": 4.927437562232574e-08, + "logits/chosen": -1.7748386859893799, + "logits/rejected": -1.7838979959487915, + "logps/chosen": -176.17922973632812, + "logps/rejected": -344.55059814453125, + "loss": 0.0947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8246551752090454, + "rewards/margins": 1.920904517173767, + "rewards/rejected": -0.09624939411878586, + "step": 8927 + }, + { + "epoch": 0.52, + "learning_rate": 4.926495260369147e-08, + "logits/chosen": -1.851728081703186, + "logits/rejected": -1.9276397228240967, + "logps/chosen": -285.80419921875, + "logps/rejected": -274.4983825683594, + "loss": 0.4506, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.862200915813446, + "rewards/margins": -0.033447265625, + "rewards/rejected": 0.895648181438446, + "step": 8928 + }, + { + "epoch": 0.52, + "learning_rate": 4.925552961116969e-08, + "logits/chosen": -1.7807977199554443, + "logits/rejected": -1.7728646993637085, + "logps/chosen": -30.897083282470703, + "logps/rejected": -189.69680786132812, + "loss": 0.2356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7576183676719666, + "rewards/margins": 1.959336519241333, + "rewards/rejected": -1.2017182111740112, + "step": 8929 + }, + { + "epoch": 0.52, + "learning_rate": 4.924610664509513e-08, + "logits/chosen": -1.8076244592666626, + "logits/rejected": -1.8053557872772217, + "logps/chosen": -27.645217895507812, + "logps/rejected": -84.68852233886719, + "loss": 0.3183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38457947969436646, + "rewards/margins": 1.773634433746338, + "rewards/rejected": -1.3890548944473267, + "step": 8930 + }, + { + "epoch": 0.52, + "learning_rate": 4.923668370580256e-08, + "logits/chosen": -1.8553526401519775, + "logits/rejected": -1.838965654373169, + "logps/chosen": -3.1892573833465576, + "logps/rejected": -145.21554565429688, + "loss": 0.4212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13275741040706635, + "rewards/margins": 1.4322950839996338, + "rewards/rejected": -1.2995376586914062, + "step": 8931 + }, + { + "epoch": 0.52, + "learning_rate": 4.9227260793626704e-08, + "logits/chosen": -2.0534114837646484, + "logits/rejected": -2.0443129539489746, + "logps/chosen": -5.197429709369317e-05, + "logps/rejected": -189.6722869873047, + "loss": 0.3803, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2874127151007997e-06, + "rewards/margins": 2.65813946723938, + "rewards/rejected": -2.6581406593322754, + "step": 8932 + }, + { + "epoch": 0.52, + "learning_rate": 4.921783790890232e-08, + "logits/chosen": -1.9866034984588623, + "logits/rejected": -1.980821132659912, + "logps/chosen": -0.000117656440124847, + "logps/rejected": -66.90505981445312, + "loss": 0.5959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00011718829773599282, + "rewards/margins": 0.42315301299095154, + "rewards/rejected": -0.4230358302593231, + "step": 8933 + }, + { + "epoch": 0.52, + "learning_rate": 4.9208415051964184e-08, + "logits/chosen": -2.0616888999938965, + "logits/rejected": -2.0644214153289795, + "logps/chosen": -202.3359375, + "logps/rejected": -286.2581787109375, + "loss": 0.3599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3592865467071533, + "rewards/margins": 0.3798248767852783, + "rewards/rejected": 0.979461669921875, + "step": 8934 + }, + { + "epoch": 0.52, + "learning_rate": 4.9198992223147e-08, + "logits/chosen": -1.8582839965820312, + "logits/rejected": -1.860034465789795, + "logps/chosen": -4.2318810301367193e-05, + "logps/rejected": -330.3180847167969, + "loss": 0.3286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.144384782492125e-06, + "rewards/margins": 9.467716217041016, + "rewards/rejected": -9.4677152633667, + "step": 8935 + }, + { + "epoch": 0.52, + "learning_rate": 4.9189569422785534e-08, + "logits/chosen": -1.9938522577285767, + "logits/rejected": -1.988257884979248, + "logps/chosen": -0.001182899228297174, + "logps/rejected": -87.81634521484375, + "loss": 0.6496, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.8463776042572135e-08, + "rewards/margins": 0.18198400735855103, + "rewards/rejected": -0.18198394775390625, + "step": 8936 + }, + { + "epoch": 0.52, + "learning_rate": 4.918014665121453e-08, + "logits/chosen": -1.8484758138656616, + "logits/rejected": -1.8376697301864624, + "logps/chosen": -84.68380737304688, + "logps/rejected": -316.3909606933594, + "loss": 0.1362, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1874840259552002, + "rewards/margins": 3.0443625450134277, + "rewards/rejected": -1.856878638267517, + "step": 8937 + }, + { + "epoch": 0.52, + "learning_rate": 4.9170723908768724e-08, + "logits/chosen": -1.5831066370010376, + "logits/rejected": -1.5793906450271606, + "logps/chosen": -1.4198209047317505, + "logps/rejected": -33.61984634399414, + "loss": 0.7146, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.008414709940552711, + "rewards/margins": -0.07590623944997787, + "rewards/rejected": 0.06749153137207031, + "step": 8938 + }, + { + "epoch": 0.52, + "learning_rate": 4.9161301195782844e-08, + "logits/chosen": -1.8827710151672363, + "logits/rejected": -1.8835384845733643, + "logps/chosen": -163.83363342285156, + "logps/rejected": -369.9024658203125, + "loss": 0.1996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8670455813407898, + "rewards/margins": 1.3506790399551392, + "rewards/rejected": -0.483633428812027, + "step": 8939 + }, + { + "epoch": 0.52, + "learning_rate": 4.9151878512591686e-08, + "logits/chosen": -1.9049183130264282, + "logits/rejected": -1.9130045175552368, + "logps/chosen": -37.89463424682617, + "logps/rejected": -270.618408203125, + "loss": 0.2232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5791530609130859, + "rewards/margins": 4.948408126831055, + "rewards/rejected": -4.369255065917969, + "step": 8940 + }, + { + "epoch": 0.52, + "learning_rate": 4.914245585952991e-08, + "logits/chosen": -2.122330665588379, + "logits/rejected": -2.1217453479766846, + "logps/chosen": -5.654529571533203, + "logps/rejected": -95.26139068603516, + "loss": 0.4748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.061896372586488724, + "rewards/margins": 1.0896120071411133, + "rewards/rejected": -1.1515083312988281, + "step": 8941 + }, + { + "epoch": 0.52, + "learning_rate": 4.913303323693234e-08, + "logits/chosen": -1.918703317642212, + "logits/rejected": -1.899834156036377, + "logps/chosen": -298.36456298828125, + "logps/rejected": -514.2492065429688, + "loss": 0.0561, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5091828107833862, + "rewards/margins": 3.5186004638671875, + "rewards/rejected": -2.009417772293091, + "step": 8942 + }, + { + "epoch": 0.52, + "learning_rate": 4.9123610645133636e-08, + "logits/chosen": -1.7707477807998657, + "logits/rejected": -1.7863948345184326, + "logps/chosen": -224.1621856689453, + "logps/rejected": -470.7099304199219, + "loss": 0.0664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9635909795761108, + "rewards/margins": 2.536555528640747, + "rewards/rejected": -0.5729644894599915, + "step": 8943 + }, + { + "epoch": 0.52, + "learning_rate": 4.911418808446859e-08, + "logits/chosen": -1.7950387001037598, + "logits/rejected": -1.781826376914978, + "logps/chosen": -204.17149353027344, + "logps/rejected": -227.65438842773438, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8447707891464233, + "rewards/margins": 0.2409728765487671, + "rewards/rejected": 1.6037979125976562, + "step": 8944 + }, + { + "epoch": 0.52, + "learning_rate": 4.9104765555271896e-08, + "logits/chosen": -1.9497889280319214, + "logits/rejected": -1.9497835636138916, + "logps/chosen": -43.70539855957031, + "logps/rejected": -258.74212646484375, + "loss": 0.2293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4117393493652344, + "rewards/margins": 2.0467324256896973, + "rewards/rejected": -1.6349929571151733, + "step": 8945 + }, + { + "epoch": 0.52, + "learning_rate": 4.9095343057878334e-08, + "logits/chosen": -1.9158259630203247, + "logits/rejected": -1.9079755544662476, + "logps/chosen": -62.73456573486328, + "logps/rejected": -254.70054626464844, + "loss": 0.4283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26504746079444885, + "rewards/margins": 2.5713934898376465, + "rewards/rejected": -2.8364410400390625, + "step": 8946 + }, + { + "epoch": 0.52, + "learning_rate": 4.908592059262258e-08, + "logits/chosen": -1.9967321157455444, + "logits/rejected": -1.9952424764633179, + "logps/chosen": -46.254207611083984, + "logps/rejected": -126.1656494140625, + "loss": 0.4402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16300621628761292, + "rewards/margins": 0.807461142539978, + "rewards/rejected": -0.6444549560546875, + "step": 8947 + }, + { + "epoch": 0.52, + "learning_rate": 4.907649815983943e-08, + "logits/chosen": -2.0281264781951904, + "logits/rejected": -2.021299123764038, + "logps/chosen": -8.539685249328613, + "logps/rejected": -176.54263305664062, + "loss": 0.3729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.342139333486557, + "rewards/margins": 1.572013020515442, + "rewards/rejected": -1.2298736572265625, + "step": 8948 + }, + { + "epoch": 0.52, + "learning_rate": 4.9067075759863555e-08, + "logits/chosen": -2.0324177742004395, + "logits/rejected": -2.029634714126587, + "logps/chosen": -199.30926513671875, + "logps/rejected": -291.80255126953125, + "loss": 0.2838, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0230530500411987, + "rewards/margins": 0.6207916736602783, + "rewards/rejected": 0.402261346578598, + "step": 8949 + }, + { + "epoch": 0.52, + "learning_rate": 4.905765339302973e-08, + "logits/chosen": -2.0249431133270264, + "logits/rejected": -2.008368730545044, + "logps/chosen": -41.774879455566406, + "logps/rejected": -316.3238525390625, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06303443759679794, + "rewards/margins": 4.653055191040039, + "rewards/rejected": -4.590020656585693, + "step": 8950 + }, + { + "epoch": 0.52, + "learning_rate": 4.9048231059672665e-08, + "logits/chosen": -1.97517728805542, + "logits/rejected": -1.970285177230835, + "logps/chosen": -47.881858825683594, + "logps/rejected": -233.49349975585938, + "loss": 0.4246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2729026973247528, + "rewards/margins": 3.053215742111206, + "rewards/rejected": -3.3261184692382812, + "step": 8951 + }, + { + "epoch": 0.52, + "learning_rate": 4.9038808760127074e-08, + "logits/chosen": -1.9710966348648071, + "logits/rejected": -1.973483681678772, + "logps/chosen": -2.8718490600585938, + "logps/rejected": -165.722412109375, + "loss": 0.4749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03818254545331001, + "rewards/margins": 1.2154414653778076, + "rewards/rejected": -1.2536239624023438, + "step": 8952 + }, + { + "epoch": 0.52, + "learning_rate": 4.902938649472772e-08, + "logits/chosen": -1.8203786611557007, + "logits/rejected": -1.8270622491836548, + "logps/chosen": -2.1099938749102876e-05, + "logps/rejected": -128.76522827148438, + "loss": 0.3703, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.794874138904561e-07, + "rewards/margins": 2.8992271423339844, + "rewards/rejected": -2.8992278575897217, + "step": 8953 + }, + { + "epoch": 0.52, + "learning_rate": 4.9019964263809285e-08, + "logits/chosen": -1.974683403968811, + "logits/rejected": -1.9762225151062012, + "logps/chosen": -4.1721673011779785, + "logps/rejected": -44.2306022644043, + "loss": 0.5886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2633328437805176, + "rewards/margins": 0.19709938764572144, + "rewards/rejected": 0.06623344868421555, + "step": 8954 + }, + { + "epoch": 0.52, + "learning_rate": 4.901054206770653e-08, + "logits/chosen": -1.8490873575210571, + "logits/rejected": -1.854528784751892, + "logps/chosen": -15.377774238586426, + "logps/rejected": -143.05889892578125, + "loss": 0.5773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019437313079833984, + "rewards/margins": 0.5451346635818481, + "rewards/rejected": -0.5256973505020142, + "step": 8955 + }, + { + "epoch": 0.52, + "learning_rate": 4.900111990675415e-08, + "logits/chosen": -2.0251190662384033, + "logits/rejected": -2.014221668243408, + "logps/chosen": -70.79964447021484, + "logps/rejected": -316.50341796875, + "loss": 0.4198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39085084199905396, + "rewards/margins": 0.6670593023300171, + "rewards/rejected": -0.2762084901332855, + "step": 8956 + }, + { + "epoch": 0.52, + "learning_rate": 4.8991697781286884e-08, + "logits/chosen": -1.8064606189727783, + "logits/rejected": -1.8181945085525513, + "logps/chosen": -292.68829345703125, + "logps/rejected": -395.3970031738281, + "loss": 0.0682, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.087493896484375, + "rewards/margins": 2.213949680328369, + "rewards/rejected": -0.12645569443702698, + "step": 8957 + }, + { + "epoch": 0.52, + "learning_rate": 4.898227569163943e-08, + "logits/chosen": -2.1772141456604004, + "logits/rejected": -2.1755783557891846, + "logps/chosen": -15.234710693359375, + "logps/rejected": -152.82400512695312, + "loss": 0.5351, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08686075359582901, + "rewards/margins": 0.9381896257400513, + "rewards/rejected": -1.025050401687622, + "step": 8958 + }, + { + "epoch": 0.52, + "learning_rate": 4.8972853638146537e-08, + "logits/chosen": -1.787739872932434, + "logits/rejected": -1.7703566551208496, + "logps/chosen": -182.01303100585938, + "logps/rejected": -464.26129150390625, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3706040382385254, + "rewards/margins": 3.747300863265991, + "rewards/rejected": -1.3766968250274658, + "step": 8959 + }, + { + "epoch": 0.52, + "learning_rate": 4.8963431621142894e-08, + "logits/chosen": -1.9802666902542114, + "logits/rejected": -1.9907130002975464, + "logps/chosen": -170.8433837890625, + "logps/rejected": -296.25592041015625, + "loss": 0.077, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1396896839141846, + "rewards/margins": 2.047929525375366, + "rewards/rejected": 0.09176025539636612, + "step": 8960 + }, + { + "epoch": 0.52, + "learning_rate": 4.8954009640963254e-08, + "logits/chosen": -2.0068514347076416, + "logits/rejected": -2.003220796585083, + "logps/chosen": -75.61811065673828, + "logps/rejected": -329.2632751464844, + "loss": 0.3291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07695847004652023, + "rewards/margins": 2.9539084434509277, + "rewards/rejected": -2.8769500255584717, + "step": 8961 + }, + { + "epoch": 0.52, + "learning_rate": 4.8944587697942274e-08, + "logits/chosen": -1.8710637092590332, + "logits/rejected": -1.8771302700042725, + "logps/chosen": -216.29376220703125, + "logps/rejected": -489.46356201171875, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1037261486053467, + "rewards/margins": 3.665078639984131, + "rewards/rejected": -0.561352550983429, + "step": 8962 + }, + { + "epoch": 0.52, + "learning_rate": 4.893516579241474e-08, + "logits/chosen": -1.9065104722976685, + "logits/rejected": -1.9025596380233765, + "logps/chosen": -8.324633598327637, + "logps/rejected": -84.82784271240234, + "loss": 0.6979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013057613745331764, + "rewards/margins": 0.006562899798154831, + "rewards/rejected": -0.019620513543486595, + "step": 8963 + }, + { + "epoch": 0.52, + "learning_rate": 4.892574392471529e-08, + "logits/chosen": -2.057328701019287, + "logits/rejected": -2.055981159210205, + "logps/chosen": -74.21528625488281, + "logps/rejected": -125.7677230834961, + "loss": 0.6494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22582092881202698, + "rewards/margins": 0.1252952516078949, + "rewards/rejected": -0.3511161804199219, + "step": 8964 + }, + { + "epoch": 0.52, + "learning_rate": 4.89163220951787e-08, + "logits/chosen": -1.7176434993743896, + "logits/rejected": -1.7290297746658325, + "logps/chosen": -0.0006719013908877969, + "logps/rejected": -172.4464111328125, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8276403352501802e-05, + "rewards/margins": 4.746487140655518, + "rewards/rejected": -4.746505260467529, + "step": 8965 + }, + { + "epoch": 0.52, + "learning_rate": 4.890690030413962e-08, + "logits/chosen": -1.9671719074249268, + "logits/rejected": -1.9785653352737427, + "logps/chosen": -57.51089859008789, + "logps/rejected": -141.8441619873047, + "loss": 0.4151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03816261515021324, + "rewards/margins": 1.7504383325576782, + "rewards/rejected": -1.712275743484497, + "step": 8966 + }, + { + "epoch": 0.52, + "learning_rate": 4.889747855193283e-08, + "logits/chosen": -1.9237420558929443, + "logits/rejected": -1.9039852619171143, + "logps/chosen": -66.8515853881836, + "logps/rejected": -244.86581420898438, + "loss": 0.0927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5685081481933594, + "rewards/margins": 3.751201629638672, + "rewards/rejected": -2.1826934814453125, + "step": 8967 + }, + { + "epoch": 0.52, + "learning_rate": 4.8888056838892946e-08, + "logits/chosen": -1.9211479425430298, + "logits/rejected": -1.9306813478469849, + "logps/chosen": -246.3685302734375, + "logps/rejected": -566.0086669921875, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.17376708984375, + "rewards/margins": 8.64749526977539, + "rewards/rejected": -6.473727703094482, + "step": 8968 + }, + { + "epoch": 0.52, + "learning_rate": 4.887863516535475e-08, + "logits/chosen": -1.9955099821090698, + "logits/rejected": -1.9966819286346436, + "logps/chosen": -2.8252339689061046e-05, + "logps/rejected": -183.10238647460938, + "loss": 0.3816, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0264997147023678e-07, + "rewards/margins": 2.427947759628296, + "rewards/rejected": -2.427947998046875, + "step": 8969 + }, + { + "epoch": 0.52, + "learning_rate": 4.886921353165291e-08, + "logits/chosen": -2.078768253326416, + "logits/rejected": -2.0809409618377686, + "logps/chosen": -0.0024793711490929127, + "logps/rejected": -118.79109191894531, + "loss": 0.4153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015791428450029343, + "rewards/margins": 2.130138635635376, + "rewards/rejected": -2.130296468734741, + "step": 8970 + }, + { + "epoch": 0.52, + "learning_rate": 4.885979193812215e-08, + "logits/chosen": -1.7772818803787231, + "logits/rejected": -1.7993024587631226, + "logps/chosen": -173.01910400390625, + "logps/rejected": -220.77027893066406, + "loss": 0.655, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6036362051963806, + "rewards/margins": -0.7284576296806335, + "rewards/rejected": 1.3320938348770142, + "step": 8971 + }, + { + "epoch": 0.52, + "learning_rate": 4.885037038509714e-08, + "logits/chosen": -1.8970941305160522, + "logits/rejected": -1.8977290391921997, + "logps/chosen": -6.520646275021136e-05, + "logps/rejected": -178.49156188964844, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8715530814006343e-06, + "rewards/margins": 5.313399314880371, + "rewards/rejected": -5.313401222229004, + "step": 8972 + }, + { + "epoch": 0.52, + "learning_rate": 4.88409488729126e-08, + "logits/chosen": -2.058159351348877, + "logits/rejected": -2.0585241317749023, + "logps/chosen": -22.59364128112793, + "logps/rejected": -93.96463012695312, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41944122314453125, + "rewards/margins": 1.8113555908203125, + "rewards/rejected": -1.3919143676757812, + "step": 8973 + }, + { + "epoch": 0.52, + "learning_rate": 4.883152740190324e-08, + "logits/chosen": -1.9328886270523071, + "logits/rejected": -1.916139841079712, + "logps/chosen": -84.92308044433594, + "logps/rejected": -262.3106689453125, + "loss": 0.1149, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2981491088867188, + "rewards/margins": 4.870120048522949, + "rewards/rejected": -3.5719711780548096, + "step": 8974 + }, + { + "epoch": 0.52, + "learning_rate": 4.8822105972403726e-08, + "logits/chosen": -1.8071213960647583, + "logits/rejected": -1.8031091690063477, + "logps/chosen": -190.96478271484375, + "logps/rejected": -271.57073974609375, + "loss": 0.3284, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.860504150390625, + "rewards/margins": 0.2302001714706421, + "rewards/rejected": 1.630303978919983, + "step": 8975 + }, + { + "epoch": 0.52, + "learning_rate": 4.8812684584748785e-08, + "logits/chosen": -1.8820574283599854, + "logits/rejected": -1.8792492151260376, + "logps/chosen": -0.00591787276789546, + "logps/rejected": -321.8056945800781, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4549815356731415e-06, + "rewards/margins": 2.7868669033050537, + "rewards/rejected": -2.7868714332580566, + "step": 8976 + }, + { + "epoch": 0.52, + "learning_rate": 4.880326323927308e-08, + "logits/chosen": -2.035628080368042, + "logits/rejected": -2.0329103469848633, + "logps/chosen": -51.95878982543945, + "logps/rejected": -149.24038696289062, + "loss": 0.5891, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2859634459018707, + "rewards/margins": 1.2059860229492188, + "rewards/rejected": -1.491949439048767, + "step": 8977 + }, + { + "epoch": 0.52, + "learning_rate": 4.879384193631133e-08, + "logits/chosen": -1.9372745752334595, + "logits/rejected": -1.92791748046875, + "logps/chosen": -212.80059814453125, + "logps/rejected": -532.2493286132812, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5588455200195312, + "rewards/margins": 7.079139709472656, + "rewards/rejected": -4.520294189453125, + "step": 8978 + }, + { + "epoch": 0.52, + "learning_rate": 4.8784420676198204e-08, + "logits/chosen": -1.857350468635559, + "logits/rejected": -1.895660161972046, + "logps/chosen": -190.9827880859375, + "logps/rejected": -491.0631103515625, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.410360813140869, + "rewards/margins": 3.9000062942504883, + "rewards/rejected": -1.4896453619003296, + "step": 8979 + }, + { + "epoch": 0.52, + "learning_rate": 4.877499945926842e-08, + "logits/chosen": -2.032029628753662, + "logits/rejected": -2.0315823554992676, + "logps/chosen": -117.67301940917969, + "logps/rejected": -242.97415161132812, + "loss": 0.2916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19230423867702484, + "rewards/margins": 3.181555986404419, + "rewards/rejected": -2.9892518520355225, + "step": 8980 + }, + { + "epoch": 0.52, + "learning_rate": 4.876557828585662e-08, + "logits/chosen": -2.174808979034424, + "logits/rejected": -2.172433376312256, + "logps/chosen": -1.2784634828567505, + "logps/rejected": -39.13932418823242, + "loss": 0.5112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03818993642926216, + "rewards/margins": 0.8195528984069824, + "rewards/rejected": -0.7813629508018494, + "step": 8981 + }, + { + "epoch": 0.52, + "learning_rate": 4.875615715629755e-08, + "logits/chosen": -1.8388086557388306, + "logits/rejected": -1.7706044912338257, + "logps/chosen": -274.0552978515625, + "logps/rejected": -563.210205078125, + "loss": 0.0982, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6773284673690796, + "rewards/margins": 4.517190456390381, + "rewards/rejected": -2.839862108230591, + "step": 8982 + }, + { + "epoch": 0.52, + "learning_rate": 4.874673607092583e-08, + "logits/chosen": -1.9481768608093262, + "logits/rejected": -1.949228286743164, + "logps/chosen": -10.427940368652344, + "logps/rejected": -142.99093627929688, + "loss": 0.3529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025599480140954256, + "rewards/margins": 2.8536038398742676, + "rewards/rejected": -2.856163740158081, + "step": 8983 + }, + { + "epoch": 0.52, + "learning_rate": 4.87373150300762e-08, + "logits/chosen": -2.0825531482696533, + "logits/rejected": -2.0768773555755615, + "logps/chosen": -94.54862976074219, + "logps/rejected": -217.715576171875, + "loss": 1.0582, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.0281585454940796, + "rewards/margins": -0.2999328374862671, + "rewards/rejected": -0.7282257080078125, + "step": 8984 + }, + { + "epoch": 0.52, + "learning_rate": 4.872789403408329e-08, + "logits/chosen": -1.9504966735839844, + "logits/rejected": -1.9491093158721924, + "logps/chosen": -2.387418508529663, + "logps/rejected": -131.52810668945312, + "loss": 0.5127, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.020261526107788086, + "rewards/margins": 0.8836801052093506, + "rewards/rejected": -0.8634185791015625, + "step": 8985 + }, + { + "epoch": 0.52, + "learning_rate": 4.871847308328184e-08, + "logits/chosen": -2.141855001449585, + "logits/rejected": -2.1399197578430176, + "logps/chosen": -11.768875122070312, + "logps/rejected": -141.15402221679688, + "loss": 0.6249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6162130832672119, + "rewards/margins": 0.8529977798461914, + "rewards/rejected": -1.4692108631134033, + "step": 8986 + }, + { + "epoch": 0.52, + "learning_rate": 4.870905217800647e-08, + "logits/chosen": -1.9463927745819092, + "logits/rejected": -1.9405900239944458, + "logps/chosen": -74.29427337646484, + "logps/rejected": -257.1903076171875, + "loss": 1.0367, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8565338850021362, + "rewards/margins": 2.25018310546875, + "rewards/rejected": -4.106717109680176, + "step": 8987 + }, + { + "epoch": 0.52, + "learning_rate": 4.86996313185919e-08, + "logits/chosen": -1.9852427244186401, + "logits/rejected": -1.9666496515274048, + "logps/chosen": -8.812285423278809, + "logps/rejected": -300.701171875, + "loss": 0.1757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8192054033279419, + "rewards/margins": 5.514130115509033, + "rewards/rejected": -4.694924831390381, + "step": 8988 + }, + { + "epoch": 0.52, + "learning_rate": 4.869021050537276e-08, + "logits/chosen": -1.731716513633728, + "logits/rejected": -1.723878264427185, + "logps/chosen": -7.916908264160156, + "logps/rejected": -242.04306030273438, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23031654953956604, + "rewards/margins": 3.085785388946533, + "rewards/rejected": -2.85546875, + "step": 8989 + }, + { + "epoch": 0.52, + "learning_rate": 4.868078973868377e-08, + "logits/chosen": -1.9021642208099365, + "logits/rejected": -1.902915596961975, + "logps/chosen": -174.32839965820312, + "logps/rejected": -290.9081115722656, + "loss": 0.1146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9809387922286987, + "rewards/margins": 2.049466133117676, + "rewards/rejected": -0.0685272216796875, + "step": 8990 + }, + { + "epoch": 0.52, + "learning_rate": 4.8671369018859574e-08, + "logits/chosen": -1.8864599466323853, + "logits/rejected": -1.9254260063171387, + "logps/chosen": -166.06527709960938, + "logps/rejected": -257.9692077636719, + "loss": 0.3809, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0904663801193237, + "rewards/margins": 0.15071111917495728, + "rewards/rejected": 0.9397552609443665, + "step": 8991 + }, + { + "epoch": 0.52, + "learning_rate": 4.866194834623486e-08, + "logits/chosen": -2.034808397293091, + "logits/rejected": -1.9705243110656738, + "logps/chosen": -199.00692749023438, + "logps/rejected": -522.2717895507812, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8243775367736816, + "rewards/margins": 2.7244813442230225, + "rewards/rejected": 0.09989624470472336, + "step": 8992 + }, + { + "epoch": 0.52, + "learning_rate": 4.8652527721144274e-08, + "logits/chosen": -1.8337438106536865, + "logits/rejected": -1.8362064361572266, + "logps/chosen": -39.8957405090332, + "logps/rejected": -234.2779541015625, + "loss": 0.2122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6734329462051392, + "rewards/margins": 4.075186252593994, + "rewards/rejected": -3.4017531871795654, + "step": 8993 + }, + { + "epoch": 0.52, + "learning_rate": 4.864310714392249e-08, + "logits/chosen": -1.9109195470809937, + "logits/rejected": -1.9142378568649292, + "logps/chosen": -201.89944458007812, + "logps/rejected": -345.21197509765625, + "loss": 0.0466, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8279541730880737, + "rewards/margins": 3.3425660133361816, + "rewards/rejected": -1.514611840248108, + "step": 8994 + }, + { + "epoch": 0.52, + "learning_rate": 4.863368661490419e-08, + "logits/chosen": -1.926566481590271, + "logits/rejected": -1.920912265777588, + "logps/chosen": -2.7721943855285645, + "logps/rejected": -134.15762329101562, + "loss": 0.656, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.20420198142528534, + "rewards/margins": -0.16121245920658112, + "rewards/rejected": 0.36541444063186646, + "step": 8995 + }, + { + "epoch": 0.52, + "learning_rate": 4.862426613442402e-08, + "logits/chosen": -1.9899576902389526, + "logits/rejected": -1.9781395196914673, + "logps/chosen": -96.87872314453125, + "logps/rejected": -448.1298828125, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.229689121246338, + "rewards/margins": 6.6819353103637695, + "rewards/rejected": -4.452246189117432, + "step": 8996 + }, + { + "epoch": 0.52, + "learning_rate": 4.861484570281666e-08, + "logits/chosen": -2.0547375679016113, + "logits/rejected": -2.0574865341186523, + "logps/chosen": -192.02972412109375, + "logps/rejected": -306.4887390136719, + "loss": 0.0616, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6020904779434204, + "rewards/margins": 2.953774929046631, + "rewards/rejected": -1.3516845703125, + "step": 8997 + }, + { + "epoch": 0.52, + "learning_rate": 4.860542532041673e-08, + "logits/chosen": -1.9479765892028809, + "logits/rejected": -1.9454580545425415, + "logps/chosen": -0.001598467119038105, + "logps/rejected": -269.5621337890625, + "loss": 0.3502, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.983288716990501e-05, + "rewards/margins": 4.043471336364746, + "rewards/rejected": -4.043560981750488, + "step": 8998 + }, + { + "epoch": 0.52, + "learning_rate": 4.859600498755893e-08, + "logits/chosen": -1.8285820484161377, + "logits/rejected": -1.8359036445617676, + "logps/chosen": -164.75802612304688, + "logps/rejected": -439.7000427246094, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1055877208709717, + "rewards/margins": 4.837018013000488, + "rewards/rejected": -2.7314300537109375, + "step": 8999 + }, + { + "epoch": 0.52, + "learning_rate": 4.85865847045779e-08, + "logits/chosen": -2.0739147663116455, + "logits/rejected": -2.0739998817443848, + "logps/chosen": -0.0008535226806998253, + "logps/rejected": -261.6446533203125, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6327587218256667e-05, + "rewards/margins": 8.040264129638672, + "rewards/rejected": -8.040247917175293, + "step": 9000 + }, + { + "epoch": 0.52, + "learning_rate": 4.8577164471808295e-08, + "logits/chosen": -1.9193460941314697, + "logits/rejected": -1.9000179767608643, + "logps/chosen": -6.043836037861183e-05, + "logps/rejected": -264.7474060058594, + "loss": 0.3597, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6053370422450826e-05, + "rewards/margins": 3.5567681789398193, + "rewards/rejected": -3.556732177734375, + "step": 9001 + }, + { + "epoch": 0.52, + "learning_rate": 4.856774428958475e-08, + "logits/chosen": -1.8480198383331299, + "logits/rejected": -1.853959560394287, + "logps/chosen": -229.43475341796875, + "logps/rejected": -321.242431640625, + "loss": 0.1648, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.347570776939392, + "rewards/margins": 1.343505859375, + "rewards/rejected": 0.0040649413131177425, + "step": 9002 + }, + { + "epoch": 0.52, + "learning_rate": 4.855832415824196e-08, + "logits/chosen": -1.8695666790008545, + "logits/rejected": -1.9809364080429077, + "logps/chosen": -269.8646240234375, + "logps/rejected": -417.5225524902344, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.171130418777466, + "rewards/margins": 3.2620058059692383, + "rewards/rejected": -1.090875267982483, + "step": 9003 + }, + { + "epoch": 0.52, + "learning_rate": 4.8548904078114526e-08, + "logits/chosen": -1.9419528245925903, + "logits/rejected": -1.9426593780517578, + "logps/chosen": -0.1817324012517929, + "logps/rejected": -182.17501831054688, + "loss": 0.4324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007101668510586023, + "rewards/margins": 1.525334358215332, + "rewards/rejected": -1.5182327032089233, + "step": 9004 + }, + { + "epoch": 0.52, + "learning_rate": 4.853948404953715e-08, + "logits/chosen": -2.022578477859497, + "logits/rejected": -2.0134589672088623, + "logps/chosen": -79.28633117675781, + "logps/rejected": -339.7373046875, + "loss": 0.1014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3950668573379517, + "rewards/margins": 4.5213518142700195, + "rewards/rejected": -3.1262848377227783, + "step": 9005 + }, + { + "epoch": 0.52, + "learning_rate": 4.853006407284441e-08, + "logits/chosen": -1.995387315750122, + "logits/rejected": -1.983046293258667, + "logps/chosen": -28.90704345703125, + "logps/rejected": -274.574951171875, + "loss": 0.6168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9001766443252563, + "rewards/margins": 2.050839424133301, + "rewards/rejected": -2.9510161876678467, + "step": 9006 + }, + { + "epoch": 0.52, + "learning_rate": 4.852064414837101e-08, + "logits/chosen": -1.7949317693710327, + "logits/rejected": -1.7825883626937866, + "logps/chosen": -289.71929931640625, + "logps/rejected": -473.67510986328125, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0265930891036987, + "rewards/margins": 0.5955322980880737, + "rewards/rejected": 0.431060791015625, + "step": 9007 + }, + { + "epoch": 0.52, + "learning_rate": 4.851122427645154e-08, + "logits/chosen": -1.9497196674346924, + "logits/rejected": -1.9476279020309448, + "logps/chosen": -3.075586573686451e-05, + "logps/rejected": -85.51333618164062, + "loss": 0.4394, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0967174830511794e-06, + "rewards/margins": 1.5311710834503174, + "rewards/rejected": -1.5311721563339233, + "step": 9008 + }, + { + "epoch": 0.52, + "learning_rate": 4.8501804457420685e-08, + "logits/chosen": -2.0703208446502686, + "logits/rejected": -2.0650548934936523, + "logps/chosen": -25.435562133789062, + "logps/rejected": -176.60342407226562, + "loss": 0.3372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3379587233066559, + "rewards/margins": 2.0953471660614014, + "rewards/rejected": -1.7573883533477783, + "step": 9009 + }, + { + "epoch": 0.52, + "learning_rate": 4.8492384691613054e-08, + "logits/chosen": -1.8576852083206177, + "logits/rejected": -1.8632404804229736, + "logps/chosen": -67.18057250976562, + "logps/rejected": -180.1360626220703, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23501205444335938, + "rewards/margins": 1.5472923517227173, + "rewards/rejected": -1.312280297279358, + "step": 9010 + }, + { + "epoch": 0.52, + "learning_rate": 4.84829649793633e-08, + "logits/chosen": -1.927595615386963, + "logits/rejected": -1.9294713735580444, + "logps/chosen": -10.480657577514648, + "logps/rejected": -195.67996215820312, + "loss": 0.4074, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07582016289234161, + "rewards/margins": 1.4618736505508423, + "rewards/rejected": -1.386053442955017, + "step": 9011 + }, + { + "epoch": 0.52, + "learning_rate": 4.847354532100605e-08, + "logits/chosen": -1.822922706604004, + "logits/rejected": -1.8282899856567383, + "logps/chosen": -26.328006744384766, + "logps/rejected": -212.65341186523438, + "loss": 0.3346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9344425201416016, + "rewards/margins": 0.918521523475647, + "rewards/rejected": 0.01592102088034153, + "step": 9012 + }, + { + "epoch": 0.52, + "learning_rate": 4.846412571687593e-08, + "logits/chosen": -1.8772011995315552, + "logits/rejected": -1.8763713836669922, + "logps/chosen": -22.802528381347656, + "logps/rejected": -110.00517272949219, + "loss": 0.5251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1473182737827301, + "rewards/margins": 0.5800803899765015, + "rewards/rejected": -0.43276214599609375, + "step": 9013 + }, + { + "epoch": 0.52, + "learning_rate": 4.8454706167307594e-08, + "logits/chosen": -1.9661628007888794, + "logits/rejected": -1.9593918323516846, + "logps/chosen": -196.5615234375, + "logps/rejected": -279.61822509765625, + "loss": 0.3545, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.397247314453125, + "rewards/margins": 0.19170832633972168, + "rewards/rejected": 1.2055389881134033, + "step": 9014 + }, + { + "epoch": 0.52, + "learning_rate": 4.844528667263563e-08, + "logits/chosen": -1.8293339014053345, + "logits/rejected": -1.8313627243041992, + "logps/chosen": -37.64872741699219, + "logps/rejected": -117.56073760986328, + "loss": 0.9203, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.039021372795105, + "rewards/margins": 0.08579325675964355, + "rewards/rejected": -1.1248146295547485, + "step": 9015 + }, + { + "epoch": 0.52, + "learning_rate": 4.8435867233194713e-08, + "logits/chosen": -1.8142552375793457, + "logits/rejected": -1.81692636013031, + "logps/chosen": -251.94772338867188, + "logps/rejected": -388.1506652832031, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3467986583709717, + "rewards/margins": 2.920758008956909, + "rewards/rejected": -0.5739593505859375, + "step": 9016 + }, + { + "epoch": 0.52, + "learning_rate": 4.8426447849319427e-08, + "logits/chosen": -2.039224863052368, + "logits/rejected": -2.038926124572754, + "logps/chosen": -0.00707932747900486, + "logps/rejected": -224.16281127929688, + "loss": 0.3728, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021340149396564811, + "rewards/margins": 2.800166606903076, + "rewards/rejected": -2.800379991531372, + "step": 9017 + }, + { + "epoch": 0.52, + "learning_rate": 4.8417028521344417e-08, + "logits/chosen": -1.8844186067581177, + "logits/rejected": -1.8765718936920166, + "logps/chosen": -33.08380126953125, + "logps/rejected": -199.55133056640625, + "loss": 0.3232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24611397087574005, + "rewards/margins": 2.7439565658569336, + "rewards/rejected": -2.49784255027771, + "step": 9018 + }, + { + "epoch": 0.52, + "learning_rate": 4.8407609249604294e-08, + "logits/chosen": -1.9428144693374634, + "logits/rejected": -1.9345203638076782, + "logps/chosen": -44.40727233886719, + "logps/rejected": -162.2786865234375, + "loss": 0.5935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32451364398002625, + "rewards/margins": 0.04398995637893677, + "rewards/rejected": 0.2805236876010895, + "step": 9019 + }, + { + "epoch": 0.52, + "learning_rate": 4.8398190034433685e-08, + "logits/chosen": -1.889691710472107, + "logits/rejected": -1.8595987558364868, + "logps/chosen": -143.1426239013672, + "logps/rejected": -237.6732940673828, + "loss": 0.3533, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3548675775527954, + "rewards/margins": 0.5220779776573181, + "rewards/rejected": 0.8327895998954773, + "step": 9020 + }, + { + "epoch": 0.52, + "learning_rate": 4.838877087616719e-08, + "logits/chosen": -1.7868188619613647, + "logits/rejected": -1.896148443222046, + "logps/chosen": -308.10394287109375, + "logps/rejected": -298.9143981933594, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.966888427734375, + "rewards/margins": 2.771017551422119, + "rewards/rejected": -0.8041290640830994, + "step": 9021 + }, + { + "epoch": 0.53, + "learning_rate": 4.8379351775139446e-08, + "logits/chosen": -1.7871661186218262, + "logits/rejected": -1.786516785621643, + "logps/chosen": -24.642051696777344, + "logps/rejected": -204.71578979492188, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3686378598213196, + "rewards/margins": 2.6637701988220215, + "rewards/rejected": -2.2951323986053467, + "step": 9022 + }, + { + "epoch": 0.53, + "learning_rate": 4.836993273168504e-08, + "logits/chosen": -1.9778103828430176, + "logits/rejected": -1.95931875705719, + "logps/chosen": -128.0861053466797, + "logps/rejected": -198.09939575195312, + "loss": 0.3485, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4253342151641846, + "rewards/margins": 0.24702298641204834, + "rewards/rejected": 1.1783112287521362, + "step": 9023 + }, + { + "epoch": 0.53, + "learning_rate": 4.8360513746138623e-08, + "logits/chosen": -1.5729478597640991, + "logits/rejected": -1.5735669136047363, + "logps/chosen": -157.90113830566406, + "logps/rejected": -448.09716796875, + "loss": 0.158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8303726315498352, + "rewards/margins": 3.0205612182617188, + "rewards/rejected": -2.1901886463165283, + "step": 9024 + }, + { + "epoch": 0.53, + "learning_rate": 4.835109481883474e-08, + "logits/chosen": -1.853147029876709, + "logits/rejected": -1.9226421117782593, + "logps/chosen": -237.3950958251953, + "logps/rejected": -119.5849380493164, + "loss": 0.1159, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.739363193511963, + "rewards/margins": 1.4258767366409302, + "rewards/rejected": 1.3134864568710327, + "step": 9025 + }, + { + "epoch": 0.53, + "learning_rate": 4.834167595010807e-08, + "logits/chosen": -1.9429290294647217, + "logits/rejected": -1.9320570230484009, + "logps/chosen": -286.62371826171875, + "logps/rejected": -440.6640625, + "loss": 0.0571, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.71783447265625, + "rewards/margins": 3.395739793777466, + "rewards/rejected": -1.6779053211212158, + "step": 9026 + }, + { + "epoch": 0.53, + "learning_rate": 4.833225714029315e-08, + "logits/chosen": -2.1231608390808105, + "logits/rejected": -2.1182751655578613, + "logps/chosen": -25.659019470214844, + "logps/rejected": -298.04571533203125, + "loss": 0.2015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4519050717353821, + "rewards/margins": 6.962759017944336, + "rewards/rejected": -6.5108537673950195, + "step": 9027 + }, + { + "epoch": 0.53, + "learning_rate": 4.832283838972466e-08, + "logits/chosen": -2.019444704055786, + "logits/rejected": -2.018853187561035, + "logps/chosen": -15.711938858032227, + "logps/rejected": -46.21499252319336, + "loss": 0.5816, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.336625874042511, + "rewards/margins": 0.09576016664505005, + "rewards/rejected": 0.24086570739746094, + "step": 9028 + }, + { + "epoch": 0.53, + "learning_rate": 4.831341969873711e-08, + "logits/chosen": -1.8939275741577148, + "logits/rejected": -1.8894398212432861, + "logps/chosen": -33.51113510131836, + "logps/rejected": -117.35505676269531, + "loss": 0.3237, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6090855002403259, + "rewards/margins": 1.776285171508789, + "rewards/rejected": -1.167199730873108, + "step": 9029 + }, + { + "epoch": 0.53, + "learning_rate": 4.8304001067665165e-08, + "logits/chosen": -1.8213789463043213, + "logits/rejected": -1.8082823753356934, + "logps/chosen": -0.00013410425162874162, + "logps/rejected": -208.05332946777344, + "loss": 0.3454, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.988158126361668e-06, + "rewards/margins": 6.391504764556885, + "rewards/rejected": -6.3914947509765625, + "step": 9030 + }, + { + "epoch": 0.53, + "learning_rate": 4.8294582496843384e-08, + "logits/chosen": -1.980785608291626, + "logits/rejected": -1.973887324333191, + "logps/chosen": -10.436734199523926, + "logps/rejected": -83.56736755371094, + "loss": 0.5954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13093452155590057, + "rewards/margins": 0.29932212829589844, + "rewards/rejected": -0.16838760673999786, + "step": 9031 + }, + { + "epoch": 0.53, + "learning_rate": 4.828516398660639e-08, + "logits/chosen": -1.822708249092102, + "logits/rejected": -1.8213967084884644, + "logps/chosen": -33.58474349975586, + "logps/rejected": -228.2984619140625, + "loss": 0.2618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4357711970806122, + "rewards/margins": 2.9122209548950195, + "rewards/rejected": -2.476449728012085, + "step": 9032 + }, + { + "epoch": 0.53, + "learning_rate": 4.8275745537288744e-08, + "logits/chosen": -2.0114409923553467, + "logits/rejected": -2.003004550933838, + "logps/chosen": -73.97801208496094, + "logps/rejected": -301.129150390625, + "loss": 0.0852, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4243385791778564, + "rewards/margins": 4.429642677307129, + "rewards/rejected": -3.0053040981292725, + "step": 9033 + }, + { + "epoch": 0.53, + "learning_rate": 4.8266327149225055e-08, + "logits/chosen": -2.0460941791534424, + "logits/rejected": -2.046113967895508, + "logps/chosen": -29.801572799682617, + "logps/rejected": -118.11689758300781, + "loss": 0.5093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0769205093383789, + "rewards/margins": 0.9059866070747375, + "rewards/rejected": -0.9829071164131165, + "step": 9034 + }, + { + "epoch": 0.53, + "learning_rate": 4.825690882274991e-08, + "logits/chosen": -1.8256224393844604, + "logits/rejected": -1.8354837894439697, + "logps/chosen": -2.791721820831299, + "logps/rejected": -177.0880126953125, + "loss": 0.3726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1221713051199913, + "rewards/margins": 2.437073230743408, + "rewards/rejected": -2.314901828765869, + "step": 9035 + }, + { + "epoch": 0.53, + "learning_rate": 4.824749055819789e-08, + "logits/chosen": -2.030431032180786, + "logits/rejected": -1.9939818382263184, + "logps/chosen": -142.41094970703125, + "logps/rejected": -376.1121826171875, + "loss": 0.0935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7508270740509033, + "rewards/margins": 2.162768602371216, + "rewards/rejected": -0.4119415283203125, + "step": 9036 + }, + { + "epoch": 0.53, + "learning_rate": 4.823807235590358e-08, + "logits/chosen": -1.7286581993103027, + "logits/rejected": -1.7374948263168335, + "logps/chosen": -19.907428741455078, + "logps/rejected": -223.08270263671875, + "loss": 0.3897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32394906878471375, + "rewards/margins": 1.0640308856964111, + "rewards/rejected": -0.740081787109375, + "step": 9037 + }, + { + "epoch": 0.53, + "learning_rate": 4.822865421620155e-08, + "logits/chosen": -2.0562245845794678, + "logits/rejected": -2.0476491451263428, + "logps/chosen": -42.91899490356445, + "logps/rejected": -145.61325073242188, + "loss": 0.2065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7506992220878601, + "rewards/margins": 3.0716519355773926, + "rewards/rejected": -2.3209526538848877, + "step": 9038 + }, + { + "epoch": 0.53, + "learning_rate": 4.82192361394264e-08, + "logits/chosen": -1.9220205545425415, + "logits/rejected": -1.9254783391952515, + "logps/chosen": -197.8184814453125, + "logps/rejected": -365.65887451171875, + "loss": 0.0653, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6837310791015625, + "rewards/margins": 2.900351047515869, + "rewards/rejected": -1.216619849205017, + "step": 9039 + }, + { + "epoch": 0.53, + "learning_rate": 4.820981812591268e-08, + "logits/chosen": -2.0256400108337402, + "logits/rejected": -2.040609836578369, + "logps/chosen": -149.925048828125, + "logps/rejected": -325.2026672363281, + "loss": 0.1177, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.718170166015625, + "rewards/margins": 1.577752709388733, + "rewards/rejected": 0.14041748642921448, + "step": 9040 + }, + { + "epoch": 0.53, + "learning_rate": 4.8200400175994994e-08, + "logits/chosen": -1.7408086061477661, + "logits/rejected": -1.6894447803497314, + "logps/chosen": -319.109375, + "logps/rejected": -433.4131774902344, + "loss": 0.1348, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.17411208152771, + "rewards/margins": 1.644058346748352, + "rewards/rejected": 0.5300537347793579, + "step": 9041 + }, + { + "epoch": 0.53, + "learning_rate": 4.819098229000787e-08, + "logits/chosen": -2.135261058807373, + "logits/rejected": -2.118894100189209, + "logps/chosen": -0.00028309441404417157, + "logps/rejected": -144.7028045654297, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2299692520609824e-06, + "rewards/margins": 2.9445066452026367, + "rewards/rejected": -2.944509983062744, + "step": 9042 + }, + { + "epoch": 0.53, + "learning_rate": 4.818156446828594e-08, + "logits/chosen": -2.1206841468811035, + "logits/rejected": -2.1131348609924316, + "logps/chosen": -30.19401741027832, + "logps/rejected": -237.0177001953125, + "loss": 0.2657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15724240243434906, + "rewards/margins": 3.4487767219543457, + "rewards/rejected": -3.291534423828125, + "step": 9043 + }, + { + "epoch": 0.53, + "learning_rate": 4.817214671116369e-08, + "logits/chosen": -2.133023738861084, + "logits/rejected": -2.081282138824463, + "logps/chosen": -151.1875762939453, + "logps/rejected": -300.11700439453125, + "loss": 0.1873, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5606125593185425, + "rewards/margins": 1.55998694896698, + "rewards/rejected": 0.0006256103515625, + "step": 9044 + }, + { + "epoch": 0.53, + "learning_rate": 4.816272901897577e-08, + "logits/chosen": -2.028428077697754, + "logits/rejected": -2.0135579109191895, + "logps/chosen": -85.32633972167969, + "logps/rejected": -206.07608032226562, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0055679320357739925, + "rewards/margins": 1.4570664167404175, + "rewards/rejected": -1.4626343250274658, + "step": 9045 + }, + { + "epoch": 0.53, + "learning_rate": 4.8153311392056675e-08, + "logits/chosen": -2.2448439598083496, + "logits/rejected": -2.2434515953063965, + "logps/chosen": -20.854394912719727, + "logps/rejected": -69.85243225097656, + "loss": 0.4352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09281330555677414, + "rewards/margins": 1.2013505697250366, + "rewards/rejected": -1.1085373163223267, + "step": 9046 + }, + { + "epoch": 0.53, + "learning_rate": 4.8143893830741016e-08, + "logits/chosen": -1.9544965028762817, + "logits/rejected": -1.9539365768432617, + "logps/chosen": -12.224552154541016, + "logps/rejected": -143.1707763671875, + "loss": 0.3507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22220097482204437, + "rewards/margins": 2.3441171646118164, + "rewards/rejected": -2.1219162940979004, + "step": 9047 + }, + { + "epoch": 0.53, + "learning_rate": 4.81344763353633e-08, + "logits/chosen": -1.8192764520645142, + "logits/rejected": -1.8070313930511475, + "logps/chosen": -180.4080810546875, + "logps/rejected": -405.76080322265625, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9049408435821533, + "rewards/margins": 3.4017090797424316, + "rewards/rejected": -1.4967682361602783, + "step": 9048 + }, + { + "epoch": 0.53, + "learning_rate": 4.812505890625814e-08, + "logits/chosen": -2.017305850982666, + "logits/rejected": -2.0053539276123047, + "logps/chosen": -0.052915140986442566, + "logps/rejected": -123.72145080566406, + "loss": 0.4915, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013001605868339539, + "rewards/margins": 0.8827617764472961, + "rewards/rejected": -0.8697601556777954, + "step": 9049 + }, + { + "epoch": 0.53, + "learning_rate": 4.8115641543760034e-08, + "logits/chosen": -1.7450401782989502, + "logits/rejected": -1.725988507270813, + "logps/chosen": -211.26730346679688, + "logps/rejected": -251.87777709960938, + "loss": 0.3909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.282373070716858, + "rewards/margins": 0.17966008186340332, + "rewards/rejected": 1.1027129888534546, + "step": 9050 + }, + { + "epoch": 0.53, + "learning_rate": 4.810622424820357e-08, + "logits/chosen": -1.8835233449935913, + "logits/rejected": -1.8768130540847778, + "logps/chosen": -214.38853454589844, + "logps/rejected": -268.12939453125, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5159012079238892, + "rewards/margins": 0.6270004510879517, + "rewards/rejected": 0.8889007568359375, + "step": 9051 + }, + { + "epoch": 0.53, + "learning_rate": 4.809680701992327e-08, + "logits/chosen": -2.1724863052368164, + "logits/rejected": -2.156639814376831, + "logps/chosen": -52.390159606933594, + "logps/rejected": -322.740478515625, + "loss": 0.1179, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1927093267440796, + "rewards/margins": 3.420309543609619, + "rewards/rejected": -2.22760009765625, + "step": 9052 + }, + { + "epoch": 0.53, + "learning_rate": 4.808738985925372e-08, + "logits/chosen": -1.710578203201294, + "logits/rejected": -1.6993155479431152, + "logps/chosen": -17.50042724609375, + "logps/rejected": -108.78813934326172, + "loss": 0.468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22501201927661896, + "rewards/margins": 0.8716863393783569, + "rewards/rejected": -0.6466743350028992, + "step": 9053 + }, + { + "epoch": 0.53, + "learning_rate": 4.807797276652942e-08, + "logits/chosen": -1.7501846551895142, + "logits/rejected": -1.7453665733337402, + "logps/chosen": -100.3553237915039, + "logps/rejected": -350.20452880859375, + "loss": 0.2284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35922929644584656, + "rewards/margins": 4.991928577423096, + "rewards/rejected": -4.632699489593506, + "step": 9054 + }, + { + "epoch": 0.53, + "learning_rate": 4.806855574208493e-08, + "logits/chosen": -1.8427908420562744, + "logits/rejected": -1.8460878133773804, + "logps/chosen": -16.53547477722168, + "logps/rejected": -114.88668060302734, + "loss": 0.4655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3835003077983856, + "rewards/margins": 0.4583393335342407, + "rewards/rejected": -0.0748390182852745, + "step": 9055 + }, + { + "epoch": 0.53, + "learning_rate": 4.805913878625479e-08, + "logits/chosen": -2.0450339317321777, + "logits/rejected": -2.035041093826294, + "logps/chosen": -27.26464080810547, + "logps/rejected": -148.39666748046875, + "loss": 0.3033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2889089584350586, + "rewards/margins": 1.5593817234039307, + "rewards/rejected": -1.270472764968872, + "step": 9056 + }, + { + "epoch": 0.53, + "learning_rate": 4.804972189937353e-08, + "logits/chosen": -1.7471650838851929, + "logits/rejected": -1.7657891511917114, + "logps/chosen": -373.1871337890625, + "logps/rejected": -632.23681640625, + "loss": 0.0344, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4692139625549316, + "rewards/margins": 3.9246950149536133, + "rewards/rejected": -1.455480933189392, + "step": 9057 + }, + { + "epoch": 0.53, + "learning_rate": 4.80403050817757e-08, + "logits/chosen": -2.0825929641723633, + "logits/rejected": -2.0500106811523438, + "logps/chosen": -142.17198181152344, + "logps/rejected": -217.3153533935547, + "loss": 0.3171, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1462677717208862, + "rewards/margins": 0.9300217628479004, + "rewards/rejected": 0.21624603867530823, + "step": 9058 + }, + { + "epoch": 0.53, + "learning_rate": 4.8030888333795805e-08, + "logits/chosen": -2.053863048553467, + "logits/rejected": -2.048678398132324, + "logps/chosen": -4.970953887095675e-05, + "logps/rejected": -70.15486145019531, + "loss": 0.4082, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6689176618456258e-06, + "rewards/margins": 2.029756784439087, + "rewards/rejected": -2.029755115509033, + "step": 9059 + }, + { + "epoch": 0.53, + "learning_rate": 4.8021471655768406e-08, + "logits/chosen": -2.023207664489746, + "logits/rejected": -2.015036106109619, + "logps/chosen": -32.38813018798828, + "logps/rejected": -188.251220703125, + "loss": 0.6287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01094741839915514, + "rewards/margins": 0.25755083560943604, + "rewards/rejected": -0.2684982419013977, + "step": 9060 + }, + { + "epoch": 0.53, + "learning_rate": 4.8012055048027993e-08, + "logits/chosen": -1.6178348064422607, + "logits/rejected": -1.6113862991333008, + "logps/chosen": -76.95413208007812, + "logps/rejected": -204.64553833007812, + "loss": 0.1316, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1718437671661377, + "rewards/margins": 2.485964298248291, + "rewards/rejected": -1.3141205310821533, + "step": 9061 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002638510909125e-08, + "logits/chosen": -1.8450478315353394, + "logits/rejected": -1.8526945114135742, + "logps/chosen": -162.24667358398438, + "logps/rejected": -287.2715148925781, + "loss": 0.1557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0902312994003296, + "rewards/margins": 1.612030029296875, + "rewards/rejected": -0.5217987298965454, + "step": 9062 + }, + { + "epoch": 0.53, + "learning_rate": 4.799322204474628e-08, + "logits/chosen": -2.024690866470337, + "logits/rejected": -2.020080804824829, + "logps/chosen": -31.5968074798584, + "logps/rejected": -231.75836181640625, + "loss": 0.2009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6123811602592468, + "rewards/margins": 3.3703691959381104, + "rewards/rejected": -2.7579879760742188, + "step": 9063 + }, + { + "epoch": 0.53, + "learning_rate": 4.798380564987404e-08, + "logits/chosen": -1.717921495437622, + "logits/rejected": -1.7185181379318237, + "logps/chosen": -0.29280054569244385, + "logps/rejected": -67.08280944824219, + "loss": 0.7059, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012883462011814117, + "rewards/margins": -0.0937764048576355, + "rewards/rejected": 0.08089294284582138, + "step": 9064 + }, + { + "epoch": 0.53, + "learning_rate": 4.7974389326626855e-08, + "logits/chosen": -1.7974553108215332, + "logits/rejected": -1.804528832435608, + "logps/chosen": -197.2391815185547, + "logps/rejected": -452.4119873046875, + "loss": 0.0683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1361740827560425, + "rewards/margins": 3.6464157104492188, + "rewards/rejected": -2.510241746902466, + "step": 9065 + }, + { + "epoch": 0.53, + "learning_rate": 4.79649730753393e-08, + "logits/chosen": -1.9788628816604614, + "logits/rejected": -1.9737348556518555, + "logps/chosen": -9.29804801940918, + "logps/rejected": -57.07307434082031, + "loss": 0.9399, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.23893523216247559, + "rewards/margins": -0.7127259969711304, + "rewards/rejected": 0.4737907350063324, + "step": 9066 + }, + { + "epoch": 0.53, + "learning_rate": 4.795555689634582e-08, + "logits/chosen": -1.9531400203704834, + "logits/rejected": -1.9560892581939697, + "logps/chosen": -55.41258239746094, + "logps/rejected": -204.58517456054688, + "loss": 0.2172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7304542660713196, + "rewards/margins": 2.429062604904175, + "rewards/rejected": -1.6986083984375, + "step": 9067 + }, + { + "epoch": 0.53, + "learning_rate": 4.7946140789981e-08, + "logits/chosen": -1.860581874847412, + "logits/rejected": -1.8475193977355957, + "logps/chosen": -200.18081665039062, + "logps/rejected": -295.46954345703125, + "loss": 0.2624, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.072656273841858, + "rewards/margins": 0.7150604724884033, + "rewards/rejected": 0.357595831155777, + "step": 9068 + }, + { + "epoch": 0.53, + "learning_rate": 4.7936724756579264e-08, + "logits/chosen": -2.028698205947876, + "logits/rejected": -2.0173721313476562, + "logps/chosen": -264.310791015625, + "logps/rejected": -537.036865234375, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.647613525390625, + "rewards/margins": 7.150643825531006, + "rewards/rejected": -4.503030300140381, + "step": 9069 + }, + { + "epoch": 0.53, + "learning_rate": 4.7927308796475194e-08, + "logits/chosen": -1.6525224447250366, + "logits/rejected": -1.6781214475631714, + "logps/chosen": -185.80091857910156, + "logps/rejected": -452.9802551269531, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3268661499023438, + "rewards/margins": 6.350139141082764, + "rewards/rejected": -4.02327299118042, + "step": 9070 + }, + { + "epoch": 0.53, + "learning_rate": 4.791789291000323e-08, + "logits/chosen": -1.8732991218566895, + "logits/rejected": -1.869030475616455, + "logps/chosen": -32.53779220581055, + "logps/rejected": -185.2352294921875, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5883216857910156, + "rewards/margins": 0.9212639331817627, + "rewards/rejected": -0.3329422175884247, + "step": 9071 + }, + { + "epoch": 0.53, + "learning_rate": 4.7908477097497904e-08, + "logits/chosen": -1.9426647424697876, + "logits/rejected": -1.9363938570022583, + "logps/chosen": -32.15965270996094, + "logps/rejected": -193.8822784423828, + "loss": 0.399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12789078056812286, + "rewards/margins": 1.469403862953186, + "rewards/rejected": -1.3415130376815796, + "step": 9072 + }, + { + "epoch": 0.53, + "learning_rate": 4.7899061359293695e-08, + "logits/chosen": -1.9574629068374634, + "logits/rejected": -1.9427686929702759, + "logps/chosen": -277.26556396484375, + "logps/rejected": -395.3658447265625, + "loss": 0.102, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0706543922424316, + "rewards/margins": 2.243865966796875, + "rewards/rejected": -0.17321167886257172, + "step": 9073 + }, + { + "epoch": 0.53, + "learning_rate": 4.78896456957251e-08, + "logits/chosen": -1.9936915636062622, + "logits/rejected": -1.9741204977035522, + "logps/chosen": -24.82563018798828, + "logps/rejected": -139.95388793945312, + "loss": 0.5301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29639339447021484, + "rewards/margins": 0.3719121813774109, + "rewards/rejected": -0.07551880180835724, + "step": 9074 + }, + { + "epoch": 0.53, + "learning_rate": 4.788023010712663e-08, + "logits/chosen": -1.9483673572540283, + "logits/rejected": -1.9466098546981812, + "logps/chosen": -12.500304222106934, + "logps/rejected": -173.8700408935547, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10001163929700851, + "rewards/margins": 3.1272988319396973, + "rewards/rejected": -3.027287244796753, + "step": 9075 + }, + { + "epoch": 0.53, + "learning_rate": 4.787081459383274e-08, + "logits/chosen": -1.814373254776001, + "logits/rejected": -1.7918891906738281, + "logps/chosen": -181.1114044189453, + "logps/rejected": -304.52099609375, + "loss": 0.1613, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.484106421470642, + "rewards/margins": 1.5743255615234375, + "rewards/rejected": -0.09021911770105362, + "step": 9076 + }, + { + "epoch": 0.53, + "learning_rate": 4.7861399156177946e-08, + "logits/chosen": -1.9505878686904907, + "logits/rejected": -1.9459325075149536, + "logps/chosen": -34.506195068359375, + "logps/rejected": -217.43763732910156, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13259200751781464, + "rewards/margins": 2.4427406787872314, + "rewards/rejected": -2.3101487159729004, + "step": 9077 + }, + { + "epoch": 0.53, + "learning_rate": 4.78519837944967e-08, + "logits/chosen": -1.8030598163604736, + "logits/rejected": -1.8046836853027344, + "logps/chosen": -44.692291259765625, + "logps/rejected": -122.17567443847656, + "loss": 0.5294, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.9061020016670227, + "rewards/margins": -0.08512574434280396, + "rewards/rejected": 0.9912277460098267, + "step": 9078 + }, + { + "epoch": 0.53, + "learning_rate": 4.784256850912351e-08, + "logits/chosen": -1.9421055316925049, + "logits/rejected": -1.8825749158859253, + "logps/chosen": -222.11912536621094, + "logps/rejected": -412.5520935058594, + "loss": 0.0896, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.323887586593628, + "rewards/margins": 2.0962843894958496, + "rewards/rejected": 0.22760315239429474, + "step": 9079 + }, + { + "epoch": 0.53, + "learning_rate": 4.783315330039283e-08, + "logits/chosen": -1.8575294017791748, + "logits/rejected": -1.911824107170105, + "logps/chosen": -161.16163635253906, + "logps/rejected": -365.43231201171875, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4898667335510254, + "rewards/margins": 2.3431992530822754, + "rewards/rejected": 0.14666748046875, + "step": 9080 + }, + { + "epoch": 0.53, + "learning_rate": 4.782373816863915e-08, + "logits/chosen": -1.9341486692428589, + "logits/rejected": -1.9410954713821411, + "logps/chosen": -7.001587867736816, + "logps/rejected": -147.93563842773438, + "loss": 0.3832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04945249482989311, + "rewards/margins": 3.0031025409698486, + "rewards/rejected": -3.0525550842285156, + "step": 9081 + }, + { + "epoch": 0.53, + "learning_rate": 4.781432311419693e-08, + "logits/chosen": -1.812648057937622, + "logits/rejected": -1.8500401973724365, + "logps/chosen": -139.54248046875, + "logps/rejected": -373.8193359375, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7223541736602783, + "rewards/margins": 4.6299285888671875, + "rewards/rejected": -2.907574415206909, + "step": 9082 + }, + { + "epoch": 0.53, + "learning_rate": 4.780490813740065e-08, + "logits/chosen": -1.9431294202804565, + "logits/rejected": -1.9614914655685425, + "logps/chosen": -239.7748565673828, + "logps/rejected": -311.9169006347656, + "loss": 0.0434, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6563096046447754, + "rewards/margins": 3.038701057434082, + "rewards/rejected": -0.3823913633823395, + "step": 9083 + }, + { + "epoch": 0.53, + "learning_rate": 4.779549323858476e-08, + "logits/chosen": -2.0189335346221924, + "logits/rejected": -1.8903058767318726, + "logps/chosen": -187.26382446289062, + "logps/rejected": -541.3902587890625, + "loss": 0.2586, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4439605474472046, + "rewards/margins": 0.7274810671806335, + "rewards/rejected": 0.716479480266571, + "step": 9084 + }, + { + "epoch": 0.53, + "learning_rate": 4.778607841808375e-08, + "logits/chosen": -2.0976710319519043, + "logits/rejected": -2.0929009914398193, + "logps/chosen": -67.64630126953125, + "logps/rejected": -295.9422912597656, + "loss": 0.1051, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.000250220298767, + "rewards/margins": 4.6807098388671875, + "rewards/rejected": -3.68045973777771, + "step": 9085 + }, + { + "epoch": 0.53, + "learning_rate": 4.7776663676232036e-08, + "logits/chosen": -1.9164677858352661, + "logits/rejected": -1.915422797203064, + "logps/chosen": -11.434653282165527, + "logps/rejected": -154.76126098632812, + "loss": 0.5528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3265272080898285, + "rewards/margins": 1.5818498134613037, + "rewards/rejected": -1.9083770513534546, + "step": 9086 + }, + { + "epoch": 0.53, + "learning_rate": 4.776724901336414e-08, + "logits/chosen": -1.9078887701034546, + "logits/rejected": -1.9109137058258057, + "logps/chosen": -212.20367431640625, + "logps/rejected": -297.0792236328125, + "loss": 0.0622, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.798370361328125, + "rewards/margins": 2.382251024246216, + "rewards/rejected": -0.583880603313446, + "step": 9087 + }, + { + "epoch": 0.53, + "learning_rate": 4.7757834429814445e-08, + "logits/chosen": -1.891248106956482, + "logits/rejected": -1.8879278898239136, + "logps/chosen": -10.526741027832031, + "logps/rejected": -373.0556945800781, + "loss": 0.3189, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05085411295294762, + "rewards/margins": 2.6026761531829834, + "rewards/rejected": -2.5518219470977783, + "step": 9088 + }, + { + "epoch": 0.53, + "learning_rate": 4.774841992591748e-08, + "logits/chosen": -1.9235353469848633, + "logits/rejected": -1.9303034543991089, + "logps/chosen": -250.15731811523438, + "logps/rejected": -377.5216064453125, + "loss": 0.0471, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5356018543243408, + "rewards/margins": 3.8596436977386475, + "rewards/rejected": -2.3240418434143066, + "step": 9089 + }, + { + "epoch": 0.53, + "learning_rate": 4.7739005502007615e-08, + "logits/chosen": -1.9707674980163574, + "logits/rejected": -1.975749135017395, + "logps/chosen": -56.683414459228516, + "logps/rejected": -428.40838623046875, + "loss": 0.2157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.299979031085968, + "rewards/margins": 3.51409649848938, + "rewards/rejected": -3.2141175270080566, + "step": 9090 + }, + { + "epoch": 0.53, + "learning_rate": 4.7729591158419364e-08, + "logits/chosen": -1.8760616779327393, + "logits/rejected": -1.884161353111267, + "logps/chosen": -155.11373901367188, + "logps/rejected": -430.7115478515625, + "loss": 0.0601, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.926824927330017, + "rewards/margins": 2.781655788421631, + "rewards/rejected": -0.8548309206962585, + "step": 9091 + }, + { + "epoch": 0.53, + "learning_rate": 4.7720176895487106e-08, + "logits/chosen": -1.8634859323501587, + "logits/rejected": -1.8340508937835693, + "logps/chosen": -320.08563232421875, + "logps/rejected": -675.0513305664062, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4810121059417725, + "rewards/margins": 3.664917230606079, + "rewards/rejected": -0.18390503525733948, + "step": 9092 + }, + { + "epoch": 0.53, + "learning_rate": 4.7710762713545346e-08, + "logits/chosen": -1.8294575214385986, + "logits/rejected": -1.797217845916748, + "logps/chosen": -223.48654174804688, + "logps/rejected": -300.62640380859375, + "loss": 0.0833, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.183048963546753, + "rewards/margins": 2.5721359252929688, + "rewards/rejected": -0.38908693194389343, + "step": 9093 + }, + { + "epoch": 0.53, + "learning_rate": 4.7701348612928476e-08, + "logits/chosen": -2.045027494430542, + "logits/rejected": -2.037078380584717, + "logps/chosen": -7.789897441864014, + "logps/rejected": -197.6605224609375, + "loss": 0.3352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11163153499364853, + "rewards/margins": 2.430494546890259, + "rewards/rejected": -2.3188629150390625, + "step": 9094 + }, + { + "epoch": 0.53, + "learning_rate": 4.769193459397095e-08, + "logits/chosen": -1.955610990524292, + "logits/rejected": -1.894404649734497, + "logps/chosen": -260.47760009765625, + "logps/rejected": -469.1313781738281, + "loss": 0.0476, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2392700910568237, + "rewards/margins": 3.605740547180176, + "rewards/rejected": -2.3664703369140625, + "step": 9095 + }, + { + "epoch": 0.53, + "learning_rate": 4.768252065700721e-08, + "logits/chosen": -1.8553848266601562, + "logits/rejected": -1.781285285949707, + "logps/chosen": -234.52902221679688, + "logps/rejected": -432.9096374511719, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.775839328765869, + "rewards/margins": 3.098602294921875, + "rewards/rejected": -0.322763055562973, + "step": 9096 + }, + { + "epoch": 0.53, + "learning_rate": 4.767310680237165e-08, + "logits/chosen": -1.8329997062683105, + "logits/rejected": -1.7740710973739624, + "logps/chosen": -360.99334716796875, + "logps/rejected": -606.8670654296875, + "loss": 0.0764, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1968750953674316, + "rewards/margins": 2.9679200649261475, + "rewards/rejected": -0.771044909954071, + "step": 9097 + }, + { + "epoch": 0.53, + "learning_rate": 4.766369303039874e-08, + "logits/chosen": -1.8136568069458008, + "logits/rejected": -1.7999080419540405, + "logps/chosen": -177.3038330078125, + "logps/rejected": -192.51678466796875, + "loss": 0.3989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7979156374931335, + "rewards/margins": 0.41294553875923157, + "rewards/rejected": 0.384970098733902, + "step": 9098 + }, + { + "epoch": 0.53, + "learning_rate": 4.765427934142287e-08, + "logits/chosen": -1.8765684366226196, + "logits/rejected": -1.8820087909698486, + "logps/chosen": -0.014215699397027493, + "logps/rejected": -113.30018615722656, + "loss": 0.5408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008158289128914475, + "rewards/margins": 0.7147840261459351, + "rewards/rejected": -0.7155998349189758, + "step": 9099 + }, + { + "epoch": 0.53, + "learning_rate": 4.7644865735778486e-08, + "logits/chosen": -1.9256261587142944, + "logits/rejected": -1.9437922239303589, + "logps/chosen": -181.037353515625, + "logps/rejected": -311.51922607421875, + "loss": 0.0723, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.437638998031616, + "rewards/margins": 2.0521013736724854, + "rewards/rejected": 0.385537713766098, + "step": 9100 + }, + { + "epoch": 0.53, + "learning_rate": 4.763545221379997e-08, + "logits/chosen": -2.0247886180877686, + "logits/rejected": -2.026448965072632, + "logps/chosen": -135.7039794921875, + "logps/rejected": -352.5258483886719, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9597504138946533, + "rewards/margins": 2.2408509254455566, + "rewards/rejected": -0.28110048174858093, + "step": 9101 + }, + { + "epoch": 0.53, + "learning_rate": 4.7626038775821774e-08, + "logits/chosen": -1.9601218700408936, + "logits/rejected": -1.9937684535980225, + "logps/chosen": -164.1182098388672, + "logps/rejected": -179.29220581054688, + "loss": 0.1417, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.268803358078003, + "rewards/margins": 1.511631727218628, + "rewards/rejected": 0.757171630859375, + "step": 9102 + }, + { + "epoch": 0.53, + "learning_rate": 4.7616625422178286e-08, + "logits/chosen": -1.9065989255905151, + "logits/rejected": -1.9594142436981201, + "logps/chosen": -197.1837158203125, + "logps/rejected": -314.0763244628906, + "loss": 0.0706, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4929336309432983, + "rewards/margins": 2.4197678565979004, + "rewards/rejected": -0.9268341064453125, + "step": 9103 + }, + { + "epoch": 0.53, + "learning_rate": 4.7607212153203927e-08, + "logits/chosen": -1.8873597383499146, + "logits/rejected": -1.877555251121521, + "logps/chosen": -129.28131103515625, + "logps/rejected": -195.00433349609375, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7927643060684204, + "rewards/margins": 0.6804168224334717, + "rewards/rejected": 1.1123474836349487, + "step": 9104 + }, + { + "epoch": 0.53, + "learning_rate": 4.7597798969233076e-08, + "logits/chosen": -1.786887526512146, + "logits/rejected": -1.8111687898635864, + "logps/chosen": -208.82183837890625, + "logps/rejected": -337.8116455078125, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9585784673690796, + "rewards/margins": 2.770092725753784, + "rewards/rejected": -0.8115143179893494, + "step": 9105 + }, + { + "epoch": 0.53, + "learning_rate": 4.758838587060019e-08, + "logits/chosen": -1.8804315328598022, + "logits/rejected": -1.8825023174285889, + "logps/chosen": -0.0001543724356452003, + "logps/rejected": -197.5467529296875, + "loss": 0.3968, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2767926566302776e-06, + "rewards/margins": 2.217487335205078, + "rewards/rejected": -2.217489719390869, + "step": 9106 + }, + { + "epoch": 0.53, + "learning_rate": 4.7578972857639596e-08, + "logits/chosen": -1.9541115760803223, + "logits/rejected": -1.9458554983139038, + "logps/chosen": -0.03669101372361183, + "logps/rejected": -220.95669555664062, + "loss": 0.3482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026180909480899572, + "rewards/margins": 4.6270751953125, + "rewards/rejected": -4.629693508148193, + "step": 9107 + }, + { + "epoch": 0.53, + "learning_rate": 4.756955993068577e-08, + "logits/chosen": -1.7661157846450806, + "logits/rejected": -1.7697129249572754, + "logps/chosen": -260.136962890625, + "logps/rejected": -421.549560546875, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.62603759765625, + "rewards/margins": 3.874249219894409, + "rewards/rejected": -2.248211622238159, + "step": 9108 + }, + { + "epoch": 0.53, + "learning_rate": 4.756014709007303e-08, + "logits/chosen": -2.015791893005371, + "logits/rejected": -2.0229415893554688, + "logps/chosen": -15.179681777954102, + "logps/rejected": -136.53341674804688, + "loss": 0.4648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04698772355914116, + "rewards/margins": 1.1183950901031494, + "rewards/rejected": -1.0714073181152344, + "step": 9109 + }, + { + "epoch": 0.53, + "learning_rate": 4.755073433613583e-08, + "logits/chosen": -1.9799748659133911, + "logits/rejected": -1.9766640663146973, + "logps/chosen": -25.87940216064453, + "logps/rejected": -194.82705688476562, + "loss": 0.2522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.518436849117279, + "rewards/margins": 2.8162472248077393, + "rewards/rejected": -2.2978103160858154, + "step": 9110 + }, + { + "epoch": 0.53, + "learning_rate": 4.7541321669208487e-08, + "logits/chosen": -1.9795302152633667, + "logits/rejected": -1.981053113937378, + "logps/chosen": -14.261861801147461, + "logps/rejected": -56.504154205322266, + "loss": 0.4819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17141953110694885, + "rewards/margins": 0.6878235340118408, + "rewards/rejected": -0.5164039731025696, + "step": 9111 + }, + { + "epoch": 0.53, + "learning_rate": 4.7531909089625455e-08, + "logits/chosen": -1.960239291191101, + "logits/rejected": -1.9533201456069946, + "logps/chosen": -148.40878295898438, + "logps/rejected": -306.69091796875, + "loss": 0.1048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2802292108535767, + "rewards/margins": 2.2124924659729004, + "rewards/rejected": -0.932263195514679, + "step": 9112 + }, + { + "epoch": 0.53, + "learning_rate": 4.7522496597721074e-08, + "logits/chosen": -1.613358497619629, + "logits/rejected": -1.6026054620742798, + "logps/chosen": -0.015306039713323116, + "logps/rejected": -179.2625274658203, + "loss": 0.4259, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.568141882074997e-05, + "rewards/margins": 1.770918846130371, + "rewards/rejected": -1.7709945440292358, + "step": 9113 + }, + { + "epoch": 0.53, + "learning_rate": 4.751308419382974e-08, + "logits/chosen": -2.046161413192749, + "logits/rejected": -1.9867929220199585, + "logps/chosen": -230.44085693359375, + "logps/rejected": -428.9562072753906, + "loss": 0.145, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8239411115646362, + "rewards/margins": 1.6067872047424316, + "rewards/rejected": 0.21715393662452698, + "step": 9114 + }, + { + "epoch": 0.53, + "learning_rate": 4.750367187828582e-08, + "logits/chosen": -2.0013184547424316, + "logits/rejected": -1.9967061281204224, + "logps/chosen": -76.7593765258789, + "logps/rejected": -175.4237060546875, + "loss": 0.2666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6376075744628906, + "rewards/margins": 1.8165276050567627, + "rewards/rejected": -1.178920030593872, + "step": 9115 + }, + { + "epoch": 0.53, + "learning_rate": 4.749425965142367e-08, + "logits/chosen": -1.8457038402557373, + "logits/rejected": -1.8528813123703003, + "logps/chosen": -250.94456481933594, + "logps/rejected": -351.27490234375, + "loss": 0.0536, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.342512607574463, + "rewards/margins": 2.405095100402832, + "rewards/rejected": 0.9374176263809204, + "step": 9116 + }, + { + "epoch": 0.53, + "learning_rate": 4.7484847513577694e-08, + "logits/chosen": -1.944106101989746, + "logits/rejected": -1.938601016998291, + "logps/chosen": -58.68308639526367, + "logps/rejected": -255.13575744628906, + "loss": 0.2314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0804134607315063, + "rewards/margins": 1.4516949653625488, + "rewards/rejected": -0.3712814450263977, + "step": 9117 + }, + { + "epoch": 0.53, + "learning_rate": 4.747543546508222e-08, + "logits/chosen": -2.113966941833496, + "logits/rejected": -2.1147286891937256, + "logps/chosen": -71.10135650634766, + "logps/rejected": -251.36825561523438, + "loss": 0.2415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48881837725639343, + "rewards/margins": 2.500776767730713, + "rewards/rejected": -2.011958360671997, + "step": 9118 + }, + { + "epoch": 0.53, + "learning_rate": 4.7466023506271646e-08, + "logits/chosen": -1.9985092878341675, + "logits/rejected": -1.9856661558151245, + "logps/chosen": -88.90049743652344, + "logps/rejected": -276.54486083984375, + "loss": 0.1107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0293205976486206, + "rewards/margins": 2.3985252380371094, + "rewards/rejected": -1.3692047595977783, + "step": 9119 + }, + { + "epoch": 0.53, + "learning_rate": 4.74566116374803e-08, + "logits/chosen": -2.2216503620147705, + "logits/rejected": -2.192841053009033, + "logps/chosen": -2.0396101474761963, + "logps/rejected": -147.82460021972656, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11074967682361603, + "rewards/margins": 2.729701280593872, + "rewards/rejected": -2.6189515590667725, + "step": 9120 + }, + { + "epoch": 0.53, + "learning_rate": 4.744719985904256e-08, + "logits/chosen": -1.980500340461731, + "logits/rejected": -1.9089736938476562, + "logps/chosen": -286.8915710449219, + "logps/rejected": -487.0477600097656, + "loss": 0.1474, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6806915998458862, + "rewards/margins": 2.347625732421875, + "rewards/rejected": -0.6669341921806335, + "step": 9121 + }, + { + "epoch": 0.53, + "learning_rate": 4.7437788171292756e-08, + "logits/chosen": -2.061483860015869, + "logits/rejected": -2.0687286853790283, + "logps/chosen": -154.91685485839844, + "logps/rejected": -357.79534912109375, + "loss": 0.0978, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4385849237442017, + "rewards/margins": 2.2630691528320312, + "rewards/rejected": -0.8244842886924744, + "step": 9122 + }, + { + "epoch": 0.53, + "learning_rate": 4.742837657456526e-08, + "logits/chosen": -2.01686692237854, + "logits/rejected": -2.0104260444641113, + "logps/chosen": -38.149497985839844, + "logps/rejected": -313.13067626953125, + "loss": 0.2264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5293774008750916, + "rewards/margins": 2.9349560737609863, + "rewards/rejected": -2.40557861328125, + "step": 9123 + }, + { + "epoch": 0.53, + "learning_rate": 4.7418965069194394e-08, + "logits/chosen": -2.0466718673706055, + "logits/rejected": -2.0697357654571533, + "logps/chosen": -262.1667785644531, + "logps/rejected": -299.1041564941406, + "loss": 0.0517, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4957427978515625, + "rewards/margins": 2.564404249191284, + "rewards/rejected": -0.06866150349378586, + "step": 9124 + }, + { + "epoch": 0.53, + "learning_rate": 4.740955365551453e-08, + "logits/chosen": -1.7635835409164429, + "logits/rejected": -1.7639575004577637, + "logps/chosen": -101.25142669677734, + "logps/rejected": -226.01348876953125, + "loss": 1.7539, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.4168739318847656, + "rewards/margins": -0.5562477111816406, + "rewards/rejected": -1.860626220703125, + "step": 9125 + }, + { + "epoch": 0.53, + "learning_rate": 4.740014233385997e-08, + "logits/chosen": -2.189377784729004, + "logits/rejected": -2.1680908203125, + "logps/chosen": -9.241009712219238, + "logps/rejected": -236.36138916015625, + "loss": 0.2916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029268933460116386, + "rewards/margins": 3.7562673091888428, + "rewards/rejected": -3.785536289215088, + "step": 9126 + }, + { + "epoch": 0.53, + "learning_rate": 4.739073110456509e-08, + "logits/chosen": -1.9782203435897827, + "logits/rejected": -1.9727036952972412, + "logps/chosen": -43.402748107910156, + "logps/rejected": -176.38890075683594, + "loss": 0.3492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21888427436351776, + "rewards/margins": 2.266404628753662, + "rewards/rejected": -2.047520399093628, + "step": 9127 + }, + { + "epoch": 0.53, + "learning_rate": 4.738131996796419e-08, + "logits/chosen": -1.8742241859436035, + "logits/rejected": -1.8881756067276, + "logps/chosen": -181.48358154296875, + "logps/rejected": -345.81219482421875, + "loss": 0.0459, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9507263898849487, + "rewards/margins": 3.015167236328125, + "rewards/rejected": -1.0644409656524658, + "step": 9128 + }, + { + "epoch": 0.53, + "learning_rate": 4.737190892439164e-08, + "logits/chosen": -1.8126519918441772, + "logits/rejected": -1.8174049854278564, + "logps/chosen": -32.24539566040039, + "logps/rejected": -145.6974639892578, + "loss": 0.2872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40404510498046875, + "rewards/margins": 2.7637078762054443, + "rewards/rejected": -2.3596627712249756, + "step": 9129 + }, + { + "epoch": 0.53, + "learning_rate": 4.736249797418171e-08, + "logits/chosen": -2.0428950786590576, + "logits/rejected": -2.0117852687835693, + "logps/chosen": -180.44476318359375, + "logps/rejected": -440.464111328125, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9052445888519287, + "rewards/margins": 6.3504228591918945, + "rewards/rejected": -3.445178270339966, + "step": 9130 + }, + { + "epoch": 0.53, + "learning_rate": 4.735308711766878e-08, + "logits/chosen": -1.8879152536392212, + "logits/rejected": -1.8876054286956787, + "logps/chosen": -17.848405838012695, + "logps/rejected": -91.41891479492188, + "loss": 0.564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20928116142749786, + "rewards/margins": 0.3340919613838196, + "rewards/rejected": -0.12481079250574112, + "step": 9131 + }, + { + "epoch": 0.53, + "learning_rate": 4.734367635518712e-08, + "logits/chosen": -1.911388874053955, + "logits/rejected": -1.924453616142273, + "logps/chosen": -11.829201698303223, + "logps/rejected": -134.91531372070312, + "loss": 0.4907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11658058315515518, + "rewards/margins": 1.4389293193817139, + "rewards/rejected": -1.5555099248886108, + "step": 9132 + }, + { + "epoch": 0.53, + "learning_rate": 4.733426568707109e-08, + "logits/chosen": -1.8843159675598145, + "logits/rejected": -1.8640637397766113, + "logps/chosen": -10.062589645385742, + "logps/rejected": -577.5621337890625, + "loss": 0.2673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13617344200611115, + "rewards/margins": 8.144644737243652, + "rewards/rejected": -8.008471488952637, + "step": 9133 + }, + { + "epoch": 0.53, + "learning_rate": 4.732485511365496e-08, + "logits/chosen": -1.8567839860916138, + "logits/rejected": -1.8413429260253906, + "logps/chosen": -144.34701538085938, + "logps/rejected": -557.8663940429688, + "loss": 0.1938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6242263913154602, + "rewards/margins": 5.779625415802002, + "rewards/rejected": -5.155398845672607, + "step": 9134 + }, + { + "epoch": 0.53, + "learning_rate": 4.731544463527307e-08, + "logits/chosen": -1.8222445249557495, + "logits/rejected": -1.8194571733474731, + "logps/chosen": -9.664213180541992, + "logps/rejected": -126.10730743408203, + "loss": 0.5514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05849943310022354, + "rewards/margins": 0.5825274586677551, + "rewards/rejected": -0.5240280032157898, + "step": 9135 + }, + { + "epoch": 0.53, + "learning_rate": 4.7306034252259724e-08, + "logits/chosen": -1.8057289123535156, + "logits/rejected": -1.8079357147216797, + "logps/chosen": -6.35375763522461e-05, + "logps/rejected": -291.98388671875, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3828179135089158e-06, + "rewards/margins": 6.623988628387451, + "rewards/rejected": -6.623990058898926, + "step": 9136 + }, + { + "epoch": 0.53, + "learning_rate": 4.729662396494921e-08, + "logits/chosen": -1.9336997270584106, + "logits/rejected": -1.9131996631622314, + "logps/chosen": -0.0182997677475214, + "logps/rejected": -272.6351318359375, + "loss": 0.355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012961121974512935, + "rewards/margins": 5.419776439666748, + "rewards/rejected": -5.421072483062744, + "step": 9137 + }, + { + "epoch": 0.53, + "learning_rate": 4.728721377367585e-08, + "logits/chosen": -1.8853050470352173, + "logits/rejected": -1.8679460287094116, + "logps/chosen": -156.75631713867188, + "logps/rejected": -305.3543701171875, + "loss": 0.1772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4764984846115112, + "rewards/margins": 1.1846954822540283, + "rewards/rejected": 0.2918029725551605, + "step": 9138 + }, + { + "epoch": 0.53, + "learning_rate": 4.727780367877392e-08, + "logits/chosen": -2.106628656387329, + "logits/rejected": -2.099754571914673, + "logps/chosen": -29.87773323059082, + "logps/rejected": -122.51249694824219, + "loss": 0.7156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6638903021812439, + "rewards/margins": 0.3747251629829407, + "rewards/rejected": -1.0386154651641846, + "step": 9139 + }, + { + "epoch": 0.53, + "learning_rate": 4.726839368057772e-08, + "logits/chosen": -1.8632683753967285, + "logits/rejected": -1.8649439811706543, + "logps/chosen": -59.08034896850586, + "logps/rejected": -120.61697387695312, + "loss": 0.2929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5891521573066711, + "rewards/margins": 1.2997081279754639, + "rewards/rejected": -0.7105560302734375, + "step": 9140 + }, + { + "epoch": 0.53, + "learning_rate": 4.725898377942153e-08, + "logits/chosen": -2.0172677040100098, + "logits/rejected": -2.011211633682251, + "logps/chosen": -49.01841735839844, + "logps/rejected": -188.90821838378906, + "loss": 0.3302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23589172959327698, + "rewards/margins": 1.7823669910430908, + "rewards/rejected": -1.5464752912521362, + "step": 9141 + }, + { + "epoch": 0.53, + "learning_rate": 4.7249573975639656e-08, + "logits/chosen": -1.8356647491455078, + "logits/rejected": -1.8392384052276611, + "logps/chosen": -129.7081298828125, + "logps/rejected": -425.2552795410156, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8043731451034546, + "rewards/margins": 3.9529051780700684, + "rewards/rejected": -2.1485321521759033, + "step": 9142 + }, + { + "epoch": 0.53, + "learning_rate": 4.724016426956636e-08, + "logits/chosen": -2.1128668785095215, + "logits/rejected": -2.114346742630005, + "logps/chosen": -1.411356806755066, + "logps/rejected": -197.32139587402344, + "loss": 0.3861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00919189490377903, + "rewards/margins": 2.8663573265075684, + "rewards/rejected": -2.87554931640625, + "step": 9143 + }, + { + "epoch": 0.53, + "learning_rate": 4.723075466153594e-08, + "logits/chosen": -1.9156990051269531, + "logits/rejected": -1.8563628196716309, + "logps/chosen": -160.21910095214844, + "logps/rejected": -390.4921569824219, + "loss": 0.1208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5733444094657898, + "rewards/margins": 2.267399549484253, + "rewards/rejected": -1.694055199623108, + "step": 9144 + }, + { + "epoch": 0.53, + "learning_rate": 4.722134515188263e-08, + "logits/chosen": -1.8502988815307617, + "logits/rejected": -1.9033212661743164, + "logps/chosen": -206.52792358398438, + "logps/rejected": -424.1606750488281, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.067692518234253, + "rewards/margins": 2.9968674182891846, + "rewards/rejected": 0.07082519680261612, + "step": 9145 + }, + { + "epoch": 0.53, + "learning_rate": 4.7211935740940776e-08, + "logits/chosen": -1.967237114906311, + "logits/rejected": -2.0175113677978516, + "logps/chosen": -207.9382781982422, + "logps/rejected": -392.6446228027344, + "loss": 0.0764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5124863386154175, + "rewards/margins": 2.4048357009887695, + "rewards/rejected": -0.8923492431640625, + "step": 9146 + }, + { + "epoch": 0.53, + "learning_rate": 4.720252642904456e-08, + "logits/chosen": -1.7839465141296387, + "logits/rejected": -1.729799509048462, + "logps/chosen": -148.01748657226562, + "logps/rejected": -311.0980224609375, + "loss": 0.2727, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.273707628250122, + "rewards/margins": 0.9561783075332642, + "rewards/rejected": 0.3175292909145355, + "step": 9147 + }, + { + "epoch": 0.53, + "learning_rate": 4.719311721652833e-08, + "logits/chosen": -1.7573530673980713, + "logits/rejected": -1.751792311668396, + "logps/chosen": -65.01245880126953, + "logps/rejected": -262.0142517089844, + "loss": 0.5888, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3736160397529602, + "rewards/margins": -0.27823638916015625, + "rewards/rejected": 0.6518524289131165, + "step": 9148 + }, + { + "epoch": 0.53, + "learning_rate": 4.7183708103726265e-08, + "logits/chosen": -2.0884175300598145, + "logits/rejected": -2.061542510986328, + "logps/chosen": -193.75987243652344, + "logps/rejected": -461.3487243652344, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1113603115081787, + "rewards/margins": 5.201304912567139, + "rewards/rejected": -3.08994460105896, + "step": 9149 + }, + { + "epoch": 0.53, + "learning_rate": 4.7174299090972704e-08, + "logits/chosen": -1.8387426137924194, + "logits/rejected": -1.8274314403533936, + "logps/chosen": -33.07715606689453, + "logps/rejected": -105.05148315429688, + "loss": 1.0361, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7503166198730469, + "rewards/margins": -0.4566177427768707, + "rewards/rejected": -0.29369887709617615, + "step": 9150 + }, + { + "epoch": 0.53, + "learning_rate": 4.716489017860183e-08, + "logits/chosen": -1.9110357761383057, + "logits/rejected": -1.902095913887024, + "logps/chosen": -23.529949188232422, + "logps/rejected": -166.7577667236328, + "loss": 0.3, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23106880486011505, + "rewards/margins": 2.5557591915130615, + "rewards/rejected": -2.324690341949463, + "step": 9151 + }, + { + "epoch": 0.53, + "learning_rate": 4.715548136694795e-08, + "logits/chosen": -1.9488602876663208, + "logits/rejected": -1.9491114616394043, + "logps/chosen": -0.0035160973202437162, + "logps/rejected": -72.88771057128906, + "loss": 0.4746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024411380582023412, + "rewards/margins": 1.1139354705810547, + "rewards/rejected": -1.1141796112060547, + "step": 9152 + }, + { + "epoch": 0.53, + "learning_rate": 4.714607265634526e-08, + "logits/chosen": -2.0594096183776855, + "logits/rejected": -2.0464353561401367, + "logps/chosen": -104.16914367675781, + "logps/rejected": -434.34747314453125, + "loss": 0.3568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07777710258960724, + "rewards/margins": 5.561749458312988, + "rewards/rejected": -5.6395263671875, + "step": 9153 + }, + { + "epoch": 0.53, + "learning_rate": 4.713666404712805e-08, + "logits/chosen": -1.92833411693573, + "logits/rejected": -1.9149497747421265, + "logps/chosen": -0.00010478002513991669, + "logps/rejected": -246.32078552246094, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2885631096869474e-06, + "rewards/margins": 4.733025550842285, + "rewards/rejected": -4.733027935028076, + "step": 9154 + }, + { + "epoch": 0.53, + "learning_rate": 4.7127255539630524e-08, + "logits/chosen": -1.960524320602417, + "logits/rejected": -1.9523404836654663, + "logps/chosen": -11.184865951538086, + "logps/rejected": -103.10688781738281, + "loss": 0.404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7807876467704773, + "rewards/margins": 0.5671897530555725, + "rewards/rejected": 0.2135978788137436, + "step": 9155 + }, + { + "epoch": 0.53, + "learning_rate": 4.711784713418693e-08, + "logits/chosen": -1.8614915609359741, + "logits/rejected": -1.857122778892517, + "logps/chosen": -43.148468017578125, + "logps/rejected": -160.24655151367188, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6972389221191406, + "rewards/margins": 1.365236759185791, + "rewards/rejected": -0.6679977774620056, + "step": 9156 + }, + { + "epoch": 0.53, + "learning_rate": 4.7108438831131526e-08, + "logits/chosen": -1.6939648389816284, + "logits/rejected": -1.688987374305725, + "logps/chosen": -22.282596588134766, + "logps/rejected": -124.66633605957031, + "loss": 0.2724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5601875185966492, + "rewards/margins": 2.1176040172576904, + "rewards/rejected": -1.557416558265686, + "step": 9157 + }, + { + "epoch": 0.53, + "learning_rate": 4.7099030630798505e-08, + "logits/chosen": -1.807660698890686, + "logits/rejected": -1.8053805828094482, + "logps/chosen": -136.0244140625, + "logps/rejected": -170.1304931640625, + "loss": 0.3533, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5613954067230225, + "rewards/margins": 0.02167677879333496, + "rewards/rejected": 2.5397186279296875, + "step": 9158 + }, + { + "epoch": 0.53, + "learning_rate": 4.708962253352211e-08, + "logits/chosen": -1.703739047050476, + "logits/rejected": -1.6985948085784912, + "logps/chosen": -35.08418655395508, + "logps/rejected": -102.16857147216797, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5563289523124695, + "rewards/margins": 1.6735401153564453, + "rewards/rejected": -1.1172112226486206, + "step": 9159 + }, + { + "epoch": 0.53, + "learning_rate": 4.7080214539636555e-08, + "logits/chosen": -1.8900460004806519, + "logits/rejected": -1.9247000217437744, + "logps/chosen": -295.3868713378906, + "logps/rejected": -425.15087890625, + "loss": 0.0323, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3978943824768066, + "rewards/margins": 3.7330262660980225, + "rewards/rejected": -1.3351318836212158, + "step": 9160 + }, + { + "epoch": 0.53, + "learning_rate": 4.707080664947607e-08, + "logits/chosen": -1.7692475318908691, + "logits/rejected": -1.8076974153518677, + "logps/chosen": -174.2512969970703, + "logps/rejected": -405.238525390625, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7950118780136108, + "rewards/margins": 4.852860927581787, + "rewards/rejected": -3.057849168777466, + "step": 9161 + }, + { + "epoch": 0.53, + "learning_rate": 4.7061398863374855e-08, + "logits/chosen": -1.92877197265625, + "logits/rejected": -1.9357510805130005, + "logps/chosen": -116.74113464355469, + "logps/rejected": -177.56463623046875, + "loss": 0.1977, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2016189098358154, + "rewards/margins": 1.0822890996932983, + "rewards/rejected": 1.119329810142517, + "step": 9162 + }, + { + "epoch": 0.53, + "learning_rate": 4.705199118166713e-08, + "logits/chosen": -1.9827773571014404, + "logits/rejected": -1.9352015256881714, + "logps/chosen": -215.96669006347656, + "logps/rejected": -455.12542724609375, + "loss": 0.1028, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0203659534454346, + "rewards/margins": 3.9848008155822754, + "rewards/rejected": -2.964434862136841, + "step": 9163 + }, + { + "epoch": 0.53, + "learning_rate": 4.704258360468709e-08, + "logits/chosen": -1.776211142539978, + "logits/rejected": -1.7930010557174683, + "logps/chosen": -186.2859344482422, + "logps/rejected": -406.8824462890625, + "loss": 0.0545, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.353572130203247, + "rewards/margins": 4.41844367980957, + "rewards/rejected": -3.064871311187744, + "step": 9164 + }, + { + "epoch": 0.53, + "learning_rate": 4.703317613276896e-08, + "logits/chosen": -1.8389121294021606, + "logits/rejected": -1.832319736480713, + "logps/chosen": -5.208771705627441, + "logps/rejected": -174.89663696289062, + "loss": 0.4275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07037558406591415, + "rewards/margins": 1.4606359004974365, + "rewards/rejected": -1.3902603387832642, + "step": 9165 + }, + { + "epoch": 0.53, + "learning_rate": 4.70237687662469e-08, + "logits/chosen": -1.7410286664962769, + "logits/rejected": -1.7537416219711304, + "logps/chosen": -239.5918731689453, + "logps/rejected": -343.79901123046875, + "loss": 0.0676, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8504135608673096, + "rewards/margins": 2.298863172531128, + "rewards/rejected": -0.4484497010707855, + "step": 9166 + }, + { + "epoch": 0.53, + "learning_rate": 4.701436150545517e-08, + "logits/chosen": -1.9007066488265991, + "logits/rejected": -1.8975149393081665, + "logps/chosen": -141.61126708984375, + "logps/rejected": -360.04962158203125, + "loss": 0.0863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2995818853378296, + "rewards/margins": 3.554837226867676, + "rewards/rejected": -2.2552552223205566, + "step": 9167 + }, + { + "epoch": 0.53, + "learning_rate": 4.700495435072789e-08, + "logits/chosen": -1.9439839124679565, + "logits/rejected": -1.9748765230178833, + "logps/chosen": -195.72702026367188, + "logps/rejected": -391.07177734375, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.274871826171875, + "rewards/margins": 3.7112550735473633, + "rewards/rejected": -1.4363831281661987, + "step": 9168 + }, + { + "epoch": 0.53, + "learning_rate": 4.699554730239931e-08, + "logits/chosen": -1.739856243133545, + "logits/rejected": -1.7514374256134033, + "logps/chosen": -252.33145141601562, + "logps/rejected": -389.26495361328125, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3189055919647217, + "rewards/margins": 3.9017181396484375, + "rewards/rejected": -1.5828125476837158, + "step": 9169 + }, + { + "epoch": 0.53, + "learning_rate": 4.698614036080356e-08, + "logits/chosen": -1.9367660284042358, + "logits/rejected": -1.9384857416152954, + "logps/chosen": -0.0037047360092401505, + "logps/rejected": -236.79428100585938, + "loss": 0.3782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021260064386297017, + "rewards/margins": 2.4885873794555664, + "rewards/rejected": -2.488800048828125, + "step": 9170 + }, + { + "epoch": 0.53, + "learning_rate": 4.697673352627487e-08, + "logits/chosen": -2.0914218425750732, + "logits/rejected": -2.0977096557617188, + "logps/chosen": -39.828521728515625, + "logps/rejected": -98.15597534179688, + "loss": 0.7031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22650299966335297, + "rewards/margins": 0.08646391332149506, + "rewards/rejected": -0.312966912984848, + "step": 9171 + }, + { + "epoch": 0.53, + "learning_rate": 4.6967326799147367e-08, + "logits/chosen": -1.9986222982406616, + "logits/rejected": -1.9886443614959717, + "logps/chosen": -5.833040237426758, + "logps/rejected": -182.28155517578125, + "loss": 0.3462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24742603302001953, + "rewards/margins": 2.225921869277954, + "rewards/rejected": -1.9784958362579346, + "step": 9172 + }, + { + "epoch": 0.53, + "learning_rate": 4.695792017975528e-08, + "logits/chosen": -1.9087610244750977, + "logits/rejected": -1.923426866531372, + "logps/chosen": -193.8407440185547, + "logps/rejected": -343.1814270019531, + "loss": 0.0592, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0554778575897217, + "rewards/margins": 2.5949065685272217, + "rewards/rejected": -0.5394287109375, + "step": 9173 + }, + { + "epoch": 0.53, + "learning_rate": 4.694851366843272e-08, + "logits/chosen": -1.7343944311141968, + "logits/rejected": -1.723922610282898, + "logps/chosen": -262.5392761230469, + "logps/rejected": -316.47918701171875, + "loss": 0.3128, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7410980463027954, + "rewards/margins": 0.2602447271347046, + "rewards/rejected": 1.4808533191680908, + "step": 9174 + }, + { + "epoch": 0.53, + "learning_rate": 4.69391072655139e-08, + "logits/chosen": -1.8997284173965454, + "logits/rejected": -1.898359775543213, + "logps/chosen": -232.5024871826172, + "logps/rejected": -359.194580078125, + "loss": 0.1449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5739242434501648, + "rewards/margins": 2.6559219360351562, + "rewards/rejected": -2.0819976329803467, + "step": 9175 + }, + { + "epoch": 0.53, + "learning_rate": 4.6929700971332955e-08, + "logits/chosen": -2.0104610919952393, + "logits/rejected": -2.0021297931671143, + "logps/chosen": -21.6345272064209, + "logps/rejected": -197.253662109375, + "loss": 0.235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5652055740356445, + "rewards/margins": 2.9589357376098633, + "rewards/rejected": -2.3937301635742188, + "step": 9176 + }, + { + "epoch": 0.53, + "learning_rate": 4.692029478622405e-08, + "logits/chosen": -1.8872753381729126, + "logits/rejected": -1.878395915031433, + "logps/chosen": -293.2192687988281, + "logps/rejected": -511.8291931152344, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0691070556640625, + "rewards/margins": 4.006439208984375, + "rewards/rejected": -1.9373321533203125, + "step": 9177 + }, + { + "epoch": 0.53, + "learning_rate": 4.691088871052134e-08, + "logits/chosen": -1.9571633338928223, + "logits/rejected": -1.9329348802566528, + "logps/chosen": -167.54440307617188, + "logps/rejected": -287.0867004394531, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3692855834960938, + "rewards/margins": 0.21636497974395752, + "rewards/rejected": 1.1529206037521362, + "step": 9178 + }, + { + "epoch": 0.53, + "learning_rate": 4.690148274455897e-08, + "logits/chosen": -1.8719823360443115, + "logits/rejected": -1.8043615818023682, + "logps/chosen": -231.10789489746094, + "logps/rejected": -457.48028564453125, + "loss": 0.1269, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2238693237304688, + "rewards/margins": 1.7885849475860596, + "rewards/rejected": 0.43528443574905396, + "step": 9179 + }, + { + "epoch": 0.53, + "learning_rate": 4.68920768886711e-08, + "logits/chosen": -1.7554035186767578, + "logits/rejected": -1.7584418058395386, + "logps/chosen": -161.27232360839844, + "logps/rejected": -241.69296264648438, + "loss": 0.1198, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2547805309295654, + "rewards/margins": 1.749308705329895, + "rewards/rejected": 0.5054718255996704, + "step": 9180 + }, + { + "epoch": 0.53, + "learning_rate": 4.688267114319185e-08, + "logits/chosen": -1.7220888137817383, + "logits/rejected": -1.7500468492507935, + "logps/chosen": -225.08999633789062, + "logps/rejected": -509.0735168457031, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7207322120666504, + "rewards/margins": 7.0844407081604, + "rewards/rejected": -4.36370849609375, + "step": 9181 + }, + { + "epoch": 0.53, + "learning_rate": 4.687326550845538e-08, + "logits/chosen": -1.949090838432312, + "logits/rejected": -1.930147647857666, + "logps/chosen": -28.37865447998047, + "logps/rejected": -138.06951904296875, + "loss": 0.3235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37592241168022156, + "rewards/margins": 2.2243378162384033, + "rewards/rejected": -1.8484153747558594, + "step": 9182 + }, + { + "epoch": 0.53, + "learning_rate": 4.686385998479581e-08, + "logits/chosen": -1.948277473449707, + "logits/rejected": -1.9431010484695435, + "logps/chosen": -0.0002646264620125294, + "logps/rejected": -204.83865356445312, + "loss": 0.3237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0369927622377872e-05, + "rewards/margins": 5.3087358474731445, + "rewards/rejected": -5.308746337890625, + "step": 9183 + }, + { + "epoch": 0.53, + "learning_rate": 4.685445457254728e-08, + "logits/chosen": -2.0751266479492188, + "logits/rejected": -2.07253098487854, + "logps/chosen": -23.01917266845703, + "logps/rejected": -165.8505859375, + "loss": 0.3546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007137298816815019, + "rewards/margins": 3.012355327606201, + "rewards/rejected": -3.0130691528320312, + "step": 9184 + }, + { + "epoch": 0.53, + "learning_rate": 4.6845049272043894e-08, + "logits/chosen": -1.8424222469329834, + "logits/rejected": -1.8357378244400024, + "logps/chosen": -26.789569854736328, + "logps/rejected": -115.15943908691406, + "loss": 0.9196, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.18264083564281464, + "rewards/margins": -0.677630603313446, + "rewards/rejected": 0.4949897825717926, + "step": 9185 + }, + { + "epoch": 0.53, + "learning_rate": 4.68356440836198e-08, + "logits/chosen": -2.0737192630767822, + "logits/rejected": -2.0633106231689453, + "logps/chosen": -0.00022564777464140207, + "logps/rejected": -213.4849853515625, + "loss": 0.3222, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3290590686665382e-05, + "rewards/margins": 5.083186626434326, + "rewards/rejected": -5.083199977874756, + "step": 9186 + }, + { + "epoch": 0.53, + "learning_rate": 4.6826239007609094e-08, + "logits/chosen": -1.868664026260376, + "logits/rejected": -1.8599903583526611, + "logps/chosen": -310.6398010253906, + "logps/rejected": -537.1767578125, + "loss": 0.1038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0699188709259033, + "rewards/margins": 4.451031684875488, + "rewards/rejected": -3.381112813949585, + "step": 9187 + }, + { + "epoch": 0.53, + "learning_rate": 4.6816834044345935e-08, + "logits/chosen": -1.968579649925232, + "logits/rejected": -1.9603134393692017, + "logps/chosen": -20.0736083984375, + "logps/rejected": -95.51055145263672, + "loss": 0.385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7190386056900024, + "rewards/margins": 0.6573099493980408, + "rewards/rejected": 0.061728667467832565, + "step": 9188 + }, + { + "epoch": 0.53, + "learning_rate": 4.680742919416436e-08, + "logits/chosen": -1.8942943811416626, + "logits/rejected": -1.8941712379455566, + "logps/chosen": -48.56080627441406, + "logps/rejected": -220.22056579589844, + "loss": 0.2034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6916385889053345, + "rewards/margins": 2.917952537536621, + "rewards/rejected": -2.226313829421997, + "step": 9189 + }, + { + "epoch": 0.53, + "learning_rate": 4.6798024457398553e-08, + "logits/chosen": -1.8504527807235718, + "logits/rejected": -1.8422001600265503, + "logps/chosen": -184.20635986328125, + "logps/rejected": -283.3436584472656, + "loss": 0.0611, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.72551429271698, + "rewards/margins": 2.8746964931488037, + "rewards/rejected": -1.1491822004318237, + "step": 9190 + }, + { + "epoch": 0.53, + "learning_rate": 4.678861983438255e-08, + "logits/chosen": -1.8203761577606201, + "logits/rejected": -1.8362425565719604, + "logps/chosen": -288.0511169433594, + "logps/rejected": -509.99853515625, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.382162570953369, + "rewards/margins": 3.4414401054382324, + "rewards/rejected": -1.0592774152755737, + "step": 9191 + }, + { + "epoch": 0.53, + "learning_rate": 4.677921532545051e-08, + "logits/chosen": -1.8766213655471802, + "logits/rejected": -1.8769125938415527, + "logps/chosen": -182.31900024414062, + "logps/rejected": -230.8482208251953, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.305523633956909, + "rewards/margins": 0.3260420560836792, + "rewards/rejected": 1.97948157787323, + "step": 9192 + }, + { + "epoch": 0.53, + "learning_rate": 4.676981093093646e-08, + "logits/chosen": -1.6571688652038574, + "logits/rejected": -1.675687551498413, + "logps/chosen": -96.72449493408203, + "logps/rejected": -272.3797607421875, + "loss": 0.2932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6083740592002869, + "rewards/margins": 1.4241760969161987, + "rewards/rejected": -0.8158020377159119, + "step": 9193 + }, + { + "epoch": 0.54, + "learning_rate": 4.6760406651174565e-08, + "logits/chosen": -1.9230612516403198, + "logits/rejected": -1.9291577339172363, + "logps/chosen": -0.00010120704246219248, + "logps/rejected": -152.97674560546875, + "loss": 0.5128, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.985028448980302e-06, + "rewards/margins": 0.9159493446350098, + "rewards/rejected": -0.915942370891571, + "step": 9194 + }, + { + "epoch": 0.54, + "learning_rate": 4.675100248649884e-08, + "logits/chosen": -1.8280246257781982, + "logits/rejected": -1.795211911201477, + "logps/chosen": -351.6550598144531, + "logps/rejected": -579.2803344726562, + "loss": 0.0394, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.073568820953369, + "rewards/margins": 5.7576141357421875, + "rewards/rejected": -3.6840455532073975, + "step": 9195 + }, + { + "epoch": 0.54, + "learning_rate": 4.6741598437243404e-08, + "logits/chosen": -1.8397035598754883, + "logits/rejected": -1.8197704553604126, + "logps/chosen": -314.5045166015625, + "logps/rejected": -445.723876953125, + "loss": 0.1507, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.953857421875, + "rewards/margins": 1.1968291997909546, + "rewards/rejected": 0.7570282220840454, + "step": 9196 + }, + { + "epoch": 0.54, + "learning_rate": 4.6732194503742346e-08, + "logits/chosen": -1.9380521774291992, + "logits/rejected": -1.9217753410339355, + "logps/chosen": -78.34860229492188, + "logps/rejected": -264.4615173339844, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1621963530778885, + "rewards/margins": 4.959803581237793, + "rewards/rejected": -4.797607421875, + "step": 9197 + }, + { + "epoch": 0.54, + "learning_rate": 4.672279068632971e-08, + "logits/chosen": -1.800301194190979, + "logits/rejected": -1.7978893518447876, + "logps/chosen": -16.14795684814453, + "logps/rejected": -329.24017333984375, + "loss": 0.3522, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14732380211353302, + "rewards/margins": 3.9297573566436768, + "rewards/rejected": -4.077081203460693, + "step": 9198 + }, + { + "epoch": 0.54, + "learning_rate": 4.671338698533959e-08, + "logits/chosen": -1.7497698068618774, + "logits/rejected": -1.7364403009414673, + "logps/chosen": -9.166958625428379e-05, + "logps/rejected": -276.75909423828125, + "loss": 0.3378, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5496625849209522e-07, + "rewards/margins": 4.731713771820068, + "rewards/rejected": -4.731713771820068, + "step": 9199 + }, + { + "epoch": 0.54, + "learning_rate": 4.670398340110604e-08, + "logits/chosen": -1.9416407346725464, + "logits/rejected": -1.9355757236480713, + "logps/chosen": -0.00797360297292471, + "logps/rejected": -218.45071411132812, + "loss": 0.3092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005279492470435798, + "rewards/margins": 6.915075778961182, + "rewards/rejected": -6.9156036376953125, + "step": 9200 + }, + { + "epoch": 0.54, + "learning_rate": 4.669457993396312e-08, + "logits/chosen": -2.078322649002075, + "logits/rejected": -2.1176931858062744, + "logps/chosen": -180.03054809570312, + "logps/rejected": -216.95626831054688, + "loss": 0.2441, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.491241455078125, + "rewards/margins": 0.7307357788085938, + "rewards/rejected": 0.7605056762695312, + "step": 9201 + }, + { + "epoch": 0.54, + "learning_rate": 4.668517658424488e-08, + "logits/chosen": -1.8896894454956055, + "logits/rejected": -1.8873306512832642, + "logps/chosen": -41.808616638183594, + "logps/rejected": -259.0901794433594, + "loss": 0.2002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9875702261924744, + "rewards/margins": 1.6138947010040283, + "rewards/rejected": -0.626324474811554, + "step": 9202 + }, + { + "epoch": 0.54, + "learning_rate": 4.6675773352285394e-08, + "logits/chosen": -2.1201083660125732, + "logits/rejected": -2.121060848236084, + "logps/chosen": -37.621543884277344, + "logps/rejected": -120.04460144042969, + "loss": 0.7442, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3198269009590149, + "rewards/margins": -0.300442099571228, + "rewards/rejected": 0.6202690005302429, + "step": 9203 + }, + { + "epoch": 0.54, + "learning_rate": 4.666637023841869e-08, + "logits/chosen": -2.005664110183716, + "logits/rejected": -2.01922607421875, + "logps/chosen": -178.116455078125, + "logps/rejected": -272.4536437988281, + "loss": 0.1611, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7328659296035767, + "rewards/margins": 1.209059238433838, + "rewards/rejected": 0.5238067507743835, + "step": 9204 + }, + { + "epoch": 0.54, + "learning_rate": 4.665696724297884e-08, + "logits/chosen": -2.0282812118530273, + "logits/rejected": -2.01806640625, + "logps/chosen": -49.50468063354492, + "logps/rejected": -243.95443725585938, + "loss": 0.1974, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8842365145683289, + "rewards/margins": 3.5344045162200928, + "rewards/rejected": -2.650167942047119, + "step": 9205 + }, + { + "epoch": 0.54, + "learning_rate": 4.664756436629984e-08, + "logits/chosen": -1.8606141805648804, + "logits/rejected": -1.84770929813385, + "logps/chosen": -204.5297088623047, + "logps/rejected": -247.38351440429688, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7720794677734375, + "rewards/margins": 4.094128608703613, + "rewards/rejected": -0.32204896211624146, + "step": 9206 + }, + { + "epoch": 0.54, + "learning_rate": 4.663816160871577e-08, + "logits/chosen": -1.97348153591156, + "logits/rejected": -1.9764444828033447, + "logps/chosen": -5.967934608459473, + "logps/rejected": -275.2635803222656, + "loss": 0.3087, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15560899674892426, + "rewards/margins": 5.058171272277832, + "rewards/rejected": -4.902562141418457, + "step": 9207 + }, + { + "epoch": 0.54, + "learning_rate": 4.662875897056063e-08, + "logits/chosen": -1.8661261796951294, + "logits/rejected": -1.8620985746383667, + "logps/chosen": -26.223440170288086, + "logps/rejected": -185.7221221923828, + "loss": 0.4383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03430500254034996, + "rewards/margins": 1.2296251058578491, + "rewards/rejected": -1.1953201293945312, + "step": 9208 + }, + { + "epoch": 0.54, + "learning_rate": 4.661935645216849e-08, + "logits/chosen": -1.6902267932891846, + "logits/rejected": -1.686810851097107, + "logps/chosen": -279.4871520996094, + "logps/rejected": -375.7605895996094, + "loss": 0.2352, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.112719774246216, + "rewards/margins": 0.5885895490646362, + "rewards/rejected": 1.5241302251815796, + "step": 9209 + }, + { + "epoch": 0.54, + "learning_rate": 4.6609954053873304e-08, + "logits/chosen": -2.0683674812316895, + "logits/rejected": -2.06561279296875, + "logps/chosen": -0.00016819250595290214, + "logps/rejected": -68.32169342041016, + "loss": 0.4322, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5131230106344447e-05, + "rewards/margins": 1.6222825050354004, + "rewards/rejected": -1.622247338294983, + "step": 9210 + }, + { + "epoch": 0.54, + "learning_rate": 4.6600551776009176e-08, + "logits/chosen": -1.950119972229004, + "logits/rejected": -1.9536932706832886, + "logps/chosen": -52.98679733276367, + "logps/rejected": -169.19317626953125, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4645889401435852, + "rewards/margins": 1.3148713111877441, + "rewards/rejected": -0.8502823114395142, + "step": 9211 + }, + { + "epoch": 0.54, + "learning_rate": 4.659114961891004e-08, + "logits/chosen": -1.88267982006073, + "logits/rejected": -1.8502126932144165, + "logps/chosen": -269.56024169921875, + "logps/rejected": -508.0149230957031, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2827515602111816, + "rewards/margins": 6.506869316101074, + "rewards/rejected": -3.2241179943084717, + "step": 9212 + }, + { + "epoch": 0.54, + "learning_rate": 4.658174758290998e-08, + "logits/chosen": -1.7847853899002075, + "logits/rejected": -1.7547593116760254, + "logps/chosen": -206.92507934570312, + "logps/rejected": -393.0651550292969, + "loss": 0.0733, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7172303199768066, + "rewards/margins": 2.361020088195801, + "rewards/rejected": 0.356210321187973, + "step": 9213 + }, + { + "epoch": 0.54, + "learning_rate": 4.6572345668342926e-08, + "logits/chosen": -2.0178987979888916, + "logits/rejected": -1.9953563213348389, + "logps/chosen": -45.32129669189453, + "logps/rejected": -222.53466796875, + "loss": 0.4271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09569282829761505, + "rewards/margins": 1.9775402545928955, + "rewards/rejected": -2.073233127593994, + "step": 9214 + }, + { + "epoch": 0.54, + "learning_rate": 4.656294387554295e-08, + "logits/chosen": -2.002079963684082, + "logits/rejected": -2.013230800628662, + "logps/chosen": -222.855224609375, + "logps/rejected": -458.76055908203125, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8462005853652954, + "rewards/margins": 3.851864814758301, + "rewards/rejected": -3.005664110183716, + "step": 9215 + }, + { + "epoch": 0.54, + "learning_rate": 4.655354220484401e-08, + "logits/chosen": -1.9364036321640015, + "logits/rejected": -1.9765764474868774, + "logps/chosen": -179.21575927734375, + "logps/rejected": -264.0148010253906, + "loss": 0.1404, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8551315069198608, + "rewards/margins": 1.3850051164627075, + "rewards/rejected": 0.47012636065483093, + "step": 9216 + }, + { + "epoch": 0.54, + "learning_rate": 4.65441406565801e-08, + "logits/chosen": -2.1229684352874756, + "logits/rejected": -2.1160647869110107, + "logps/chosen": -10.992316246032715, + "logps/rejected": -203.61912536621094, + "loss": 0.3411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23007498681545258, + "rewards/margins": 2.0085525512695312, + "rewards/rejected": -1.7784775495529175, + "step": 9217 + }, + { + "epoch": 0.54, + "learning_rate": 4.653473923108524e-08, + "logits/chosen": -1.9553617238998413, + "logits/rejected": -1.940838098526001, + "logps/chosen": -59.73597717285156, + "logps/rejected": -150.7313232421875, + "loss": 0.517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07851257175207138, + "rewards/margins": 0.7050582766532898, + "rewards/rejected": -0.6265457272529602, + "step": 9218 + }, + { + "epoch": 0.54, + "learning_rate": 4.6525337928693375e-08, + "logits/chosen": -1.763061285018921, + "logits/rejected": -1.7741557359695435, + "logps/chosen": -159.5986328125, + "logps/rejected": -259.1194763183594, + "loss": 0.3204, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4070953130722046, + "rewards/margins": 0.5634521245956421, + "rewards/rejected": 0.8436431884765625, + "step": 9219 + }, + { + "epoch": 0.54, + "learning_rate": 4.651593674973852e-08, + "logits/chosen": -1.848314881324768, + "logits/rejected": -1.8255654573440552, + "logps/chosen": -224.43728637695312, + "logps/rejected": -352.9576110839844, + "loss": 0.0489, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9345200061798096, + "rewards/margins": 3.0012712478637695, + "rewards/rejected": -1.0667511224746704, + "step": 9220 + }, + { + "epoch": 0.54, + "learning_rate": 4.650653569455462e-08, + "logits/chosen": -1.9590774774551392, + "logits/rejected": -1.9756088256835938, + "logps/chosen": -185.20346069335938, + "logps/rejected": -322.32061767578125, + "loss": 0.06, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6582657098770142, + "rewards/margins": 2.5208449363708496, + "rewards/rejected": -0.862579345703125, + "step": 9221 + }, + { + "epoch": 0.54, + "learning_rate": 4.649713476347567e-08, + "logits/chosen": -1.930679440498352, + "logits/rejected": -1.9260300397872925, + "logps/chosen": -197.1846160888672, + "logps/rejected": -293.52386474609375, + "loss": 0.3408, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.727015733718872, + "rewards/margins": 0.40373075008392334, + "rewards/rejected": 1.3232849836349487, + "step": 9222 + }, + { + "epoch": 0.54, + "learning_rate": 4.648773395683561e-08, + "logits/chosen": -2.1441140174865723, + "logits/rejected": -2.141688585281372, + "logps/chosen": -34.57978057861328, + "logps/rejected": -164.7480010986328, + "loss": 0.5793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7950552105903625, + "rewards/margins": 3.518918752670288, + "rewards/rejected": -4.313973903656006, + "step": 9223 + }, + { + "epoch": 0.54, + "learning_rate": 4.647833327496844e-08, + "logits/chosen": -1.9437576532363892, + "logits/rejected": -1.961167573928833, + "logps/chosen": -245.30096435546875, + "logps/rejected": -404.1927795410156, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.585675001144409, + "rewards/margins": 3.2369933128356934, + "rewards/rejected": 0.34868165850639343, + "step": 9224 + }, + { + "epoch": 0.54, + "learning_rate": 4.6468932718208086e-08, + "logits/chosen": -1.8668103218078613, + "logits/rejected": -1.8670458793640137, + "logps/chosen": -0.005245183128863573, + "logps/rejected": -304.81500244140625, + "loss": 0.3243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040729466127231717, + "rewards/margins": 7.274243354797363, + "rewards/rejected": -7.274650573730469, + "step": 9225 + }, + { + "epoch": 0.54, + "learning_rate": 4.645953228688853e-08, + "logits/chosen": -1.9434598684310913, + "logits/rejected": -1.9068430662155151, + "logps/chosen": -183.9345703125, + "logps/rejected": -563.7039794921875, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.506750464439392, + "rewards/margins": 4.209240913391113, + "rewards/rejected": -2.7024903297424316, + "step": 9226 + }, + { + "epoch": 0.54, + "learning_rate": 4.645013198134368e-08, + "logits/chosen": -1.8527165651321411, + "logits/rejected": -1.873643398284912, + "logps/chosen": -210.09429931640625, + "logps/rejected": -335.5669250488281, + "loss": 0.0896, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1067445278167725, + "rewards/margins": 2.0106873512268066, + "rewards/rejected": 0.09605713188648224, + "step": 9227 + }, + { + "epoch": 0.54, + "learning_rate": 4.644073180190753e-08, + "logits/chosen": -2.136647939682007, + "logits/rejected": -2.166543483734131, + "logps/chosen": -207.64791870117188, + "logps/rejected": -207.3231964111328, + "loss": 0.3059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6866883039474487, + "rewards/margins": 0.3650696277618408, + "rewards/rejected": 1.321618676185608, + "step": 9228 + }, + { + "epoch": 0.54, + "learning_rate": 4.643133174891397e-08, + "logits/chosen": -1.951489806175232, + "logits/rejected": -1.9185986518859863, + "logps/chosen": -234.7943572998047, + "logps/rejected": -473.9539794921875, + "loss": 0.136, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5036468505859375, + "rewards/margins": 1.5612945556640625, + "rewards/rejected": -0.057647705078125, + "step": 9229 + }, + { + "epoch": 0.54, + "learning_rate": 4.6421931822696994e-08, + "logits/chosen": -1.928097128868103, + "logits/rejected": -1.921690583229065, + "logps/chosen": -0.32756662368774414, + "logps/rejected": -174.82379150390625, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011155217653140426, + "rewards/margins": 3.626368522644043, + "rewards/rejected": -3.627484083175659, + "step": 9230 + }, + { + "epoch": 0.54, + "learning_rate": 4.641253202359048e-08, + "logits/chosen": -1.7976094484329224, + "logits/rejected": -1.7988656759262085, + "logps/chosen": -253.45538330078125, + "logps/rejected": -270.71295166015625, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39410400390625, + "rewards/margins": 0.2771667540073395, + "rewards/rejected": 0.11693725734949112, + "step": 9231 + }, + { + "epoch": 0.54, + "learning_rate": 4.6403132351928397e-08, + "logits/chosen": -1.9890251159667969, + "logits/rejected": -1.9816025495529175, + "logps/chosen": -14.947124481201172, + "logps/rejected": -165.26788330078125, + "loss": 0.3767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003983307164162397, + "rewards/margins": 2.360830068588257, + "rewards/rejected": -2.3648133277893066, + "step": 9232 + }, + { + "epoch": 0.54, + "learning_rate": 4.6393732808044615e-08, + "logits/chosen": -1.8908342123031616, + "logits/rejected": -1.8913980722427368, + "logps/chosen": -29.454059600830078, + "logps/rejected": -347.6888122558594, + "loss": 0.1509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43998757004737854, + "rewards/margins": 4.346890449523926, + "rewards/rejected": -3.906903028488159, + "step": 9233 + }, + { + "epoch": 0.54, + "learning_rate": 4.6384333392273125e-08, + "logits/chosen": -1.9758894443511963, + "logits/rejected": -1.95921790599823, + "logps/chosen": -16.785552978515625, + "logps/rejected": -381.93060302734375, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19150219857692719, + "rewards/margins": 8.446117401123047, + "rewards/rejected": -8.25461483001709, + "step": 9234 + }, + { + "epoch": 0.54, + "learning_rate": 4.637493410494776e-08, + "logits/chosen": -1.935559868812561, + "logits/rejected": -1.9318374395370483, + "logps/chosen": -25.12872886657715, + "logps/rejected": -175.23257446289062, + "loss": 0.309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22530345618724823, + "rewards/margins": 3.53438401222229, + "rewards/rejected": -3.3090806007385254, + "step": 9235 + }, + { + "epoch": 0.54, + "learning_rate": 4.636553494640249e-08, + "logits/chosen": -1.9232126474380493, + "logits/rejected": -1.9118447303771973, + "logps/chosen": -87.30752563476562, + "logps/rejected": -209.5972900390625, + "loss": 0.1805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3722747564315796, + "rewards/margins": 1.8621947765350342, + "rewards/rejected": -0.489920049905777, + "step": 9236 + }, + { + "epoch": 0.54, + "learning_rate": 4.635613591697119e-08, + "logits/chosen": -2.1262338161468506, + "logits/rejected": -2.122008800506592, + "logps/chosen": -42.79750061035156, + "logps/rejected": -264.49871826171875, + "loss": 0.321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10409927368164062, + "rewards/margins": 3.9594597816467285, + "rewards/rejected": -3.855360507965088, + "step": 9237 + }, + { + "epoch": 0.54, + "learning_rate": 4.6346737016987765e-08, + "logits/chosen": -1.9125404357910156, + "logits/rejected": -1.9168390035629272, + "logps/chosen": -81.73551177978516, + "logps/rejected": -264.4908752441406, + "loss": 0.2102, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7217849493026733, + "rewards/margins": 1.0660598278045654, + "rewards/rejected": 0.6557251214981079, + "step": 9238 + }, + { + "epoch": 0.54, + "learning_rate": 4.6337338246786115e-08, + "logits/chosen": -1.926496148109436, + "logits/rejected": -2.000483274459839, + "logps/chosen": -288.4775085449219, + "logps/rejected": -440.10992431640625, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6280701160430908, + "rewards/margins": 4.018231391906738, + "rewards/rejected": -2.3901612758636475, + "step": 9239 + }, + { + "epoch": 0.54, + "learning_rate": 4.632793960670012e-08, + "logits/chosen": -1.702501654624939, + "logits/rejected": -1.698337197303772, + "logps/chosen": -163.3158416748047, + "logps/rejected": -297.58843994140625, + "loss": 0.3939, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.157893419265747, + "rewards/margins": 0.02773892879486084, + "rewards/rejected": 1.1301544904708862, + "step": 9240 + }, + { + "epoch": 0.54, + "learning_rate": 4.631854109706369e-08, + "logits/chosen": -1.9467942714691162, + "logits/rejected": -1.9456971883773804, + "logps/chosen": -23.32374382019043, + "logps/rejected": -127.98798370361328, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6515203714370728, + "rewards/margins": 1.2336516380310059, + "rewards/rejected": -0.5821312069892883, + "step": 9241 + }, + { + "epoch": 0.54, + "learning_rate": 4.630914271821067e-08, + "logits/chosen": -1.9624888896942139, + "logits/rejected": -1.9437141418457031, + "logps/chosen": -228.35501098632812, + "logps/rejected": -531.6998901367188, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7894959449768066, + "rewards/margins": 9.056488037109375, + "rewards/rejected": -6.266992092132568, + "step": 9242 + }, + { + "epoch": 0.54, + "learning_rate": 4.629974447047496e-08, + "logits/chosen": -1.9126029014587402, + "logits/rejected": -1.894242763519287, + "logps/chosen": -298.7422180175781, + "logps/rejected": -428.66998291015625, + "loss": 0.1001, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.793853759765625, + "rewards/margins": 1.5986510515213013, + "rewards/rejected": 1.1952027082443237, + "step": 9243 + }, + { + "epoch": 0.54, + "learning_rate": 4.629034635419043e-08, + "logits/chosen": -2.052607536315918, + "logits/rejected": -2.0499777793884277, + "logps/chosen": -57.080299377441406, + "logps/rejected": -191.45697021484375, + "loss": 0.2226, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2271416187286377, + "rewards/margins": 1.3120307922363281, + "rewards/rejected": -0.08488922566175461, + "step": 9244 + }, + { + "epoch": 0.54, + "learning_rate": 4.6280948369690944e-08, + "logits/chosen": -1.899882435798645, + "logits/rejected": -1.9020553827285767, + "logps/chosen": -0.001180559629574418, + "logps/rejected": -134.34573364257812, + "loss": 0.5903, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3150247216108255e-05, + "rewards/margins": 0.3648419678211212, + "rewards/rejected": -0.36486512422561646, + "step": 9245 + }, + { + "epoch": 0.54, + "learning_rate": 4.627155051731035e-08, + "logits/chosen": -2.0364489555358887, + "logits/rejected": -2.025097608566284, + "logps/chosen": -0.9413030743598938, + "logps/rejected": -266.01361083984375, + "loss": 0.2799, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009579867124557495, + "rewards/margins": 3.9304113388061523, + "rewards/rejected": -3.9208314418792725, + "step": 9246 + }, + { + "epoch": 0.54, + "learning_rate": 4.626215279738254e-08, + "logits/chosen": -2.218048334121704, + "logits/rejected": -2.2020151615142822, + "logps/chosen": -0.0005749563570134342, + "logps/rejected": -345.55633544921875, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.755759957537521e-06, + "rewards/margins": 5.5509467124938965, + "rewards/rejected": -5.550952434539795, + "step": 9247 + }, + { + "epoch": 0.54, + "learning_rate": 4.625275521024132e-08, + "logits/chosen": -1.856562614440918, + "logits/rejected": -1.8491147756576538, + "logps/chosen": -174.80841064453125, + "logps/rejected": -386.4587097167969, + "loss": 0.0872, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8267501592636108, + "rewards/margins": 2.2448959350585938, + "rewards/rejected": -0.4181457459926605, + "step": 9248 + }, + { + "epoch": 0.54, + "learning_rate": 4.6243357756220597e-08, + "logits/chosen": -2.0695924758911133, + "logits/rejected": -2.0720713138580322, + "logps/chosen": -13.862299919128418, + "logps/rejected": -153.44406127929688, + "loss": 0.3299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29548197984695435, + "rewards/margins": 1.8917741775512695, + "rewards/rejected": -1.5962921380996704, + "step": 9249 + }, + { + "epoch": 0.54, + "learning_rate": 4.6233960435654154e-08, + "logits/chosen": -1.808828592300415, + "logits/rejected": -1.8197110891342163, + "logps/chosen": -141.1735076904297, + "logps/rejected": -441.506103515625, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1661529541015625, + "rewards/margins": 4.169100761413574, + "rewards/rejected": -2.002948045730591, + "step": 9250 + }, + { + "epoch": 0.54, + "learning_rate": 4.6224563248875884e-08, + "logits/chosen": -2.106900930404663, + "logits/rejected": -2.1066575050354004, + "logps/chosen": -0.012751943431794643, + "logps/rejected": -276.46722412109375, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00039077913970686495, + "rewards/margins": 4.719851493835449, + "rewards/rejected": -4.720242500305176, + "step": 9251 + }, + { + "epoch": 0.54, + "learning_rate": 4.621516619621957e-08, + "logits/chosen": -1.8542208671569824, + "logits/rejected": -1.8812745809555054, + "logps/chosen": -178.39422607421875, + "logps/rejected": -346.4162902832031, + "loss": 0.1008, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3621567487716675, + "rewards/margins": 2.4157791137695312, + "rewards/rejected": -1.0536224842071533, + "step": 9252 + }, + { + "epoch": 0.54, + "learning_rate": 4.620576927801909e-08, + "logits/chosen": -1.8190754652023315, + "logits/rejected": -1.819443702697754, + "logps/chosen": -175.35250854492188, + "logps/rejected": -233.90785217285156, + "loss": 0.1733, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6306138038635254, + "rewards/margins": 0.9297715425491333, + "rewards/rejected": 1.700842261314392, + "step": 9253 + }, + { + "epoch": 0.54, + "learning_rate": 4.619637249460822e-08, + "logits/chosen": -1.8786826133728027, + "logits/rejected": -1.8883659839630127, + "logps/chosen": -0.0004100044025108218, + "logps/rejected": -182.42483520507812, + "loss": 0.3857, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.643395600898657e-05, + "rewards/margins": 2.482992172241211, + "rewards/rejected": -2.483018636703491, + "step": 9254 + }, + { + "epoch": 0.54, + "learning_rate": 4.618697584632083e-08, + "logits/chosen": -1.8151382207870483, + "logits/rejected": -1.7942860126495361, + "logps/chosen": -199.84030151367188, + "logps/rejected": -456.880126953125, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7805298566818237, + "rewards/margins": 3.7595551013946533, + "rewards/rejected": -1.9790252447128296, + "step": 9255 + }, + { + "epoch": 0.54, + "learning_rate": 4.617757933349068e-08, + "logits/chosen": -2.0768823623657227, + "logits/rejected": -2.082378387451172, + "logps/chosen": -208.5541229248047, + "logps/rejected": -442.0454406738281, + "loss": 0.0654, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5721482038497925, + "rewards/margins": 2.5563340187072754, + "rewards/rejected": -0.9841858148574829, + "step": 9256 + }, + { + "epoch": 0.54, + "learning_rate": 4.616818295645162e-08, + "logits/chosen": -1.9778289794921875, + "logits/rejected": -1.9566361904144287, + "logps/chosen": -172.0451202392578, + "logps/rejected": -310.6070861816406, + "loss": 0.1093, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6733856201171875, + "rewards/margins": 2.4983458518981934, + "rewards/rejected": -0.8249603509902954, + "step": 9257 + }, + { + "epoch": 0.54, + "learning_rate": 4.6158786715537456e-08, + "logits/chosen": -1.9701238870620728, + "logits/rejected": -1.9688019752502441, + "logps/chosen": -26.033885955810547, + "logps/rejected": -297.97052001953125, + "loss": 0.2254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2841871380805969, + "rewards/margins": 4.108582973480225, + "rewards/rejected": -3.8243958950042725, + "step": 9258 + }, + { + "epoch": 0.54, + "learning_rate": 4.6149390611081964e-08, + "logits/chosen": -1.5899062156677246, + "logits/rejected": -1.5882760286331177, + "logps/chosen": -89.18406677246094, + "logps/rejected": -213.38375854492188, + "loss": 0.5415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04291076585650444, + "rewards/margins": 0.3668609857559204, + "rewards/rejected": -0.40977174043655396, + "step": 9259 + }, + { + "epoch": 0.54, + "learning_rate": 4.613999464341897e-08, + "logits/chosen": -2.0586156845092773, + "logits/rejected": -2.0486738681793213, + "logps/chosen": -6.626373291015625, + "logps/rejected": -121.09567260742188, + "loss": 0.5341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1873864233493805, + "rewards/margins": 0.5235055088996887, + "rewards/rejected": -0.3361190855503082, + "step": 9260 + }, + { + "epoch": 0.54, + "learning_rate": 4.6130598812882235e-08, + "logits/chosen": -1.9718515872955322, + "logits/rejected": -1.9715663194656372, + "logps/chosen": -157.02984619140625, + "logps/rejected": -227.37216186523438, + "loss": 0.2321, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2083908319473267, + "rewards/margins": 0.7944977283477783, + "rewards/rejected": 0.4138931334018707, + "step": 9261 + }, + { + "epoch": 0.54, + "learning_rate": 4.6121203119805566e-08, + "logits/chosen": -2.022571325302124, + "logits/rejected": -2.016237258911133, + "logps/chosen": -0.00393013097345829, + "logps/rejected": -125.19369506835938, + "loss": 0.4133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010110675793839619, + "rewards/margins": 1.686819076538086, + "rewards/rejected": -1.686920166015625, + "step": 9262 + }, + { + "epoch": 0.54, + "learning_rate": 4.611180756452273e-08, + "logits/chosen": -1.7923839092254639, + "logits/rejected": -1.7822569608688354, + "logps/chosen": -0.22920969128608704, + "logps/rejected": -230.658203125, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014484049752354622, + "rewards/margins": 3.1027612686157227, + "rewards/rejected": -3.1172454357147217, + "step": 9263 + }, + { + "epoch": 0.54, + "learning_rate": 4.610241214736751e-08, + "logits/chosen": -2.0224432945251465, + "logits/rejected": -1.990638256072998, + "logps/chosen": -241.69970703125, + "logps/rejected": -544.8749389648438, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.451037645339966, + "rewards/margins": 4.372937202453613, + "rewards/rejected": -1.921899437904358, + "step": 9264 + }, + { + "epoch": 0.54, + "learning_rate": 4.609301686867367e-08, + "logits/chosen": -2.0659265518188477, + "logits/rejected": -2.0614709854125977, + "logps/chosen": -28.62750816345215, + "logps/rejected": -153.59750366210938, + "loss": 0.3404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9076005816459656, + "rewards/margins": 0.5767046213150024, + "rewards/rejected": 0.3308959901332855, + "step": 9265 + }, + { + "epoch": 0.54, + "learning_rate": 4.608362172877499e-08, + "logits/chosen": -1.938880443572998, + "logits/rejected": -1.9463999271392822, + "logps/chosen": -98.18091583251953, + "logps/rejected": -191.8649444580078, + "loss": 0.4405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37966156005859375, + "rewards/margins": 0.4637390077114105, + "rewards/rejected": -0.08407745510339737, + "step": 9266 + }, + { + "epoch": 0.54, + "learning_rate": 4.6074226728005216e-08, + "logits/chosen": -1.9088752269744873, + "logits/rejected": -1.8983932733535767, + "logps/chosen": -222.12060546875, + "logps/rejected": -228.48297119140625, + "loss": 0.0872, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.232452392578125, + "rewards/margins": 1.925994873046875, + "rewards/rejected": 1.30645751953125, + "step": 9267 + }, + { + "epoch": 0.54, + "learning_rate": 4.6064831866698126e-08, + "logits/chosen": -2.139214277267456, + "logits/rejected": -2.131160020828247, + "logps/chosen": -9.491130828857422, + "logps/rejected": -123.71678924560547, + "loss": 0.419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013908482156693935, + "rewards/margins": 1.3911665678024292, + "rewards/rejected": -1.4050750732421875, + "step": 9268 + }, + { + "epoch": 0.54, + "learning_rate": 4.6055437145187424e-08, + "logits/chosen": -1.7756494283676147, + "logits/rejected": -1.7853854894638062, + "logps/chosen": -149.3041229248047, + "logps/rejected": -178.68258666992188, + "loss": 0.1489, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4904892444610596, + "rewards/margins": 1.400044322013855, + "rewards/rejected": 0.09044494479894638, + "step": 9269 + }, + { + "epoch": 0.54, + "learning_rate": 4.604604256380693e-08, + "logits/chosen": -1.9171618223190308, + "logits/rejected": -1.921417474746704, + "logps/chosen": -52.57942199707031, + "logps/rejected": -231.7537384033203, + "loss": 0.3241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31754493713378906, + "rewards/margins": 1.7514301538467407, + "rewards/rejected": -1.4338852167129517, + "step": 9270 + }, + { + "epoch": 0.54, + "learning_rate": 4.603664812289032e-08, + "logits/chosen": -2.0775246620178223, + "logits/rejected": -2.0778326988220215, + "logps/chosen": -45.609230041503906, + "logps/rejected": -132.15097045898438, + "loss": 0.745, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00024108887009788305, + "rewards/margins": -0.2922714054584503, + "rewards/rejected": 0.2925125062465668, + "step": 9271 + }, + { + "epoch": 0.54, + "learning_rate": 4.602725382277137e-08, + "logits/chosen": -1.7171778678894043, + "logits/rejected": -1.7231031656265259, + "logps/chosen": -6.378639221191406, + "logps/rejected": -103.67958068847656, + "loss": 0.5099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008482694625854492, + "rewards/margins": 0.8682224154472351, + "rewards/rejected": -0.8597397208213806, + "step": 9272 + }, + { + "epoch": 0.54, + "learning_rate": 4.6017859663783775e-08, + "logits/chosen": -1.8487063646316528, + "logits/rejected": -1.7371951341629028, + "logps/chosen": -254.1168212890625, + "logps/rejected": -464.11822509765625, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2093505859375, + "rewards/margins": 3.279064893722534, + "rewards/rejected": -0.06971435993909836, + "step": 9273 + }, + { + "epoch": 0.54, + "learning_rate": 4.600846564626132e-08, + "logits/chosen": -1.8641725778579712, + "logits/rejected": -1.8571670055389404, + "logps/chosen": -47.13462448120117, + "logps/rejected": -176.4725341796875, + "loss": 0.2644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3297298550605774, + "rewards/margins": 1.704122543334961, + "rewards/rejected": -1.3743927478790283, + "step": 9274 + }, + { + "epoch": 0.54, + "learning_rate": 4.599907177053765e-08, + "logits/chosen": -1.889198899269104, + "logits/rejected": -1.8885079622268677, + "logps/chosen": -43.271949768066406, + "logps/rejected": -152.21334838867188, + "loss": 0.4535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03117675893008709, + "rewards/margins": 1.3622193336486816, + "rewards/rejected": -1.3310425281524658, + "step": 9275 + }, + { + "epoch": 0.54, + "learning_rate": 4.598967803694656e-08, + "logits/chosen": -1.9689651727676392, + "logits/rejected": -2.000563383102417, + "logps/chosen": -161.349365234375, + "logps/rejected": -344.459228515625, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.211669921875, + "rewards/margins": 3.8401002883911133, + "rewards/rejected": -1.6284302473068237, + "step": 9276 + }, + { + "epoch": 0.54, + "learning_rate": 4.598028444582168e-08, + "logits/chosen": -1.8867273330688477, + "logits/rejected": -1.8817293643951416, + "logps/chosen": -58.10924530029297, + "logps/rejected": -236.7935791015625, + "loss": 0.2243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7936515808105469, + "rewards/margins": 2.555413246154785, + "rewards/rejected": -1.7617615461349487, + "step": 9277 + }, + { + "epoch": 0.54, + "learning_rate": 4.5970890997496776e-08, + "logits/chosen": -1.8451637029647827, + "logits/rejected": -1.8333839178085327, + "logps/chosen": -137.5827178955078, + "logps/rejected": -198.0580291748047, + "loss": 0.3513, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.249424695968628, + "rewards/margins": 0.10274338722229004, + "rewards/rejected": 2.146681308746338, + "step": 9278 + }, + { + "epoch": 0.54, + "learning_rate": 4.5961497692305545e-08, + "logits/chosen": -1.8786122798919678, + "logits/rejected": -1.8613008260726929, + "logps/chosen": -170.85305786132812, + "logps/rejected": -443.2167053222656, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3592255115509033, + "rewards/margins": 4.306634426116943, + "rewards/rejected": -1.9474090337753296, + "step": 9279 + }, + { + "epoch": 0.54, + "learning_rate": 4.595210453058166e-08, + "logits/chosen": -1.6935664415359497, + "logits/rejected": -1.6594862937927246, + "logps/chosen": -275.1263732910156, + "logps/rejected": -420.6066589355469, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2354583740234375, + "rewards/margins": 3.7163758277893066, + "rewards/rejected": -0.480917364358902, + "step": 9280 + }, + { + "epoch": 0.54, + "learning_rate": 4.594271151265883e-08, + "logits/chosen": -1.6569786071777344, + "logits/rejected": -1.6523420810699463, + "logps/chosen": -105.00593566894531, + "logps/rejected": -358.81829833984375, + "loss": 0.193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5784576535224915, + "rewards/margins": 2.52573561668396, + "rewards/rejected": -1.9472779035568237, + "step": 9281 + }, + { + "epoch": 0.54, + "learning_rate": 4.593331863887072e-08, + "logits/chosen": -1.994325876235962, + "logits/rejected": -1.989245057106018, + "logps/chosen": -44.03662872314453, + "logps/rejected": -157.83621215820312, + "loss": 0.4613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33807680010795593, + "rewards/margins": 0.3458191156387329, + "rewards/rejected": -0.0077423094771802425, + "step": 9282 + }, + { + "epoch": 0.54, + "learning_rate": 4.592392590955104e-08, + "logits/chosen": -1.9229542016983032, + "logits/rejected": -1.9155081510543823, + "logps/chosen": -97.13946533203125, + "logps/rejected": -385.6145935058594, + "loss": 0.2315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2690177857875824, + "rewards/margins": 8.44469928741455, + "rewards/rejected": -8.175681114196777, + "step": 9283 + }, + { + "epoch": 0.54, + "learning_rate": 4.591453332503344e-08, + "logits/chosen": -2.0852181911468506, + "logits/rejected": -2.0796711444854736, + "logps/chosen": -8.128406524658203, + "logps/rejected": -150.31808471679688, + "loss": 0.4966, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06978435814380646, + "rewards/margins": 0.9492796063423157, + "rewards/rejected": -0.8794952630996704, + "step": 9284 + }, + { + "epoch": 0.54, + "learning_rate": 4.5905140885651606e-08, + "logits/chosen": -1.723636269569397, + "logits/rejected": -1.7092050313949585, + "logps/chosen": -274.6783447265625, + "logps/rejected": -390.9605712890625, + "loss": 0.5088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24044494330883026, + "rewards/margins": 0.5667327642440796, + "rewards/rejected": -0.3262878358364105, + "step": 9285 + }, + { + "epoch": 0.54, + "learning_rate": 4.589574859173919e-08, + "logits/chosen": -1.9572234153747559, + "logits/rejected": -1.949008822441101, + "logps/chosen": -62.87549591064453, + "logps/rejected": -343.416259765625, + "loss": 0.2659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6988670229911804, + "rewards/margins": 1.3198661804199219, + "rewards/rejected": -0.6209991574287415, + "step": 9286 + }, + { + "epoch": 0.54, + "learning_rate": 4.588635644362988e-08, + "logits/chosen": -2.140397310256958, + "logits/rejected": -2.1392688751220703, + "logps/chosen": -71.00897979736328, + "logps/rejected": -280.3882751464844, + "loss": 0.1893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.68685382604599, + "rewards/margins": 3.1976494789123535, + "rewards/rejected": -2.5107955932617188, + "step": 9287 + }, + { + "epoch": 0.54, + "learning_rate": 4.587696444165728e-08, + "logits/chosen": -1.938748836517334, + "logits/rejected": -1.936571478843689, + "logps/chosen": -14.30099105834961, + "logps/rejected": -142.35867309570312, + "loss": 0.5609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2020677626132965, + "rewards/margins": 0.3774217665195465, + "rewards/rejected": -0.17535400390625, + "step": 9288 + }, + { + "epoch": 0.54, + "learning_rate": 4.58675725861551e-08, + "logits/chosen": -1.914876937866211, + "logits/rejected": -1.9220913648605347, + "logps/chosen": -227.09591674804688, + "logps/rejected": -363.31280517578125, + "loss": 0.0514, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.506704807281494, + "rewards/margins": 2.669146776199341, + "rewards/rejected": -0.16244201362133026, + "step": 9289 + }, + { + "epoch": 0.54, + "learning_rate": 4.5858180877456925e-08, + "logits/chosen": -2.1125521659851074, + "logits/rejected": -2.1239092350006104, + "logps/chosen": -163.2682342529297, + "logps/rejected": -358.40924072265625, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.02695631980896, + "rewards/margins": 3.6787872314453125, + "rewards/rejected": -1.651831030845642, + "step": 9290 + }, + { + "epoch": 0.54, + "learning_rate": 4.584878931589646e-08, + "logits/chosen": -2.0212185382843018, + "logits/rejected": -2.0198566913604736, + "logps/chosen": -3.778036117553711, + "logps/rejected": -98.09249877929688, + "loss": 0.6278, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013062954531051219, + "rewards/margins": 0.2521093785762787, + "rewards/rejected": -0.25224000215530396, + "step": 9291 + }, + { + "epoch": 0.54, + "learning_rate": 4.583939790180727e-08, + "logits/chosen": -1.8068522214889526, + "logits/rejected": -1.7981715202331543, + "logps/chosen": -154.7933349609375, + "logps/rejected": -382.0972900390625, + "loss": 0.067, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7988083362579346, + "rewards/margins": 2.8482985496520996, + "rewards/rejected": -1.0494903326034546, + "step": 9292 + }, + { + "epoch": 0.54, + "learning_rate": 4.583000663552305e-08, + "logits/chosen": -1.8478161096572876, + "logits/rejected": -1.833893060684204, + "logps/chosen": -39.349647521972656, + "logps/rejected": -162.6631622314453, + "loss": 0.2112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4411598443984985, + "rewards/margins": 1.4413551092147827, + "rewards/rejected": -0.00019531250291038305, + "step": 9293 + }, + { + "epoch": 0.54, + "learning_rate": 4.582061551737736e-08, + "logits/chosen": -2.1142988204956055, + "logits/rejected": -2.1032731533050537, + "logps/chosen": -2.4804675579071045, + "logps/rejected": -119.86182403564453, + "loss": 0.3936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.036603283137083054, + "rewards/margins": 1.489504098892212, + "rewards/rejected": -1.452900767326355, + "step": 9294 + }, + { + "epoch": 0.54, + "learning_rate": 4.581122454770389e-08, + "logits/chosen": -1.8495523929595947, + "logits/rejected": -1.8429540395736694, + "logps/chosen": -189.4609375, + "logps/rejected": -256.411376953125, + "loss": 0.4423, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.84918212890625, + "rewards/margins": -0.01967775821685791, + "rewards/rejected": 0.8688598871231079, + "step": 9295 + }, + { + "epoch": 0.54, + "learning_rate": 4.580183372683618e-08, + "logits/chosen": -1.950848937034607, + "logits/rejected": -1.9020051956176758, + "logps/chosen": -173.42330932617188, + "logps/rejected": -260.4031066894531, + "loss": 0.1442, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.772760033607483, + "rewards/margins": 2.003875732421875, + "rewards/rejected": -0.23111572861671448, + "step": 9296 + }, + { + "epoch": 0.54, + "learning_rate": 4.57924430551079e-08, + "logits/chosen": -1.8320859670639038, + "logits/rejected": -1.8309203386306763, + "logps/chosen": -186.18594360351562, + "logps/rejected": -361.8804931640625, + "loss": 0.0802, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5423157215118408, + "rewards/margins": 2.44866943359375, + "rewards/rejected": -0.906353771686554, + "step": 9297 + }, + { + "epoch": 0.54, + "learning_rate": 4.578305253285259e-08, + "logits/chosen": -1.8686143159866333, + "logits/rejected": -1.8663259744644165, + "logps/chosen": -15.59639835357666, + "logps/rejected": -77.25334167480469, + "loss": 0.4538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5470755696296692, + "rewards/margins": 0.4993750751018524, + "rewards/rejected": 0.04770050197839737, + "step": 9298 + }, + { + "epoch": 0.54, + "learning_rate": 4.5773662160403895e-08, + "logits/chosen": -2.090510368347168, + "logits/rejected": -2.085287570953369, + "logps/chosen": -74.2679214477539, + "logps/rejected": -93.53953552246094, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.90853351354599, + "rewards/margins": 0.8249107599258423, + "rewards/rejected": 0.08362274616956711, + "step": 9299 + }, + { + "epoch": 0.54, + "learning_rate": 4.57642719380954e-08, + "logits/chosen": -1.7728281021118164, + "logits/rejected": -1.7109752893447876, + "logps/chosen": -225.1976318359375, + "logps/rejected": -629.3551025390625, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4077117443084717, + "rewards/margins": 4.2780609130859375, + "rewards/rejected": -1.8703491687774658, + "step": 9300 + }, + { + "epoch": 0.54, + "learning_rate": 4.5754881866260675e-08, + "logits/chosen": -2.0621488094329834, + "logits/rejected": -2.0555999279022217, + "logps/chosen": -3.288073778152466, + "logps/rejected": -231.50933837890625, + "loss": 0.359, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0476078987121582, + "rewards/margins": 5.368578910827637, + "rewards/rejected": -5.416186809539795, + "step": 9301 + }, + { + "epoch": 0.54, + "learning_rate": 4.574549194523332e-08, + "logits/chosen": -1.9040114879608154, + "logits/rejected": -1.9108614921569824, + "logps/chosen": -0.694035530090332, + "logps/rejected": -155.5527801513672, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07133438438177109, + "rewards/margins": 3.594787120819092, + "rewards/rejected": -3.5234527587890625, + "step": 9302 + }, + { + "epoch": 0.54, + "learning_rate": 4.57361021753469e-08, + "logits/chosen": -1.8205636739730835, + "logits/rejected": -1.818071722984314, + "logps/chosen": -25.581199645996094, + "logps/rejected": -194.6653289794922, + "loss": 0.2743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38752347230911255, + "rewards/margins": 1.6875007152557373, + "rewards/rejected": -1.29997718334198, + "step": 9303 + }, + { + "epoch": 0.54, + "learning_rate": 4.572671255693499e-08, + "logits/chosen": -2.1551156044006348, + "logits/rejected": -2.148698091506958, + "logps/chosen": -9.023939492180943e-05, + "logps/rejected": -74.6390151977539, + "loss": 0.6836, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5138386970647844e-06, + "rewards/margins": -0.014559948816895485, + "rewards/rejected": 0.014561462216079235, + "step": 9304 + }, + { + "epoch": 0.54, + "learning_rate": 4.571732309033115e-08, + "logits/chosen": -2.019052743911743, + "logits/rejected": -2.0176498889923096, + "logps/chosen": -31.817298889160156, + "logps/rejected": -117.79718017578125, + "loss": 0.3759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2163536548614502, + "rewards/margins": 0.3768661618232727, + "rewards/rejected": 0.8394874930381775, + "step": 9305 + }, + { + "epoch": 0.54, + "learning_rate": 4.570793377586895e-08, + "logits/chosen": -1.9940465688705444, + "logits/rejected": -1.9856761693954468, + "logps/chosen": -63.627323150634766, + "logps/rejected": -218.65231323242188, + "loss": 0.4448, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007596206851303577, + "rewards/margins": 1.3799769878387451, + "rewards/rejected": -1.3875732421875, + "step": 9306 + }, + { + "epoch": 0.54, + "learning_rate": 4.5698544613881925e-08, + "logits/chosen": -2.053846597671509, + "logits/rejected": -2.055722951889038, + "logps/chosen": -25.684724807739258, + "logps/rejected": -79.94507598876953, + "loss": 0.5255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10559406131505966, + "rewards/margins": 0.6890435814857483, + "rewards/rejected": -0.5834495425224304, + "step": 9307 + }, + { + "epoch": 0.54, + "learning_rate": 4.568915560470364e-08, + "logits/chosen": -1.9468669891357422, + "logits/rejected": -1.9675761461257935, + "logps/chosen": -182.19857788085938, + "logps/rejected": -402.6463623046875, + "loss": 0.0782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.999963402748108, + "rewards/margins": 2.102160692214966, + "rewards/rejected": -0.10219726711511612, + "step": 9308 + }, + { + "epoch": 0.54, + "learning_rate": 4.5679766748667634e-08, + "logits/chosen": -1.8431236743927002, + "logits/rejected": -1.8429131507873535, + "logps/chosen": -0.00031455716816708446, + "logps/rejected": -186.58692932128906, + "loss": 0.3336, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.314715624786913e-06, + "rewards/margins": 4.760246753692627, + "rewards/rejected": -4.760251045227051, + "step": 9309 + }, + { + "epoch": 0.54, + "learning_rate": 4.567037804610746e-08, + "logits/chosen": -1.8752381801605225, + "logits/rejected": -1.8791249990463257, + "logps/chosen": -82.98860168457031, + "logps/rejected": -250.39671325683594, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15192566812038422, + "rewards/margins": 3.6539857387542725, + "rewards/rejected": -3.8059113025665283, + "step": 9310 + }, + { + "epoch": 0.54, + "learning_rate": 4.5660989497356605e-08, + "logits/chosen": -1.9265265464782715, + "logits/rejected": -1.9281940460205078, + "logps/chosen": -49.440086364746094, + "logps/rejected": -213.5926513671875, + "loss": 0.4579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3996681272983551, + "rewards/margins": 0.4828544855117798, + "rewards/rejected": -0.08318634331226349, + "step": 9311 + }, + { + "epoch": 0.54, + "learning_rate": 4.565160110274866e-08, + "logits/chosen": -1.812456488609314, + "logits/rejected": -1.797589659690857, + "logps/chosen": -187.5906524658203, + "logps/rejected": -323.67095947265625, + "loss": 0.1742, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.076887607574463, + "rewards/margins": 1.269953966140747, + "rewards/rejected": 0.806933581829071, + "step": 9312 + }, + { + "epoch": 0.54, + "learning_rate": 4.5642212862617084e-08, + "logits/chosen": -1.985610008239746, + "logits/rejected": -1.971407413482666, + "logps/chosen": -14.46748161315918, + "logps/rejected": -341.68328857421875, + "loss": 0.2693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2630006968975067, + "rewards/margins": 4.147976875305176, + "rewards/rejected": -3.8849761486053467, + "step": 9313 + }, + { + "epoch": 0.54, + "learning_rate": 4.563282477729546e-08, + "logits/chosen": -1.756848931312561, + "logits/rejected": -1.7573997974395752, + "logps/chosen": -247.10476684570312, + "logps/rejected": -336.3388671875, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0758607387542725, + "rewards/margins": 2.803637742996216, + "rewards/rejected": 0.2722229063510895, + "step": 9314 + }, + { + "epoch": 0.54, + "learning_rate": 4.5623436847117225e-08, + "logits/chosen": -2.03611159324646, + "logits/rejected": -2.031644105911255, + "logps/chosen": -3.9934689993970096e-05, + "logps/rejected": -125.97215270996094, + "loss": 0.3514, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.867664066907309e-07, + "rewards/margins": 2.7573952674865723, + "rewards/rejected": -2.7573959827423096, + "step": 9315 + }, + { + "epoch": 0.54, + "learning_rate": 4.5614049072415957e-08, + "logits/chosen": -1.9565168619155884, + "logits/rejected": -1.9495375156402588, + "logps/chosen": -0.0016347593627870083, + "logps/rejected": -174.4652557373047, + "loss": 0.3969, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.060348171857186e-05, + "rewards/margins": 2.2054946422576904, + "rewards/rejected": -2.2054641246795654, + "step": 9316 + }, + { + "epoch": 0.54, + "learning_rate": 4.560466145352508e-08, + "logits/chosen": -1.9676144123077393, + "logits/rejected": -1.966264009475708, + "logps/chosen": -58.796810150146484, + "logps/rejected": -209.37423706054688, + "loss": 0.4484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14210473001003265, + "rewards/margins": 0.9589352011680603, + "rewards/rejected": -0.8168304562568665, + "step": 9317 + }, + { + "epoch": 0.54, + "learning_rate": 4.559527399077815e-08, + "logits/chosen": -2.0091073513031006, + "logits/rejected": -1.9771286249160767, + "logps/chosen": -163.49481201171875, + "logps/rejected": -360.00946044921875, + "loss": 0.2759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4942444562911987, + "rewards/margins": 0.5516235828399658, + "rewards/rejected": 0.9426208734512329, + "step": 9318 + }, + { + "epoch": 0.54, + "learning_rate": 4.558588668450863e-08, + "logits/chosen": -1.8094148635864258, + "logits/rejected": -1.7943620681762695, + "logps/chosen": -209.54815673828125, + "logps/rejected": -367.9499206542969, + "loss": 0.4796, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.4292114973068237, + "rewards/margins": -0.3740386962890625, + "rewards/rejected": 1.8032501935958862, + "step": 9319 + }, + { + "epoch": 0.54, + "learning_rate": 4.5576499535050014e-08, + "logits/chosen": -1.8551698923110962, + "logits/rejected": -1.9137574434280396, + "logps/chosen": -298.5982360839844, + "logps/rejected": -366.38336181640625, + "loss": 0.0604, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9979705810546875, + "rewards/margins": 2.1936097145080566, + "rewards/rejected": -0.19563904404640198, + "step": 9320 + }, + { + "epoch": 0.54, + "learning_rate": 4.556711254273577e-08, + "logits/chosen": -1.9745286703109741, + "logits/rejected": -1.976349115371704, + "logps/chosen": -173.58566284179688, + "logps/rejected": -237.34852600097656, + "loss": 0.3673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.990155041217804, + "rewards/margins": 0.616363525390625, + "rewards/rejected": 0.37379151582717896, + "step": 9321 + }, + { + "epoch": 0.54, + "learning_rate": 4.5557725707899367e-08, + "logits/chosen": -1.8728911876678467, + "logits/rejected": -1.8619521856307983, + "logps/chosen": -9.34813117980957, + "logps/rejected": -134.77484130859375, + "loss": 0.459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0428503043949604, + "rewards/margins": 1.5637372732162476, + "rewards/rejected": -1.520887017250061, + "step": 9322 + }, + { + "epoch": 0.54, + "learning_rate": 4.554833903087428e-08, + "logits/chosen": -2.0358657836914062, + "logits/rejected": -2.0246825218200684, + "logps/chosen": -215.9332275390625, + "logps/rejected": -374.4133605957031, + "loss": 0.1613, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9124329090118408, + "rewards/margins": 1.1884307861328125, + "rewards/rejected": 0.7240020632743835, + "step": 9323 + }, + { + "epoch": 0.54, + "learning_rate": 4.553895251199397e-08, + "logits/chosen": -2.1014840602874756, + "logits/rejected": -2.0967020988464355, + "logps/chosen": -12.772228240966797, + "logps/rejected": -100.25749969482422, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27162906527519226, + "rewards/margins": 2.709883689880371, + "rewards/rejected": -2.4382545948028564, + "step": 9324 + }, + { + "epoch": 0.54, + "learning_rate": 4.552956615159188e-08, + "logits/chosen": -2.080692768096924, + "logits/rejected": -2.0762851238250732, + "logps/chosen": -48.0657844543457, + "logps/rejected": -257.3987121582031, + "loss": 0.6221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6298977136611938, + "rewards/margins": 1.587623953819275, + "rewards/rejected": -2.2175216674804688, + "step": 9325 + }, + { + "epoch": 0.54, + "learning_rate": 4.552017995000147e-08, + "logits/chosen": -1.8146708011627197, + "logits/rejected": -1.7973415851593018, + "logps/chosen": -45.59568405151367, + "logps/rejected": -179.31834411621094, + "loss": 0.8398, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.27881813049316406, + "rewards/margins": -0.5165241360664368, + "rewards/rejected": 0.2377059906721115, + "step": 9326 + }, + { + "epoch": 0.54, + "learning_rate": 4.551079390755619e-08, + "logits/chosen": -2.1307666301727295, + "logits/rejected": -2.1400105953216553, + "logps/chosen": -64.64955139160156, + "logps/rejected": -314.42950439453125, + "loss": 0.2873, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27669450640678406, + "rewards/margins": 3.8647024631500244, + "rewards/rejected": -4.141396999359131, + "step": 9327 + }, + { + "epoch": 0.54, + "learning_rate": 4.5501408024589455e-08, + "logits/chosen": -1.830458402633667, + "logits/rejected": -1.8260622024536133, + "logps/chosen": -99.9558334350586, + "logps/rejected": -344.0792236328125, + "loss": 0.4519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44126206636428833, + "rewards/margins": 3.6486704349517822, + "rewards/rejected": -4.089932441711426, + "step": 9328 + }, + { + "epoch": 0.54, + "learning_rate": 4.549202230143472e-08, + "logits/chosen": -1.8820431232452393, + "logits/rejected": -1.8825591802597046, + "logps/chosen": -45.131370544433594, + "logps/rejected": -135.7733154296875, + "loss": 0.4469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28893738985061646, + "rewards/margins": 0.7626953125, + "rewards/rejected": -0.47375795245170593, + "step": 9329 + }, + { + "epoch": 0.54, + "learning_rate": 4.54826367384254e-08, + "logits/chosen": -1.9894821643829346, + "logits/rejected": -1.970260500907898, + "logps/chosen": -53.46009826660156, + "logps/rejected": -183.45834350585938, + "loss": 0.1823, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6216506958007812, + "rewards/margins": 1.3995559215545654, + "rewards/rejected": 0.22209472954273224, + "step": 9330 + }, + { + "epoch": 0.54, + "learning_rate": 4.547325133589493e-08, + "logits/chosen": -1.859185814857483, + "logits/rejected": -1.8771345615386963, + "logps/chosen": -172.30445861816406, + "logps/rejected": -360.8641357421875, + "loss": 0.0798, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1236374378204346, + "rewards/margins": 3.6716020107269287, + "rewards/rejected": -2.547964572906494, + "step": 9331 + }, + { + "epoch": 0.54, + "learning_rate": 4.546386609417669e-08, + "logits/chosen": -1.9208836555480957, + "logits/rejected": -1.9040967226028442, + "logps/chosen": -15.924749374389648, + "logps/rejected": -133.54794311523438, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04536733776330948, + "rewards/margins": 2.45418643951416, + "rewards/rejected": -2.499553680419922, + "step": 9332 + }, + { + "epoch": 0.54, + "learning_rate": 4.545448101360415e-08, + "logits/chosen": -1.9913763999938965, + "logits/rejected": -1.973441243171692, + "logps/chosen": -170.64366149902344, + "logps/rejected": -307.28021240234375, + "loss": 0.059, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4009475708007812, + "rewards/margins": 2.8200759887695312, + "rewards/rejected": -0.41912841796875, + "step": 9333 + }, + { + "epoch": 0.54, + "learning_rate": 4.544509609451065e-08, + "logits/chosen": -1.9198907613754272, + "logits/rejected": -1.9225407838821411, + "logps/chosen": -446.05657958984375, + "logps/rejected": -736.7684936523438, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.637097120285034, + "rewards/margins": 7.951776504516602, + "rewards/rejected": -5.314679145812988, + "step": 9334 + }, + { + "epoch": 0.54, + "learning_rate": 4.5435711337229645e-08, + "logits/chosen": -1.6533936262130737, + "logits/rejected": -1.657077670097351, + "logps/chosen": -148.38388061523438, + "logps/rejected": -293.09942626953125, + "loss": 0.1923, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0127580165863037, + "rewards/margins": 0.9537278413772583, + "rewards/rejected": 1.0590301752090454, + "step": 9335 + }, + { + "epoch": 0.54, + "learning_rate": 4.542632674209446e-08, + "logits/chosen": -1.8814637660980225, + "logits/rejected": -1.9194976091384888, + "logps/chosen": -240.8973388671875, + "logps/rejected": -453.9418640136719, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0973756313323975, + "rewards/margins": 3.653411865234375, + "rewards/rejected": -1.556036353111267, + "step": 9336 + }, + { + "epoch": 0.54, + "learning_rate": 4.541694230943856e-08, + "logits/chosen": -1.9740532636642456, + "logits/rejected": -1.974866509437561, + "logps/chosen": -15.282635688781738, + "logps/rejected": -153.6303253173828, + "loss": 0.4265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.149485781788826, + "rewards/margins": 1.54035484790802, + "rewards/rejected": -1.6898406744003296, + "step": 9337 + }, + { + "epoch": 0.54, + "learning_rate": 4.540755803959526e-08, + "logits/chosen": -1.792418122291565, + "logits/rejected": -1.794022560119629, + "logps/chosen": -184.35781860351562, + "logps/rejected": -273.5231628417969, + "loss": 0.0461, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8699554204940796, + "rewards/margins": 2.739056348800659, + "rewards/rejected": -0.8691009879112244, + "step": 9338 + }, + { + "epoch": 0.54, + "learning_rate": 4.5398173932897974e-08, + "logits/chosen": -1.9864635467529297, + "logits/rejected": -1.9621055126190186, + "logps/chosen": -183.36956787109375, + "logps/rejected": -326.79998779296875, + "loss": 0.3397, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2699249982833862, + "rewards/margins": 0.6020905375480652, + "rewards/rejected": 0.667834460735321, + "step": 9339 + }, + { + "epoch": 0.54, + "learning_rate": 4.5388789989680064e-08, + "logits/chosen": -1.4473928213119507, + "logits/rejected": -1.4379304647445679, + "logps/chosen": -193.13809204101562, + "logps/rejected": -368.189697265625, + "loss": 0.0734, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2541656494140625, + "rewards/margins": 2.6906464099884033, + "rewards/rejected": -0.43648073077201843, + "step": 9340 + }, + { + "epoch": 0.54, + "learning_rate": 4.537940621027488e-08, + "logits/chosen": -1.850896954536438, + "logits/rejected": -1.8271318674087524, + "logps/chosen": -174.9042205810547, + "logps/rejected": -323.5953063964844, + "loss": 0.1963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.63922119140625, + "rewards/margins": 1.300537109375, + "rewards/rejected": 0.33868408203125, + "step": 9341 + }, + { + "epoch": 0.54, + "learning_rate": 4.53700225950158e-08, + "logits/chosen": -1.9772902727127075, + "logits/rejected": -1.9768866300582886, + "logps/chosen": -0.0024474095553159714, + "logps/rejected": -315.68450927734375, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.303182868170552e-05, + "rewards/margins": 5.36552095413208, + "rewards/rejected": -5.365563869476318, + "step": 9342 + }, + { + "epoch": 0.54, + "learning_rate": 4.536063914423616e-08, + "logits/chosen": -1.907024621963501, + "logits/rejected": -1.9321414232254028, + "logps/chosen": -270.95428466796875, + "logps/rejected": -321.6355895996094, + "loss": 0.0568, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4426910877227783, + "rewards/margins": 3.2462799549102783, + "rewards/rejected": -1.8035888671875, + "step": 9343 + }, + { + "epoch": 0.54, + "learning_rate": 4.5351255858269315e-08, + "logits/chosen": -2.046569347381592, + "logits/rejected": -2.0282716751098633, + "logps/chosen": -286.25433349609375, + "logps/rejected": -363.6666259765625, + "loss": 0.051, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3585267066955566, + "rewards/margins": 2.649737596511841, + "rewards/rejected": -0.29121094942092896, + "step": 9344 + }, + { + "epoch": 0.54, + "learning_rate": 4.534187273744859e-08, + "logits/chosen": -1.8999433517456055, + "logits/rejected": -1.8798339366912842, + "logps/chosen": -269.554931640625, + "logps/rejected": -376.0008850097656, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.26739501953125, + "rewards/margins": 2.858376979827881, + "rewards/rejected": -0.5909820795059204, + "step": 9345 + }, + { + "epoch": 0.54, + "learning_rate": 4.533248978210735e-08, + "logits/chosen": -1.8501075506210327, + "logits/rejected": -1.8665053844451904, + "logps/chosen": -149.89187622070312, + "logps/rejected": -246.35595703125, + "loss": 0.3676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5993896722793579, + "rewards/margins": 0.6749405264854431, + "rewards/rejected": -0.07555084675550461, + "step": 9346 + }, + { + "epoch": 0.54, + "learning_rate": 4.5323106992578886e-08, + "logits/chosen": -1.9824246168136597, + "logits/rejected": -1.979529857635498, + "logps/chosen": -12.235422134399414, + "logps/rejected": -188.005615234375, + "loss": 0.3017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003104686737060547, + "rewards/margins": 3.536447763442993, + "rewards/rejected": -3.5395524501800537, + "step": 9347 + }, + { + "epoch": 0.54, + "learning_rate": 4.531372436919655e-08, + "logits/chosen": -2.0778586864471436, + "logits/rejected": -2.0801258087158203, + "logps/chosen": -1.3461976051330566, + "logps/rejected": -95.11480712890625, + "loss": 0.3024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19069913029670715, + "rewards/margins": 3.3376619815826416, + "rewards/rejected": -3.146962881088257, + "step": 9348 + }, + { + "epoch": 0.54, + "learning_rate": 4.530434191229364e-08, + "logits/chosen": -1.9942893981933594, + "logits/rejected": -1.9806877374649048, + "logps/chosen": -111.45970153808594, + "logps/rejected": -206.888671875, + "loss": 0.1477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9857650995254517, + "rewards/margins": 2.5733978748321533, + "rewards/rejected": -1.5876327753067017, + "step": 9349 + }, + { + "epoch": 0.54, + "learning_rate": 4.5294959622203476e-08, + "logits/chosen": -1.558142900466919, + "logits/rejected": -1.5808779001235962, + "logps/chosen": -344.85595703125, + "logps/rejected": -495.7122497558594, + "loss": 0.1708, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0548095703125, + "rewards/margins": 0.9769377708435059, + "rewards/rejected": 2.077871799468994, + "step": 9350 + }, + { + "epoch": 0.54, + "learning_rate": 4.5285577499259344e-08, + "logits/chosen": -1.9870654344558716, + "logits/rejected": -1.9646486043930054, + "logps/chosen": -121.1863021850586, + "logps/rejected": -305.77764892578125, + "loss": 0.1238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9662452936172485, + "rewards/margins": 3.6643776893615723, + "rewards/rejected": -2.698132276535034, + "step": 9351 + }, + { + "epoch": 0.54, + "learning_rate": 4.52761955437946e-08, + "logits/chosen": -1.7426819801330566, + "logits/rejected": -1.7343482971191406, + "logps/chosen": -15.598939895629883, + "logps/rejected": -230.89837646484375, + "loss": 0.3341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09588565677404404, + "rewards/margins": 5.341250419616699, + "rewards/rejected": -5.245364665985107, + "step": 9352 + }, + { + "epoch": 0.54, + "learning_rate": 4.5266813756142455e-08, + "logits/chosen": -2.003946304321289, + "logits/rejected": -2.0042290687561035, + "logps/chosen": -37.416603088378906, + "logps/rejected": -192.72738647460938, + "loss": 0.3945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37087783217430115, + "rewards/margins": 1.1818764209747314, + "rewards/rejected": -0.8109985589981079, + "step": 9353 + }, + { + "epoch": 0.54, + "learning_rate": 4.525743213663628e-08, + "logits/chosen": -1.8913708925247192, + "logits/rejected": -1.8983280658721924, + "logps/chosen": -204.33712768554688, + "logps/rejected": -339.88739013671875, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9577301740646362, + "rewards/margins": 3.51792311668396, + "rewards/rejected": -1.5601929426193237, + "step": 9354 + }, + { + "epoch": 0.54, + "learning_rate": 4.524805068560927e-08, + "logits/chosen": -2.09464693069458, + "logits/rejected": -2.083578586578369, + "logps/chosen": -5.16018009185791, + "logps/rejected": -165.59698486328125, + "loss": 0.409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.040534354746341705, + "rewards/margins": 1.583595871925354, + "rewards/rejected": -1.6241302490234375, + "step": 9355 + }, + { + "epoch": 0.54, + "learning_rate": 4.5238669403394785e-08, + "logits/chosen": -1.9029415845870972, + "logits/rejected": -1.8982131481170654, + "logps/chosen": -92.9466781616211, + "logps/rejected": -361.8136901855469, + "loss": 0.3075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3029625117778778, + "rewards/margins": 2.342578887939453, + "rewards/rejected": -2.6455414295196533, + "step": 9356 + }, + { + "epoch": 0.54, + "learning_rate": 4.522928829032602e-08, + "logits/chosen": -1.8694204092025757, + "logits/rejected": -1.8812974691390991, + "logps/chosen": -165.12054443359375, + "logps/rejected": -186.06324768066406, + "loss": 0.2224, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.43511962890625, + "rewards/margins": 0.8147384524345398, + "rewards/rejected": 0.6203811764717102, + "step": 9357 + }, + { + "epoch": 0.54, + "learning_rate": 4.52199073467363e-08, + "logits/chosen": -1.875359296798706, + "logits/rejected": -1.8947933912277222, + "logps/chosen": -321.88360595703125, + "logps/rejected": -360.8905334472656, + "loss": 0.2906, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.35650634765625, + "rewards/margins": 0.8142181038856506, + "rewards/rejected": 0.5422882437705994, + "step": 9358 + }, + { + "epoch": 0.54, + "learning_rate": 4.521052657295882e-08, + "logits/chosen": -1.9056577682495117, + "logits/rejected": -1.8928840160369873, + "logps/chosen": -209.4110107421875, + "logps/rejected": -473.6506652832031, + "loss": 0.0656, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.157702684402466, + "rewards/margins": 2.5576446056365967, + "rewards/rejected": -0.399942010641098, + "step": 9359 + }, + { + "epoch": 0.54, + "learning_rate": 4.520114596932688e-08, + "logits/chosen": -1.953829050064087, + "logits/rejected": -1.9533872604370117, + "logps/chosen": -196.92002868652344, + "logps/rejected": -311.16253662109375, + "loss": 0.0827, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8915956020355225, + "rewards/margins": 2.15582275390625, + "rewards/rejected": 0.7357727289199829, + "step": 9360 + }, + { + "epoch": 0.54, + "learning_rate": 4.5191765536173715e-08, + "logits/chosen": -2.0596156120300293, + "logits/rejected": -2.0674381256103516, + "logps/chosen": -4.904844760894775, + "logps/rejected": -73.50186157226562, + "loss": 0.8231, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2501474618911743, + "rewards/margins": -0.20678655803203583, + "rewards/rejected": -0.04336090013384819, + "step": 9361 + }, + { + "epoch": 0.54, + "learning_rate": 4.518238527383255e-08, + "logits/chosen": -1.9269359111785889, + "logits/rejected": -1.9307239055633545, + "logps/chosen": -0.00012230622814968228, + "logps/rejected": -34.863502502441406, + "loss": 0.4771, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.410641849972308e-06, + "rewards/margins": 1.049678087234497, + "rewards/rejected": -1.0496824979782104, + "step": 9362 + }, + { + "epoch": 0.54, + "learning_rate": 4.517300518263664e-08, + "logits/chosen": -1.8771260976791382, + "logits/rejected": -1.858944058418274, + "logps/chosen": -257.5045166015625, + "logps/rejected": -440.12994384765625, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2588441371917725, + "rewards/margins": 3.6395294666290283, + "rewards/rejected": -0.380685418844223, + "step": 9363 + }, + { + "epoch": 0.54, + "learning_rate": 4.5163625262919175e-08, + "logits/chosen": -1.9184433221817017, + "logits/rejected": -1.8928756713867188, + "logps/chosen": -187.95571899414062, + "logps/rejected": -245.4084930419922, + "loss": 0.1078, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.229038953781128, + "rewards/margins": 1.7929916381835938, + "rewards/rejected": 0.43604737520217896, + "step": 9364 + }, + { + "epoch": 0.54, + "learning_rate": 4.515424551501342e-08, + "logits/chosen": -1.816499948501587, + "logits/rejected": -1.817346215248108, + "logps/chosen": -35.990726470947266, + "logps/rejected": -129.3553009033203, + "loss": 0.4565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31460267305374146, + "rewards/margins": 0.882300615310669, + "rewards/rejected": -0.5676979422569275, + "step": 9365 + }, + { + "epoch": 0.55, + "learning_rate": 4.5144865939252546e-08, + "logits/chosen": -1.9150396585464478, + "logits/rejected": -1.926582932472229, + "logps/chosen": -0.005429205484688282, + "logps/rejected": -312.7140197753906, + "loss": 0.329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003008018247783184, + "rewards/margins": 6.772459030151367, + "rewards/rejected": -6.772759914398193, + "step": 9366 + }, + { + "epoch": 0.55, + "learning_rate": 4.51354865359698e-08, + "logits/chosen": -1.5172491073608398, + "logits/rejected": -1.526060700416565, + "logps/chosen": -78.24959564208984, + "logps/rejected": -231.2471466064453, + "loss": 0.3154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5857208371162415, + "rewards/margins": 1.2633652687072754, + "rewards/rejected": -0.6776443719863892, + "step": 9367 + }, + { + "epoch": 0.55, + "learning_rate": 4.5126107305498355e-08, + "logits/chosen": -2.022879123687744, + "logits/rejected": -2.050173044204712, + "logps/chosen": -187.51809692382812, + "logps/rejected": -337.5316162109375, + "loss": 0.229, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.583471655845642, + "rewards/margins": 0.7773864269256592, + "rewards/rejected": 0.8060852289199829, + "step": 9368 + }, + { + "epoch": 0.55, + "learning_rate": 4.5116728248171434e-08, + "logits/chosen": -1.7767919301986694, + "logits/rejected": -1.775227427482605, + "logps/chosen": -5.007004261016846, + "logps/rejected": -197.28199768066406, + "loss": 0.4095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14770948886871338, + "rewards/margins": 4.955008506774902, + "rewards/rejected": -5.102717876434326, + "step": 9369 + }, + { + "epoch": 0.55, + "learning_rate": 4.51073493643222e-08, + "logits/chosen": -1.8975714445114136, + "logits/rejected": -1.8890081644058228, + "logps/chosen": -51.36671447753906, + "logps/rejected": -241.12661743164062, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09689483791589737, + "rewards/margins": 3.171858310699463, + "rewards/rejected": -3.2687530517578125, + "step": 9370 + }, + { + "epoch": 0.55, + "learning_rate": 4.509797065428386e-08, + "logits/chosen": -1.850841999053955, + "logits/rejected": -1.8505853414535522, + "logps/chosen": -0.8839772343635559, + "logps/rejected": -71.92386627197266, + "loss": 0.6397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010232865810394287, + "rewards/margins": 0.20113787055015564, + "rewards/rejected": -0.19090500473976135, + "step": 9371 + }, + { + "epoch": 0.55, + "learning_rate": 4.5088592118389553e-08, + "logits/chosen": -1.9042408466339111, + "logits/rejected": -1.9081411361694336, + "logps/chosen": -0.000888835871592164, + "logps/rejected": -102.28763580322266, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.037867099919822e-05, + "rewards/margins": 1.3723868131637573, + "rewards/rejected": -1.3723564147949219, + "step": 9372 + }, + { + "epoch": 0.55, + "learning_rate": 4.5079213756972514e-08, + "logits/chosen": -1.8346891403198242, + "logits/rejected": -1.8275163173675537, + "logps/chosen": -60.153018951416016, + "logps/rejected": -184.4127960205078, + "loss": 0.6774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8503711819648743, + "rewards/margins": 1.696362018585205, + "rewards/rejected": -2.5467331409454346, + "step": 9373 + }, + { + "epoch": 0.55, + "learning_rate": 4.5069835570365844e-08, + "logits/chosen": -2.0428969860076904, + "logits/rejected": -2.0404727458953857, + "logps/chosen": -0.0020441627129912376, + "logps/rejected": -196.9327392578125, + "loss": 0.4274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016976642655208707, + "rewards/margins": 1.7242625951766968, + "rewards/rejected": -1.724432349205017, + "step": 9374 + }, + { + "epoch": 0.55, + "learning_rate": 4.506045755890276e-08, + "logits/chosen": -2.146548271179199, + "logits/rejected": -2.143084764480591, + "logps/chosen": -56.68379211425781, + "logps/rejected": -221.96728515625, + "loss": 0.5062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13880959153175354, + "rewards/margins": 1.0940532684326172, + "rewards/rejected": -1.2328628301620483, + "step": 9375 + }, + { + "epoch": 0.55, + "learning_rate": 4.505107972291635e-08, + "logits/chosen": -1.9524645805358887, + "logits/rejected": -1.9660786390304565, + "logps/chosen": -83.18224334716797, + "logps/rejected": -353.2375793457031, + "loss": 0.074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4510048627853394, + "rewards/margins": 3.5311622619628906, + "rewards/rejected": -2.080157518386841, + "step": 9376 + }, + { + "epoch": 0.55, + "learning_rate": 4.5041702062739835e-08, + "logits/chosen": -2.092731237411499, + "logits/rejected": -2.078730821609497, + "logps/chosen": -0.21589502692222595, + "logps/rejected": -179.41326904296875, + "loss": 0.3074, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005890116095542908, + "rewards/margins": 6.239012241363525, + "rewards/rejected": -6.2331223487854, + "step": 9377 + }, + { + "epoch": 0.55, + "learning_rate": 4.5032324578706277e-08, + "logits/chosen": -1.9597654342651367, + "logits/rejected": -2.04738450050354, + "logps/chosen": -286.0303955078125, + "logps/rejected": -178.3633270263672, + "loss": 0.0936, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0758392810821533, + "rewards/margins": 1.6565262079238892, + "rewards/rejected": 1.4193130731582642, + "step": 9378 + }, + { + "epoch": 0.55, + "learning_rate": 4.5022947271148864e-08, + "logits/chosen": -1.771851897239685, + "logits/rejected": -1.73954176902771, + "logps/chosen": -195.89743041992188, + "logps/rejected": -294.6480407714844, + "loss": 0.1082, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9317184686660767, + "rewards/margins": 2.3750686645507812, + "rewards/rejected": -0.443350225687027, + "step": 9379 + }, + { + "epoch": 0.55, + "learning_rate": 4.501357014040072e-08, + "logits/chosen": -1.6296738386154175, + "logits/rejected": -1.6345044374465942, + "logps/chosen": -30.32438087463379, + "logps/rejected": -187.54647827148438, + "loss": 0.5032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.174061581492424, + "rewards/margins": 1.1280426979064941, + "rewards/rejected": -1.3021042346954346, + "step": 9380 + }, + { + "epoch": 0.55, + "learning_rate": 4.500419318679495e-08, + "logits/chosen": -1.886041283607483, + "logits/rejected": -1.8809868097305298, + "logps/chosen": -54.693641662597656, + "logps/rejected": -369.6363220214844, + "loss": 0.2067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5659553408622742, + "rewards/margins": 3.2078073024749756, + "rewards/rejected": -2.6418519020080566, + "step": 9381 + }, + { + "epoch": 0.55, + "learning_rate": 4.4994816410664675e-08, + "logits/chosen": -1.8130717277526855, + "logits/rejected": -1.809658408164978, + "logps/chosen": -11.28392505645752, + "logps/rejected": -210.4316864013672, + "loss": 0.377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06005125120282173, + "rewards/margins": 4.281972885131836, + "rewards/rejected": -4.342024326324463, + "step": 9382 + }, + { + "epoch": 0.55, + "learning_rate": 4.4985439812343004e-08, + "logits/chosen": -1.8280826807022095, + "logits/rejected": -1.8342506885528564, + "logps/chosen": -220.7532958984375, + "logps/rejected": -257.39495849609375, + "loss": 0.1349, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.128002882003784, + "rewards/margins": 1.7102293968200684, + "rewards/rejected": 0.41777345538139343, + "step": 9383 + }, + { + "epoch": 0.55, + "learning_rate": 4.497606339216304e-08, + "logits/chosen": -2.1501994132995605, + "logits/rejected": -2.1443235874176025, + "logps/chosen": -74.29656219482422, + "logps/rejected": -339.823486328125, + "loss": 0.1913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6721855401992798, + "rewards/margins": 4.679747581481934, + "rewards/rejected": -4.007562160491943, + "step": 9384 + }, + { + "epoch": 0.55, + "learning_rate": 4.496668715045788e-08, + "logits/chosen": -2.0457725524902344, + "logits/rejected": -2.0459542274475098, + "logps/chosen": -129.3233642578125, + "logps/rejected": -321.93804931640625, + "loss": 0.3255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.113555908203125, + "rewards/margins": 4.120068550109863, + "rewards/rejected": -4.233624458312988, + "step": 9385 + }, + { + "epoch": 0.55, + "learning_rate": 4.4957311087560615e-08, + "logits/chosen": -1.9667863845825195, + "logits/rejected": -1.9532414674758911, + "logps/chosen": -42.45412063598633, + "logps/rejected": -315.5455322265625, + "loss": 0.1491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1012299060821533, + "rewards/margins": 1.9911835193634033, + "rewards/rejected": -0.88995361328125, + "step": 9386 + }, + { + "epoch": 0.55, + "learning_rate": 4.494793520380432e-08, + "logits/chosen": -1.7507528066635132, + "logits/rejected": -1.7635987997055054, + "logps/chosen": -247.47348022460938, + "logps/rejected": -239.84500122070312, + "loss": 0.4128, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.9135040044784546, + "rewards/margins": -0.16346442699432373, + "rewards/rejected": 2.0769684314727783, + "step": 9387 + }, + { + "epoch": 0.55, + "learning_rate": 4.493855949952209e-08, + "logits/chosen": -2.1105148792266846, + "logits/rejected": -2.0887224674224854, + "logps/chosen": -30.8727970123291, + "logps/rejected": -576.9326171875, + "loss": 0.2352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39795514941215515, + "rewards/margins": 7.206231594085693, + "rewards/rejected": -6.808276653289795, + "step": 9388 + }, + { + "epoch": 0.55, + "learning_rate": 4.492918397504697e-08, + "logits/chosen": -1.7264368534088135, + "logits/rejected": -1.7258503437042236, + "logps/chosen": -62.186397552490234, + "logps/rejected": -209.8731689453125, + "loss": 0.4285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42518350481987, + "rewards/margins": 3.805797815322876, + "rewards/rejected": -4.230981349945068, + "step": 9389 + }, + { + "epoch": 0.55, + "learning_rate": 4.4919808630712055e-08, + "logits/chosen": -1.9927376508712769, + "logits/rejected": -1.997302532196045, + "logps/chosen": -1.444389820098877, + "logps/rejected": -95.77548217773438, + "loss": 0.6035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003820109413936734, + "rewards/margins": 0.3764365315437317, + "rewards/rejected": -0.38025665283203125, + "step": 9390 + }, + { + "epoch": 0.55, + "learning_rate": 4.4910433466850374e-08, + "logits/chosen": -1.9756652116775513, + "logits/rejected": -1.9740371704101562, + "logps/chosen": -0.04869557172060013, + "logps/rejected": -219.1596221923828, + "loss": 0.3558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00783390924334526, + "rewards/margins": 3.750300645828247, + "rewards/rejected": -3.742466688156128, + "step": 9391 + }, + { + "epoch": 0.55, + "learning_rate": 4.4901058483795e-08, + "logits/chosen": -2.116093158721924, + "logits/rejected": -2.117330312728882, + "logps/chosen": -9.829813003540039, + "logps/rejected": -186.9209442138672, + "loss": 0.3361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24679450690746307, + "rewards/margins": 2.2888095378875732, + "rewards/rejected": -2.0420150756835938, + "step": 9392 + }, + { + "epoch": 0.55, + "learning_rate": 4.489168368187895e-08, + "logits/chosen": -2.060396909713745, + "logits/rejected": -2.0516064167022705, + "logps/chosen": -0.00381737039424479, + "logps/rejected": -165.23580932617188, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002531511418055743, + "rewards/margins": 5.149448394775391, + "rewards/rejected": -5.1497015953063965, + "step": 9393 + }, + { + "epoch": 0.55, + "learning_rate": 4.488230906143532e-08, + "logits/chosen": -1.9395438432693481, + "logits/rejected": -1.9458458423614502, + "logps/chosen": -0.16240456700325012, + "logps/rejected": -212.1605682373047, + "loss": 0.3882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001131552504375577, + "rewards/margins": 2.4007301330566406, + "rewards/rejected": -2.4018616676330566, + "step": 9394 + }, + { + "epoch": 0.55, + "learning_rate": 4.487293462279707e-08, + "logits/chosen": -1.7530230283737183, + "logits/rejected": -1.7459478378295898, + "logps/chosen": -185.03860473632812, + "logps/rejected": -221.89065551757812, + "loss": 0.486, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1772003173828125, + "rewards/margins": 0.6939972639083862, + "rewards/rejected": -1.8711975812911987, + "step": 9395 + }, + { + "epoch": 0.55, + "learning_rate": 4.486356036629729e-08, + "logits/chosen": -1.9056867361068726, + "logits/rejected": -1.898109793663025, + "logps/chosen": -170.1809539794922, + "logps/rejected": -262.1441345214844, + "loss": 0.2097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1402939558029175, + "rewards/margins": 1.571241855621338, + "rewards/rejected": -0.430947870016098, + "step": 9396 + }, + { + "epoch": 0.55, + "learning_rate": 4.485418629226894e-08, + "logits/chosen": -2.2226502895355225, + "logits/rejected": -2.2237446308135986, + "logps/chosen": -0.00025247351732105017, + "logps/rejected": -115.9449234008789, + "loss": 0.4118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1121363968413789e-05, + "rewards/margins": 1.969241738319397, + "rewards/rejected": -1.9692528247833252, + "step": 9397 + }, + { + "epoch": 0.55, + "learning_rate": 4.48448124010451e-08, + "logits/chosen": -1.9704831838607788, + "logits/rejected": -1.9815380573272705, + "logps/chosen": -206.8045196533203, + "logps/rejected": -269.9349365234375, + "loss": 0.0705, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7839279174804688, + "rewards/margins": 2.4196090698242188, + "rewards/rejected": -0.63568115234375, + "step": 9398 + }, + { + "epoch": 0.55, + "learning_rate": 4.483543869295871e-08, + "logits/chosen": -2.001023769378662, + "logits/rejected": -1.9943026304244995, + "logps/chosen": -0.06776886433362961, + "logps/rejected": -192.1724395751953, + "loss": 0.4886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008835498592816293, + "rewards/margins": 0.9743590354919434, + "rewards/rejected": -0.9752426147460938, + "step": 9399 + }, + { + "epoch": 0.55, + "learning_rate": 4.482606516834281e-08, + "logits/chosen": -2.061954975128174, + "logits/rejected": -2.056466817855835, + "logps/chosen": -7.175784111022949, + "logps/rejected": -187.28213500976562, + "loss": 0.4865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012426376342773438, + "rewards/margins": 1.0627567768096924, + "rewards/rejected": -1.0751831531524658, + "step": 9400 + }, + { + "epoch": 0.55, + "learning_rate": 4.481669182753039e-08, + "logits/chosen": -1.8270041942596436, + "logits/rejected": -1.8274937868118286, + "logps/chosen": -11.96909236907959, + "logps/rejected": -109.10578155517578, + "loss": 0.6043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07921314239501953, + "rewards/margins": 0.6100088357925415, + "rewards/rejected": -0.689221978187561, + "step": 9401 + }, + { + "epoch": 0.55, + "learning_rate": 4.480731867085442e-08, + "logits/chosen": -1.7942311763763428, + "logits/rejected": -1.7800277471542358, + "logps/chosen": -151.15187072753906, + "logps/rejected": -310.374755859375, + "loss": 0.1194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6700973510742188, + "rewards/margins": 2.231980800628662, + "rewards/rejected": -0.5618835687637329, + "step": 9402 + }, + { + "epoch": 0.55, + "learning_rate": 4.47979456986479e-08, + "logits/chosen": -2.0477049350738525, + "logits/rejected": -2.052853584289551, + "logps/chosen": -3.265220880508423, + "logps/rejected": -79.73052215576172, + "loss": 0.4689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.043487098067998886, + "rewards/margins": 1.4097036123275757, + "rewards/rejected": -1.4531906843185425, + "step": 9403 + }, + { + "epoch": 0.55, + "learning_rate": 4.478857291124379e-08, + "logits/chosen": -1.7427775859832764, + "logits/rejected": -1.735571265220642, + "logps/chosen": -210.2831573486328, + "logps/rejected": -324.26605224609375, + "loss": 0.0489, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.635122776031494, + "rewards/margins": 2.510366916656494, + "rewards/rejected": 0.124755859375, + "step": 9404 + }, + { + "epoch": 0.55, + "learning_rate": 4.477920030897507e-08, + "logits/chosen": -1.9274870157241821, + "logits/rejected": -1.9239273071289062, + "logps/chosen": -0.0040293363854289055, + "logps/rejected": -211.4642333984375, + "loss": 0.3351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0011323251528665423, + "rewards/margins": 5.822506904602051, + "rewards/rejected": -5.821374416351318, + "step": 9405 + }, + { + "epoch": 0.55, + "learning_rate": 4.4769827892174686e-08, + "logits/chosen": -1.7829166650772095, + "logits/rejected": -1.7318270206451416, + "logps/chosen": -217.1510009765625, + "logps/rejected": -432.6903076171875, + "loss": 0.0966, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.04119873046875, + "rewards/margins": 2.347918748855591, + "rewards/rejected": -0.30671998858451843, + "step": 9406 + }, + { + "epoch": 0.55, + "learning_rate": 4.47604556611756e-08, + "logits/chosen": -1.7871408462524414, + "logits/rejected": -1.800865888595581, + "logps/chosen": -2.579123020172119, + "logps/rejected": -203.918212890625, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09507768601179123, + "rewards/margins": 3.4179553985595703, + "rewards/rejected": -3.3228776454925537, + "step": 9407 + }, + { + "epoch": 0.55, + "learning_rate": 4.475108361631075e-08, + "logits/chosen": -1.9929877519607544, + "logits/rejected": -2.01416015625, + "logps/chosen": -216.120849609375, + "logps/rejected": -406.3699951171875, + "loss": 0.0635, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8656433820724487, + "rewards/margins": 2.3711304664611816, + "rewards/rejected": -0.5054870843887329, + "step": 9408 + }, + { + "epoch": 0.55, + "learning_rate": 4.4741711757913105e-08, + "logits/chosen": -2.0146079063415527, + "logits/rejected": -1.9973924160003662, + "logps/chosen": -0.02398795448243618, + "logps/rejected": -339.1417541503906, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014030654914677143, + "rewards/margins": 5.713639736175537, + "rewards/rejected": -5.699609279632568, + "step": 9409 + }, + { + "epoch": 0.55, + "learning_rate": 4.4732340086315565e-08, + "logits/chosen": -2.104196310043335, + "logits/rejected": -2.085331439971924, + "logps/chosen": -2.473541736602783, + "logps/rejected": -321.9315490722656, + "loss": 0.34, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06623230129480362, + "rewards/margins": 3.4172606468200684, + "rewards/rejected": -3.3510284423828125, + "step": 9410 + }, + { + "epoch": 0.55, + "learning_rate": 4.4722968601851074e-08, + "logits/chosen": -1.886369228363037, + "logits/rejected": -1.873147964477539, + "logps/chosen": -150.21954345703125, + "logps/rejected": -230.72549438476562, + "loss": 0.2642, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.048725962638855, + "rewards/margins": 1.1348968744277954, + "rewards/rejected": -0.08617096394300461, + "step": 9411 + }, + { + "epoch": 0.55, + "learning_rate": 4.471359730485255e-08, + "logits/chosen": -2.03893780708313, + "logits/rejected": -2.0445449352264404, + "logps/chosen": -0.06399435549974442, + "logps/rejected": -204.89993286132812, + "loss": 0.3609, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00469596590846777, + "rewards/margins": 3.600600481033325, + "rewards/rejected": -3.6052963733673096, + "step": 9412 + }, + { + "epoch": 0.55, + "learning_rate": 4.470422619565292e-08, + "logits/chosen": -2.174929141998291, + "logits/rejected": -2.1735379695892334, + "logps/chosen": -24.007944107055664, + "logps/rejected": -43.84431838989258, + "loss": 0.6064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23326416313648224, + "rewards/margins": 0.08663634955883026, + "rewards/rejected": 0.14662781357765198, + "step": 9413 + }, + { + "epoch": 0.55, + "learning_rate": 4.4694855274585054e-08, + "logits/chosen": -1.9092177152633667, + "logits/rejected": -1.9110420942306519, + "logps/chosen": -4.2080475395778194e-05, + "logps/rejected": -115.16535949707031, + "loss": 0.3873, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3496767173346598e-06, + "rewards/margins": 2.42128324508667, + "rewards/rejected": -2.4212799072265625, + "step": 9414 + }, + { + "epoch": 0.55, + "learning_rate": 4.468548454198191e-08, + "logits/chosen": -1.7983096837997437, + "logits/rejected": -1.791176199913025, + "logps/chosen": -98.39419555664062, + "logps/rejected": -396.819091796875, + "loss": 0.072, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.526410698890686, + "rewards/margins": 6.873859405517578, + "rewards/rejected": -5.347448825836182, + "step": 9415 + }, + { + "epoch": 0.55, + "learning_rate": 4.467611399817632e-08, + "logits/chosen": -1.9903546571731567, + "logits/rejected": -1.9890713691711426, + "logps/chosen": -62.095909118652344, + "logps/rejected": -328.5244445800781, + "loss": 0.1405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.78689044713974, + "rewards/margins": 4.308863162994385, + "rewards/rejected": -3.52197265625, + "step": 9416 + }, + { + "epoch": 0.55, + "learning_rate": 4.466674364350124e-08, + "logits/chosen": -2.039149522781372, + "logits/rejected": -2.0583302974700928, + "logps/chosen": -276.86041259765625, + "logps/rejected": -411.26336669921875, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.807641625404358, + "rewards/margins": 4.973883152008057, + "rewards/rejected": -3.166241407394409, + "step": 9417 + }, + { + "epoch": 0.55, + "learning_rate": 4.465737347828947e-08, + "logits/chosen": -2.030545711517334, + "logits/rejected": -2.034625291824341, + "logps/chosen": -123.36241149902344, + "logps/rejected": -372.3584899902344, + "loss": 0.0486, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2472381591796875, + "rewards/margins": 4.5935211181640625, + "rewards/rejected": -2.346282958984375, + "step": 9418 + }, + { + "epoch": 0.55, + "learning_rate": 4.4648003502873966e-08, + "logits/chosen": -1.9990127086639404, + "logits/rejected": -1.982581615447998, + "logps/chosen": -204.68765258789062, + "logps/rejected": -468.821044921875, + "loss": 0.0321, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7318482398986816, + "rewards/margins": 3.0230865478515625, + "rewards/rejected": -0.291238397359848, + "step": 9419 + }, + { + "epoch": 0.55, + "learning_rate": 4.4638633717587525e-08, + "logits/chosen": -1.9749364852905273, + "logits/rejected": -1.9767934083938599, + "logps/chosen": -20.665861129760742, + "logps/rejected": -108.53919982910156, + "loss": 0.4864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3908618986606598, + "rewards/margins": 0.5879985690116882, + "rewards/rejected": -0.19713668525218964, + "step": 9420 + }, + { + "epoch": 0.55, + "learning_rate": 4.462926412276305e-08, + "logits/chosen": -1.879520297050476, + "logits/rejected": -1.8429988622665405, + "logps/chosen": -278.228759765625, + "logps/rejected": -485.438232421875, + "loss": 0.1079, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.289440870285034, + "rewards/margins": 1.4574767351150513, + "rewards/rejected": 1.831964135169983, + "step": 9421 + }, + { + "epoch": 0.55, + "learning_rate": 4.46198947187334e-08, + "logits/chosen": -2.1765213012695312, + "logits/rejected": -2.1774239540100098, + "logps/chosen": -22.596471786499023, + "logps/rejected": -62.54750061035156, + "loss": 0.4979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15749035775661469, + "rewards/margins": 0.6590200662612915, + "rewards/rejected": -0.5015296936035156, + "step": 9422 + }, + { + "epoch": 0.55, + "learning_rate": 4.4610525505831386e-08, + "logits/chosen": -1.9737128019332886, + "logits/rejected": -1.971906065940857, + "logps/chosen": -66.13908386230469, + "logps/rejected": -226.83282470703125, + "loss": 0.4114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3130233883857727, + "rewards/margins": 1.273707628250122, + "rewards/rejected": -0.9606842398643494, + "step": 9423 + }, + { + "epoch": 0.55, + "learning_rate": 4.460115648438989e-08, + "logits/chosen": -1.8639930486679077, + "logits/rejected": -1.8590681552886963, + "logps/chosen": -178.649169921875, + "logps/rejected": -343.84637451171875, + "loss": 0.0704, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9074554443359375, + "rewards/margins": 2.0951995849609375, + "rewards/rejected": -0.187744140625, + "step": 9424 + }, + { + "epoch": 0.55, + "learning_rate": 4.4591787654741706e-08, + "logits/chosen": -1.7722175121307373, + "logits/rejected": -1.7657289505004883, + "logps/chosen": -0.005843005608767271, + "logps/rejected": -173.66006469726562, + "loss": 0.3356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016001435869839042, + "rewards/margins": 3.6902010440826416, + "rewards/rejected": -3.6903610229492188, + "step": 9425 + }, + { + "epoch": 0.55, + "learning_rate": 4.4582419017219696e-08, + "logits/chosen": -1.8228414058685303, + "logits/rejected": -1.820072054862976, + "logps/chosen": -0.18188615143299103, + "logps/rejected": -364.711669921875, + "loss": 0.3189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010187417268753052, + "rewards/margins": 6.088744163513184, + "rewards/rejected": -6.098931789398193, + "step": 9426 + }, + { + "epoch": 0.55, + "learning_rate": 4.457305057215665e-08, + "logits/chosen": -1.9734487533569336, + "logits/rejected": -1.997421383857727, + "logps/chosen": -204.75881958007812, + "logps/rejected": -377.1783447265625, + "loss": 0.0379, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0785751342773438, + "rewards/margins": 4.593467712402344, + "rewards/rejected": -3.514892578125, + "step": 9427 + }, + { + "epoch": 0.55, + "learning_rate": 4.45636823198854e-08, + "logits/chosen": -2.1792452335357666, + "logits/rejected": -2.172299861907959, + "logps/chosen": -0.46743929386138916, + "logps/rejected": -186.24574279785156, + "loss": 0.3585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04361734166741371, + "rewards/margins": 4.63844633102417, + "rewards/rejected": -4.682063579559326, + "step": 9428 + }, + { + "epoch": 0.55, + "learning_rate": 4.455431426073874e-08, + "logits/chosen": -1.9262434244155884, + "logits/rejected": -1.9190329313278198, + "logps/chosen": -22.960369110107422, + "logps/rejected": -198.62652587890625, + "loss": 0.7493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8124291300773621, + "rewards/margins": 0.9743140339851379, + "rewards/rejected": -1.7867431640625, + "step": 9429 + }, + { + "epoch": 0.55, + "learning_rate": 4.4544946395049485e-08, + "logits/chosen": -2.0584583282470703, + "logits/rejected": -2.0621914863586426, + "logps/chosen": -14.9103364944458, + "logps/rejected": -192.15017700195312, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2957770526409149, + "rewards/margins": 1.6970558166503906, + "rewards/rejected": -1.4012787342071533, + "step": 9430 + }, + { + "epoch": 0.55, + "learning_rate": 4.45355787231504e-08, + "logits/chosen": -1.8563120365142822, + "logits/rejected": -1.8709121942520142, + "logps/chosen": -254.89981079101562, + "logps/rejected": -304.31024169921875, + "loss": 0.3614, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.718658447265625, + "rewards/margins": -0.03775644302368164, + "rewards/rejected": 2.7564148902893066, + "step": 9431 + }, + { + "epoch": 0.55, + "learning_rate": 4.452621124537431e-08, + "logits/chosen": -1.8418793678283691, + "logits/rejected": -1.8293933868408203, + "logps/chosen": -289.1103210449219, + "logps/rejected": -484.2954406738281, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6411529779434204, + "rewards/margins": 5.168774604797363, + "rewards/rejected": -3.5276215076446533, + "step": 9432 + }, + { + "epoch": 0.55, + "learning_rate": 4.4516843962053954e-08, + "logits/chosen": -1.812822699546814, + "logits/rejected": -1.8172321319580078, + "logps/chosen": -3.766977897612378e-05, + "logps/rejected": -127.85340118408203, + "loss": 0.3773, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.536568654766597e-07, + "rewards/margins": 2.4489169120788574, + "rewards/rejected": -2.448915958404541, + "step": 9433 + }, + { + "epoch": 0.55, + "learning_rate": 4.450747687352213e-08, + "logits/chosen": -1.7793840169906616, + "logits/rejected": -1.8172025680541992, + "logps/chosen": -230.55673217773438, + "logps/rejected": -368.02069091796875, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.27726149559021, + "rewards/margins": 3.19370436668396, + "rewards/rejected": -0.91644287109375, + "step": 9434 + }, + { + "epoch": 0.55, + "learning_rate": 4.449810998011158e-08, + "logits/chosen": -1.9208896160125732, + "logits/rejected": -1.9199714660644531, + "logps/chosen": -7.2542266845703125, + "logps/rejected": -150.34274291992188, + "loss": 0.3837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2568167746067047, + "rewards/margins": 2.54670786857605, + "rewards/rejected": -2.8035247325897217, + "step": 9435 + }, + { + "epoch": 0.55, + "learning_rate": 4.448874328215511e-08, + "logits/chosen": -1.9698421955108643, + "logits/rejected": -1.9879200458526611, + "logps/chosen": -265.36669921875, + "logps/rejected": -369.304931640625, + "loss": 0.1184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.308172583580017, + "rewards/margins": 2.024090528488159, + "rewards/rejected": -0.7159180045127869, + "step": 9436 + }, + { + "epoch": 0.55, + "learning_rate": 4.44793767799854e-08, + "logits/chosen": -2.0422158241271973, + "logits/rejected": -2.0340795516967773, + "logps/chosen": -16.118839263916016, + "logps/rejected": -192.10626220703125, + "loss": 0.5166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015514755621552467, + "rewards/margins": 0.7648685574531555, + "rewards/rejected": -0.780383288860321, + "step": 9437 + }, + { + "epoch": 0.55, + "learning_rate": 4.447001047393526e-08, + "logits/chosen": -1.9335963726043701, + "logits/rejected": -1.908035397529602, + "logps/chosen": -138.00157165527344, + "logps/rejected": -307.1433410644531, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0224380493164062, + "rewards/margins": 4.1635332107543945, + "rewards/rejected": -2.141094923019409, + "step": 9438 + }, + { + "epoch": 0.55, + "learning_rate": 4.446064436433737e-08, + "logits/chosen": -1.8286185264587402, + "logits/rejected": -1.8292601108551025, + "logps/chosen": -0.2820645570755005, + "logps/rejected": -59.72872543334961, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04765060171484947, + "rewards/margins": 2.0671727657318115, + "rewards/rejected": -2.019522190093994, + "step": 9439 + }, + { + "epoch": 0.55, + "learning_rate": 4.44512784515245e-08, + "logits/chosen": -2.011293649673462, + "logits/rejected": -1.9998173713684082, + "logps/chosen": -39.282981872558594, + "logps/rejected": -256.5792236328125, + "loss": 0.1536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7788631319999695, + "rewards/margins": 3.3790950775146484, + "rewards/rejected": -2.600231885910034, + "step": 9440 + }, + { + "epoch": 0.55, + "learning_rate": 4.444191273582937e-08, + "logits/chosen": -1.6124383211135864, + "logits/rejected": -1.6105804443359375, + "logps/chosen": -76.97711181640625, + "logps/rejected": -126.38807678222656, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4251556396484375, + "rewards/margins": 1.9394654035568237, + "rewards/rejected": -1.5143097639083862, + "step": 9441 + }, + { + "epoch": 0.55, + "learning_rate": 4.4432547217584675e-08, + "logits/chosen": -1.7923705577850342, + "logits/rejected": -1.7940285205841064, + "logps/chosen": -3.4618208408355713, + "logps/rejected": -51.825523376464844, + "loss": 0.6608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21434350311756134, + "rewards/margins": 0.37716805934906006, + "rewards/rejected": -0.5915115475654602, + "step": 9442 + }, + { + "epoch": 0.55, + "learning_rate": 4.442318189712315e-08, + "logits/chosen": -1.8700666427612305, + "logits/rejected": -1.8534905910491943, + "logps/chosen": -251.27513122558594, + "logps/rejected": -424.4142761230469, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7813096046447754, + "rewards/margins": 3.2716875076293945, + "rewards/rejected": -0.490377813577652, + "step": 9443 + }, + { + "epoch": 0.55, + "learning_rate": 4.441381677477747e-08, + "logits/chosen": -2.0774734020233154, + "logits/rejected": -2.0705649852752686, + "logps/chosen": -129.21543884277344, + "logps/rejected": -352.03302001953125, + "loss": 0.1093, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7639954090118408, + "rewards/margins": 1.7636597156524658, + "rewards/rejected": 0.000335693359375, + "step": 9444 + }, + { + "epoch": 0.55, + "learning_rate": 4.4404451850880356e-08, + "logits/chosen": -1.8879903554916382, + "logits/rejected": -1.8876088857650757, + "logps/chosen": -45.74871063232422, + "logps/rejected": -303.19866943359375, + "loss": 0.1812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7781704068183899, + "rewards/margins": 3.8187222480773926, + "rewards/rejected": -3.0405519008636475, + "step": 9445 + }, + { + "epoch": 0.55, + "learning_rate": 4.439508712576447e-08, + "logits/chosen": -1.8768163919448853, + "logits/rejected": -1.8695470094680786, + "logps/chosen": -204.7978973388672, + "logps/rejected": -279.42425537109375, + "loss": 0.3502, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.104893445968628, + "rewards/margins": 0.03279566764831543, + "rewards/rejected": 2.0720977783203125, + "step": 9446 + }, + { + "epoch": 0.55, + "learning_rate": 4.438572259976251e-08, + "logits/chosen": -1.9800621271133423, + "logits/rejected": -1.9795448780059814, + "logps/chosen": -181.61940002441406, + "logps/rejected": -366.9180908203125, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4462417364120483, + "rewards/margins": 2.6585373878479004, + "rewards/rejected": -1.2122955322265625, + "step": 9447 + }, + { + "epoch": 0.55, + "learning_rate": 4.437635827320715e-08, + "logits/chosen": -1.6790109872817993, + "logits/rejected": -1.6727933883666992, + "logps/chosen": -186.30105590820312, + "logps/rejected": -468.77294921875, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.422964572906494, + "rewards/margins": 4.692178726196289, + "rewards/rejected": -2.269213914871216, + "step": 9448 + }, + { + "epoch": 0.55, + "learning_rate": 4.436699414643105e-08, + "logits/chosen": -1.8707531690597534, + "logits/rejected": -1.8600081205368042, + "logps/chosen": -7.331146480282769e-05, + "logps/rejected": -81.08447265625, + "loss": 0.6749, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2648092112831364e-07, + "rewards/margins": 0.0734960213303566, + "rewards/rejected": -0.0734962448477745, + "step": 9449 + }, + { + "epoch": 0.55, + "learning_rate": 4.4357630219766874e-08, + "logits/chosen": -1.8566685914993286, + "logits/rejected": -1.8460962772369385, + "logps/chosen": -51.34260177612305, + "logps/rejected": -267.2615051269531, + "loss": 0.1377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7186203002929688, + "rewards/margins": 4.830142498016357, + "rewards/rejected": -4.111522197723389, + "step": 9450 + }, + { + "epoch": 0.55, + "learning_rate": 4.434826649354727e-08, + "logits/chosen": -1.8822698593139648, + "logits/rejected": -1.9858282804489136, + "logps/chosen": -355.753173828125, + "logps/rejected": -400.9970397949219, + "loss": 0.0846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.570165991783142, + "rewards/margins": 2.1929595470428467, + "rewards/rejected": -0.6227936148643494, + "step": 9451 + }, + { + "epoch": 0.55, + "learning_rate": 4.4338902968104885e-08, + "logits/chosen": -2.0140926837921143, + "logits/rejected": -2.0513248443603516, + "logps/chosen": -299.696044921875, + "logps/rejected": -471.55328369140625, + "loss": 0.0391, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8982452154159546, + "rewards/margins": 4.5435638427734375, + "rewards/rejected": -2.6453187465667725, + "step": 9452 + }, + { + "epoch": 0.55, + "learning_rate": 4.432953964377236e-08, + "logits/chosen": -1.9296250343322754, + "logits/rejected": -1.9324097633361816, + "logps/chosen": -8.309469223022461, + "logps/rejected": -83.55628204345703, + "loss": 0.4041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18564824759960175, + "rewards/margins": 1.0466139316558838, + "rewards/rejected": -0.8609657287597656, + "step": 9453 + }, + { + "epoch": 0.55, + "learning_rate": 4.4320176520882314e-08, + "logits/chosen": -1.8141074180603027, + "logits/rejected": -1.811189889907837, + "logps/chosen": -37.26756286621094, + "logps/rejected": -200.3205108642578, + "loss": 0.2765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3474186062812805, + "rewards/margins": 2.6212849617004395, + "rewards/rejected": -2.2738664150238037, + "step": 9454 + }, + { + "epoch": 0.55, + "learning_rate": 4.43108135997674e-08, + "logits/chosen": -1.9552637338638306, + "logits/rejected": -1.9452826976776123, + "logps/chosen": -27.01166343688965, + "logps/rejected": -266.3758544921875, + "loss": 0.4122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005240440368652344, + "rewards/margins": 2.1877965927124023, + "rewards/rejected": -2.18255615234375, + "step": 9455 + }, + { + "epoch": 0.55, + "learning_rate": 4.4301450880760196e-08, + "logits/chosen": -2.036410331726074, + "logits/rejected": -2.030461311340332, + "logps/chosen": -104.73362731933594, + "logps/rejected": -380.73046875, + "loss": 0.142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8458053469657898, + "rewards/margins": 2.6005935668945312, + "rewards/rejected": -1.7547882795333862, + "step": 9456 + }, + { + "epoch": 0.55, + "learning_rate": 4.4292088364193355e-08, + "logits/chosen": -1.9897515773773193, + "logits/rejected": -1.9880740642547607, + "logps/chosen": -24.64025115966797, + "logps/rejected": -150.00454711914062, + "loss": 0.4763, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0023420334327965975, + "rewards/margins": 1.3819806575775146, + "rewards/rejected": -1.379638671875, + "step": 9457 + }, + { + "epoch": 0.55, + "learning_rate": 4.4282726050399425e-08, + "logits/chosen": -1.8098136186599731, + "logits/rejected": -1.8118853569030762, + "logps/chosen": -7.296329021453857, + "logps/rejected": -106.45833587646484, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10690026730298996, + "rewards/margins": 2.335686683654785, + "rewards/rejected": -2.2287864685058594, + "step": 9458 + }, + { + "epoch": 0.55, + "learning_rate": 4.4273363939711066e-08, + "logits/chosen": -2.0400023460388184, + "logits/rejected": -2.0446085929870605, + "logps/chosen": -1.8071686029434204, + "logps/rejected": -58.8497428894043, + "loss": 0.6826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028181790839880705, + "rewards/margins": 0.03761570528149605, + "rewards/rejected": -0.04043388366699219, + "step": 9459 + }, + { + "epoch": 0.55, + "learning_rate": 4.42640020324608e-08, + "logits/chosen": -1.832277536392212, + "logits/rejected": -1.8327064514160156, + "logps/chosen": -80.32115173339844, + "logps/rejected": -344.7791748046875, + "loss": 0.0723, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4596160650253296, + "rewards/margins": 4.201199531555176, + "rewards/rejected": -2.7415833473205566, + "step": 9460 + }, + { + "epoch": 0.55, + "learning_rate": 4.425464032898124e-08, + "logits/chosen": -1.72468101978302, + "logits/rejected": -1.6070306301116943, + "logps/chosen": -315.775634765625, + "logps/rejected": -652.787353515625, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.982492208480835, + "rewards/margins": 3.3785736560821533, + "rewards/rejected": -0.3960815370082855, + "step": 9461 + }, + { + "epoch": 0.55, + "learning_rate": 4.4245278829604986e-08, + "logits/chosen": -1.8665101528167725, + "logits/rejected": -1.8624659776687622, + "logps/chosen": -49.081634521484375, + "logps/rejected": -178.87741088867188, + "loss": 0.3853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38722726702690125, + "rewards/margins": 1.7326985597610474, + "rewards/rejected": -1.3454712629318237, + "step": 9462 + }, + { + "epoch": 0.55, + "learning_rate": 4.423591753466456e-08, + "logits/chosen": -2.03670597076416, + "logits/rejected": -2.0387558937072754, + "logps/chosen": -0.0002864315756596625, + "logps/rejected": -152.60716247558594, + "loss": 0.3821, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00026952053303830326, + "rewards/margins": 2.1963236331939697, + "rewards/rejected": -2.196054220199585, + "step": 9463 + }, + { + "epoch": 0.55, + "learning_rate": 4.4226556444492543e-08, + "logits/chosen": -1.874578595161438, + "logits/rejected": -1.8764631748199463, + "logps/chosen": -28.079851150512695, + "logps/rejected": -131.26124572753906, + "loss": 0.2333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3584541380405426, + "rewards/margins": 2.6144158840179443, + "rewards/rejected": -2.2559616565704346, + "step": 9464 + }, + { + "epoch": 0.55, + "learning_rate": 4.4217195559421485e-08, + "logits/chosen": -1.8438869714736938, + "logits/rejected": -1.8413331508636475, + "logps/chosen": -0.12723270058631897, + "logps/rejected": -183.16009521484375, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013989091385155916, + "rewards/margins": 2.8158864974975586, + "rewards/rejected": -2.8172852993011475, + "step": 9465 + }, + { + "epoch": 0.55, + "learning_rate": 4.420783487978393e-08, + "logits/chosen": -1.9335376024246216, + "logits/rejected": -1.9257843494415283, + "logps/chosen": -0.008529746904969215, + "logps/rejected": -213.123046875, + "loss": 0.4111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0076739550568163395, + "rewards/margins": 1.936970829963684, + "rewards/rejected": -1.929296851158142, + "step": 9466 + }, + { + "epoch": 0.55, + "learning_rate": 4.41984744059124e-08, + "logits/chosen": -1.8264786005020142, + "logits/rejected": -1.7697515487670898, + "logps/chosen": -326.574951171875, + "logps/rejected": -557.6728515625, + "loss": 0.0727, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2535858154296875, + "rewards/margins": 2.582693576812744, + "rewards/rejected": -1.329107642173767, + "step": 9467 + }, + { + "epoch": 0.55, + "learning_rate": 4.418911413813945e-08, + "logits/chosen": -1.7572771310806274, + "logits/rejected": -1.7593588829040527, + "logps/chosen": -227.19644165039062, + "logps/rejected": -280.5873718261719, + "loss": 0.083, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.96136474609375, + "rewards/margins": 1.7944244146347046, + "rewards/rejected": 1.1669403314590454, + "step": 9468 + }, + { + "epoch": 0.55, + "learning_rate": 4.417975407679758e-08, + "logits/chosen": -2.09185528755188, + "logits/rejected": -2.089351177215576, + "logps/chosen": -5.38020658493042, + "logps/rejected": -149.01881408691406, + "loss": 0.274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33704039454460144, + "rewards/margins": 3.0158560276031494, + "rewards/rejected": -2.6788156032562256, + "step": 9469 + }, + { + "epoch": 0.55, + "learning_rate": 4.417039422221932e-08, + "logits/chosen": -1.9421873092651367, + "logits/rejected": -1.9354758262634277, + "logps/chosen": -7.87956960266456e-05, + "logps/rejected": -179.72203063964844, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0145525922998786e-06, + "rewards/margins": 4.037046432495117, + "rewards/rejected": -4.03704833984375, + "step": 9470 + }, + { + "epoch": 0.55, + "learning_rate": 4.416103457473717e-08, + "logits/chosen": -1.8137881755828857, + "logits/rejected": -1.8246763944625854, + "logps/chosen": -214.58099365234375, + "logps/rejected": -644.8421630859375, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.933955430984497, + "rewards/margins": 12.201601028442383, + "rewards/rejected": -9.267645835876465, + "step": 9471 + }, + { + "epoch": 0.55, + "learning_rate": 4.415167513468365e-08, + "logits/chosen": -1.7805896997451782, + "logits/rejected": -1.7542451620101929, + "logps/chosen": -201.3193359375, + "logps/rejected": -357.9246826171875, + "loss": 0.141, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7572662830352783, + "rewards/margins": 1.3901734352111816, + "rewards/rejected": 0.36709290742874146, + "step": 9472 + }, + { + "epoch": 0.55, + "learning_rate": 4.414231590239121e-08, + "logits/chosen": -2.0184922218322754, + "logits/rejected": -2.051353931427002, + "logps/chosen": -167.89523315429688, + "logps/rejected": -283.2835693359375, + "loss": 0.0771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.490687608718872, + "rewards/margins": 2.2247514724731445, + "rewards/rejected": -0.7340637445449829, + "step": 9473 + }, + { + "epoch": 0.55, + "learning_rate": 4.4132956878192386e-08, + "logits/chosen": -1.8189915418624878, + "logits/rejected": -1.800933599472046, + "logps/chosen": -270.2413330078125, + "logps/rejected": -524.375732421875, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0451691150665283, + "rewards/margins": 6.3799285888671875, + "rewards/rejected": -4.334759712219238, + "step": 9474 + }, + { + "epoch": 0.55, + "learning_rate": 4.41235980624196e-08, + "logits/chosen": -2.2089900970458984, + "logits/rejected": -2.205300807952881, + "logps/chosen": -0.030851105228066444, + "logps/rejected": -229.62892150878906, + "loss": 0.3616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008273454732261598, + "rewards/margins": 3.2018964290618896, + "rewards/rejected": -3.202723741531372, + "step": 9475 + }, + { + "epoch": 0.55, + "learning_rate": 4.411423945540539e-08, + "logits/chosen": -2.0535333156585693, + "logits/rejected": -2.0428428649902344, + "logps/chosen": -0.0016719452105462551, + "logps/rejected": -184.80703735351562, + "loss": 0.3611, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.525580829475075e-05, + "rewards/margins": 3.381401300430298, + "rewards/rejected": -3.3814666271209717, + "step": 9476 + }, + { + "epoch": 0.55, + "learning_rate": 4.410488105748216e-08, + "logits/chosen": -2.000575065612793, + "logits/rejected": -1.9911854267120361, + "logps/chosen": -9.786828013602644e-05, + "logps/rejected": -125.11599731445312, + "loss": 0.4736, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.162734734971309e-06, + "rewards/margins": 1.23216712474823, + "rewards/rejected": -1.2321609258651733, + "step": 9477 + }, + { + "epoch": 0.55, + "learning_rate": 4.409552286898242e-08, + "logits/chosen": -2.207580327987671, + "logits/rejected": -2.207659959793091, + "logps/chosen": -0.004150871187448502, + "logps/rejected": -129.574951171875, + "loss": 0.5437, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.220376573968679e-05, + "rewards/margins": 0.7190341353416443, + "rewards/rejected": -0.7191063165664673, + "step": 9478 + }, + { + "epoch": 0.55, + "learning_rate": 4.408616489023855e-08, + "logits/chosen": -1.8829491138458252, + "logits/rejected": -1.9155341386795044, + "logps/chosen": -228.0190887451172, + "logps/rejected": -468.9796142578125, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1982955932617188, + "rewards/margins": 5.580278396606445, + "rewards/rejected": -2.3819825649261475, + "step": 9479 + }, + { + "epoch": 0.55, + "learning_rate": 4.4076807121583076e-08, + "logits/chosen": -1.799176573753357, + "logits/rejected": -1.7972691059112549, + "logps/chosen": -89.442138671875, + "logps/rejected": -237.43545532226562, + "loss": 0.176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0003684759140015, + "rewards/margins": 1.9871070384979248, + "rewards/rejected": -0.9867386221885681, + "step": 9480 + }, + { + "epoch": 0.55, + "learning_rate": 4.4067449563348346e-08, + "logits/chosen": -1.9832998514175415, + "logits/rejected": -1.9872585535049438, + "logps/chosen": -2.066532611846924, + "logps/rejected": -38.93190002441406, + "loss": 0.6765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015299391932785511, + "rewards/margins": 0.07459618896245956, + "rewards/rejected": -0.05929679796099663, + "step": 9481 + }, + { + "epoch": 0.55, + "learning_rate": 4.4058092215866836e-08, + "logits/chosen": -1.9256399869918823, + "logits/rejected": -1.936518669128418, + "logps/chosen": -150.88465881347656, + "logps/rejected": -247.02557373046875, + "loss": 0.1262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.319392442703247, + "rewards/margins": 2.1927080154418945, + "rewards/rejected": -0.8733154535293579, + "step": 9482 + }, + { + "epoch": 0.55, + "learning_rate": 4.404873507947097e-08, + "logits/chosen": -2.0851528644561768, + "logits/rejected": -2.075730085372925, + "logps/chosen": -1.5031770467758179, + "logps/rejected": -150.1756134033203, + "loss": 0.3715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036612797528505325, + "rewards/margins": 3.429081678390503, + "rewards/rejected": -3.4656944274902344, + "step": 9483 + }, + { + "epoch": 0.55, + "learning_rate": 4.4039378154493135e-08, + "logits/chosen": -1.927061915397644, + "logits/rejected": -1.9183861017227173, + "logps/chosen": -321.4551086425781, + "logps/rejected": -409.7323913574219, + "loss": 0.1657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.902362048625946, + "rewards/margins": 1.3773162364959717, + "rewards/rejected": -0.474954217672348, + "step": 9484 + }, + { + "epoch": 0.55, + "learning_rate": 4.403002144126575e-08, + "logits/chosen": -2.0004870891571045, + "logits/rejected": -2.0025646686553955, + "logps/chosen": -0.08601319789886475, + "logps/rejected": -156.405517578125, + "loss": 0.3523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014301084447652102, + "rewards/margins": 3.045444965362549, + "rewards/rejected": -3.046875, + "step": 9485 + }, + { + "epoch": 0.55, + "learning_rate": 4.4020664940121206e-08, + "logits/chosen": -1.735191822052002, + "logits/rejected": -1.7082263231277466, + "logps/chosen": -221.63638305664062, + "logps/rejected": -488.5819091796875, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.879730224609375, + "rewards/margins": 6.097455024719238, + "rewards/rejected": -3.217724561691284, + "step": 9486 + }, + { + "epoch": 0.55, + "learning_rate": 4.4011308651391896e-08, + "logits/chosen": -2.1097795963287354, + "logits/rejected": -2.109058141708374, + "logps/chosen": -12.30369758605957, + "logps/rejected": -87.10328674316406, + "loss": 0.7495, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.23832301795482635, + "rewards/margins": -0.560477614402771, + "rewards/rejected": 0.7988006472587585, + "step": 9487 + }, + { + "epoch": 0.55, + "learning_rate": 4.400195257541018e-08, + "logits/chosen": -1.982707142829895, + "logits/rejected": -1.974563479423523, + "logps/chosen": -179.50299072265625, + "logps/rejected": -383.22259521484375, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.339343309402466, + "rewards/margins": 3.9477968215942383, + "rewards/rejected": -1.608453392982483, + "step": 9488 + }, + { + "epoch": 0.55, + "learning_rate": 4.3992596712508464e-08, + "logits/chosen": -1.8281505107879639, + "logits/rejected": -1.8271607160568237, + "logps/chosen": -169.6874542236328, + "logps/rejected": -360.79571533203125, + "loss": 0.0542, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3668625354766846, + "rewards/margins": 2.5615220069885254, + "rewards/rejected": -0.19465942680835724, + "step": 9489 + }, + { + "epoch": 0.55, + "learning_rate": 4.3983241063019084e-08, + "logits/chosen": -1.9749237298965454, + "logits/rejected": -1.9852064847946167, + "logps/chosen": -93.18235778808594, + "logps/rejected": -296.86138916015625, + "loss": 0.2467, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5234543085098267, + "rewards/margins": 2.8343887329101562, + "rewards/rejected": -2.310934543609619, + "step": 9490 + }, + { + "epoch": 0.55, + "learning_rate": 4.3973885627274425e-08, + "logits/chosen": -1.8103816509246826, + "logits/rejected": -1.7907434701919556, + "logps/chosen": -202.7982635498047, + "logps/rejected": -246.1890869140625, + "loss": 0.2366, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2330001592636108, + "rewards/margins": 1.1660293340682983, + "rewards/rejected": 0.0669708251953125, + "step": 9491 + }, + { + "epoch": 0.55, + "learning_rate": 4.396453040560682e-08, + "logits/chosen": -2.0334551334381104, + "logits/rejected": -2.0273330211639404, + "logps/chosen": -54.774658203125, + "logps/rejected": -304.3894348144531, + "loss": 0.1352, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.010156273841858, + "rewards/margins": 5.026641845703125, + "rewards/rejected": -4.016485691070557, + "step": 9492 + }, + { + "epoch": 0.55, + "learning_rate": 4.395517539834862e-08, + "logits/chosen": -1.873840570449829, + "logits/rejected": -1.923717975616455, + "logps/chosen": -333.9117431640625, + "logps/rejected": -458.5758056640625, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.510815382003784, + "rewards/margins": 2.825360059738159, + "rewards/rejected": -0.314544677734375, + "step": 9493 + }, + { + "epoch": 0.55, + "learning_rate": 4.3945820605832154e-08, + "logits/chosen": -1.8090406656265259, + "logits/rejected": -1.773650884628296, + "logps/chosen": -222.3235626220703, + "logps/rejected": -464.81591796875, + "loss": 0.0989, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.766597032546997, + "rewards/margins": 2.4616410732269287, + "rewards/rejected": -0.6950439810752869, + "step": 9494 + }, + { + "epoch": 0.55, + "learning_rate": 4.3936466028389765e-08, + "logits/chosen": -1.9645389318466187, + "logits/rejected": -1.959083914756775, + "logps/chosen": -3.529869556427002, + "logps/rejected": -365.8336486816406, + "loss": 0.3158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09812450408935547, + "rewards/margins": 7.9307355880737305, + "rewards/rejected": -7.832611083984375, + "step": 9495 + }, + { + "epoch": 0.55, + "learning_rate": 4.392711166635374e-08, + "logits/chosen": -1.7998392581939697, + "logits/rejected": -1.804971694946289, + "logps/chosen": -222.71644592285156, + "logps/rejected": -511.16925048828125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0677077770233154, + "rewards/margins": 8.586021423339844, + "rewards/rejected": -5.518313884735107, + "step": 9496 + }, + { + "epoch": 0.55, + "learning_rate": 4.391775752005644e-08, + "logits/chosen": -1.7327533960342407, + "logits/rejected": -1.7298258543014526, + "logps/chosen": -31.379077911376953, + "logps/rejected": -205.64035034179688, + "loss": 0.3604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1253402680158615, + "rewards/margins": 2.3928513526916504, + "rewards/rejected": -2.2675111293792725, + "step": 9497 + }, + { + "epoch": 0.55, + "learning_rate": 4.390840358983012e-08, + "logits/chosen": -2.0287880897521973, + "logits/rejected": -2.030163526535034, + "logps/chosen": -56.05489730834961, + "logps/rejected": -206.8782958984375, + "loss": 0.6268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7505332827568054, + "rewards/margins": 1.6202874183654785, + "rewards/rejected": -2.3708207607269287, + "step": 9498 + }, + { + "epoch": 0.55, + "learning_rate": 4.389904987600713e-08, + "logits/chosen": -1.7422847747802734, + "logits/rejected": -1.7474015951156616, + "logps/chosen": -171.67083740234375, + "logps/rejected": -282.3221435546875, + "loss": 0.1544, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.057904005050659, + "rewards/margins": 1.44692063331604, + "rewards/rejected": 0.6109833121299744, + "step": 9499 + }, + { + "epoch": 0.55, + "learning_rate": 4.3889696378919706e-08, + "logits/chosen": -1.9711123704910278, + "logits/rejected": -1.9265508651733398, + "logps/chosen": -129.32676696777344, + "logps/rejected": -233.54888916015625, + "loss": 0.2629, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8307541608810425, + "rewards/margins": 0.6282837390899658, + "rewards/rejected": 1.2024704217910767, + "step": 9500 + }, + { + "epoch": 0.55, + "learning_rate": 4.3880343098900156e-08, + "logits/chosen": -1.9983183145523071, + "logits/rejected": -2.004796028137207, + "logps/chosen": -285.7519226074219, + "logps/rejected": -486.95379638671875, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.104840040206909, + "rewards/margins": 8.500317573547363, + "rewards/rejected": -6.395477294921875, + "step": 9501 + }, + { + "epoch": 0.55, + "learning_rate": 4.387099003628077e-08, + "logits/chosen": -1.9781748056411743, + "logits/rejected": -1.9709992408752441, + "logps/chosen": -124.45478820800781, + "logps/rejected": -267.3260498046875, + "loss": 1.5771, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8368027210235596, + "rewards/margins": 1.45707106590271, + "rewards/rejected": -4.2938737869262695, + "step": 9502 + }, + { + "epoch": 0.55, + "learning_rate": 4.3861637191393784e-08, + "logits/chosen": -1.9484281539916992, + "logits/rejected": -1.9315696954727173, + "logps/chosen": -162.62335205078125, + "logps/rejected": -207.55056762695312, + "loss": 0.361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8380218744277954, + "rewards/margins": 0.05193328857421875, + "rewards/rejected": 1.7860885858535767, + "step": 9503 + }, + { + "epoch": 0.55, + "learning_rate": 4.3852284564571486e-08, + "logits/chosen": -1.9317876100540161, + "logits/rejected": -1.9294694662094116, + "logps/chosen": -16.37116050720215, + "logps/rejected": -199.96568298339844, + "loss": 0.3447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07914657890796661, + "rewards/margins": 2.9293999671936035, + "rewards/rejected": -2.8502533435821533, + "step": 9504 + }, + { + "epoch": 0.55, + "learning_rate": 4.38429321561461e-08, + "logits/chosen": -1.7973655462265015, + "logits/rejected": -1.7997907400131226, + "logps/chosen": -7.8035359382629395, + "logps/rejected": -264.47039794921875, + "loss": 0.3171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0731077715754509, + "rewards/margins": 4.696006774902344, + "rewards/rejected": -4.622899055480957, + "step": 9505 + }, + { + "epoch": 0.55, + "learning_rate": 4.383357996644989e-08, + "logits/chosen": -2.006685972213745, + "logits/rejected": -2.0246212482452393, + "logps/chosen": -183.98495483398438, + "logps/rejected": -327.8221435546875, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6260589361190796, + "rewards/margins": 4.224203586578369, + "rewards/rejected": -2.59814453125, + "step": 9506 + }, + { + "epoch": 0.55, + "learning_rate": 4.382422799581508e-08, + "logits/chosen": -1.944584608078003, + "logits/rejected": -1.9454829692840576, + "logps/chosen": -117.96147155761719, + "logps/rejected": -221.75267028808594, + "loss": 0.0992, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2091354131698608, + "rewards/margins": 1.9761199951171875, + "rewards/rejected": -0.7669845819473267, + "step": 9507 + }, + { + "epoch": 0.55, + "learning_rate": 4.38148762445739e-08, + "logits/chosen": -1.7311770915985107, + "logits/rejected": -1.712300419807434, + "logps/chosen": -307.378662109375, + "logps/rejected": -400.2340087890625, + "loss": 0.0396, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2257537841796875, + "rewards/margins": 3.5522186756134033, + "rewards/rejected": -1.3264648914337158, + "step": 9508 + }, + { + "epoch": 0.55, + "learning_rate": 4.380552471305857e-08, + "logits/chosen": -1.9818546772003174, + "logits/rejected": -1.9701578617095947, + "logps/chosen": -28.034954071044922, + "logps/rejected": -212.09237670898438, + "loss": 0.2709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.540042519569397, + "rewards/margins": 1.9915913343429565, + "rewards/rejected": -1.4515488147735596, + "step": 9509 + }, + { + "epoch": 0.55, + "learning_rate": 4.3796173401601305e-08, + "logits/chosen": -1.8160711526870728, + "logits/rejected": -1.809455156326294, + "logps/chosen": -0.00017606161418370903, + "logps/rejected": -527.8518676757812, + "loss": 0.326, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6255585794388026e-07, + "rewards/margins": 12.850104331970215, + "rewards/rejected": -12.850104331970215, + "step": 9510 + }, + { + "epoch": 0.55, + "learning_rate": 4.37868223105343e-08, + "logits/chosen": -1.856985092163086, + "logits/rejected": -1.8485387563705444, + "logps/chosen": -193.756103515625, + "logps/rejected": -300.32183837890625, + "loss": 0.5238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22198791801929474, + "rewards/margins": 0.43643492460250854, + "rewards/rejected": -0.214447021484375, + "step": 9511 + }, + { + "epoch": 0.55, + "learning_rate": 4.3777471440189765e-08, + "logits/chosen": -1.9719306230545044, + "logits/rejected": -1.9665859937667847, + "logps/chosen": -66.70771026611328, + "logps/rejected": -243.421875, + "loss": 0.1349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9639686346054077, + "rewards/margins": 1.5669898986816406, + "rewards/rejected": 0.3969787657260895, + "step": 9512 + }, + { + "epoch": 0.55, + "learning_rate": 4.376812079089988e-08, + "logits/chosen": -1.9138259887695312, + "logits/rejected": -1.8784470558166504, + "logps/chosen": -268.145751953125, + "logps/rejected": -305.40887451171875, + "loss": 0.1283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8036590814590454, + "rewards/margins": 2.024279832839966, + "rewards/rejected": -0.22062073647975922, + "step": 9513 + }, + { + "epoch": 0.55, + "learning_rate": 4.3758770362996824e-08, + "logits/chosen": -2.0198163986206055, + "logits/rejected": -2.014218807220459, + "logps/chosen": -52.019962310791016, + "logps/rejected": -239.0777130126953, + "loss": 0.4493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030909348279237747, + "rewards/margins": 0.9547069668769836, + "rewards/rejected": -0.923797607421875, + "step": 9514 + }, + { + "epoch": 0.55, + "learning_rate": 4.3749420156812776e-08, + "logits/chosen": -1.839424729347229, + "logits/rejected": -1.8266849517822266, + "logps/chosen": -50.282257080078125, + "logps/rejected": -376.23138427734375, + "loss": 0.1729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6808937191963196, + "rewards/margins": 3.738288164138794, + "rewards/rejected": -3.057394504547119, + "step": 9515 + }, + { + "epoch": 0.55, + "learning_rate": 4.374007017267991e-08, + "logits/chosen": -1.7827650308609009, + "logits/rejected": -1.8020930290222168, + "logps/chosen": -109.6514892578125, + "logps/rejected": -330.4461364746094, + "loss": 0.2335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4592125117778778, + "rewards/margins": 2.7386741638183594, + "rewards/rejected": -2.279461622238159, + "step": 9516 + }, + { + "epoch": 0.55, + "learning_rate": 4.3730720410930345e-08, + "logits/chosen": -2.064420223236084, + "logits/rejected": -2.0694284439086914, + "logps/chosen": -3.6716148315463215e-05, + "logps/rejected": -148.10987854003906, + "loss": 0.3883, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.774943237061962e-07, + "rewards/margins": 2.2355761528015137, + "rewards/rejected": -2.2355751991271973, + "step": 9517 + }, + { + "epoch": 0.55, + "learning_rate": 4.372137087189629e-08, + "logits/chosen": -2.0808775424957275, + "logits/rejected": -2.086043119430542, + "logps/chosen": -4.3759589195251465, + "logps/rejected": -37.97398376464844, + "loss": 0.5911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09337430447340012, + "rewards/margins": 0.27224355936050415, + "rewards/rejected": -0.17886924743652344, + "step": 9518 + }, + { + "epoch": 0.55, + "learning_rate": 4.3712021555909826e-08, + "logits/chosen": -1.8475526571273804, + "logits/rejected": -1.8476788997650146, + "logps/chosen": -10.627364158630371, + "logps/rejected": -156.38311767578125, + "loss": 0.3561, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09689588844776154, + "rewards/margins": 2.33385968208313, + "rewards/rejected": -2.430755615234375, + "step": 9519 + }, + { + "epoch": 0.55, + "learning_rate": 4.370267246330314e-08, + "logits/chosen": -2.1038830280303955, + "logits/rejected": -2.107377052307129, + "logps/chosen": -3.6126527786254883, + "logps/rejected": -100.5541763305664, + "loss": 0.4729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23390427231788635, + "rewards/margins": 0.7501777410507202, + "rewards/rejected": -0.5162734985351562, + "step": 9520 + }, + { + "epoch": 0.55, + "learning_rate": 4.36933235944083e-08, + "logits/chosen": -1.927829623222351, + "logits/rejected": -1.9154590368270874, + "logps/chosen": -54.05595397949219, + "logps/rejected": -331.72235107421875, + "loss": 0.1119, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.056817650794983, + "rewards/margins": 4.45881986618042, + "rewards/rejected": -3.4020020961761475, + "step": 9521 + }, + { + "epoch": 0.55, + "learning_rate": 4.368397494955747e-08, + "logits/chosen": -1.8024364709854126, + "logits/rejected": -1.80183744430542, + "logps/chosen": -0.02900397777557373, + "logps/rejected": -58.28567123413086, + "loss": 0.6533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018796444637700915, + "rewards/margins": 0.22555223107337952, + "rewards/rejected": -0.22743187844753265, + "step": 9522 + }, + { + "epoch": 0.55, + "learning_rate": 4.367462652908276e-08, + "logits/chosen": -1.8428465127944946, + "logits/rejected": -1.8379278182983398, + "logps/chosen": -32.220970153808594, + "logps/rejected": -233.99441528320312, + "loss": 0.2349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7314422726631165, + "rewards/margins": 2.0630264282226562, + "rewards/rejected": -1.3315842151641846, + "step": 9523 + }, + { + "epoch": 0.55, + "learning_rate": 4.3665278333316236e-08, + "logits/chosen": -1.8781602382659912, + "logits/rejected": -1.943075180053711, + "logps/chosen": -232.33824157714844, + "logps/rejected": -284.996337890625, + "loss": 0.0514, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7917709350585938, + "rewards/margins": 2.3556442260742188, + "rewards/rejected": 0.436126708984375, + "step": 9524 + }, + { + "epoch": 0.55, + "learning_rate": 4.3655930362590024e-08, + "logits/chosen": -1.8069747686386108, + "logits/rejected": -1.9002678394317627, + "logps/chosen": -383.44573974609375, + "logps/rejected": -484.6564025878906, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.804644823074341, + "rewards/margins": 6.059832572937012, + "rewards/rejected": -3.25518798828125, + "step": 9525 + }, + { + "epoch": 0.55, + "learning_rate": 4.36465826172362e-08, + "logits/chosen": -1.9076569080352783, + "logits/rejected": -1.9125604629516602, + "logps/chosen": -17.77457618713379, + "logps/rejected": -331.88800048828125, + "loss": 0.2068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6088590621948242, + "rewards/margins": 8.244552612304688, + "rewards/rejected": -7.635693550109863, + "step": 9526 + }, + { + "epoch": 0.55, + "learning_rate": 4.363723509758683e-08, + "logits/chosen": -1.9805101156234741, + "logits/rejected": -1.9713058471679688, + "logps/chosen": -16.275592803955078, + "logps/rejected": -227.4619140625, + "loss": 0.3472, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011189460754394531, + "rewards/margins": 5.080592632293701, + "rewards/rejected": -5.069403171539307, + "step": 9527 + }, + { + "epoch": 0.55, + "learning_rate": 4.3627887803973993e-08, + "logits/chosen": -2.10260009765625, + "logits/rejected": -2.0866849422454834, + "logps/chosen": -175.7075653076172, + "logps/rejected": -229.23622131347656, + "loss": 0.2082, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3427261114120483, + "rewards/margins": 0.9164702892303467, + "rewards/rejected": 0.4262557923793793, + "step": 9528 + }, + { + "epoch": 0.55, + "learning_rate": 4.361854073672976e-08, + "logits/chosen": -1.9878184795379639, + "logits/rejected": -1.954976201057434, + "logps/chosen": -191.59121704101562, + "logps/rejected": -333.17950439453125, + "loss": 0.1046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4749786853790283, + "rewards/margins": 2.05735182762146, + "rewards/rejected": -0.5823730826377869, + "step": 9529 + }, + { + "epoch": 0.55, + "learning_rate": 4.360919389618617e-08, + "logits/chosen": -1.7782052755355835, + "logits/rejected": -1.78769052028656, + "logps/chosen": -212.44049072265625, + "logps/rejected": -389.69403076171875, + "loss": 0.0497, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8667526245117188, + "rewards/margins": 3.6328353881835938, + "rewards/rejected": -1.766082763671875, + "step": 9530 + }, + { + "epoch": 0.55, + "learning_rate": 4.359984728267528e-08, + "logits/chosen": -1.9089292287826538, + "logits/rejected": -1.9076478481292725, + "logps/chosen": -214.89181518554688, + "logps/rejected": -392.54376220703125, + "loss": 0.0964, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8803131580352783, + "rewards/margins": 1.9241364002227783, + "rewards/rejected": -0.0438232421875, + "step": 9531 + }, + { + "epoch": 0.55, + "learning_rate": 4.35905008965291e-08, + "logits/chosen": -1.8998738527297974, + "logits/rejected": -1.8843165636062622, + "logps/chosen": -8.574305534362793, + "logps/rejected": -151.46142578125, + "loss": 0.4252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0817112922668457, + "rewards/margins": 1.4220998287200928, + "rewards/rejected": -1.340388536453247, + "step": 9532 + }, + { + "epoch": 0.55, + "learning_rate": 4.358115473807971e-08, + "logits/chosen": -1.8464980125427246, + "logits/rejected": -1.8275889158248901, + "logps/chosen": -182.98129272460938, + "logps/rejected": -327.3815612792969, + "loss": 0.3342, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6252624988555908, + "rewards/margins": 0.3703399896621704, + "rewards/rejected": 1.2549225091934204, + "step": 9533 + }, + { + "epoch": 0.55, + "learning_rate": 4.357180880765908e-08, + "logits/chosen": -1.7761434316635132, + "logits/rejected": -1.8062622547149658, + "logps/chosen": -231.91415405273438, + "logps/rejected": -260.7120361328125, + "loss": 0.3036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6960983276367188, + "rewards/margins": 0.32252657413482666, + "rewards/rejected": 1.373571753501892, + "step": 9534 + }, + { + "epoch": 0.55, + "learning_rate": 4.3562463105599255e-08, + "logits/chosen": -2.0816538333892822, + "logits/rejected": -2.081932544708252, + "logps/chosen": -47.006248474121094, + "logps/rejected": -215.93243408203125, + "loss": 0.1351, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3515923023223877, + "rewards/margins": 2.9223809242248535, + "rewards/rejected": -1.5707886219024658, + "step": 9535 + }, + { + "epoch": 0.55, + "learning_rate": 4.355311763223222e-08, + "logits/chosen": -1.7092866897583008, + "logits/rejected": -1.7372685670852661, + "logps/chosen": -269.06646728515625, + "logps/rejected": -246.60186767578125, + "loss": 0.1842, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5695741176605225, + "rewards/margins": 1.0025880336761475, + "rewards/rejected": 1.566986083984375, + "step": 9536 + }, + { + "epoch": 0.55, + "learning_rate": 4.354377238789e-08, + "logits/chosen": -1.892648458480835, + "logits/rejected": -1.8819681406021118, + "logps/chosen": -0.0002258887980133295, + "logps/rejected": -89.0682601928711, + "loss": 0.5263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00013381887401919812, + "rewards/margins": 0.7285731434822083, + "rewards/rejected": -0.7284393310546875, + "step": 9537 + }, + { + "epoch": 0.56, + "learning_rate": 4.353442737290454e-08, + "logits/chosen": -1.8844830989837646, + "logits/rejected": -1.8787165880203247, + "logps/chosen": -65.23768615722656, + "logps/rejected": -183.96771240234375, + "loss": 0.6991, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8059551119804382, + "rewards/margins": 1.3875172138214111, + "rewards/rejected": -2.193472385406494, + "step": 9538 + }, + { + "epoch": 0.56, + "learning_rate": 4.352508258760787e-08, + "logits/chosen": -1.8547714948654175, + "logits/rejected": -1.8488372564315796, + "logps/chosen": -175.2365264892578, + "logps/rejected": -209.61337280273438, + "loss": 0.1082, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0690994262695312, + "rewards/margins": 1.7746292352676392, + "rewards/rejected": 0.2944702208042145, + "step": 9539 + }, + { + "epoch": 0.56, + "learning_rate": 4.351573803233191e-08, + "logits/chosen": -2.094273328781128, + "logits/rejected": -2.0796852111816406, + "logps/chosen": -80.45442962646484, + "logps/rejected": -316.67498779296875, + "loss": 0.3719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18560639023780823, + "rewards/margins": 3.8272171020507812, + "rewards/rejected": -4.012823581695557, + "step": 9540 + }, + { + "epoch": 0.56, + "learning_rate": 4.350639370740869e-08, + "logits/chosen": -1.8283038139343262, + "logits/rejected": -1.8204153776168823, + "logps/chosen": -1.0578583478927612, + "logps/rejected": -234.13916015625, + "loss": 0.2707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28826746344566345, + "rewards/margins": 4.760534763336182, + "rewards/rejected": -4.472267150878906, + "step": 9541 + }, + { + "epoch": 0.56, + "learning_rate": 4.3497049613170097e-08, + "logits/chosen": -1.7472681999206543, + "logits/rejected": -1.725935697555542, + "logps/chosen": -185.74472045898438, + "logps/rejected": -443.3951110839844, + "loss": 0.0543, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2840423583984375, + "rewards/margins": 2.52490234375, + "rewards/rejected": -0.2408599853515625, + "step": 9542 + }, + { + "epoch": 0.56, + "learning_rate": 4.3487705749948116e-08, + "logits/chosen": -1.9019927978515625, + "logits/rejected": -1.879814624786377, + "logps/chosen": -40.70020294189453, + "logps/rejected": -199.56765747070312, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25297853350639343, + "rewards/margins": 2.714425563812256, + "rewards/rejected": -2.9674041271209717, + "step": 9543 + }, + { + "epoch": 0.56, + "learning_rate": 4.3478362118074696e-08, + "logits/chosen": -1.8660571575164795, + "logits/rejected": -1.881126046180725, + "logps/chosen": -241.92991638183594, + "logps/rejected": -270.8763732910156, + "loss": 0.4259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7793930172920227, + "rewards/margins": 0.4386276304721832, + "rewards/rejected": 0.3407653868198395, + "step": 9544 + }, + { + "epoch": 0.56, + "learning_rate": 4.346901871788175e-08, + "logits/chosen": -1.9698795080184937, + "logits/rejected": -1.9625962972640991, + "logps/chosen": -33.67747497558594, + "logps/rejected": -223.6627960205078, + "loss": 0.4467, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6009674072265625, + "rewards/margins": 0.26100462675094604, + "rewards/rejected": 0.33996278047561646, + "step": 9545 + }, + { + "epoch": 0.56, + "learning_rate": 4.345967554970121e-08, + "logits/chosen": -2.148191452026367, + "logits/rejected": -2.135148525238037, + "logps/chosen": -22.19785499572754, + "logps/rejected": -201.68136596679688, + "loss": 0.3469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0808509811758995, + "rewards/margins": 2.4663689136505127, + "rewards/rejected": -2.54721999168396, + "step": 9546 + }, + { + "epoch": 0.56, + "learning_rate": 4.3450332613864975e-08, + "logits/chosen": -1.8804547786712646, + "logits/rejected": -1.8919122219085693, + "logps/chosen": -174.85635375976562, + "logps/rejected": -408.416748046875, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4860291481018066, + "rewards/margins": 8.290750503540039, + "rewards/rejected": -5.804721355438232, + "step": 9547 + }, + { + "epoch": 0.56, + "learning_rate": 4.3440989910704975e-08, + "logits/chosen": -1.8033751249313354, + "logits/rejected": -1.8135179281234741, + "logps/chosen": -208.66616821289062, + "logps/rejected": -663.3295288085938, + "loss": 0.0406, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.274623155593872, + "rewards/margins": 8.294539451599121, + "rewards/rejected": -7.01991605758667, + "step": 9548 + }, + { + "epoch": 0.56, + "learning_rate": 4.343164744055309e-08, + "logits/chosen": -1.8859562873840332, + "logits/rejected": -1.8841222524642944, + "logps/chosen": -2.4538989067077637, + "logps/rejected": -92.53862762451172, + "loss": 0.5365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04962220415472984, + "rewards/margins": 0.6912199258804321, + "rewards/rejected": -0.6415977478027344, + "step": 9549 + }, + { + "epoch": 0.56, + "learning_rate": 4.342230520374122e-08, + "logits/chosen": -2.055297374725342, + "logits/rejected": -2.0527169704437256, + "logps/chosen": -9.81672477722168, + "logps/rejected": -194.71946716308594, + "loss": 0.6164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6421952843666077, + "rewards/margins": 1.5490539073944092, + "rewards/rejected": -2.191249132156372, + "step": 9550 + }, + { + "epoch": 0.56, + "learning_rate": 4.3412963200601236e-08, + "logits/chosen": -1.8603264093399048, + "logits/rejected": -1.8871691226959229, + "logps/chosen": -181.8585205078125, + "logps/rejected": -331.4700012207031, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5367798805236816, + "rewards/margins": 3.671963691711426, + "rewards/rejected": -1.1351836919784546, + "step": 9551 + }, + { + "epoch": 0.56, + "learning_rate": 4.3403621431465035e-08, + "logits/chosen": -1.9013530015945435, + "logits/rejected": -1.9061305522918701, + "logps/chosen": -3.8764212131500244, + "logps/rejected": -108.64570617675781, + "loss": 0.5114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10828860104084015, + "rewards/margins": 0.6529297232627869, + "rewards/rejected": -0.5446411371231079, + "step": 9552 + }, + { + "epoch": 0.56, + "learning_rate": 4.3394279896664457e-08, + "logits/chosen": -1.9644306898117065, + "logits/rejected": -2.010478973388672, + "logps/chosen": -217.06719970703125, + "logps/rejected": -362.9440002441406, + "loss": 0.0631, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.076127767562866, + "rewards/margins": 2.1931779384613037, + "rewards/rejected": 0.8829498291015625, + "step": 9553 + }, + { + "epoch": 0.56, + "learning_rate": 4.338493859653137e-08, + "logits/chosen": -1.8997708559036255, + "logits/rejected": -1.9040230512619019, + "logps/chosen": -228.25018310546875, + "logps/rejected": -207.61183166503906, + "loss": 0.3091, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.19383704662323, + "rewards/margins": 0.5110809803009033, + "rewards/rejected": 0.6827560663223267, + "step": 9554 + }, + { + "epoch": 0.56, + "learning_rate": 4.3375597531397625e-08, + "logits/chosen": -2.0788135528564453, + "logits/rejected": -2.066202163696289, + "logps/chosen": -49.17413330078125, + "logps/rejected": -177.04067993164062, + "loss": 0.2155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48853379487991333, + "rewards/margins": 2.7788169384002686, + "rewards/rejected": -2.290283203125, + "step": 9555 + }, + { + "epoch": 0.56, + "learning_rate": 4.336625670159506e-08, + "logits/chosen": -1.7420730590820312, + "logits/rejected": -1.7444995641708374, + "logps/chosen": -0.025223640725016594, + "logps/rejected": -353.2330322265625, + "loss": 0.3202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001709027448669076, + "rewards/margins": 7.318792819976807, + "rewards/rejected": -7.320501804351807, + "step": 9556 + }, + { + "epoch": 0.56, + "learning_rate": 4.335691610745549e-08, + "logits/chosen": -1.917941927909851, + "logits/rejected": -1.8890199661254883, + "logps/chosen": -202.36370849609375, + "logps/rejected": -280.1700439453125, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0622071027755737, + "rewards/margins": 0.9896637797355652, + "rewards/rejected": 0.07254333794116974, + "step": 9557 + }, + { + "epoch": 0.56, + "learning_rate": 4.334757574931078e-08, + "logits/chosen": -1.8811761140823364, + "logits/rejected": -1.889048457145691, + "logps/chosen": -0.00027452450012788177, + "logps/rejected": -126.40901947021484, + "loss": 0.5962, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.852634785696864e-06, + "rewards/margins": 0.41952693462371826, + "rewards/rejected": -0.41953277587890625, + "step": 9558 + }, + { + "epoch": 0.56, + "learning_rate": 4.33382356274927e-08, + "logits/chosen": -1.9000471830368042, + "logits/rejected": -1.8909640312194824, + "logps/chosen": -8.746753692626953, + "logps/rejected": -256.4549560546875, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02031116560101509, + "rewards/margins": 6.25206184387207, + "rewards/rejected": -6.23175048828125, + "step": 9559 + }, + { + "epoch": 0.56, + "learning_rate": 4.33288957423331e-08, + "logits/chosen": -1.7512216567993164, + "logits/rejected": -1.7549103498458862, + "logps/chosen": -188.20364379882812, + "logps/rejected": -341.23834228515625, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9887237548828125, + "rewards/margins": 0.12808537483215332, + "rewards/rejected": 2.860638380050659, + "step": 9560 + }, + { + "epoch": 0.56, + "learning_rate": 4.3319556094163724e-08, + "logits/chosen": -1.9394512176513672, + "logits/rejected": -1.9328289031982422, + "logps/chosen": -176.97525024414062, + "logps/rejected": -199.56468200683594, + "loss": 0.4258, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.329949975013733, + "rewards/margins": -0.16563570499420166, + "rewards/rejected": 1.4955856800079346, + "step": 9561 + }, + { + "epoch": 0.56, + "learning_rate": 4.3310216683316406e-08, + "logits/chosen": -1.9700255393981934, + "logits/rejected": -1.9569745063781738, + "logps/chosen": -205.68475341796875, + "logps/rejected": -335.1949462890625, + "loss": 0.0478, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.388235569000244, + "rewards/margins": 2.5354645252227783, + "rewards/rejected": -0.14722900092601776, + "step": 9562 + }, + { + "epoch": 0.56, + "learning_rate": 4.330087751012292e-08, + "logits/chosen": -1.6759836673736572, + "logits/rejected": -1.6898410320281982, + "logps/chosen": -193.71702575683594, + "logps/rejected": -154.27001953125, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0602829456329346, + "rewards/margins": 0.2827301025390625, + "rewards/rejected": 2.777552843093872, + "step": 9563 + }, + { + "epoch": 0.56, + "learning_rate": 4.329153857491502e-08, + "logits/chosen": -1.8954774141311646, + "logits/rejected": -1.8952504396438599, + "logps/chosen": -0.002056252909824252, + "logps/rejected": -85.72514343261719, + "loss": 0.4578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018424075096845627, + "rewards/margins": 1.0870158672332764, + "rewards/rejected": -1.0872001647949219, + "step": 9564 + }, + { + "epoch": 0.56, + "learning_rate": 4.3282199878024504e-08, + "logits/chosen": -1.8245117664337158, + "logits/rejected": -1.8847469091415405, + "logps/chosen": -271.04559326171875, + "logps/rejected": -368.96484375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.321697950363159, + "rewards/margins": 6.093838691711426, + "rewards/rejected": -2.7721405029296875, + "step": 9565 + }, + { + "epoch": 0.56, + "learning_rate": 4.3272861419783095e-08, + "logits/chosen": -1.9893068075180054, + "logits/rejected": -1.9911643266677856, + "logps/chosen": -208.39935302734375, + "logps/rejected": -515.2139282226562, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.621052622795105, + "rewards/margins": 4.452144145965576, + "rewards/rejected": -2.8310914039611816, + "step": 9566 + }, + { + "epoch": 0.56, + "learning_rate": 4.326352320052257e-08, + "logits/chosen": -2.0188424587249756, + "logits/rejected": -2.0129692554473877, + "logps/chosen": -4.148417428950779e-05, + "logps/rejected": -206.61077880859375, + "loss": 0.3598, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.737377356737852e-06, + "rewards/margins": 3.8562099933624268, + "rewards/rejected": -3.856201171875, + "step": 9567 + }, + { + "epoch": 0.56, + "learning_rate": 4.325418522057463e-08, + "logits/chosen": -2.076093912124634, + "logits/rejected": -2.0799906253814697, + "logps/chosen": -20.68318748474121, + "logps/rejected": -84.15919494628906, + "loss": 0.5616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15071068704128265, + "rewards/margins": 0.37775689363479614, + "rewards/rejected": -0.2270462065935135, + "step": 9568 + }, + { + "epoch": 0.56, + "learning_rate": 4.324484748027105e-08, + "logits/chosen": -1.8574681282043457, + "logits/rejected": -1.8606467247009277, + "logps/chosen": -49.86500930786133, + "logps/rejected": -221.26416015625, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07162857055664062, + "rewards/margins": 3.03962779045105, + "rewards/rejected": -3.1112563610076904, + "step": 9569 + }, + { + "epoch": 0.56, + "learning_rate": 4.323550997994352e-08, + "logits/chosen": -1.9778521060943604, + "logits/rejected": -1.9760023355484009, + "logps/chosen": -0.1627710908651352, + "logps/rejected": -196.9793701171875, + "loss": 0.3538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00983367022126913, + "rewards/margins": 3.682140350341797, + "rewards/rejected": -3.691973924636841, + "step": 9570 + }, + { + "epoch": 0.56, + "learning_rate": 4.322617271992377e-08, + "logits/chosen": -1.9531219005584717, + "logits/rejected": -1.9582029581069946, + "logps/chosen": -210.31248474121094, + "logps/rejected": -420.4892272949219, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.919603109359741, + "rewards/margins": 4.6389360427856445, + "rewards/rejected": -1.7193329334259033, + "step": 9571 + }, + { + "epoch": 0.56, + "learning_rate": 4.321683570054349e-08, + "logits/chosen": -1.5947153568267822, + "logits/rejected": -1.5784424543380737, + "logps/chosen": -189.5870361328125, + "logps/rejected": -363.67437744140625, + "loss": 0.0681, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.39459228515625, + "rewards/margins": 3.7989563941955566, + "rewards/rejected": -2.4043641090393066, + "step": 9572 + }, + { + "epoch": 0.56, + "learning_rate": 4.320749892213439e-08, + "logits/chosen": -1.9122962951660156, + "logits/rejected": -1.8998342752456665, + "logps/chosen": -26.00542640686035, + "logps/rejected": -148.09181213378906, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48559150099754333, + "rewards/margins": 1.3955204486846924, + "rewards/rejected": -0.9099289178848267, + "step": 9573 + }, + { + "epoch": 0.56, + "learning_rate": 4.3198162385028156e-08, + "logits/chosen": -1.9610103368759155, + "logits/rejected": -1.9415045976638794, + "logps/chosen": -218.34530639648438, + "logps/rejected": -457.72796630859375, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.38736891746521, + "rewards/margins": 3.76462721824646, + "rewards/rejected": -1.37725830078125, + "step": 9574 + }, + { + "epoch": 0.56, + "learning_rate": 4.318882608955647e-08, + "logits/chosen": -2.0298638343811035, + "logits/rejected": -2.0356593132019043, + "logps/chosen": -12.453810691833496, + "logps/rejected": -41.811851501464844, + "loss": 0.5521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.059946443885564804, + "rewards/margins": 0.564082682132721, + "rewards/rejected": -0.5041362643241882, + "step": 9575 + }, + { + "epoch": 0.56, + "learning_rate": 4.317949003605099e-08, + "logits/chosen": -2.223226308822632, + "logits/rejected": -2.2147328853607178, + "logps/chosen": -63.44670867919922, + "logps/rejected": -250.81797790527344, + "loss": 0.3336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010158539516851306, + "rewards/margins": 2.1114795207977295, + "rewards/rejected": -2.1124954223632812, + "step": 9576 + }, + { + "epoch": 0.56, + "learning_rate": 4.317015422484339e-08, + "logits/chosen": -1.9024380445480347, + "logits/rejected": -1.9103034734725952, + "logps/chosen": -213.33834838867188, + "logps/rejected": -303.7989501953125, + "loss": 0.1339, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5013580322265625, + "rewards/margins": 1.4133422374725342, + "rewards/rejected": 1.0880157947540283, + "step": 9577 + }, + { + "epoch": 0.56, + "learning_rate": 4.3160818656265305e-08, + "logits/chosen": -1.7373145818710327, + "logits/rejected": -1.7432684898376465, + "logps/chosen": -285.497314453125, + "logps/rejected": -549.4031982421875, + "loss": 0.0254, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.059213399887085, + "rewards/margins": 6.083139419555664, + "rewards/rejected": -4.02392578125, + "step": 9578 + }, + { + "epoch": 0.56, + "learning_rate": 4.315148333064843e-08, + "logits/chosen": -2.045445203781128, + "logits/rejected": -2.0378618240356445, + "logps/chosen": -10.446871757507324, + "logps/rejected": -131.554931640625, + "loss": 0.6981, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1379023641347885, + "rewards/margins": 0.13957108557224274, + "rewards/rejected": -0.27747344970703125, + "step": 9579 + }, + { + "epoch": 0.56, + "learning_rate": 4.314214824832433e-08, + "logits/chosen": -1.9567371606826782, + "logits/rejected": -1.95160973072052, + "logps/chosen": -79.68453979492188, + "logps/rejected": -278.94537353515625, + "loss": 0.1824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7708763480186462, + "rewards/margins": 6.246683597564697, + "rewards/rejected": -5.475807189941406, + "step": 9580 + }, + { + "epoch": 0.56, + "learning_rate": 4.31328134096247e-08, + "logits/chosen": -1.8899633884429932, + "logits/rejected": -1.8827557563781738, + "logps/chosen": -5.102074646856636e-05, + "logps/rejected": -254.04061889648438, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0441799531690776e-05, + "rewards/margins": 4.839808464050293, + "rewards/rejected": -4.8397979736328125, + "step": 9581 + }, + { + "epoch": 0.56, + "learning_rate": 4.31234788148811e-08, + "logits/chosen": -1.9437592029571533, + "logits/rejected": -2.0169060230255127, + "logps/chosen": -199.39385986328125, + "logps/rejected": -215.35836791992188, + "loss": 0.1414, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.401666283607483, + "rewards/margins": 1.2541016340255737, + "rewards/rejected": 0.14756469428539276, + "step": 9582 + }, + { + "epoch": 0.56, + "learning_rate": 4.311414446442518e-08, + "logits/chosen": -1.8945657014846802, + "logits/rejected": -1.8520286083221436, + "logps/chosen": -172.50924682617188, + "logps/rejected": -450.8607482910156, + "loss": 0.0896, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0036256313323975, + "rewards/margins": 2.399685859680176, + "rewards/rejected": -0.39606019854545593, + "step": 9583 + }, + { + "epoch": 0.56, + "learning_rate": 4.310481035858854e-08, + "logits/chosen": -2.0292251110076904, + "logits/rejected": -2.0101637840270996, + "logps/chosen": -0.0006425044848583639, + "logps/rejected": -135.0982666015625, + "loss": 0.5563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006537814042530954, + "rewards/margins": 0.6491203308105469, + "rewards/rejected": -0.6484665274620056, + "step": 9584 + }, + { + "epoch": 0.56, + "learning_rate": 4.309547649770275e-08, + "logits/chosen": -2.115323543548584, + "logits/rejected": -2.108386516571045, + "logps/chosen": -42.28563690185547, + "logps/rejected": -123.23886108398438, + "loss": 0.603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4228813350200653, + "rewards/margins": 1.0238486528396606, + "rewards/rejected": -1.4467300176620483, + "step": 9585 + }, + { + "epoch": 0.56, + "learning_rate": 4.308614288209943e-08, + "logits/chosen": -1.9633985757827759, + "logits/rejected": -1.9527652263641357, + "logps/chosen": -17.88766098022461, + "logps/rejected": -348.1522521972656, + "loss": 0.258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2869873046875, + "rewards/margins": 7.003024578094482, + "rewards/rejected": -6.716037273406982, + "step": 9586 + }, + { + "epoch": 0.56, + "learning_rate": 4.3076809512110116e-08, + "logits/chosen": -2.060762643814087, + "logits/rejected": -2.0507826805114746, + "logps/chosen": -3.8524763584136963, + "logps/rejected": -213.80731201171875, + "loss": 0.3319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10745041817426682, + "rewards/margins": 2.9871273040771484, + "rewards/rejected": -2.8796768188476562, + "step": 9587 + }, + { + "epoch": 0.56, + "learning_rate": 4.30674763880664e-08, + "logits/chosen": -1.947448968887329, + "logits/rejected": -1.9271572828292847, + "logps/chosen": -219.41372680664062, + "logps/rejected": -508.2589416503906, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5388245582580566, + "rewards/margins": 6.098940849304199, + "rewards/rejected": -3.5601165294647217, + "step": 9588 + }, + { + "epoch": 0.56, + "learning_rate": 4.305814351029983e-08, + "logits/chosen": -1.9309321641921997, + "logits/rejected": -1.9314438104629517, + "logps/chosen": -7.700128555297852, + "logps/rejected": -50.35070037841797, + "loss": 0.7207, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01598663441836834, + "rewards/margins": -0.1716056764125824, + "rewards/rejected": 0.1875923126935959, + "step": 9589 + }, + { + "epoch": 0.56, + "learning_rate": 4.3048810879141964e-08, + "logits/chosen": -1.8424419164657593, + "logits/rejected": -1.84018075466156, + "logps/chosen": -39.10919952392578, + "logps/rejected": -97.00184631347656, + "loss": 0.7375, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.049733735620975494, + "rewards/margins": -0.22862550616264343, + "rewards/rejected": 0.17889176309108734, + "step": 9590 + }, + { + "epoch": 0.56, + "learning_rate": 4.3039478494924335e-08, + "logits/chosen": -1.8487905263900757, + "logits/rejected": -1.8323452472686768, + "logps/chosen": -174.151123046875, + "logps/rejected": -501.67706298828125, + "loss": 0.119, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5664520263671875, + "rewards/margins": 2.27752685546875, + "rewards/rejected": -0.7110748291015625, + "step": 9591 + }, + { + "epoch": 0.56, + "learning_rate": 4.303014635797848e-08, + "logits/chosen": -2.0068438053131104, + "logits/rejected": -2.011547327041626, + "logps/chosen": -2.0980756744393148e-05, + "logps/rejected": -233.56459045410156, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.668933720111454e-07, + "rewards/margins": 7.036210536956787, + "rewards/rejected": -7.036210536956787, + "step": 9592 + }, + { + "epoch": 0.56, + "learning_rate": 4.302081446863591e-08, + "logits/chosen": -2.125049591064453, + "logits/rejected": -2.1243319511413574, + "logps/chosen": -0.05722383037209511, + "logps/rejected": -152.10482788085938, + "loss": 0.5008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002313921693712473, + "rewards/margins": 0.8533074259757996, + "rewards/rejected": -0.855621337890625, + "step": 9593 + }, + { + "epoch": 0.56, + "learning_rate": 4.301148282722816e-08, + "logits/chosen": -1.8058611154556274, + "logits/rejected": -1.7847836017608643, + "logps/chosen": -171.9558563232422, + "logps/rejected": -378.0809326171875, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.464373826980591, + "rewards/margins": 4.6273956298828125, + "rewards/rejected": -2.1630218029022217, + "step": 9594 + }, + { + "epoch": 0.56, + "learning_rate": 4.300215143408672e-08, + "logits/chosen": -1.8272500038146973, + "logits/rejected": -1.8225172758102417, + "logps/chosen": -111.45625305175781, + "logps/rejected": -643.5288696289062, + "loss": 0.3295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.034449007362127304, + "rewards/margins": 11.377155303955078, + "rewards/rejected": -11.342706680297852, + "step": 9595 + }, + { + "epoch": 0.56, + "learning_rate": 4.2992820289543094e-08, + "logits/chosen": -1.919237494468689, + "logits/rejected": -1.9143577814102173, + "logps/chosen": -171.81277465820312, + "logps/rejected": -626.9080200195312, + "loss": 0.0743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7984634637832642, + "rewards/margins": 5.740321159362793, + "rewards/rejected": -4.941857814788818, + "step": 9596 + }, + { + "epoch": 0.56, + "learning_rate": 4.298348939392876e-08, + "logits/chosen": -1.8345376253128052, + "logits/rejected": -1.8707836866378784, + "logps/chosen": -235.5752410888672, + "logps/rejected": -373.0584411621094, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.57611083984375, + "rewards/margins": 4.078878879547119, + "rewards/rejected": -2.502768039703369, + "step": 9597 + }, + { + "epoch": 0.56, + "learning_rate": 4.2974158747575214e-08, + "logits/chosen": -1.8302134275436401, + "logits/rejected": -1.8404922485351562, + "logps/chosen": -0.001045689103193581, + "logps/rejected": -265.2832336425781, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.39294779728516e-05, + "rewards/margins": 5.8837175369262695, + "rewards/rejected": -5.883731365203857, + "step": 9598 + }, + { + "epoch": 0.56, + "learning_rate": 4.2964828350813895e-08, + "logits/chosen": -1.9132585525512695, + "logits/rejected": -1.899253010749817, + "logps/chosen": -221.7360076904297, + "logps/rejected": -361.9866943359375, + "loss": 0.1971, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5232772827148438, + "rewards/margins": 1.5259994268417358, + "rewards/rejected": -0.002722168108448386, + "step": 9599 + }, + { + "epoch": 0.56, + "learning_rate": 4.295549820397632e-08, + "logits/chosen": -1.8887732028961182, + "logits/rejected": -1.850137710571289, + "logps/chosen": -143.67811584472656, + "logps/rejected": -441.1405029296875, + "loss": 0.0554, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6953445672988892, + "rewards/margins": 5.4447922706604, + "rewards/rejected": -3.7494475841522217, + "step": 9600 + }, + { + "epoch": 0.56, + "learning_rate": 4.2946168307393875e-08, + "logits/chosen": -1.8074297904968262, + "logits/rejected": -1.8058514595031738, + "logps/chosen": -32.87631607055664, + "logps/rejected": -146.51455688476562, + "loss": 0.2122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.773211658000946, + "rewards/margins": 2.163801670074463, + "rewards/rejected": -1.390589952468872, + "step": 9601 + }, + { + "epoch": 0.56, + "learning_rate": 4.2936838661398064e-08, + "logits/chosen": -1.9571986198425293, + "logits/rejected": -1.9487521648406982, + "logps/chosen": -177.5361328125, + "logps/rejected": -322.4129638671875, + "loss": 0.3156, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3829498291015625, + "rewards/margins": 0.21310114860534668, + "rewards/rejected": 2.169848680496216, + "step": 9602 + }, + { + "epoch": 0.56, + "learning_rate": 4.2927509266320266e-08, + "logits/chosen": -1.9612001180648804, + "logits/rejected": -1.9619032144546509, + "logps/chosen": -172.77593994140625, + "logps/rejected": -391.277587890625, + "loss": 0.0699, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4354370832443237, + "rewards/margins": 2.90610671043396, + "rewards/rejected": -1.4706696271896362, + "step": 9603 + }, + { + "epoch": 0.56, + "learning_rate": 4.291818012249194e-08, + "logits/chosen": -1.876092553138733, + "logits/rejected": -1.8741456270217896, + "logps/chosen": -0.002514560939744115, + "logps/rejected": -178.3109130859375, + "loss": 0.4453, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001277238188777119, + "rewards/margins": 1.5121877193450928, + "rewards/rejected": -1.5123153924942017, + "step": 9604 + }, + { + "epoch": 0.56, + "learning_rate": 4.290885123024451e-08, + "logits/chosen": -1.9449821710586548, + "logits/rejected": -1.9523483514785767, + "logps/chosen": -63.1057014465332, + "logps/rejected": -166.7610321044922, + "loss": 0.173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8170360922813416, + "rewards/margins": 2.3878414630889893, + "rewards/rejected": -1.5708054304122925, + "step": 9605 + }, + { + "epoch": 0.56, + "learning_rate": 4.2899522589909366e-08, + "logits/chosen": -2.0277252197265625, + "logits/rejected": -2.0243165493011475, + "logps/chosen": -62.208595275878906, + "logps/rejected": -228.0200958251953, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3909294307231903, + "rewards/margins": 1.7979546785354614, + "rewards/rejected": -1.4070252180099487, + "step": 9606 + }, + { + "epoch": 0.56, + "learning_rate": 4.289019420181791e-08, + "logits/chosen": -1.852178931236267, + "logits/rejected": -1.852885365486145, + "logps/chosen": -40.136722564697266, + "logps/rejected": -226.5858154296875, + "loss": 0.1804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9352413415908813, + "rewards/margins": 2.900297164916992, + "rewards/rejected": -1.9650558233261108, + "step": 9607 + }, + { + "epoch": 0.56, + "learning_rate": 4.288086606630153e-08, + "logits/chosen": -1.8803173303604126, + "logits/rejected": -1.8728116750717163, + "logps/chosen": -187.21263122558594, + "logps/rejected": -218.54808044433594, + "loss": 0.3827, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4518569707870483, + "rewards/margins": 0.20492851734161377, + "rewards/rejected": 1.2469284534454346, + "step": 9608 + }, + { + "epoch": 0.56, + "learning_rate": 4.287153818369162e-08, + "logits/chosen": -2.0713934898376465, + "logits/rejected": -2.0825133323669434, + "logps/chosen": -212.01303100585938, + "logps/rejected": -271.9679870605469, + "loss": 0.1187, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.587359666824341, + "rewards/margins": 1.6427216529846191, + "rewards/rejected": 0.9446380734443665, + "step": 9609 + }, + { + "epoch": 0.56, + "learning_rate": 4.286221055431954e-08, + "logits/chosen": -2.135187864303589, + "logits/rejected": -2.128615379333496, + "logps/chosen": -43.03004455566406, + "logps/rejected": -221.36300659179688, + "loss": 0.1255, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5351219177246094, + "rewards/margins": 2.9628257751464844, + "rewards/rejected": -1.427703857421875, + "step": 9610 + }, + { + "epoch": 0.56, + "learning_rate": 4.285288317851666e-08, + "logits/chosen": -1.7657043933868408, + "logits/rejected": -1.7661502361297607, + "logps/chosen": -118.01463317871094, + "logps/rejected": -279.4822692871094, + "loss": 0.1586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.751312255859375, + "rewards/margins": 2.7540009021759033, + "rewards/rejected": -2.0026886463165283, + "step": 9611 + }, + { + "epoch": 0.56, + "learning_rate": 4.284355605661432e-08, + "logits/chosen": -2.055821657180786, + "logits/rejected": -2.0426573753356934, + "logps/chosen": -13.424571990966797, + "logps/rejected": -90.05105590820312, + "loss": 0.5553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07418622821569443, + "rewards/margins": 0.7723256349563599, + "rewards/rejected": -0.8465118408203125, + "step": 9612 + }, + { + "epoch": 0.56, + "learning_rate": 4.283422918894388e-08, + "logits/chosen": -1.9845550060272217, + "logits/rejected": -1.996647834777832, + "logps/chosen": -48.107181549072266, + "logps/rejected": -231.66091918945312, + "loss": 0.186, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.214943289756775, + "rewards/margins": 2.8029773235321045, + "rewards/rejected": -1.5880340337753296, + "step": 9613 + }, + { + "epoch": 0.56, + "learning_rate": 4.282490257583667e-08, + "logits/chosen": -2.016031265258789, + "logits/rejected": -2.0410573482513428, + "logps/chosen": -254.0528564453125, + "logps/rejected": -375.73309326171875, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.265393018722534, + "rewards/margins": 2.7751526832580566, + "rewards/rejected": -0.5097595453262329, + "step": 9614 + }, + { + "epoch": 0.56, + "learning_rate": 4.281557621762402e-08, + "logits/chosen": -2.1595711708068848, + "logits/rejected": -2.1559202671051025, + "logps/chosen": -26.307771682739258, + "logps/rejected": -252.69802856445312, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04358673095703125, + "rewards/margins": 5.5932159423828125, + "rewards/rejected": -5.549629211425781, + "step": 9615 + }, + { + "epoch": 0.56, + "learning_rate": 4.280625011463724e-08, + "logits/chosen": -1.9269869327545166, + "logits/rejected": -1.9202080965042114, + "logps/chosen": -64.42955017089844, + "logps/rejected": -150.50994873046875, + "loss": 0.2561, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1456390619277954, + "rewards/margins": 1.0996369123458862, + "rewards/rejected": 0.04600219801068306, + "step": 9616 + }, + { + "epoch": 0.56, + "learning_rate": 4.279692426720765e-08, + "logits/chosen": -2.0711607933044434, + "logits/rejected": -2.0730884075164795, + "logps/chosen": -96.34175109863281, + "logps/rejected": -228.27398681640625, + "loss": 0.2544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37709352374076843, + "rewards/margins": 5.854485988616943, + "rewards/rejected": -5.477392673492432, + "step": 9617 + }, + { + "epoch": 0.56, + "learning_rate": 4.278759867566654e-08, + "logits/chosen": -1.8965082168579102, + "logits/rejected": -1.8895931243896484, + "logps/chosen": -11.12904167175293, + "logps/rejected": -169.32528686523438, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003643894335255027, + "rewards/margins": 3.6214842796325684, + "rewards/rejected": -3.6251282691955566, + "step": 9618 + }, + { + "epoch": 0.56, + "learning_rate": 4.277827334034521e-08, + "logits/chosen": -1.9560023546218872, + "logits/rejected": -1.9478168487548828, + "logps/chosen": -110.02122497558594, + "logps/rejected": -190.50701904296875, + "loss": 0.5459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47142335772514343, + "rewards/margins": 1.3444640636444092, + "rewards/rejected": -1.815887451171875, + "step": 9619 + }, + { + "epoch": 0.56, + "learning_rate": 4.276894826157492e-08, + "logits/chosen": -1.9146311283111572, + "logits/rejected": -1.914099097251892, + "logps/chosen": -17.38872718811035, + "logps/rejected": -80.10958862304688, + "loss": 0.4142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025414466857910156, + "rewards/margins": 1.7513312101364136, + "rewards/rejected": -1.7767456769943237, + "step": 9620 + }, + { + "epoch": 0.56, + "learning_rate": 4.2759623439686984e-08, + "logits/chosen": -1.8412961959838867, + "logits/rejected": -1.8383424282073975, + "logps/chosen": -44.214046478271484, + "logps/rejected": -194.12615966796875, + "loss": 0.351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02122955396771431, + "rewards/margins": 2.5720183849334717, + "rewards/rejected": -2.5507888793945312, + "step": 9621 + }, + { + "epoch": 0.56, + "learning_rate": 4.275029887501261e-08, + "logits/chosen": -2.1420958042144775, + "logits/rejected": -2.13767147064209, + "logps/chosen": -0.018277939409017563, + "logps/rejected": -62.44823455810547, + "loss": 0.4102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007086846511811018, + "rewards/margins": 1.8392802476882935, + "rewards/rejected": -1.832193374633789, + "step": 9622 + }, + { + "epoch": 0.56, + "learning_rate": 4.274097456788309e-08, + "logits/chosen": -1.7265034914016724, + "logits/rejected": -1.7222567796707153, + "logps/chosen": -237.77633666992188, + "logps/rejected": -452.26190185546875, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4393310546875, + "rewards/margins": 4.747641086578369, + "rewards/rejected": -2.308310031890869, + "step": 9623 + }, + { + "epoch": 0.56, + "learning_rate": 4.273165051862968e-08, + "logits/chosen": -1.9465515613555908, + "logits/rejected": -1.936389446258545, + "logps/chosen": -37.5682487487793, + "logps/rejected": -237.19544982910156, + "loss": 0.3428, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06640167534351349, + "rewards/margins": 2.617823839187622, + "rewards/rejected": -2.684225559234619, + "step": 9624 + }, + { + "epoch": 0.56, + "learning_rate": 4.272232672758358e-08, + "logits/chosen": -1.9338383674621582, + "logits/rejected": -1.9194557666778564, + "logps/chosen": -2.223273754119873, + "logps/rejected": -96.24730682373047, + "loss": 0.5185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09754550457000732, + "rewards/margins": 0.6447340846061707, + "rewards/rejected": -0.5471885800361633, + "step": 9625 + }, + { + "epoch": 0.56, + "learning_rate": 4.2713003195076046e-08, + "logits/chosen": -2.199143171310425, + "logits/rejected": -2.186671018600464, + "logps/chosen": -75.55583190917969, + "logps/rejected": -422.5066223144531, + "loss": 0.1428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9039711356163025, + "rewards/margins": 2.4112069606781006, + "rewards/rejected": -1.5072357654571533, + "step": 9626 + }, + { + "epoch": 0.56, + "learning_rate": 4.270367992143828e-08, + "logits/chosen": -1.843424677848816, + "logits/rejected": -1.8234940767288208, + "logps/chosen": -68.65877532958984, + "logps/rejected": -294.248779296875, + "loss": 0.214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7037467956542969, + "rewards/margins": 2.857659101486206, + "rewards/rejected": -2.153912305831909, + "step": 9627 + }, + { + "epoch": 0.56, + "learning_rate": 4.2694356907001496e-08, + "logits/chosen": -2.0035486221313477, + "logits/rejected": -1.9814144372940063, + "logps/chosen": -16.634227752685547, + "logps/rejected": -282.894287109375, + "loss": 0.2709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23646984994411469, + "rewards/margins": 5.4354658126831055, + "rewards/rejected": -5.198996067047119, + "step": 9628 + }, + { + "epoch": 0.56, + "learning_rate": 4.2685034152096875e-08, + "logits/chosen": -1.8917591571807861, + "logits/rejected": -1.7709424495697021, + "logps/chosen": -331.1830139160156, + "logps/rejected": -888.2921142578125, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.375457763671875, + "rewards/margins": 5.460436820983887, + "rewards/rejected": -3.084979295730591, + "step": 9629 + }, + { + "epoch": 0.56, + "learning_rate": 4.267571165705564e-08, + "logits/chosen": -2.012798309326172, + "logits/rejected": -1.9987667798995972, + "logps/chosen": -9.011984366225079e-05, + "logps/rejected": -94.45712280273438, + "loss": 0.5591, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.687612843350507e-05, + "rewards/margins": 0.5546572804450989, + "rewards/rejected": -0.5546203851699829, + "step": 9630 + }, + { + "epoch": 0.56, + "learning_rate": 4.266638942220894e-08, + "logits/chosen": -1.8170937299728394, + "logits/rejected": -1.9165844917297363, + "logps/chosen": -201.28509521484375, + "logps/rejected": -294.05218505859375, + "loss": 0.187, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4875259399414062, + "rewards/margins": 1.0034011602401733, + "rewards/rejected": 0.4841247498989105, + "step": 9631 + }, + { + "epoch": 0.56, + "learning_rate": 4.2657067447887975e-08, + "logits/chosen": -1.9384372234344482, + "logits/rejected": -1.9334958791732788, + "logps/chosen": -190.1148681640625, + "logps/rejected": -451.27197265625, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.519662618637085, + "rewards/margins": 4.807888984680176, + "rewards/rejected": -2.288226366043091, + "step": 9632 + }, + { + "epoch": 0.56, + "learning_rate": 4.264774573442388e-08, + "logits/chosen": -1.9265356063842773, + "logits/rejected": -1.90658438205719, + "logps/chosen": -4.247467994689941, + "logps/rejected": -254.59140014648438, + "loss": 0.3434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1083347350358963, + "rewards/margins": 3.054144859313965, + "rewards/rejected": -2.945810079574585, + "step": 9633 + }, + { + "epoch": 0.56, + "learning_rate": 4.263842428214783e-08, + "logits/chosen": -1.804824709892273, + "logits/rejected": -1.798378825187683, + "logps/chosen": -0.00018965288472827524, + "logps/rejected": -183.79039001464844, + "loss": 0.3525, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.33816533384379e-05, + "rewards/margins": 3.5936293601989746, + "rewards/rejected": -3.5935959815979004, + "step": 9634 + }, + { + "epoch": 0.56, + "learning_rate": 4.2629103091390945e-08, + "logits/chosen": -1.988173246383667, + "logits/rejected": -1.978204369544983, + "logps/chosen": -0.07925059646368027, + "logps/rejected": -156.22708129882812, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.044470373541116714, + "rewards/margins": 2.903212070465088, + "rewards/rejected": -2.8587417602539062, + "step": 9635 + }, + { + "epoch": 0.56, + "learning_rate": 4.261978216248439e-08, + "logits/chosen": -1.966970682144165, + "logits/rejected": -1.9516838788986206, + "logps/chosen": -197.88734436035156, + "logps/rejected": -292.4715576171875, + "loss": 0.2755, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2771285772323608, + "rewards/margins": 0.8774642944335938, + "rewards/rejected": 0.3996643126010895, + "step": 9636 + }, + { + "epoch": 0.56, + "learning_rate": 4.261046149575926e-08, + "logits/chosen": -1.7542418241500854, + "logits/rejected": -1.7607948780059814, + "logps/chosen": -283.7869567871094, + "logps/rejected": -341.8146057128906, + "loss": 0.2776, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.385223388671875, + "rewards/margins": 0.43796998262405396, + "rewards/rejected": 0.947253406047821, + "step": 9637 + }, + { + "epoch": 0.56, + "learning_rate": 4.260114109154669e-08, + "logits/chosen": -1.8677607774734497, + "logits/rejected": -1.8667963743209839, + "logps/chosen": -38.518741607666016, + "logps/rejected": -155.74571228027344, + "loss": 0.2479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48475342988967896, + "rewards/margins": 2.4715287685394287, + "rewards/rejected": -1.986775279045105, + "step": 9638 + }, + { + "epoch": 0.56, + "learning_rate": 4.2591820950177775e-08, + "logits/chosen": -1.867164969444275, + "logits/rejected": -1.9135353565216064, + "logps/chosen": -173.3907470703125, + "logps/rejected": -254.59002685546875, + "loss": 0.1975, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.431779623031616, + "rewards/margins": 0.7961503267288208, + "rewards/rejected": 1.6356292963027954, + "step": 9639 + }, + { + "epoch": 0.56, + "learning_rate": 4.258250107198363e-08, + "logits/chosen": -2.1820261478424072, + "logits/rejected": -2.188197374343872, + "logps/chosen": -7.340308666229248, + "logps/rejected": -69.04559326171875, + "loss": 0.4716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1339953988790512, + "rewards/margins": 0.6666229367256165, + "rewards/rejected": -0.532627522945404, + "step": 9640 + }, + { + "epoch": 0.56, + "learning_rate": 4.25731814572953e-08, + "logits/chosen": -1.839064359664917, + "logits/rejected": -1.8283777236938477, + "logps/chosen": -18.76883888244629, + "logps/rejected": -221.93959045410156, + "loss": 0.2338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5905332565307617, + "rewards/margins": 2.9793989658355713, + "rewards/rejected": -2.3888657093048096, + "step": 9641 + }, + { + "epoch": 0.56, + "learning_rate": 4.256386210644393e-08, + "logits/chosen": -1.900997519493103, + "logits/rejected": -1.9074923992156982, + "logps/chosen": -17.94725799560547, + "logps/rejected": -202.93116760253906, + "loss": 0.2872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42829057574272156, + "rewards/margins": 2.027175188064575, + "rewards/rejected": -1.5988845825195312, + "step": 9642 + }, + { + "epoch": 0.56, + "learning_rate": 4.255454301976053e-08, + "logits/chosen": -1.8221442699432373, + "logits/rejected": -1.8197110891342163, + "logps/chosen": -7.564764022827148, + "logps/rejected": -232.0228729248047, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20426760613918304, + "rewards/margins": 1.1432950496673584, + "rewards/rejected": -0.9390274286270142, + "step": 9643 + }, + { + "epoch": 0.56, + "learning_rate": 4.254522419757617e-08, + "logits/chosen": -1.8977231979370117, + "logits/rejected": -1.90362548828125, + "logps/chosen": -176.53570556640625, + "logps/rejected": -331.98394775390625, + "loss": 0.0902, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.247483968734741, + "rewards/margins": 2.3581011295318604, + "rewards/rejected": -0.11061706393957138, + "step": 9644 + }, + { + "epoch": 0.56, + "learning_rate": 4.253590564022194e-08, + "logits/chosen": -1.9141768217086792, + "logits/rejected": -1.9180984497070312, + "logps/chosen": -20.985498428344727, + "logps/rejected": -267.7359619140625, + "loss": 0.294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16240635514259338, + "rewards/margins": 4.3171491622924805, + "rewards/rejected": -4.15474271774292, + "step": 9645 + }, + { + "epoch": 0.56, + "learning_rate": 4.2526587348028836e-08, + "logits/chosen": -1.9372599124908447, + "logits/rejected": -1.9190547466278076, + "logps/chosen": -236.4222412109375, + "logps/rejected": -526.1036376953125, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.648489475250244, + "rewards/margins": 4.548788547515869, + "rewards/rejected": -1.900299072265625, + "step": 9646 + }, + { + "epoch": 0.56, + "learning_rate": 4.2517269321327917e-08, + "logits/chosen": -1.9667783975601196, + "logits/rejected": -1.9635916948318481, + "logps/chosen": -0.12010977417230606, + "logps/rejected": -43.79494857788086, + "loss": 0.5798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05138775333762169, + "rewards/margins": 0.41097867488861084, + "rewards/rejected": -0.35959091782569885, + "step": 9647 + }, + { + "epoch": 0.56, + "learning_rate": 4.250795156045018e-08, + "logits/chosen": -2.0573642253875732, + "logits/rejected": -2.053027868270874, + "logps/chosen": -0.003136090934276581, + "logps/rejected": -31.620141983032227, + "loss": 0.6545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013664843572769314, + "rewards/margins": 0.29505226016044617, + "rewards/rejected": -0.29518890380859375, + "step": 9648 + }, + { + "epoch": 0.56, + "learning_rate": 4.249863406572667e-08, + "logits/chosen": -1.9533432722091675, + "logits/rejected": -1.8448469638824463, + "logps/chosen": -259.4494934082031, + "logps/rejected": -736.7677001953125, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.677389621734619, + "rewards/margins": 3.214871406555176, + "rewards/rejected": -0.5374817252159119, + "step": 9649 + }, + { + "epoch": 0.56, + "learning_rate": 4.2489316837488354e-08, + "logits/chosen": -2.0618820190429688, + "logits/rejected": -2.046879529953003, + "logps/chosen": -0.0006053795805200934, + "logps/rejected": -155.55401611328125, + "loss": 0.3885, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4373486161930487e-05, + "rewards/margins": 2.2681288719177246, + "rewards/rejected": -2.2681732177734375, + "step": 9650 + }, + { + "epoch": 0.56, + "learning_rate": 4.2479999876066256e-08, + "logits/chosen": -1.9161962270736694, + "logits/rejected": -1.916157603263855, + "logps/chosen": -41.83572006225586, + "logps/rejected": -232.49508666992188, + "loss": 0.1508, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0409824848175049, + "rewards/margins": 2.96620512008667, + "rewards/rejected": -1.9252227544784546, + "step": 9651 + }, + { + "epoch": 0.56, + "learning_rate": 4.2470683181791335e-08, + "logits/chosen": -1.900477409362793, + "logits/rejected": -1.7801079750061035, + "logps/chosen": -214.92022705078125, + "logps/rejected": -544.6597900390625, + "loss": 0.0462, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9072036743164062, + "rewards/margins": 2.966578722000122, + "rewards/rejected": -0.05937499925494194, + "step": 9652 + }, + { + "epoch": 0.56, + "learning_rate": 4.246136675499459e-08, + "logits/chosen": -1.788426399230957, + "logits/rejected": -1.7909810543060303, + "logps/chosen": -106.19605255126953, + "logps/rejected": -312.29278564453125, + "loss": 0.3606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19778823852539062, + "rewards/margins": 4.220589637756348, + "rewards/rejected": -4.418377876281738, + "step": 9653 + }, + { + "epoch": 0.56, + "learning_rate": 4.245205059600696e-08, + "logits/chosen": -1.908150553703308, + "logits/rejected": -1.9102176427841187, + "logps/chosen": -54.79745864868164, + "logps/rejected": -156.39102172851562, + "loss": 0.2208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5950992703437805, + "rewards/margins": 2.8388614654541016, + "rewards/rejected": -2.243762254714966, + "step": 9654 + }, + { + "epoch": 0.56, + "learning_rate": 4.244273470515942e-08, + "logits/chosen": -1.7978556156158447, + "logits/rejected": -1.7954548597335815, + "logps/chosen": -0.8456128835678101, + "logps/rejected": -12.749933242797852, + "loss": 0.6615, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09947887808084488, + "rewards/margins": 0.041023220866918564, + "rewards/rejected": 0.058455657213926315, + "step": 9655 + }, + { + "epoch": 0.56, + "learning_rate": 4.2433419082782904e-08, + "logits/chosen": -2.0780930519104004, + "logits/rejected": -2.0666205883026123, + "logps/chosen": -16.86050796508789, + "logps/rejected": -267.64495849609375, + "loss": 0.2512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2202014923095703, + "rewards/margins": 3.9764468669891357, + "rewards/rejected": -3.7562453746795654, + "step": 9656 + }, + { + "epoch": 0.56, + "learning_rate": 4.242410372920837e-08, + "logits/chosen": -2.127629518508911, + "logits/rejected": -2.1265218257904053, + "logps/chosen": -45.16120147705078, + "logps/rejected": -122.74897766113281, + "loss": 0.5361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5661209225654602, + "rewards/margins": 1.590151309967041, + "rewards/rejected": -2.1562721729278564, + "step": 9657 + }, + { + "epoch": 0.56, + "learning_rate": 4.2414788644766705e-08, + "logits/chosen": -1.9348623752593994, + "logits/rejected": -1.894282579421997, + "logps/chosen": -275.1200866699219, + "logps/rejected": -551.5845947265625, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.057809591293335, + "rewards/margins": 3.8649630546569824, + "rewards/rejected": -1.807153344154358, + "step": 9658 + }, + { + "epoch": 0.56, + "learning_rate": 4.2405473829788875e-08, + "logits/chosen": -1.9083350896835327, + "logits/rejected": -1.8986296653747559, + "logps/chosen": -266.0469970703125, + "logps/rejected": -402.5652160644531, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4643616676330566, + "rewards/margins": 2.6647796630859375, + "rewards/rejected": 0.7995819449424744, + "step": 9659 + }, + { + "epoch": 0.56, + "learning_rate": 4.2396159284605736e-08, + "logits/chosen": -1.901898980140686, + "logits/rejected": -1.8797861337661743, + "logps/chosen": -177.94366455078125, + "logps/rejected": -461.82733154296875, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9612457752227783, + "rewards/margins": 4.0746307373046875, + "rewards/rejected": -2.113384962081909, + "step": 9660 + }, + { + "epoch": 0.56, + "learning_rate": 4.238684500954825e-08, + "logits/chosen": -1.8414840698242188, + "logits/rejected": -1.8384709358215332, + "logps/chosen": -13.100180625915527, + "logps/rejected": -184.82675170898438, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41166144609451294, + "rewards/margins": 0.5277594923973083, + "rewards/rejected": -0.11609802395105362, + "step": 9661 + }, + { + "epoch": 0.56, + "learning_rate": 4.237753100494723e-08, + "logits/chosen": -2.034255027770996, + "logits/rejected": -2.0373013019561768, + "logps/chosen": -169.4132080078125, + "logps/rejected": -232.90841674804688, + "loss": 0.142, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4498382806777954, + "rewards/margins": 1.418360948562622, + "rewards/rejected": 0.03147735819220543, + "step": 9662 + }, + { + "epoch": 0.56, + "learning_rate": 4.236821727113363e-08, + "logits/chosen": -1.5978654623031616, + "logits/rejected": -1.5936046838760376, + "logps/chosen": -9.114990234375, + "logps/rejected": -89.48724365234375, + "loss": 0.6579, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19749966263771057, + "rewards/margins": 0.2858942151069641, + "rewards/rejected": -0.4833938777446747, + "step": 9663 + }, + { + "epoch": 0.56, + "learning_rate": 4.2358903808438256e-08, + "logits/chosen": -1.8199000358581543, + "logits/rejected": -1.8144054412841797, + "logps/chosen": -26.78847885131836, + "logps/rejected": -174.3403778076172, + "loss": 0.6847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7106661200523376, + "rewards/margins": 0.8984602093696594, + "rewards/rejected": -1.609126329421997, + "step": 9664 + }, + { + "epoch": 0.56, + "learning_rate": 4.2349590617192e-08, + "logits/chosen": -1.9210543632507324, + "logits/rejected": -1.9670029878616333, + "logps/chosen": -221.93045043945312, + "logps/rejected": -425.8997802734375, + "loss": 0.0418, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6434906721115112, + "rewards/margins": 4.514035224914551, + "rewards/rejected": -2.87054443359375, + "step": 9665 + }, + { + "epoch": 0.56, + "learning_rate": 4.234027769772572e-08, + "logits/chosen": -1.835938811302185, + "logits/rejected": -1.8333686590194702, + "logps/chosen": -6.241503715515137, + "logps/rejected": -158.37258911132812, + "loss": 0.2866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.278571218252182, + "rewards/margins": 3.017667293548584, + "rewards/rejected": -2.739096164703369, + "step": 9666 + }, + { + "epoch": 0.56, + "learning_rate": 4.233096505037023e-08, + "logits/chosen": -1.942490816116333, + "logits/rejected": -1.9450387954711914, + "logps/chosen": -268.6131591796875, + "logps/rejected": -502.2187805175781, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.863909959793091, + "rewards/margins": 7.027335166931152, + "rewards/rejected": -4.163424968719482, + "step": 9667 + }, + { + "epoch": 0.56, + "learning_rate": 4.2321652675456393e-08, + "logits/chosen": -1.984348177909851, + "logits/rejected": -1.9835723638534546, + "logps/chosen": -9.655687608756125e-05, + "logps/rejected": -165.00416564941406, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0951545340940356e-05, + "rewards/margins": 2.951526403427124, + "rewards/rejected": -2.951495409011841, + "step": 9668 + }, + { + "epoch": 0.56, + "learning_rate": 4.2312340573314996e-08, + "logits/chosen": -2.0153112411499023, + "logits/rejected": -2.004791498184204, + "logps/chosen": -6.75324821472168, + "logps/rejected": -166.0816192626953, + "loss": 0.3439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11230039596557617, + "rewards/margins": 2.6320927143096924, + "rewards/rejected": -2.519792318344116, + "step": 9669 + }, + { + "epoch": 0.56, + "learning_rate": 4.230302874427688e-08, + "logits/chosen": -1.8267604112625122, + "logits/rejected": -1.8128283023834229, + "logps/chosen": -168.7198486328125, + "logps/rejected": -356.3443908691406, + "loss": 0.0786, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0403244495391846, + "rewards/margins": 2.1645615100860596, + "rewards/rejected": -0.124237060546875, + "step": 9670 + }, + { + "epoch": 0.56, + "learning_rate": 4.229371718867282e-08, + "logits/chosen": -1.6939177513122559, + "logits/rejected": -1.689328670501709, + "logps/chosen": -32.39845657348633, + "logps/rejected": -241.1284637451172, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025741576682776213, + "rewards/margins": 2.5618042945861816, + "rewards/rejected": -2.564378499984741, + "step": 9671 + }, + { + "epoch": 0.56, + "learning_rate": 4.228440590683363e-08, + "logits/chosen": -2.1070613861083984, + "logits/rejected": -2.103532552719116, + "logps/chosen": -0.0003326876030769199, + "logps/rejected": -114.35853576660156, + "loss": 0.4985, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0882038623094559e-05, + "rewards/margins": 0.9388471245765686, + "rewards/rejected": -0.9388580322265625, + "step": 9672 + }, + { + "epoch": 0.56, + "learning_rate": 4.2275094899090073e-08, + "logits/chosen": -1.9132555723190308, + "logits/rejected": -1.916497826576233, + "logps/chosen": -86.10621643066406, + "logps/rejected": -549.2908935546875, + "loss": 0.0427, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.019700765609741, + "rewards/margins": 9.729948997497559, + "rewards/rejected": -7.710247993469238, + "step": 9673 + }, + { + "epoch": 0.56, + "learning_rate": 4.2265784165772944e-08, + "logits/chosen": -1.9488904476165771, + "logits/rejected": -1.9472887516021729, + "logps/chosen": -24.40296173095703, + "logps/rejected": -242.5337677001953, + "loss": 0.2352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2417072355747223, + "rewards/margins": 3.6996266841888428, + "rewards/rejected": -3.4579193592071533, + "step": 9674 + }, + { + "epoch": 0.56, + "learning_rate": 4.225647370721298e-08, + "logits/chosen": -1.8397809267044067, + "logits/rejected": -1.8428208827972412, + "logps/chosen": -3.540497709764168e-05, + "logps/rejected": -239.2477264404297, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.079632157707238e-07, + "rewards/margins": 4.220026969909668, + "rewards/rejected": -4.220027446746826, + "step": 9675 + }, + { + "epoch": 0.56, + "learning_rate": 4.224716352374096e-08, + "logits/chosen": -1.9537460803985596, + "logits/rejected": -1.9486578702926636, + "logps/chosen": -0.0017112715868279338, + "logps/rejected": -330.053955078125, + "loss": 0.3518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001101758680306375, + "rewards/margins": 3.151000738143921, + "rewards/rejected": -3.151110887527466, + "step": 9676 + }, + { + "epoch": 0.56, + "learning_rate": 4.223785361568761e-08, + "logits/chosen": -2.1430134773254395, + "logits/rejected": -2.1262929439544678, + "logps/chosen": -14.750897407531738, + "logps/rejected": -255.98629760742188, + "loss": 0.3482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025101853534579277, + "rewards/margins": 3.7561535835266113, + "rewards/rejected": -3.7310516834259033, + "step": 9677 + }, + { + "epoch": 0.56, + "learning_rate": 4.222854398338367e-08, + "logits/chosen": -1.8115801811218262, + "logits/rejected": -1.8103282451629639, + "logps/chosen": -154.64224243164062, + "logps/rejected": -225.48251342773438, + "loss": 0.2734, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.038787841796875, + "rewards/margins": 0.3995940685272217, + "rewards/rejected": 1.6391937732696533, + "step": 9678 + }, + { + "epoch": 0.56, + "learning_rate": 4.2219234627159854e-08, + "logits/chosen": -1.97116219997406, + "logits/rejected": -1.965059757232666, + "logps/chosen": -107.61090850830078, + "logps/rejected": -246.23085021972656, + "loss": 0.25, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47220537066459656, + "rewards/margins": 2.2863197326660156, + "rewards/rejected": -1.8141144514083862, + "step": 9679 + }, + { + "epoch": 0.56, + "learning_rate": 4.22099255473469e-08, + "logits/chosen": -1.956945776939392, + "logits/rejected": -1.9483883380889893, + "logps/chosen": -27.696250915527344, + "logps/rejected": -160.9544677734375, + "loss": 0.3303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035776328295469284, + "rewards/margins": 3.5496878623962402, + "rewards/rejected": -3.513911485671997, + "step": 9680 + }, + { + "epoch": 0.56, + "learning_rate": 4.220061674427547e-08, + "logits/chosen": -2.175758123397827, + "logits/rejected": -2.169576406478882, + "logps/chosen": -109.95015716552734, + "logps/rejected": -259.20147705078125, + "loss": 0.4221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5612846612930298, + "rewards/margins": 0.49165576696395874, + "rewards/rejected": 0.06962890923023224, + "step": 9681 + }, + { + "epoch": 0.56, + "learning_rate": 4.219130821827632e-08, + "logits/chosen": -1.726025938987732, + "logits/rejected": -1.7236981391906738, + "logps/chosen": -48.54597473144531, + "logps/rejected": -111.07705688476562, + "loss": 0.7407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0114974975585938, + "rewards/margins": 1.2881081104278564, + "rewards/rejected": -2.29960560798645, + "step": 9682 + }, + { + "epoch": 0.56, + "learning_rate": 4.218199996968006e-08, + "logits/chosen": -1.851555347442627, + "logits/rejected": -1.8552005290985107, + "logps/chosen": -12.242269515991211, + "logps/rejected": -92.3015365600586, + "loss": 0.4527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18186597526073456, + "rewards/margins": 1.5998647212982178, + "rewards/rejected": -1.7817306518554688, + "step": 9683 + }, + { + "epoch": 0.56, + "learning_rate": 4.2172691998817414e-08, + "logits/chosen": -2.069448709487915, + "logits/rejected": -2.071828842163086, + "logps/chosen": -37.3746452331543, + "logps/rejected": -123.67547607421875, + "loss": 0.5307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3148635923862457, + "rewards/margins": 1.3810768127441406, + "rewards/rejected": -1.695940375328064, + "step": 9684 + }, + { + "epoch": 0.56, + "learning_rate": 4.216338430601905e-08, + "logits/chosen": -2.032212257385254, + "logits/rejected": -2.0687708854675293, + "logps/chosen": -357.58026123046875, + "logps/rejected": -482.8061218261719, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9231202602386475, + "rewards/margins": 6.713101387023926, + "rewards/rejected": -3.7899811267852783, + "step": 9685 + }, + { + "epoch": 0.56, + "learning_rate": 4.21540768916156e-08, + "logits/chosen": -1.8791730403900146, + "logits/rejected": -1.8522533178329468, + "logps/chosen": -242.28305053710938, + "logps/rejected": -332.9026794433594, + "loss": 0.1544, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.109182834625244, + "rewards/margins": 1.107110619544983, + "rewards/rejected": 1.0020722150802612, + "step": 9686 + }, + { + "epoch": 0.56, + "learning_rate": 4.2144769755937724e-08, + "logits/chosen": -1.7781134843826294, + "logits/rejected": -1.7817806005477905, + "logps/chosen": -63.332252502441406, + "logps/rejected": -299.0194396972656, + "loss": 0.1769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5695862174034119, + "rewards/margins": 4.2480621337890625, + "rewards/rejected": -3.678476095199585, + "step": 9687 + }, + { + "epoch": 0.56, + "learning_rate": 4.213546289931604e-08, + "logits/chosen": -1.975886583328247, + "logits/rejected": -1.960978627204895, + "logps/chosen": -14.911046981811523, + "logps/rejected": -257.29937744140625, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44541263580322266, + "rewards/margins": 2.1751046180725098, + "rewards/rejected": -1.7296921014785767, + "step": 9688 + }, + { + "epoch": 0.56, + "learning_rate": 4.2126156322081194e-08, + "logits/chosen": -1.9922306537628174, + "logits/rejected": -2.0070626735687256, + "logps/chosen": -200.8679656982422, + "logps/rejected": -253.2236785888672, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.352888584136963, + "rewards/margins": 3.2542724609375, + "rewards/rejected": -0.9013839960098267, + "step": 9689 + }, + { + "epoch": 0.56, + "learning_rate": 4.211685002456378e-08, + "logits/chosen": -1.87836492061615, + "logits/rejected": -1.929329752922058, + "logps/chosen": -181.3790740966797, + "logps/rejected": -383.7992248535156, + "loss": 0.0956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.84588623046875, + "rewards/margins": 3.0446534156799316, + "rewards/rejected": -2.1987671852111816, + "step": 9690 + }, + { + "epoch": 0.56, + "learning_rate": 4.2107544007094426e-08, + "logits/chosen": -1.9419111013412476, + "logits/rejected": -1.9348334074020386, + "logps/chosen": -19.17919921875, + "logps/rejected": -275.35870361328125, + "loss": 0.1994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5177593231201172, + "rewards/margins": 5.529224872589111, + "rewards/rejected": -5.011465549468994, + "step": 9691 + }, + { + "epoch": 0.56, + "learning_rate": 4.2098238270003704e-08, + "logits/chosen": -2.037001132965088, + "logits/rejected": -2.0348925590515137, + "logps/chosen": -0.022843442857265472, + "logps/rejected": -247.52276611328125, + "loss": 0.338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014599085552617908, + "rewards/margins": 4.751480579376221, + "rewards/rejected": -4.752940654754639, + "step": 9692 + }, + { + "epoch": 0.56, + "learning_rate": 4.208893281362222e-08, + "logits/chosen": -1.955492615699768, + "logits/rejected": -1.9602059125900269, + "logps/chosen": -41.58204650878906, + "logps/rejected": -248.6001739501953, + "loss": 0.5032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2562374174594879, + "rewards/margins": 0.26139336824417114, + "rewards/rejected": -0.0051559447310864925, + "step": 9693 + }, + { + "epoch": 0.56, + "learning_rate": 4.2079627638280526e-08, + "logits/chosen": -1.6923209428787231, + "logits/rejected": -1.6837186813354492, + "logps/chosen": -44.260704040527344, + "logps/rejected": -405.2613525390625, + "loss": 0.2589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028455352410674095, + "rewards/margins": 4.896409034729004, + "rewards/rejected": -4.867953777313232, + "step": 9694 + }, + { + "epoch": 0.56, + "learning_rate": 4.2070322744309214e-08, + "logits/chosen": -1.847644329071045, + "logits/rejected": -1.8466603755950928, + "logps/chosen": -205.0143280029297, + "logps/rejected": -556.486083984375, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9409072399139404, + "rewards/margins": 8.40807056427002, + "rewards/rejected": -5.4671630859375, + "step": 9695 + }, + { + "epoch": 0.56, + "learning_rate": 4.2061018132038825e-08, + "logits/chosen": -1.8951882123947144, + "logits/rejected": -1.8957535028457642, + "logps/chosen": -174.50950622558594, + "logps/rejected": -308.1253356933594, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4825423955917358, + "rewards/margins": 0.6210586428642273, + "rewards/rejected": 0.8614837527275085, + "step": 9696 + }, + { + "epoch": 0.56, + "learning_rate": 4.20517138017999e-08, + "logits/chosen": -1.8973190784454346, + "logits/rejected": -1.8695615530014038, + "logps/chosen": -194.48123168945312, + "logps/rejected": -422.3691711425781, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.992761254310608, + "rewards/margins": 3.705331325531006, + "rewards/rejected": -1.7125701904296875, + "step": 9697 + }, + { + "epoch": 0.56, + "learning_rate": 4.204240975392298e-08, + "logits/chosen": -1.8807927370071411, + "logits/rejected": -1.8787187337875366, + "logps/chosen": -43.05818176269531, + "logps/rejected": -152.98965454101562, + "loss": 0.3092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6621101498603821, + "rewards/margins": 1.5911552906036377, + "rewards/rejected": -0.9290451407432556, + "step": 9698 + }, + { + "epoch": 0.56, + "learning_rate": 4.20331059887386e-08, + "logits/chosen": -2.0352697372436523, + "logits/rejected": -2.036184310913086, + "logps/chosen": -229.8647003173828, + "logps/rejected": -393.9851989746094, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.757380723953247, + "rewards/margins": 3.5953874588012695, + "rewards/rejected": -0.8380066156387329, + "step": 9699 + }, + { + "epoch": 0.56, + "learning_rate": 4.2023802506577255e-08, + "logits/chosen": -1.9711581468582153, + "logits/rejected": -2.0172340869903564, + "logps/chosen": -231.77389526367188, + "logps/rejected": -384.15423583984375, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.854304552078247, + "rewards/margins": 3.9483354091644287, + "rewards/rejected": -2.0940308570861816, + "step": 9700 + }, + { + "epoch": 0.56, + "learning_rate": 4.2014499307769466e-08, + "logits/chosen": -1.9515042304992676, + "logits/rejected": -1.953721523284912, + "logps/chosen": -0.2413492649793625, + "logps/rejected": -87.61290740966797, + "loss": 0.4418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011597098782658577, + "rewards/margins": 1.4130572080612183, + "rewards/rejected": -1.4246543645858765, + "step": 9701 + }, + { + "epoch": 0.56, + "learning_rate": 4.200519639264571e-08, + "logits/chosen": -1.957507610321045, + "logits/rejected": -1.9651511907577515, + "logps/chosen": -0.0002321975480299443, + "logps/rejected": -169.4452667236328, + "loss": 0.4179, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2515270100266207e-05, + "rewards/margins": 1.7908810377120972, + "rewards/rejected": -1.7908935546875, + "step": 9702 + }, + { + "epoch": 0.56, + "learning_rate": 4.199589376153651e-08, + "logits/chosen": -2.1475419998168945, + "logits/rejected": -2.1507253646850586, + "logps/chosen": -36.55533981323242, + "logps/rejected": -148.7924346923828, + "loss": 0.3633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3278064727783203, + "rewards/margins": 1.1457066535949707, + "rewards/rejected": -0.8179001212120056, + "step": 9703 + }, + { + "epoch": 0.56, + "learning_rate": 4.198659141477229e-08, + "logits/chosen": -1.842435359954834, + "logits/rejected": -1.8399128913879395, + "logps/chosen": -94.24115753173828, + "logps/rejected": -389.8883972167969, + "loss": 0.0972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2307090759277344, + "rewards/margins": 6.872356414794922, + "rewards/rejected": -5.6416473388671875, + "step": 9704 + }, + { + "epoch": 0.56, + "learning_rate": 4.1977289352683544e-08, + "logits/chosen": -2.009239673614502, + "logits/rejected": -2.010289430618286, + "logps/chosen": -0.09337183088064194, + "logps/rejected": -68.9236831665039, + "loss": 0.3961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004970808979123831, + "rewards/margins": 1.9674447774887085, + "rewards/rejected": -1.972415566444397, + "step": 9705 + }, + { + "epoch": 0.56, + "learning_rate": 4.196798757560074e-08, + "logits/chosen": -2.0819225311279297, + "logits/rejected": -2.080787181854248, + "logps/chosen": -6.722872734069824, + "logps/rejected": -97.60545349121094, + "loss": 0.4559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6161271333694458, + "rewards/margins": 0.3776581883430481, + "rewards/rejected": 0.2384689301252365, + "step": 9706 + }, + { + "epoch": 0.56, + "learning_rate": 4.1958686083854294e-08, + "logits/chosen": -1.9544243812561035, + "logits/rejected": -1.963225245475769, + "logps/chosen": -24.134361267089844, + "logps/rejected": -120.61697387695312, + "loss": 0.3434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2465190887451172, + "rewards/margins": 2.461484909057617, + "rewards/rejected": -2.2149658203125, + "step": 9707 + }, + { + "epoch": 0.56, + "learning_rate": 4.194938487777466e-08, + "logits/chosen": -1.8244715929031372, + "logits/rejected": -1.8140207529067993, + "logps/chosen": -0.018991999328136444, + "logps/rejected": -289.33160400390625, + "loss": 0.3295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001342687988653779, + "rewards/margins": 6.753657817840576, + "rewards/rejected": -6.755000591278076, + "step": 9708 + }, + { + "epoch": 0.57, + "learning_rate": 4.194008395769225e-08, + "logits/chosen": -1.7228869199752808, + "logits/rejected": -1.723750352859497, + "logps/chosen": -5.637500762939453, + "logps/rejected": -358.2081298828125, + "loss": 0.3245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1116679236292839, + "rewards/margins": 5.615479946136475, + "rewards/rejected": -5.503811836242676, + "step": 9709 + }, + { + "epoch": 0.57, + "learning_rate": 4.1930783323937494e-08, + "logits/chosen": -1.8618789911270142, + "logits/rejected": -1.8592761754989624, + "logps/chosen": -99.98689270019531, + "logps/rejected": -218.94369506835938, + "loss": 0.3181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7077118158340454, + "rewards/margins": 1.4666534662246704, + "rewards/rejected": -0.758941650390625, + "step": 9710 + }, + { + "epoch": 0.57, + "learning_rate": 4.192148297684076e-08, + "logits/chosen": -1.8121960163116455, + "logits/rejected": -1.7893481254577637, + "logps/chosen": -0.002308179857209325, + "logps/rejected": -167.7059326171875, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00071769516216591, + "rewards/margins": 2.146960973739624, + "rewards/rejected": -2.1462433338165283, + "step": 9711 + }, + { + "epoch": 0.57, + "learning_rate": 4.19121829167325e-08, + "logits/chosen": -1.8640748262405396, + "logits/rejected": -1.8671354055404663, + "logps/chosen": -66.28614044189453, + "logps/rejected": -357.08154296875, + "loss": 0.4295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.489227294921875, + "rewards/margins": 6.235388278961182, + "rewards/rejected": -6.724615573883057, + "step": 9712 + }, + { + "epoch": 0.57, + "learning_rate": 4.190288314394304e-08, + "logits/chosen": -1.9440943002700806, + "logits/rejected": -1.9451884031295776, + "logps/chosen": -173.7677459716797, + "logps/rejected": -332.94000244140625, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4740891456604004, + "rewards/margins": 2.6966660022735596, + "rewards/rejected": -0.22257690131664276, + "step": 9713 + }, + { + "epoch": 0.57, + "learning_rate": 4.189358365880279e-08, + "logits/chosen": -2.0544543266296387, + "logits/rejected": -2.053337335586548, + "logps/chosen": -6.633924961090088, + "logps/rejected": -95.28707885742188, + "loss": 0.651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12519855797290802, + "rewards/margins": 0.011815071105957031, + "rewards/rejected": 0.11338348686695099, + "step": 9714 + }, + { + "epoch": 0.57, + "learning_rate": 4.188428446164209e-08, + "logits/chosen": -1.8518894910812378, + "logits/rejected": -1.839293360710144, + "logps/chosen": -45.3772087097168, + "logps/rejected": -219.43936157226562, + "loss": 0.4076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3660038113594055, + "rewards/margins": 3.248250961303711, + "rewards/rejected": -3.6142547130584717, + "step": 9715 + }, + { + "epoch": 0.57, + "learning_rate": 4.187498555279132e-08, + "logits/chosen": -1.848313808441162, + "logits/rejected": -1.836350679397583, + "logps/chosen": -221.5823211669922, + "logps/rejected": -402.66217041015625, + "loss": 0.1669, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1325668096542358, + "rewards/margins": 1.8752120733261108, + "rewards/rejected": -0.742645263671875, + "step": 9716 + }, + { + "epoch": 0.57, + "learning_rate": 4.186568693258079e-08, + "logits/chosen": -1.9578487873077393, + "logits/rejected": -1.9611815214157104, + "logps/chosen": -0.0466548427939415, + "logps/rejected": -54.29499816894531, + "loss": 0.573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002253148704767227, + "rewards/margins": 0.5611623525619507, + "rewards/rejected": -0.56341552734375, + "step": 9717 + }, + { + "epoch": 0.57, + "learning_rate": 4.185638860134086e-08, + "logits/chosen": -1.9512430429458618, + "logits/rejected": -1.9578909873962402, + "logps/chosen": -16.361207962036133, + "logps/rejected": -164.57492065429688, + "loss": 0.4643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11996708065271378, + "rewards/margins": 0.9991937875747681, + "rewards/rejected": -0.8792266845703125, + "step": 9718 + }, + { + "epoch": 0.57, + "learning_rate": 4.1847090559401846e-08, + "logits/chosen": -1.9978647232055664, + "logits/rejected": -2.0034515857696533, + "logps/chosen": -161.81192016601562, + "logps/rejected": -351.08697509765625, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7563308477401733, + "rewards/margins": 3.7571916580200195, + "rewards/rejected": -2.0008606910705566, + "step": 9719 + }, + { + "epoch": 0.57, + "learning_rate": 4.1837792807094054e-08, + "logits/chosen": -1.6640071868896484, + "logits/rejected": -1.665611982345581, + "logps/chosen": -58.81626892089844, + "logps/rejected": -76.52323913574219, + "loss": 0.8997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9589244723320007, + "rewards/margins": 0.05705755949020386, + "rewards/rejected": -1.0159820318222046, + "step": 9720 + }, + { + "epoch": 0.57, + "learning_rate": 4.1828495344747774e-08, + "logits/chosen": -2.0839414596557617, + "logits/rejected": -2.078005790710449, + "logps/chosen": -33.39870834350586, + "logps/rejected": -157.2195587158203, + "loss": 0.5143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1430656462907791, + "rewards/margins": 0.3567344546318054, + "rewards/rejected": -0.2136688232421875, + "step": 9721 + }, + { + "epoch": 0.57, + "learning_rate": 4.181919817269332e-08, + "logits/chosen": -1.8356785774230957, + "logits/rejected": -1.8340697288513184, + "logps/chosen": -172.57998657226562, + "logps/rejected": -286.75390625, + "loss": 0.0721, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6005219221115112, + "rewards/margins": 2.7048280239105225, + "rewards/rejected": -1.1043061017990112, + "step": 9722 + }, + { + "epoch": 0.57, + "learning_rate": 4.180990129126095e-08, + "logits/chosen": -2.0443499088287354, + "logits/rejected": -2.0346615314483643, + "logps/chosen": -24.00567054748535, + "logps/rejected": -137.21836853027344, + "loss": 0.5757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5234686136245728, + "rewards/margins": 0.04432585835456848, + "rewards/rejected": 0.4791427552700043, + "step": 9723 + }, + { + "epoch": 0.57, + "learning_rate": 4.1800604700780974e-08, + "logits/chosen": -1.9021334648132324, + "logits/rejected": -1.8899723291397095, + "logps/chosen": -0.019404267892241478, + "logps/rejected": -134.6848602294922, + "loss": 0.3725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0072257122956216335, + "rewards/margins": 2.789134979248047, + "rewards/rejected": -2.781909227371216, + "step": 9724 + }, + { + "epoch": 0.57, + "learning_rate": 4.17913084015836e-08, + "logits/chosen": -1.7684215307235718, + "logits/rejected": -1.758593201637268, + "logps/chosen": -349.6598815917969, + "logps/rejected": -518.48828125, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.15818190574646, + "rewards/margins": 4.160269260406494, + "rewards/rejected": -2.002087354660034, + "step": 9725 + }, + { + "epoch": 0.57, + "learning_rate": 4.178201239399911e-08, + "logits/chosen": -2.009864330291748, + "logits/rejected": -2.0031700134277344, + "logps/chosen": -24.843029022216797, + "logps/rejected": -259.527587890625, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3616781234741211, + "rewards/margins": 2.5944783687591553, + "rewards/rejected": -2.232800245285034, + "step": 9726 + }, + { + "epoch": 0.57, + "learning_rate": 4.177271667835775e-08, + "logits/chosen": -1.9205352067947388, + "logits/rejected": -1.9140523672103882, + "logps/chosen": -8.541196823120117, + "logps/rejected": -313.3846740722656, + "loss": 0.2648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34296733140945435, + "rewards/margins": 4.182151794433594, + "rewards/rejected": -3.839184522628784, + "step": 9727 + }, + { + "epoch": 0.57, + "learning_rate": 4.1763421254989726e-08, + "logits/chosen": -1.9083524942398071, + "logits/rejected": -1.9090462923049927, + "logps/chosen": -248.8866729736328, + "logps/rejected": -391.05999755859375, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0395736694335938, + "rewards/margins": 2.7486984729766846, + "rewards/rejected": -0.709124743938446, + "step": 9728 + }, + { + "epoch": 0.57, + "learning_rate": 4.175412612422529e-08, + "logits/chosen": -1.751222848892212, + "logits/rejected": -1.7461923360824585, + "logps/chosen": -46.151424407958984, + "logps/rejected": -191.34368896484375, + "loss": 0.5514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45253220200538635, + "rewards/margins": 1.431856632232666, + "rewards/rejected": -1.88438880443573, + "step": 9729 + }, + { + "epoch": 0.57, + "learning_rate": 4.174483128639461e-08, + "logits/chosen": -1.8735560178756714, + "logits/rejected": -1.8294017314910889, + "logps/chosen": -124.9848861694336, + "logps/rejected": -296.16259765625, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1198128461837769, + "rewards/margins": 0.6378426551818848, + "rewards/rejected": 0.4819702208042145, + "step": 9730 + }, + { + "epoch": 0.57, + "learning_rate": 4.173553674182791e-08, + "logits/chosen": -1.864282488822937, + "logits/rejected": -1.859224796295166, + "logps/chosen": -0.0013694862136617303, + "logps/rejected": -115.2921371459961, + "loss": 0.6099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011079629621235654, + "rewards/margins": 0.35318127274513245, + "rewards/rejected": -0.3532920777797699, + "step": 9731 + }, + { + "epoch": 0.57, + "learning_rate": 4.1726242490855365e-08, + "logits/chosen": -1.7770133018493652, + "logits/rejected": -1.7578120231628418, + "logps/chosen": -190.9305419921875, + "logps/rejected": -323.4451599121094, + "loss": 0.1677, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5218414068222046, + "rewards/margins": 1.1421904563903809, + "rewards/rejected": 0.37965089082717896, + "step": 9732 + }, + { + "epoch": 0.57, + "learning_rate": 4.1716948533807164e-08, + "logits/chosen": -1.9017629623413086, + "logits/rejected": -1.896444320678711, + "logps/chosen": -0.049802061170339584, + "logps/rejected": -182.22312927246094, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00017262808978557587, + "rewards/margins": 4.022067546844482, + "rewards/rejected": -4.021894931793213, + "step": 9733 + }, + { + "epoch": 0.57, + "learning_rate": 4.170765487101346e-08, + "logits/chosen": -1.9576228857040405, + "logits/rejected": -1.9641807079315186, + "logps/chosen": -1.575355887413025, + "logps/rejected": -113.5124282836914, + "loss": 0.3893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.040260374546051025, + "rewards/margins": 1.8771369457244873, + "rewards/rejected": -1.917397379875183, + "step": 9734 + }, + { + "epoch": 0.57, + "learning_rate": 4.169836150280443e-08, + "logits/chosen": -2.0185070037841797, + "logits/rejected": -2.0108323097229004, + "logps/chosen": -2.713088274002075, + "logps/rejected": -239.65333557128906, + "loss": 0.3561, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1562885046005249, + "rewards/margins": 6.565977096557617, + "rewards/rejected": -6.722265720367432, + "step": 9735 + }, + { + "epoch": 0.57, + "learning_rate": 4.1689068429510197e-08, + "logits/chosen": -1.9573566913604736, + "logits/rejected": -1.9441242218017578, + "logps/chosen": -39.22462844848633, + "logps/rejected": -208.43218994140625, + "loss": 0.5448, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.776358425617218, + "rewards/margins": -0.12072259187698364, + "rewards/rejected": 0.8970810174942017, + "step": 9736 + }, + { + "epoch": 0.57, + "learning_rate": 4.167977565146091e-08, + "logits/chosen": -2.093104600906372, + "logits/rejected": -2.0898804664611816, + "logps/chosen": -14.971762657165527, + "logps/rejected": -357.5687255859375, + "loss": 0.2671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17980872094631195, + "rewards/margins": 9.009441375732422, + "rewards/rejected": -8.829632759094238, + "step": 9737 + }, + { + "epoch": 0.57, + "learning_rate": 4.1670483168986686e-08, + "logits/chosen": -1.8891918659210205, + "logits/rejected": -1.890088677406311, + "logps/chosen": -218.11338806152344, + "logps/rejected": -341.6181335449219, + "loss": 0.0671, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8847182989120483, + "rewards/margins": 2.447026014328003, + "rewards/rejected": -0.5623077750205994, + "step": 9738 + }, + { + "epoch": 0.57, + "learning_rate": 4.166119098241766e-08, + "logits/chosen": -1.9373894929885864, + "logits/rejected": -1.9801464080810547, + "logps/chosen": -136.04689025878906, + "logps/rejected": -314.28497314453125, + "loss": 0.1299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7380844354629517, + "rewards/margins": 1.5190781354904175, + "rewards/rejected": 0.21900634467601776, + "step": 9739 + }, + { + "epoch": 0.57, + "learning_rate": 4.1651899092083895e-08, + "logits/chosen": -2.0966668128967285, + "logits/rejected": -2.0879364013671875, + "logps/chosen": -6.738216876983643, + "logps/rejected": -101.1490478515625, + "loss": 0.4426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29687628149986267, + "rewards/margins": 0.8985714912414551, + "rewards/rejected": -0.6016952395439148, + "step": 9740 + }, + { + "epoch": 0.57, + "learning_rate": 4.164260749831553e-08, + "logits/chosen": -1.8865439891815186, + "logits/rejected": -1.8891034126281738, + "logps/chosen": -1.2485120296478271, + "logps/rejected": -149.1022491455078, + "loss": 0.3549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09004189819097519, + "rewards/margins": 2.6539762020111084, + "rewards/rejected": -2.563934326171875, + "step": 9741 + }, + { + "epoch": 0.57, + "learning_rate": 4.1633316201442614e-08, + "logits/chosen": -2.0490150451660156, + "logits/rejected": -2.018239736557007, + "logps/chosen": -242.621337890625, + "logps/rejected": -475.7388916015625, + "loss": 0.1004, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.051684617996216, + "rewards/margins": 2.1328887939453125, + "rewards/rejected": -0.08120422810316086, + "step": 9742 + }, + { + "epoch": 0.57, + "learning_rate": 4.1624025201795244e-08, + "logits/chosen": -1.922715663909912, + "logits/rejected": -1.913469672203064, + "logps/chosen": -7.164270209614187e-05, + "logps/rejected": -92.9971923828125, + "loss": 0.5038, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6583111321087927e-06, + "rewards/margins": 0.9724361896514893, + "rewards/rejected": -0.9724335074424744, + "step": 9743 + }, + { + "epoch": 0.57, + "learning_rate": 4.161473449970345e-08, + "logits/chosen": -1.875113844871521, + "logits/rejected": -1.8714150190353394, + "logps/chosen": -1.2735992670059204, + "logps/rejected": -198.76852416992188, + "loss": 0.3407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04938904196023941, + "rewards/margins": 2.8269877433776855, + "rewards/rejected": -2.7775986194610596, + "step": 9744 + }, + { + "epoch": 0.57, + "learning_rate": 4.1605444095497304e-08, + "logits/chosen": -2.046274185180664, + "logits/rejected": -2.0480265617370605, + "logps/chosen": -68.90501403808594, + "logps/rejected": -226.2261962890625, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6250579953193665, + "rewards/margins": 1.9088852405548096, + "rewards/rejected": -1.2838271856307983, + "step": 9745 + }, + { + "epoch": 0.57, + "learning_rate": 4.1596153989506863e-08, + "logits/chosen": -1.8115259408950806, + "logits/rejected": -1.7965258359909058, + "logps/chosen": -85.50626373291016, + "logps/rejected": -335.08367919921875, + "loss": 0.5955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2169700711965561, + "rewards/margins": 0.883985161781311, + "rewards/rejected": -1.1009552478790283, + "step": 9746 + }, + { + "epoch": 0.57, + "learning_rate": 4.158686418206214e-08, + "logits/chosen": -1.9859378337860107, + "logits/rejected": -1.9743698835372925, + "logps/chosen": -14.386497497558594, + "logps/rejected": -225.7130889892578, + "loss": 0.3882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06716003268957138, + "rewards/margins": 2.732182264328003, + "rewards/rejected": -2.799342393875122, + "step": 9747 + }, + { + "epoch": 0.57, + "learning_rate": 4.1577574673493155e-08, + "logits/chosen": -1.8454947471618652, + "logits/rejected": -1.8408149480819702, + "logps/chosen": -6.986067295074463, + "logps/rejected": -92.63542175292969, + "loss": 0.3105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9069086313247681, + "rewards/margins": 1.0687586069107056, + "rewards/rejected": -0.1618499755859375, + "step": 9748 + }, + { + "epoch": 0.57, + "learning_rate": 4.1568285464129904e-08, + "logits/chosen": -1.9327105283737183, + "logits/rejected": -1.9655660390853882, + "logps/chosen": -189.35140991210938, + "logps/rejected": -577.3916015625, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3667815923690796, + "rewards/margins": 6.3215789794921875, + "rewards/rejected": -4.954797267913818, + "step": 9749 + }, + { + "epoch": 0.57, + "learning_rate": 4.1558996554302404e-08, + "logits/chosen": -1.8135796785354614, + "logits/rejected": -1.7728214263916016, + "logps/chosen": -139.19358825683594, + "logps/rejected": -314.78472900390625, + "loss": 0.1771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.306127905845642, + "rewards/margins": 1.9780211448669434, + "rewards/rejected": -0.671893298625946, + "step": 9750 + }, + { + "epoch": 0.57, + "learning_rate": 4.1549707944340636e-08, + "logits/chosen": -2.1297404766082764, + "logits/rejected": -2.1337687969207764, + "logps/chosen": -71.67817687988281, + "logps/rejected": -236.75430297851562, + "loss": 0.2025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4842781126499176, + "rewards/margins": 3.1925089359283447, + "rewards/rejected": -2.70823073387146, + "step": 9751 + }, + { + "epoch": 0.57, + "learning_rate": 4.154041963457458e-08, + "logits/chosen": -1.75233793258667, + "logits/rejected": -1.7451740503311157, + "logps/chosen": -0.0011594746029004455, + "logps/rejected": -394.0086669921875, + "loss": 0.3385, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.965808380395174e-05, + "rewards/margins": 7.968329906463623, + "rewards/rejected": -7.968359470367432, + "step": 9752 + }, + { + "epoch": 0.57, + "learning_rate": 4.153113162533419e-08, + "logits/chosen": -1.9634281396865845, + "logits/rejected": -1.9494696855545044, + "logps/chosen": -39.864166259765625, + "logps/rejected": -197.62545776367188, + "loss": 0.0955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7445964813232422, + "rewards/margins": 2.7311840057373047, + "rewards/rejected": -0.9865875244140625, + "step": 9753 + }, + { + "epoch": 0.57, + "learning_rate": 4.152184391694945e-08, + "logits/chosen": -2.0034961700439453, + "logits/rejected": -1.997949481010437, + "logps/chosen": -23.62782859802246, + "logps/rejected": -162.91958618164062, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48338204622268677, + "rewards/margins": 1.3154683113098145, + "rewards/rejected": -0.8320862054824829, + "step": 9754 + }, + { + "epoch": 0.57, + "learning_rate": 4.1512556509750264e-08, + "logits/chosen": -1.7895375490188599, + "logits/rejected": -1.7854132652282715, + "logps/chosen": -12.07485294342041, + "logps/rejected": -153.5592041015625, + "loss": 0.4367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12127342075109482, + "rewards/margins": 1.1882939338684082, + "rewards/rejected": -1.3095673322677612, + "step": 9755 + }, + { + "epoch": 0.57, + "learning_rate": 4.15032694040666e-08, + "logits/chosen": -1.9806129932403564, + "logits/rejected": -1.983460783958435, + "logps/chosen": -6.9834208488464355, + "logps/rejected": -144.22909545898438, + "loss": 0.4223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2120085209608078, + "rewards/margins": 1.2589292526245117, + "rewards/rejected": -1.0469207763671875, + "step": 9756 + }, + { + "epoch": 0.57, + "learning_rate": 4.1493982600228353e-08, + "logits/chosen": -2.018615961074829, + "logits/rejected": -2.009107828140259, + "logps/chosen": -18.804039001464844, + "logps/rejected": -209.20809936523438, + "loss": 0.3706, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1764509230852127, + "rewards/margins": 1.9606062173843384, + "rewards/rejected": -1.784155249595642, + "step": 9757 + }, + { + "epoch": 0.57, + "learning_rate": 4.148469609856547e-08, + "logits/chosen": -1.896715521812439, + "logits/rejected": -1.9016149044036865, + "logps/chosen": -79.53083801269531, + "logps/rejected": -222.01361083984375, + "loss": 0.2345, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4741256833076477, + "rewards/margins": 2.6545028686523438, + "rewards/rejected": -2.180377244949341, + "step": 9758 + }, + { + "epoch": 0.57, + "learning_rate": 4.147540989940782e-08, + "logits/chosen": -1.8873860836029053, + "logits/rejected": -1.8126157522201538, + "logps/chosen": -179.28927612304688, + "logps/rejected": -513.915283203125, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3970627784729004, + "rewards/margins": 5.527742385864258, + "rewards/rejected": -3.1306793689727783, + "step": 9759 + }, + { + "epoch": 0.57, + "learning_rate": 4.1466124003085326e-08, + "logits/chosen": -1.9612207412719727, + "logits/rejected": -1.9538618326187134, + "logps/chosen": -55.2718391418457, + "logps/rejected": -201.74473571777344, + "loss": 0.2274, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0331677198410034, + "rewards/margins": 1.4394619464874268, + "rewards/rejected": -0.4062942564487457, + "step": 9760 + }, + { + "epoch": 0.57, + "learning_rate": 4.145683840992783e-08, + "logits/chosen": -1.8951163291931152, + "logits/rejected": -1.898164987564087, + "logps/chosen": -37.29063415527344, + "logps/rejected": -192.4896240234375, + "loss": 0.3232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16360321640968323, + "rewards/margins": 4.264445781707764, + "rewards/rejected": -4.100842475891113, + "step": 9761 + }, + { + "epoch": 0.57, + "learning_rate": 4.1447553120265247e-08, + "logits/chosen": -1.8960001468658447, + "logits/rejected": -1.855826735496521, + "logps/chosen": -222.37257385253906, + "logps/rejected": -413.40478515625, + "loss": 0.1406, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2491012811660767, + "rewards/margins": 1.660810947418213, + "rewards/rejected": -0.41170960664749146, + "step": 9762 + }, + { + "epoch": 0.57, + "learning_rate": 4.143826813442738e-08, + "logits/chosen": -1.9452400207519531, + "logits/rejected": -1.9447968006134033, + "logps/chosen": -19.993783950805664, + "logps/rejected": -45.08879852294922, + "loss": 0.8283, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.23850059509277344, + "rewards/margins": -0.1931995451450348, + "rewards/rejected": -0.045301057398319244, + "step": 9763 + }, + { + "epoch": 0.57, + "learning_rate": 4.142898345274414e-08, + "logits/chosen": -1.841530680656433, + "logits/rejected": -1.960824728012085, + "logps/chosen": -268.53125, + "logps/rejected": -569.2363891601562, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7687348127365112, + "rewards/margins": 9.74548625946045, + "rewards/rejected": -7.976751804351807, + "step": 9764 + }, + { + "epoch": 0.57, + "learning_rate": 4.14196990755453e-08, + "logits/chosen": -2.101335048675537, + "logits/rejected": -2.075878381729126, + "logps/chosen": -5.924591823713854e-05, + "logps/rejected": -221.96820068359375, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0490093700354919e-06, + "rewards/margins": 5.143413543701172, + "rewards/rejected": -5.143414497375488, + "step": 9765 + }, + { + "epoch": 0.57, + "learning_rate": 4.141041500316074e-08, + "logits/chosen": -1.9841516017913818, + "logits/rejected": -1.981236457824707, + "logps/chosen": -7.486238609999418e-05, + "logps/rejected": -72.02374267578125, + "loss": 0.5919, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4807912925316487e-06, + "rewards/margins": 0.4562298357486725, + "rewards/rejected": -0.4562263488769531, + "step": 9766 + }, + { + "epoch": 0.57, + "learning_rate": 4.1401131235920255e-08, + "logits/chosen": -1.8905270099639893, + "logits/rejected": -1.8804322481155396, + "logps/chosen": -237.13491821289062, + "logps/rejected": -356.06304931640625, + "loss": 0.0993, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1661834716796875, + "rewards/margins": 2.0879454612731934, + "rewards/rejected": 0.07823791354894638, + "step": 9767 + }, + { + "epoch": 0.57, + "learning_rate": 4.139184777415364e-08, + "logits/chosen": -1.8753653764724731, + "logits/rejected": -1.8707717657089233, + "logps/chosen": -15.1824369430542, + "logps/rejected": -186.69041442871094, + "loss": 0.222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7411438822746277, + "rewards/margins": 1.922307014465332, + "rewards/rejected": -1.1811630725860596, + "step": 9768 + }, + { + "epoch": 0.57, + "learning_rate": 4.138256461819072e-08, + "logits/chosen": -1.6922028064727783, + "logits/rejected": -1.666818380355835, + "logps/chosen": -137.62855529785156, + "logps/rejected": -272.6395263671875, + "loss": 0.0932, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.723530650138855, + "rewards/margins": 2.1705522537231445, + "rewards/rejected": -0.447021484375, + "step": 9769 + }, + { + "epoch": 0.57, + "learning_rate": 4.137328176836124e-08, + "logits/chosen": -2.0190787315368652, + "logits/rejected": -1.9593472480773926, + "logps/chosen": -205.21136474609375, + "logps/rejected": -510.60302734375, + "loss": 0.0901, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4905457496643066, + "rewards/margins": 2.2948365211486816, + "rewards/rejected": 0.195709228515625, + "step": 9770 + }, + { + "epoch": 0.57, + "learning_rate": 4.136399922499501e-08, + "logits/chosen": -2.017995595932007, + "logits/rejected": -1.987028956413269, + "logps/chosen": -279.16644287109375, + "logps/rejected": -473.68792724609375, + "loss": 0.1073, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.444787621498108, + "rewards/margins": 2.0594482421875, + "rewards/rejected": -0.6146606802940369, + "step": 9771 + }, + { + "epoch": 0.57, + "learning_rate": 4.135471698842174e-08, + "logits/chosen": -2.0534274578094482, + "logits/rejected": -2.054295301437378, + "logps/chosen": -13.059233665466309, + "logps/rejected": -203.82147216796875, + "loss": 0.3808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.041730593889951706, + "rewards/margins": 1.9734594821929932, + "rewards/rejected": -2.0151901245117188, + "step": 9772 + }, + { + "epoch": 0.57, + "learning_rate": 4.1345435058971235e-08, + "logits/chosen": -1.919106364250183, + "logits/rejected": -1.9221807718276978, + "logps/chosen": -11.820581436157227, + "logps/rejected": -98.96305847167969, + "loss": 0.4696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19254007935523987, + "rewards/margins": 1.3535842895507812, + "rewards/rejected": -1.5461243391036987, + "step": 9773 + }, + { + "epoch": 0.57, + "learning_rate": 4.13361534369732e-08, + "logits/chosen": -1.9284940958023071, + "logits/rejected": -2.005120277404785, + "logps/chosen": -281.73345947265625, + "logps/rejected": -271.6997375488281, + "loss": 0.2281, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.287072777748108, + "rewards/margins": 0.7139801383018494, + "rewards/rejected": 0.5730926394462585, + "step": 9774 + }, + { + "epoch": 0.57, + "learning_rate": 4.1326872122757384e-08, + "logits/chosen": -1.8156015872955322, + "logits/rejected": -1.8135526180267334, + "logps/chosen": -49.91697311401367, + "logps/rejected": -192.63406372070312, + "loss": 0.3101, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.200083926320076, + "rewards/margins": 2.7283432483673096, + "rewards/rejected": -2.52825927734375, + "step": 9775 + }, + { + "epoch": 0.57, + "learning_rate": 4.131759111665348e-08, + "logits/chosen": -1.840067744255066, + "logits/rejected": -1.8428560495376587, + "logps/chosen": -1.9018439054489136, + "logps/rejected": -192.18438720703125, + "loss": 0.3365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03795722872018814, + "rewards/margins": 3.726861000061035, + "rewards/rejected": -3.68890380859375, + "step": 9776 + }, + { + "epoch": 0.57, + "learning_rate": 4.130831041899123e-08, + "logits/chosen": -1.916782259941101, + "logits/rejected": -1.9168566465377808, + "logps/chosen": -14.145120620727539, + "logps/rejected": -167.41690063476562, + "loss": 0.3747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14952078461647034, + "rewards/margins": 2.1551194190979004, + "rewards/rejected": -2.005598545074463, + "step": 9777 + }, + { + "epoch": 0.57, + "learning_rate": 4.129903003010029e-08, + "logits/chosen": -1.779536247253418, + "logits/rejected": -1.7390016317367554, + "logps/chosen": -199.24359130859375, + "logps/rejected": -346.18115234375, + "loss": 0.3303, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2519806623458862, + "rewards/margins": 0.5764374136924744, + "rewards/rejected": 0.6755432486534119, + "step": 9778 + }, + { + "epoch": 0.57, + "learning_rate": 4.1289749950310375e-08, + "logits/chosen": -2.1414132118225098, + "logits/rejected": -2.134556531906128, + "logps/chosen": -8.527640342712402, + "logps/rejected": -222.88726806640625, + "loss": 0.2023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3103075921535492, + "rewards/margins": 2.946977376937866, + "rewards/rejected": -2.636669874191284, + "step": 9779 + }, + { + "epoch": 0.57, + "learning_rate": 4.1280470179951144e-08, + "logits/chosen": -1.7945548295974731, + "logits/rejected": -1.782720923423767, + "logps/chosen": -40.700767517089844, + "logps/rejected": -280.4688415527344, + "loss": 0.2539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24256058037281036, + "rewards/margins": 3.309922218322754, + "rewards/rejected": -3.06736159324646, + "step": 9780 + }, + { + "epoch": 0.57, + "learning_rate": 4.127119071935227e-08, + "logits/chosen": -1.84518301486969, + "logits/rejected": -1.8439710140228271, + "logps/chosen": -42.72718048095703, + "logps/rejected": -132.27703857421875, + "loss": 0.5574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25761985778808594, + "rewards/margins": 1.132545828819275, + "rewards/rejected": -1.3901656866073608, + "step": 9781 + }, + { + "epoch": 0.57, + "learning_rate": 4.126191156884339e-08, + "logits/chosen": -1.9312024116516113, + "logits/rejected": -1.9471604824066162, + "logps/chosen": -297.3226318359375, + "logps/rejected": -366.3513488769531, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9875214099884033, + "rewards/margins": 2.53055739402771, + "rewards/rejected": -0.5430359244346619, + "step": 9782 + }, + { + "epoch": 0.57, + "learning_rate": 4.125263272875417e-08, + "logits/chosen": -2.0496838092803955, + "logits/rejected": -2.072801113128662, + "logps/chosen": -189.25926208496094, + "logps/rejected": -340.0039978027344, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.227407932281494, + "rewards/margins": 3.240530490875244, + "rewards/rejected": -1.01312255859375, + "step": 9783 + }, + { + "epoch": 0.57, + "learning_rate": 4.124335419941419e-08, + "logits/chosen": -1.7346481084823608, + "logits/rejected": -1.7323702573776245, + "logps/chosen": -217.21878051757812, + "logps/rejected": -270.1531982421875, + "loss": 0.1618, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7817047834396362, + "rewards/margins": 1.566186547279358, + "rewards/rejected": 0.21551819145679474, + "step": 9784 + }, + { + "epoch": 0.57, + "learning_rate": 4.123407598115314e-08, + "logits/chosen": -2.10213303565979, + "logits/rejected": -2.101933002471924, + "logps/chosen": -2.5985867977142334, + "logps/rejected": -93.2905502319336, + "loss": 0.5441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06897936016321182, + "rewards/margins": 0.5651684999465942, + "rewards/rejected": -0.4961891174316406, + "step": 9785 + }, + { + "epoch": 0.57, + "learning_rate": 4.122479807430056e-08, + "logits/chosen": -2.1114919185638428, + "logits/rejected": -2.109809637069702, + "logps/chosen": -0.014510474167764187, + "logps/rejected": -168.32159423828125, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00060361681971699, + "rewards/margins": 1.8929449319839478, + "rewards/rejected": -1.893548607826233, + "step": 9786 + }, + { + "epoch": 0.57, + "learning_rate": 4.121552047918608e-08, + "logits/chosen": -1.9775322675704956, + "logits/rejected": -1.9799641370773315, + "logps/chosen": -14.24994945526123, + "logps/rejected": -171.1743927001953, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05616350099444389, + "rewards/margins": 1.858355164527893, + "rewards/rejected": -1.9145187139511108, + "step": 9787 + }, + { + "epoch": 0.57, + "learning_rate": 4.12062431961393e-08, + "logits/chosen": -2.0417556762695312, + "logits/rejected": -2.0133352279663086, + "logps/chosen": -25.758968353271484, + "logps/rejected": -321.37359619140625, + "loss": 0.7792, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.410021185874939, + "rewards/margins": 3.283170223236084, + "rewards/rejected": -4.6931915283203125, + "step": 9788 + }, + { + "epoch": 0.57, + "learning_rate": 4.119696622548976e-08, + "logits/chosen": -2.048762559890747, + "logits/rejected": -2.0553042888641357, + "logps/chosen": -111.54730224609375, + "logps/rejected": -242.2244415283203, + "loss": 0.114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4280990362167358, + "rewards/margins": 1.8030608892440796, + "rewards/rejected": -0.37496185302734375, + "step": 9789 + }, + { + "epoch": 0.57, + "learning_rate": 4.118768956756706e-08, + "logits/chosen": -1.9673130512237549, + "logits/rejected": -1.9656521081924438, + "logps/chosen": -194.61083984375, + "logps/rejected": -347.8376159667969, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9369125366210938, + "rewards/margins": 3.8904128074645996, + "rewards/rejected": -1.9535003900527954, + "step": 9790 + }, + { + "epoch": 0.57, + "learning_rate": 4.1178413222700714e-08, + "logits/chosen": -1.8662728071212769, + "logits/rejected": -1.8809484243392944, + "logps/chosen": -237.00955200195312, + "logps/rejected": -471.57781982421875, + "loss": 0.1229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9867035150527954, + "rewards/margins": 2.620532274246216, + "rewards/rejected": -1.6338287591934204, + "step": 9791 + }, + { + "epoch": 0.57, + "learning_rate": 4.116913719122029e-08, + "logits/chosen": -1.8185025453567505, + "logits/rejected": -1.7437909841537476, + "logps/chosen": -228.39381408691406, + "logps/rejected": -495.00152587890625, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.980119466781616, + "rewards/margins": 4.004429817199707, + "rewards/rejected": -1.0243103504180908, + "step": 9792 + }, + { + "epoch": 0.57, + "learning_rate": 4.115986147345531e-08, + "logits/chosen": -1.9005061388015747, + "logits/rejected": -1.8907513618469238, + "logps/chosen": -244.4163818359375, + "logps/rejected": -522.0109252929688, + "loss": 0.0209, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.300433397293091, + "rewards/margins": 6.155703544616699, + "rewards/rejected": -3.8552703857421875, + "step": 9793 + }, + { + "epoch": 0.57, + "learning_rate": 4.115058606973529e-08, + "logits/chosen": -1.872414469718933, + "logits/rejected": -1.8662859201431274, + "logps/chosen": -34.82781219482422, + "logps/rejected": -51.268898010253906, + "loss": 0.5992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11956138908863068, + "rewards/margins": 0.29051512479782104, + "rewards/rejected": -0.17095375061035156, + "step": 9794 + }, + { + "epoch": 0.57, + "learning_rate": 4.114131098038974e-08, + "logits/chosen": -2.028447389602661, + "logits/rejected": -1.9613399505615234, + "logps/chosen": -180.46279907226562, + "logps/rejected": -395.974365234375, + "loss": 0.0602, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6373047828674316, + "rewards/margins": 2.61297607421875, + "rewards/rejected": 0.02432861365377903, + "step": 9795 + }, + { + "epoch": 0.57, + "learning_rate": 4.113203620574816e-08, + "logits/chosen": -2.009960889816284, + "logits/rejected": -2.0324032306671143, + "logps/chosen": -259.6438293457031, + "logps/rejected": -479.0146179199219, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6399567127227783, + "rewards/margins": 4.704998970031738, + "rewards/rejected": -1.0650421380996704, + "step": 9796 + }, + { + "epoch": 0.57, + "learning_rate": 4.1122761746140024e-08, + "logits/chosen": -1.9308812618255615, + "logits/rejected": -1.921492099761963, + "logps/chosen": -61.6367073059082, + "logps/rejected": -265.1101379394531, + "loss": 0.2768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1723712980747223, + "rewards/margins": 2.3822453022003174, + "rewards/rejected": -2.209873914718628, + "step": 9797 + }, + { + "epoch": 0.57, + "learning_rate": 4.111348760189483e-08, + "logits/chosen": -2.0742084980010986, + "logits/rejected": -2.05610990524292, + "logps/chosen": -33.066165924072266, + "logps/rejected": -175.1220245361328, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9001384973526001, + "rewards/margins": 0.6867015957832336, + "rewards/rejected": 0.21343688666820526, + "step": 9798 + }, + { + "epoch": 0.57, + "learning_rate": 4.1104213773342e-08, + "logits/chosen": -2.0446226596832275, + "logits/rejected": -2.042829990386963, + "logps/chosen": -208.03424072265625, + "logps/rejected": -275.40411376953125, + "loss": 0.0914, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.495166063308716, + "rewards/margins": 1.7530548572540283, + "rewards/rejected": 0.7421112060546875, + "step": 9799 + }, + { + "epoch": 0.57, + "learning_rate": 4.109494026081103e-08, + "logits/chosen": -1.9957293272018433, + "logits/rejected": -1.9986521005630493, + "logps/chosen": -90.59706115722656, + "logps/rejected": -192.6914825439453, + "loss": 0.4487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2106063812971115, + "rewards/margins": 0.4626861810684204, + "rewards/rejected": -0.2520797848701477, + "step": 9800 + }, + { + "epoch": 0.57, + "learning_rate": 4.108566706463134e-08, + "logits/chosen": -1.7227489948272705, + "logits/rejected": -1.705728530883789, + "logps/chosen": -0.0032211835496127605, + "logps/rejected": -237.34495544433594, + "loss": 0.3478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0004762158205267042, + "rewards/margins": 5.998454570770264, + "rewards/rejected": -5.997978210449219, + "step": 9801 + }, + { + "epoch": 0.57, + "learning_rate": 4.1076394185132355e-08, + "logits/chosen": -2.216027021408081, + "logits/rejected": -2.215249538421631, + "logps/chosen": -0.0343879871070385, + "logps/rejected": -91.13810729980469, + "loss": 0.4878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020566117018461227, + "rewards/margins": 1.051990032196045, + "rewards/rejected": -1.054046630859375, + "step": 9802 + }, + { + "epoch": 0.57, + "learning_rate": 4.10671216226435e-08, + "logits/chosen": -2.1003589630126953, + "logits/rejected": -2.0950841903686523, + "logps/chosen": -34.90430450439453, + "logps/rejected": -233.9279022216797, + "loss": 0.2663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42236098647117615, + "rewards/margins": 2.6504995822906494, + "rewards/rejected": -2.2281386852264404, + "step": 9803 + }, + { + "epoch": 0.57, + "learning_rate": 4.105784937749418e-08, + "logits/chosen": -2.0001041889190674, + "logits/rejected": -2.0012784004211426, + "logps/chosen": -26.627891540527344, + "logps/rejected": -248.73165893554688, + "loss": 0.1772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0732498168945312, + "rewards/margins": 4.128684997558594, + "rewards/rejected": -3.0554351806640625, + "step": 9804 + }, + { + "epoch": 0.57, + "learning_rate": 4.1048577450013775e-08, + "logits/chosen": -1.797377347946167, + "logits/rejected": -1.7810475826263428, + "logps/chosen": -281.4376525878906, + "logps/rejected": -444.80169677734375, + "loss": 0.0455, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5226348638534546, + "rewards/margins": 4.416159152984619, + "rewards/rejected": -2.893524169921875, + "step": 9805 + }, + { + "epoch": 0.57, + "learning_rate": 4.1039305840531694e-08, + "logits/chosen": -2.043226718902588, + "logits/rejected": -2.0446271896362305, + "logps/chosen": -0.0016630676109343767, + "logps/rejected": -187.94822692871094, + "loss": 0.3711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010708433546824381, + "rewards/margins": 2.897050380706787, + "rewards/rejected": -2.8971574306488037, + "step": 9806 + }, + { + "epoch": 0.57, + "learning_rate": 4.103003454937732e-08, + "logits/chosen": -2.003948211669922, + "logits/rejected": -1.992266297340393, + "logps/chosen": -94.57722473144531, + "logps/rejected": -343.2012023925781, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.483428955078125, + "rewards/margins": 5.247027397155762, + "rewards/rejected": -3.763598680496216, + "step": 9807 + }, + { + "epoch": 0.57, + "learning_rate": 4.102076357687998e-08, + "logits/chosen": -1.9012389183044434, + "logits/rejected": -1.895039439201355, + "logps/chosen": -13.378999710083008, + "logps/rejected": -164.0680694580078, + "loss": 0.3866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01861257664859295, + "rewards/margins": 1.9709807634353638, + "rewards/rejected": -1.952368140220642, + "step": 9808 + }, + { + "epoch": 0.57, + "learning_rate": 4.1011492923369046e-08, + "logits/chosen": -2.022110939025879, + "logits/rejected": -2.0302183628082275, + "logps/chosen": -36.255191802978516, + "logps/rejected": -192.1895294189453, + "loss": 0.2275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3622402250766754, + "rewards/margins": 2.276000738143921, + "rewards/rejected": -1.9137604236602783, + "step": 9809 + }, + { + "epoch": 0.57, + "learning_rate": 4.100222258917385e-08, + "logits/chosen": -1.7746012210845947, + "logits/rejected": -1.7518885135650635, + "logps/chosen": -212.701904296875, + "logps/rejected": -287.2422180175781, + "loss": 0.4014, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.523431420326233, + "rewards/margins": -0.0916595458984375, + "rewards/rejected": 1.6150909662246704, + "step": 9810 + }, + { + "epoch": 0.57, + "learning_rate": 4.099295257462372e-08, + "logits/chosen": -2.0502326488494873, + "logits/rejected": -2.034292459487915, + "logps/chosen": -17.49127197265625, + "logps/rejected": -288.8158264160156, + "loss": 0.252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2946409285068512, + "rewards/margins": 5.168347358703613, + "rewards/rejected": -4.873706340789795, + "step": 9811 + }, + { + "epoch": 0.57, + "learning_rate": 4.0983682880047965e-08, + "logits/chosen": -1.7297333478927612, + "logits/rejected": -1.7287522554397583, + "logps/chosen": -0.00036962691228836775, + "logps/rejected": -92.33372497558594, + "loss": 0.436, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5685778382467106e-05, + "rewards/margins": 1.3357913494110107, + "rewards/rejected": -1.3358169794082642, + "step": 9812 + }, + { + "epoch": 0.57, + "learning_rate": 4.097441350577591e-08, + "logits/chosen": -1.9144474267959595, + "logits/rejected": -1.911690592765808, + "logps/chosen": -182.59109497070312, + "logps/rejected": -415.4584045410156, + "loss": 0.0738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9740204215049744, + "rewards/margins": 3.7615082263946533, + "rewards/rejected": -2.787487745285034, + "step": 9813 + }, + { + "epoch": 0.57, + "learning_rate": 4.0965144452136826e-08, + "logits/chosen": -1.8217155933380127, + "logits/rejected": -1.8159972429275513, + "logps/chosen": -22.071577072143555, + "logps/rejected": -91.02022552490234, + "loss": 0.6568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2839048504829407, + "rewards/margins": 0.4886896014213562, + "rewards/rejected": -0.7725944519042969, + "step": 9814 + }, + { + "epoch": 0.57, + "learning_rate": 4.095587571946002e-08, + "logits/chosen": -2.1610379219055176, + "logits/rejected": -2.152850389480591, + "logps/chosen": -18.68769073486328, + "logps/rejected": -300.76959228515625, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0983438491821289, + "rewards/margins": 6.027013778686523, + "rewards/rejected": -5.9286699295043945, + "step": 9815 + }, + { + "epoch": 0.57, + "learning_rate": 4.094660730807473e-08, + "logits/chosen": -2.013462781906128, + "logits/rejected": -2.003185987472534, + "logps/chosen": -103.48066711425781, + "logps/rejected": -387.36175537109375, + "loss": 0.0894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2324104309082031, + "rewards/margins": 5.207859039306641, + "rewards/rejected": -3.9754486083984375, + "step": 9816 + }, + { + "epoch": 0.57, + "learning_rate": 4.0937339218310245e-08, + "logits/chosen": -1.6597206592559814, + "logits/rejected": -1.6707463264465332, + "logps/chosen": -2.9921393434051424e-05, + "logps/rejected": -136.4462432861328, + "loss": 0.35, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5510278192086844e-06, + "rewards/margins": 3.376640796661377, + "rewards/rejected": -3.376638174057007, + "step": 9817 + }, + { + "epoch": 0.57, + "learning_rate": 4.092807145049579e-08, + "logits/chosen": -1.9803390502929688, + "logits/rejected": -1.9890096187591553, + "logps/chosen": -304.52099609375, + "logps/rejected": -534.7242431640625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.996206760406494, + "rewards/margins": 8.000537872314453, + "rewards/rejected": -4.004330635070801, + "step": 9818 + }, + { + "epoch": 0.57, + "learning_rate": 4.0918804004960636e-08, + "logits/chosen": -2.0921826362609863, + "logits/rejected": -2.0882174968719482, + "logps/chosen": -22.00366973876953, + "logps/rejected": -92.91715240478516, + "loss": 0.4734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5274425745010376, + "rewards/margins": 0.35894203186035156, + "rewards/rejected": 0.16850052773952484, + "step": 9819 + }, + { + "epoch": 0.57, + "learning_rate": 4.0909536882033955e-08, + "logits/chosen": -1.921955943107605, + "logits/rejected": -1.8542470932006836, + "logps/chosen": -229.2600860595703, + "logps/rejected": -591.1761474609375, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8854035139083862, + "rewards/margins": 0.2773590087890625, + "rewards/rejected": 1.6080445051193237, + "step": 9820 + }, + { + "epoch": 0.57, + "learning_rate": 4.0900270082045016e-08, + "logits/chosen": -2.1207435131073, + "logits/rejected": -2.1083171367645264, + "logps/chosen": -0.7220434546470642, + "logps/rejected": -218.9990997314453, + "loss": 0.5531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15450827777385712, + "rewards/margins": 0.6377251148223877, + "rewards/rejected": -0.4832168519496918, + "step": 9821 + }, + { + "epoch": 0.57, + "learning_rate": 4.089100360532298e-08, + "logits/chosen": -1.8783628940582275, + "logits/rejected": -1.8481056690216064, + "logps/chosen": -212.01333618164062, + "logps/rejected": -397.1131591796875, + "loss": 0.1917, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2389190196990967, + "rewards/margins": 0.8964476585388184, + "rewards/rejected": 1.3424713611602783, + "step": 9822 + }, + { + "epoch": 0.57, + "learning_rate": 4.088173745219706e-08, + "logits/chosen": -1.9253214597702026, + "logits/rejected": -1.9276056289672852, + "logps/chosen": -0.0002630797680467367, + "logps/rejected": -78.66898345947266, + "loss": 0.5871, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.086806049803272e-05, + "rewards/margins": 0.48176246881484985, + "rewards/rejected": -0.48171159625053406, + "step": 9823 + }, + { + "epoch": 0.57, + "learning_rate": 4.087247162299642e-08, + "logits/chosen": -2.098173141479492, + "logits/rejected": -2.090615749359131, + "logps/chosen": -33.0106201171875, + "logps/rejected": -209.2752227783203, + "loss": 0.4887, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05910797044634819, + "rewards/margins": 0.6143432855606079, + "rewards/rejected": -0.5552353262901306, + "step": 9824 + }, + { + "epoch": 0.57, + "learning_rate": 4.086320611805025e-08, + "logits/chosen": -1.853554368019104, + "logits/rejected": -1.8657796382904053, + "logps/chosen": -178.39175415039062, + "logps/rejected": -339.3759765625, + "loss": 0.0643, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0332093238830566, + "rewards/margins": 2.2947633266448975, + "rewards/rejected": -0.26155397295951843, + "step": 9825 + }, + { + "epoch": 0.57, + "learning_rate": 4.085394093768766e-08, + "logits/chosen": -1.8845690488815308, + "logits/rejected": -1.8895198106765747, + "logps/chosen": -30.83690643310547, + "logps/rejected": -178.99053955078125, + "loss": 0.4923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20021477341651917, + "rewards/margins": 0.8439983129501343, + "rewards/rejected": -0.6437835693359375, + "step": 9826 + }, + { + "epoch": 0.57, + "learning_rate": 4.084467608223784e-08, + "logits/chosen": -1.7354310750961304, + "logits/rejected": -1.736038327217102, + "logps/chosen": -186.85391235351562, + "logps/rejected": -321.99481201171875, + "loss": 0.0533, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4057159423828125, + "rewards/margins": 3.101550340652466, + "rewards/rejected": -1.6958343982696533, + "step": 9827 + }, + { + "epoch": 0.57, + "learning_rate": 4.0835411552029924e-08, + "logits/chosen": -2.0203890800476074, + "logits/rejected": -2.021604061126709, + "logps/chosen": -55.825531005859375, + "logps/rejected": -172.6916961669922, + "loss": 0.2533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7680145502090454, + "rewards/margins": 1.342494249343872, + "rewards/rejected": -0.5744796991348267, + "step": 9828 + }, + { + "epoch": 0.57, + "learning_rate": 4.082614734739301e-08, + "logits/chosen": -1.880584955215454, + "logits/rejected": -1.8857783079147339, + "logps/chosen": -336.50177001953125, + "logps/rejected": -553.48876953125, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2145631313323975, + "rewards/margins": 4.245080947875977, + "rewards/rejected": -2.030517578125, + "step": 9829 + }, + { + "epoch": 0.57, + "learning_rate": 4.081688346865622e-08, + "logits/chosen": -1.887311577796936, + "logits/rejected": -1.8812493085861206, + "logps/chosen": -8.501201629638672, + "logps/rejected": -321.3737487792969, + "loss": 0.219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37807533144950867, + "rewards/margins": 8.762502670288086, + "rewards/rejected": -8.384427070617676, + "step": 9830 + }, + { + "epoch": 0.57, + "learning_rate": 4.0807619916148644e-08, + "logits/chosen": -1.820349931716919, + "logits/rejected": -1.8212114572525024, + "logps/chosen": -0.0002507949247956276, + "logps/rejected": -127.08549499511719, + "loss": 0.5511, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.939218787418213e-06, + "rewards/margins": 0.6718286871910095, + "rewards/rejected": -0.6718376278877258, + "step": 9831 + }, + { + "epoch": 0.57, + "learning_rate": 4.0798356690199384e-08, + "logits/chosen": -1.8002445697784424, + "logits/rejected": -1.780992031097412, + "logps/chosen": -3.7738122940063477, + "logps/rejected": -162.83648681640625, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29249247908592224, + "rewards/margins": 3.68656587600708, + "rewards/rejected": -3.394073486328125, + "step": 9832 + }, + { + "epoch": 0.57, + "learning_rate": 4.078909379113748e-08, + "logits/chosen": -1.7684584856033325, + "logits/rejected": -1.770782232284546, + "logps/chosen": -18.615819931030273, + "logps/rejected": -77.31969451904297, + "loss": 0.4321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7107650637626648, + "rewards/margins": 0.4815177917480469, + "rewards/rejected": 0.2292472869157791, + "step": 9833 + }, + { + "epoch": 0.57, + "learning_rate": 4.0779831219292047e-08, + "logits/chosen": -2.0136959552764893, + "logits/rejected": -2.0143117904663086, + "logps/chosen": -3.6596848076442257e-05, + "logps/rejected": -179.49069213867188, + "loss": 0.3587, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.079626473365352e-07, + "rewards/margins": 3.6214118003845215, + "rewards/rejected": -3.621411085128784, + "step": 9834 + }, + { + "epoch": 0.57, + "learning_rate": 4.07705689749921e-08, + "logits/chosen": -1.9717339277267456, + "logits/rejected": -1.968080997467041, + "logps/chosen": -35.46719741821289, + "logps/rejected": -204.24392700195312, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2951164245605469, + "rewards/margins": 1.3759269714355469, + "rewards/rejected": -1.080810546875, + "step": 9835 + }, + { + "epoch": 0.57, + "learning_rate": 4.076130705856669e-08, + "logits/chosen": -1.8217763900756836, + "logits/rejected": -1.8194479942321777, + "logps/chosen": -80.973388671875, + "logps/rejected": -224.1215362548828, + "loss": 0.2511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7572952508926392, + "rewards/margins": 1.3136887550354004, + "rewards/rejected": -0.5563934445381165, + "step": 9836 + }, + { + "epoch": 0.57, + "learning_rate": 4.075204547034484e-08, + "logits/chosen": -1.8046998977661133, + "logits/rejected": -1.8028812408447266, + "logps/chosen": -21.622127532958984, + "logps/rejected": -296.1945495605469, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.052767183631658554, + "rewards/margins": 4.522497177124023, + "rewards/rejected": -4.469729900360107, + "step": 9837 + }, + { + "epoch": 0.57, + "learning_rate": 4.074278421065558e-08, + "logits/chosen": -1.9393879175186157, + "logits/rejected": -1.9249392747879028, + "logps/chosen": -22.27983283996582, + "logps/rejected": -191.83682250976562, + "loss": 0.2338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5126747488975525, + "rewards/margins": 4.444171905517578, + "rewards/rejected": -3.93149733543396, + "step": 9838 + }, + { + "epoch": 0.57, + "learning_rate": 4.073352327982789e-08, + "logits/chosen": -1.7517197132110596, + "logits/rejected": -1.7564831972122192, + "logps/chosen": -119.2518310546875, + "logps/rejected": -206.98397827148438, + "loss": 0.222, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3601974248886108, + "rewards/margins": 1.1831833124160767, + "rewards/rejected": 0.17701415717601776, + "step": 9839 + }, + { + "epoch": 0.57, + "learning_rate": 4.0724262678190805e-08, + "logits/chosen": -2.016014575958252, + "logits/rejected": -1.99872887134552, + "logps/chosen": -33.54561233520508, + "logps/rejected": -257.26641845703125, + "loss": 0.194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7771698236465454, + "rewards/margins": 2.934584140777588, + "rewards/rejected": -2.157414197921753, + "step": 9840 + }, + { + "epoch": 0.57, + "learning_rate": 4.071500240607326e-08, + "logits/chosen": -1.9902236461639404, + "logits/rejected": -1.9642401933670044, + "logps/chosen": -182.448974609375, + "logps/rejected": -299.0587463378906, + "loss": 0.3727, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4774048328399658, + "rewards/margins": 0.17874455451965332, + "rewards/rejected": 1.2986602783203125, + "step": 9841 + }, + { + "epoch": 0.57, + "learning_rate": 4.070574246380426e-08, + "logits/chosen": -1.8945965766906738, + "logits/rejected": -1.8701868057250977, + "logps/chosen": -35.763694763183594, + "logps/rejected": -534.8348388671875, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0766143798828125, + "rewards/margins": 13.543057441711426, + "rewards/rejected": -13.466443061828613, + "step": 9842 + }, + { + "epoch": 0.57, + "learning_rate": 4.069648285171274e-08, + "logits/chosen": -1.6330615282058716, + "logits/rejected": -1.5934027433395386, + "logps/chosen": -185.60260009765625, + "logps/rejected": -371.3489990234375, + "loss": 0.0656, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2723145484924316, + "rewards/margins": 2.4870972633361816, + "rewards/rejected": -0.21478271484375, + "step": 9843 + }, + { + "epoch": 0.57, + "learning_rate": 4.068722357012767e-08, + "logits/chosen": -2.0408716201782227, + "logits/rejected": -2.0445570945739746, + "logps/chosen": -0.00012099306331947446, + "logps/rejected": -144.23959350585938, + "loss": 0.4779, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.634318661876023e-06, + "rewards/margins": 1.195091724395752, + "rewards/rejected": -1.195094347000122, + "step": 9844 + }, + { + "epoch": 0.57, + "learning_rate": 4.0677964619377964e-08, + "logits/chosen": -1.9225683212280273, + "logits/rejected": -1.9615408182144165, + "logps/chosen": -177.9712371826172, + "logps/rejected": -358.63897705078125, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.644068956375122, + "rewards/margins": 2.6569321155548096, + "rewards/rejected": -1.0128631591796875, + "step": 9845 + }, + { + "epoch": 0.57, + "learning_rate": 4.066870599979256e-08, + "logits/chosen": -1.819127082824707, + "logits/rejected": -1.7911843061447144, + "logps/chosen": -193.2918701171875, + "logps/rejected": -275.49725341796875, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9764190912246704, + "rewards/margins": 0.2258087396621704, + "rewards/rejected": 0.7506103515625, + "step": 9846 + }, + { + "epoch": 0.57, + "learning_rate": 4.065944771170034e-08, + "logits/chosen": -1.976159691810608, + "logits/rejected": -1.9928240776062012, + "logps/chosen": -204.42279052734375, + "logps/rejected": -326.0727844238281, + "loss": 0.0689, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.175467014312744, + "rewards/margins": 2.0375795364379883, + "rewards/rejected": 1.1378875970840454, + "step": 9847 + }, + { + "epoch": 0.57, + "learning_rate": 4.0650189755430246e-08, + "logits/chosen": -1.7947943210601807, + "logits/rejected": -1.809838891029358, + "logps/chosen": -267.2872009277344, + "logps/rejected": -467.6576232910156, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6471405029296875, + "rewards/margins": 4.300244331359863, + "rewards/rejected": -0.6531036496162415, + "step": 9848 + }, + { + "epoch": 0.57, + "learning_rate": 4.0640932131311155e-08, + "logits/chosen": -2.070169687271118, + "logits/rejected": -2.0738396644592285, + "logps/chosen": -50.598426818847656, + "logps/rejected": -158.43067932128906, + "loss": 0.2362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6491379141807556, + "rewards/margins": 3.0191071033477783, + "rewards/rejected": -2.369969129562378, + "step": 9849 + }, + { + "epoch": 0.57, + "learning_rate": 4.063167483967192e-08, + "logits/chosen": -1.9508320093154907, + "logits/rejected": -1.9433170557022095, + "logps/chosen": -69.45834350585938, + "logps/rejected": -258.2330322265625, + "loss": 0.2211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7656814455986023, + "rewards/margins": 3.0943727493286133, + "rewards/rejected": -2.328691244125366, + "step": 9850 + }, + { + "epoch": 0.57, + "learning_rate": 4.062241788084143e-08, + "logits/chosen": -1.9410321712493896, + "logits/rejected": -1.9208118915557861, + "logps/chosen": -206.00340270996094, + "logps/rejected": -367.6874694824219, + "loss": 0.5798, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5442856550216675, + "rewards/margins": -0.6422713994979858, + "rewards/rejected": 2.1865570545196533, + "step": 9851 + }, + { + "epoch": 0.57, + "learning_rate": 4.061316125514852e-08, + "logits/chosen": -1.9638702869415283, + "logits/rejected": -1.957495927810669, + "logps/chosen": -4.537100791931152, + "logps/rejected": -244.25662231445312, + "loss": 0.4227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1323043406009674, + "rewards/margins": 1.2765487432479858, + "rewards/rejected": -1.1442444324493408, + "step": 9852 + }, + { + "epoch": 0.57, + "learning_rate": 4.0603904962922056e-08, + "logits/chosen": -2.065964460372925, + "logits/rejected": -2.0609052181243896, + "logps/chosen": -50.5623893737793, + "logps/rejected": -386.7001953125, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14838829636573792, + "rewards/margins": 5.060735702514648, + "rewards/rejected": -4.912347316741943, + "step": 9853 + }, + { + "epoch": 0.57, + "learning_rate": 4.059464900449083e-08, + "logits/chosen": -2.011124849319458, + "logits/rejected": -1.9950505495071411, + "logps/chosen": -211.23110961914062, + "logps/rejected": -296.30718994140625, + "loss": 0.2395, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2421021461486816, + "rewards/margins": 0.648056149482727, + "rewards/rejected": 1.5940459966659546, + "step": 9854 + }, + { + "epoch": 0.57, + "learning_rate": 4.058539338018368e-08, + "logits/chosen": -1.9235312938690186, + "logits/rejected": -1.9218087196350098, + "logps/chosen": -15.078782081604004, + "logps/rejected": -177.21441650390625, + "loss": 0.2654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8230641484260559, + "rewards/margins": 1.4203388690948486, + "rewards/rejected": -0.5972747802734375, + "step": 9855 + }, + { + "epoch": 0.57, + "learning_rate": 4.0576138090329416e-08, + "logits/chosen": -1.8795260190963745, + "logits/rejected": -1.866138219833374, + "logps/chosen": -158.14602661132812, + "logps/rejected": -254.7493438720703, + "loss": 0.0879, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.440093994140625, + "rewards/margins": 1.8799575567245483, + "rewards/rejected": 0.5601364374160767, + "step": 9856 + }, + { + "epoch": 0.57, + "learning_rate": 4.056688313525682e-08, + "logits/chosen": -2.0117874145507812, + "logits/rejected": -2.058302402496338, + "logps/chosen": -143.72360229492188, + "logps/rejected": -346.6601867675781, + "loss": 0.0734, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1982421875, + "rewards/margins": 3.482492208480835, + "rewards/rejected": -2.284250020980835, + "step": 9857 + }, + { + "epoch": 0.57, + "learning_rate": 4.0557628515294676e-08, + "logits/chosen": -1.9749761819839478, + "logits/rejected": -1.9812054634094238, + "logps/chosen": -24.097148895263672, + "logps/rejected": -242.010009765625, + "loss": 0.1274, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0215762853622437, + "rewards/margins": 5.967714309692383, + "rewards/rejected": -4.94613790512085, + "step": 9858 + }, + { + "epoch": 0.57, + "learning_rate": 4.0548374230771764e-08, + "logits/chosen": -1.927618145942688, + "logits/rejected": -1.9230947494506836, + "logps/chosen": -273.4937438964844, + "logps/rejected": -492.421630859375, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.437213182449341, + "rewards/margins": 4.972933769226074, + "rewards/rejected": -2.5357208251953125, + "step": 9859 + }, + { + "epoch": 0.57, + "learning_rate": 4.053912028201682e-08, + "logits/chosen": -1.9158484935760498, + "logits/rejected": -1.9197343587875366, + "logps/chosen": -163.30355834960938, + "logps/rejected": -300.3468322753906, + "loss": 0.6275, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.279260277748108, + "rewards/margins": -0.7642577886581421, + "rewards/rejected": 2.04351806640625, + "step": 9860 + }, + { + "epoch": 0.57, + "learning_rate": 4.052986666935861e-08, + "logits/chosen": -2.024942636489868, + "logits/rejected": -2.0263125896453857, + "logps/chosen": -0.0006946544745005667, + "logps/rejected": -39.75409698486328, + "loss": 0.5914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0002880717802327126, + "rewards/margins": 0.4228111803531647, + "rewards/rejected": -0.4225231111049652, + "step": 9861 + }, + { + "epoch": 0.57, + "learning_rate": 4.052061339312586e-08, + "logits/chosen": -1.8958148956298828, + "logits/rejected": -1.9116147756576538, + "logps/chosen": -195.62649536132812, + "logps/rejected": -210.37069702148438, + "loss": 0.1145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9189209938049316, + "rewards/margins": 1.4952118396759033, + "rewards/rejected": 1.4237091541290283, + "step": 9862 + }, + { + "epoch": 0.57, + "learning_rate": 4.051136045364729e-08, + "logits/chosen": -2.1093239784240723, + "logits/rejected": -2.102816343307495, + "logps/chosen": -0.001730172662064433, + "logps/rejected": -187.95675659179688, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001245986932190135, + "rewards/margins": 2.8136205673217773, + "rewards/rejected": -2.8137452602386475, + "step": 9863 + }, + { + "epoch": 0.57, + "learning_rate": 4.05021078512516e-08, + "logits/chosen": -1.9505535364151, + "logits/rejected": -1.9515819549560547, + "logps/chosen": -15.427621841430664, + "logps/rejected": -75.88993072509766, + "loss": 0.7281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3196743130683899, + "rewards/margins": 0.15360450744628906, + "rewards/rejected": -0.47327882051467896, + "step": 9864 + }, + { + "epoch": 0.57, + "learning_rate": 4.0492855586267516e-08, + "logits/chosen": -2.1340291500091553, + "logits/rejected": -2.131776809692383, + "logps/chosen": -10.6596040725708, + "logps/rejected": -231.81466674804688, + "loss": 0.3315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02647104300558567, + "rewards/margins": 3.2818682193756104, + "rewards/rejected": -3.255397081375122, + "step": 9865 + }, + { + "epoch": 0.57, + "learning_rate": 4.048360365902368e-08, + "logits/chosen": -1.8378113508224487, + "logits/rejected": -1.8306825160980225, + "logps/chosen": -7.8268585205078125, + "logps/rejected": -452.9058837890625, + "loss": 0.271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28066036105155945, + "rewards/margins": 9.488259315490723, + "rewards/rejected": -9.207598686218262, + "step": 9866 + }, + { + "epoch": 0.57, + "learning_rate": 4.04743520698488e-08, + "logits/chosen": -1.9704723358154297, + "logits/rejected": -1.9672142267227173, + "logps/chosen": -5.172393798828125, + "logps/rejected": -131.47463989257812, + "loss": 0.3577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15586386620998383, + "rewards/margins": 2.04923152923584, + "rewards/rejected": -1.8933677673339844, + "step": 9867 + }, + { + "epoch": 0.57, + "learning_rate": 4.046510081907154e-08, + "logits/chosen": -1.957972526550293, + "logits/rejected": -1.9030194282531738, + "logps/chosen": -415.2830810546875, + "logps/rejected": -746.0338134765625, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.224468946456909, + "rewards/margins": 4.442578315734863, + "rewards/rejected": -2.218109130859375, + "step": 9868 + }, + { + "epoch": 0.57, + "learning_rate": 4.045584990702053e-08, + "logits/chosen": -1.938301920890808, + "logits/rejected": -1.9551540613174438, + "logps/chosen": -259.4669189453125, + "logps/rejected": -262.6558532714844, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.94989013671875, + "rewards/margins": 2.68060302734375, + "rewards/rejected": 0.269287109375, + "step": 9869 + }, + { + "epoch": 0.57, + "learning_rate": 4.044659933402443e-08, + "logits/chosen": -1.8631826639175415, + "logits/rejected": -1.8577568531036377, + "logps/chosen": -156.83901977539062, + "logps/rejected": -374.5205993652344, + "loss": 0.0793, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.752691626548767, + "rewards/margins": 5.113284111022949, + "rewards/rejected": -3.3605926036834717, + "step": 9870 + }, + { + "epoch": 0.57, + "learning_rate": 4.043734910041185e-08, + "logits/chosen": -1.7703279256820679, + "logits/rejected": -1.7578949928283691, + "logps/chosen": -231.82986450195312, + "logps/rejected": -408.9124755859375, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.543112277984619, + "rewards/margins": 3.9786500930786133, + "rewards/rejected": -1.4355376958847046, + "step": 9871 + }, + { + "epoch": 0.57, + "learning_rate": 4.04280992065114e-08, + "logits/chosen": -1.9979909658432007, + "logits/rejected": -2.0213265419006348, + "logps/chosen": -163.1343536376953, + "logps/rejected": -290.8388671875, + "loss": 0.1006, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.190263509750366, + "rewards/margins": 1.7519913911819458, + "rewards/rejected": 0.438272088766098, + "step": 9872 + }, + { + "epoch": 0.57, + "learning_rate": 4.0418849652651685e-08, + "logits/chosen": -1.8443796634674072, + "logits/rejected": -1.8410385847091675, + "logps/chosen": -0.03159301355481148, + "logps/rejected": -188.81410217285156, + "loss": 0.3355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012392580974847078, + "rewards/margins": 5.26963472366333, + "rewards/rejected": -5.2708740234375, + "step": 9873 + }, + { + "epoch": 0.57, + "learning_rate": 4.0409600439161305e-08, + "logits/chosen": -1.849208116531372, + "logits/rejected": -1.8612990379333496, + "logps/chosen": -182.20404052734375, + "logps/rejected": -298.5180358886719, + "loss": 0.0648, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9508758783340454, + "rewards/margins": 3.2266907691955566, + "rewards/rejected": -1.2758148908615112, + "step": 9874 + }, + { + "epoch": 0.57, + "learning_rate": 4.0400351566368826e-08, + "logits/chosen": -1.9144471883773804, + "logits/rejected": -1.9092013835906982, + "logps/chosen": -8.60311222076416, + "logps/rejected": -205.8233184814453, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17009468376636505, + "rewards/margins": 2.161006450653076, + "rewards/rejected": -1.990911841392517, + "step": 9875 + }, + { + "epoch": 0.57, + "learning_rate": 4.039110303460282e-08, + "logits/chosen": -1.8932503461837769, + "logits/rejected": -1.8906604051589966, + "logps/chosen": -48.32276153564453, + "logps/rejected": -136.67620849609375, + "loss": 0.6641, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11142349243164062, + "rewards/margins": 0.2356407344341278, + "rewards/rejected": -0.34706422686576843, + "step": 9876 + }, + { + "epoch": 0.57, + "learning_rate": 4.0381854844191824e-08, + "logits/chosen": -1.896486520767212, + "logits/rejected": -1.8835943937301636, + "logps/chosen": -129.329345703125, + "logps/rejected": -243.20846557617188, + "loss": 0.0939, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.414799451828003, + "rewards/margins": 2.2468674182891846, + "rewards/rejected": 0.16793213784694672, + "step": 9877 + }, + { + "epoch": 0.57, + "learning_rate": 4.037260699546441e-08, + "logits/chosen": -1.872438907623291, + "logits/rejected": -1.9070916175842285, + "logps/chosen": -253.80682373046875, + "logps/rejected": -361.56048583984375, + "loss": 0.2045, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.548437476158142, + "rewards/margins": 0.9535827040672302, + "rewards/rejected": 0.5948547720909119, + "step": 9878 + }, + { + "epoch": 0.57, + "learning_rate": 4.036335948874907e-08, + "logits/chosen": -2.01883864402771, + "logits/rejected": -2.01597261428833, + "logps/chosen": -50.180538177490234, + "logps/rejected": -190.55172729492188, + "loss": 0.189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.347031831741333, + "rewards/margins": 1.458054780960083, + "rewards/rejected": -0.11102294921875, + "step": 9879 + }, + { + "epoch": 0.57, + "learning_rate": 4.035411232437435e-08, + "logits/chosen": -1.6802895069122314, + "logits/rejected": -1.6613408327102661, + "logps/chosen": -23.247886657714844, + "logps/rejected": -336.6195068359375, + "loss": 0.1926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7171531915664673, + "rewards/margins": 7.820522308349609, + "rewards/rejected": -7.103369235992432, + "step": 9880 + }, + { + "epoch": 0.58, + "learning_rate": 4.034486550266873e-08, + "logits/chosen": -1.9748563766479492, + "logits/rejected": -1.9752475023269653, + "logps/chosen": -12.28291130065918, + "logps/rejected": -105.0169677734375, + "loss": 0.357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4252031445503235, + "rewards/margins": 1.3050317764282227, + "rewards/rejected": -0.8798286318778992, + "step": 9881 + }, + { + "epoch": 0.58, + "learning_rate": 4.0335619023960735e-08, + "logits/chosen": -1.9772143363952637, + "logits/rejected": -1.9741896390914917, + "logps/chosen": -54.851985931396484, + "logps/rejected": -122.10357666015625, + "loss": 0.2272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9983463287353516, + "rewards/margins": 1.5667911767959595, + "rewards/rejected": -0.5684448480606079, + "step": 9882 + }, + { + "epoch": 0.58, + "learning_rate": 4.0326372888578814e-08, + "logits/chosen": -1.8919323682785034, + "logits/rejected": -1.8844252824783325, + "logps/chosen": -130.6151580810547, + "logps/rejected": -196.8513946533203, + "loss": 0.4585, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7886489629745483, + "rewards/margins": -0.24344027042388916, + "rewards/rejected": 2.0320892333984375, + "step": 9883 + }, + { + "epoch": 0.58, + "learning_rate": 4.0317127096851455e-08, + "logits/chosen": -1.9611765146255493, + "logits/rejected": -1.9518091678619385, + "logps/chosen": -177.55908203125, + "logps/rejected": -512.82666015625, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4984054565429688, + "rewards/margins": 5.978764533996582, + "rewards/rejected": -3.480358839035034, + "step": 9884 + }, + { + "epoch": 0.58, + "learning_rate": 4.0307881649107094e-08, + "logits/chosen": -1.9148151874542236, + "logits/rejected": -1.889355182647705, + "logps/chosen": -229.38592529296875, + "logps/rejected": -347.92938232421875, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3409576416015625, + "rewards/margins": 3.4197235107421875, + "rewards/rejected": -1.078765869140625, + "step": 9885 + }, + { + "epoch": 0.58, + "learning_rate": 4.0298636545674194e-08, + "logits/chosen": -1.9616203308105469, + "logits/rejected": -1.9371845722198486, + "logps/chosen": -173.9958953857422, + "logps/rejected": -228.81484985351562, + "loss": 0.1665, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7469894886016846, + "rewards/margins": 1.5373245477676392, + "rewards/rejected": 0.20966492593288422, + "step": 9886 + }, + { + "epoch": 0.58, + "learning_rate": 4.028939178688117e-08, + "logits/chosen": -1.856618046760559, + "logits/rejected": -1.9178078174591064, + "logps/chosen": -239.72305297851562, + "logps/rejected": -242.8197021484375, + "loss": 0.0726, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9810577630996704, + "rewards/margins": 2.196276903152466, + "rewards/rejected": -0.21521912515163422, + "step": 9887 + }, + { + "epoch": 0.58, + "learning_rate": 4.028014737305644e-08, + "logits/chosen": -1.9252264499664307, + "logits/rejected": -1.8275946378707886, + "logps/chosen": -228.5420684814453, + "logps/rejected": -632.79052734375, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7528092861175537, + "rewards/margins": 4.106386184692383, + "rewards/rejected": -1.35357666015625, + "step": 9888 + }, + { + "epoch": 0.58, + "learning_rate": 4.0270903304528444e-08, + "logits/chosen": -2.044875144958496, + "logits/rejected": -2.02736759185791, + "logps/chosen": -8.921060562133789, + "logps/rejected": -202.7821044921875, + "loss": 0.3404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15792246162891388, + "rewards/margins": 2.9830567836761475, + "rewards/rejected": -2.82513427734375, + "step": 9889 + }, + { + "epoch": 0.58, + "learning_rate": 4.026165958162554e-08, + "logits/chosen": -2.0489957332611084, + "logits/rejected": -2.019658088684082, + "logps/chosen": -190.94317626953125, + "logps/rejected": -372.001220703125, + "loss": 0.0835, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8098113536834717, + "rewards/margins": 1.9438261985778809, + "rewards/rejected": 0.865985095500946, + "step": 9890 + }, + { + "epoch": 0.58, + "learning_rate": 4.025241620467613e-08, + "logits/chosen": -2.0218935012817383, + "logits/rejected": -2.0065698623657227, + "logps/chosen": -134.78990173339844, + "logps/rejected": -448.4982604980469, + "loss": 0.0531, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2145187854766846, + "rewards/margins": 4.529402256011963, + "rewards/rejected": -3.3148834705352783, + "step": 9891 + }, + { + "epoch": 0.58, + "learning_rate": 4.024317317400857e-08, + "logits/chosen": -1.8416930437088013, + "logits/rejected": -1.8436235189437866, + "logps/chosen": -53.46991729736328, + "logps/rejected": -317.79168701171875, + "loss": 0.181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5278129577636719, + "rewards/margins": 4.279994964599609, + "rewards/rejected": -3.7521820068359375, + "step": 9892 + }, + { + "epoch": 0.58, + "learning_rate": 4.023393048995124e-08, + "logits/chosen": -1.8406356573104858, + "logits/rejected": -1.8382198810577393, + "logps/chosen": -37.81105041503906, + "logps/rejected": -254.75494384765625, + "loss": 0.1781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6784691214561462, + "rewards/margins": 3.947298526763916, + "rewards/rejected": -3.268829345703125, + "step": 9893 + }, + { + "epoch": 0.58, + "learning_rate": 4.0224688152832455e-08, + "logits/chosen": -1.8085581064224243, + "logits/rejected": -1.8395037651062012, + "logps/chosen": -247.3814239501953, + "logps/rejected": -481.55242919921875, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.431623935699463, + "rewards/margins": 3.1851608753204346, + "rewards/rejected": -0.7535369992256165, + "step": 9894 + }, + { + "epoch": 0.58, + "learning_rate": 4.021544616298057e-08, + "logits/chosen": -1.917639970779419, + "logits/rejected": -1.9240702390670776, + "logps/chosen": -0.0031053307466208935, + "logps/rejected": -152.55824279785156, + "loss": 0.3605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00014476753131020814, + "rewards/margins": 2.6860525608062744, + "rewards/rejected": -2.6859078407287598, + "step": 9895 + }, + { + "epoch": 0.58, + "learning_rate": 4.020620452072389e-08, + "logits/chosen": -1.7325026988983154, + "logits/rejected": -1.733589768409729, + "logps/chosen": -7.056164264678955, + "logps/rejected": -134.55821228027344, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5183963179588318, + "rewards/margins": 2.0175182819366455, + "rewards/rejected": -1.499121904373169, + "step": 9896 + }, + { + "epoch": 0.58, + "learning_rate": 4.019696322639075e-08, + "logits/chosen": -2.0608532428741455, + "logits/rejected": -2.0559816360473633, + "logps/chosen": -1.6028287410736084, + "logps/rejected": -130.80038452148438, + "loss": 0.388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08419440686702728, + "rewards/margins": 3.178284168243408, + "rewards/rejected": -3.2624785900115967, + "step": 9897 + }, + { + "epoch": 0.58, + "learning_rate": 4.0187722280309414e-08, + "logits/chosen": -1.8115190267562866, + "logits/rejected": -1.8106777667999268, + "logps/chosen": -163.14395141601562, + "logps/rejected": -289.6661376953125, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1541794538497925, + "rewards/margins": 0.26098179817199707, + "rewards/rejected": 0.8931976556777954, + "step": 9898 + }, + { + "epoch": 0.58, + "learning_rate": 4.0178481682808176e-08, + "logits/chosen": -1.8019520044326782, + "logits/rejected": -1.7325812578201294, + "logps/chosen": -163.26156616210938, + "logps/rejected": -407.53375244140625, + "loss": 0.1157, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.67291259765625, + "rewards/margins": 2.3443236351013184, + "rewards/rejected": -0.6714111566543579, + "step": 9899 + }, + { + "epoch": 0.58, + "learning_rate": 4.0169241434215315e-08, + "logits/chosen": -1.9201040267944336, + "logits/rejected": -1.9139161109924316, + "logps/chosen": -209.69363403320312, + "logps/rejected": -350.8482971191406, + "loss": 0.079, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.300624132156372, + "rewards/margins": 2.076677083969116, + "rewards/rejected": 0.22394715249538422, + "step": 9900 + }, + { + "epoch": 0.58, + "learning_rate": 4.0160001534859094e-08, + "logits/chosen": -1.864335536956787, + "logits/rejected": -1.8682281970977783, + "logps/chosen": -137.6395263671875, + "logps/rejected": -277.9698486328125, + "loss": 0.0636, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.41318678855896, + "rewards/margins": 2.303152561187744, + "rewards/rejected": 0.11003418266773224, + "step": 9901 + }, + { + "epoch": 0.58, + "learning_rate": 4.015076198506774e-08, + "logits/chosen": -1.9776620864868164, + "logits/rejected": -1.9678186178207397, + "logps/chosen": -96.21484375, + "logps/rejected": -322.78131103515625, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1228431686758995, + "rewards/margins": 2.0849831104278564, + "rewards/rejected": -1.9621399641036987, + "step": 9902 + }, + { + "epoch": 0.58, + "learning_rate": 4.014152278516951e-08, + "logits/chosen": -1.93381667137146, + "logits/rejected": -1.907612919807434, + "logps/chosen": -38.75468063354492, + "logps/rejected": -398.07501220703125, + "loss": 0.3414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019768906757235527, + "rewards/margins": 9.00141716003418, + "rewards/rejected": -9.021185874938965, + "step": 9903 + }, + { + "epoch": 0.58, + "learning_rate": 4.01322839354926e-08, + "logits/chosen": -2.0401453971862793, + "logits/rejected": -2.0416512489318848, + "logps/chosen": -42.31660461425781, + "logps/rejected": -169.2320556640625, + "loss": 0.2528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5282692313194275, + "rewards/margins": 2.2809898853302, + "rewards/rejected": -1.7527207136154175, + "step": 9904 + }, + { + "epoch": 0.58, + "learning_rate": 4.012304543636524e-08, + "logits/chosen": -1.8529672622680664, + "logits/rejected": -1.9131232500076294, + "logps/chosen": -218.5890350341797, + "logps/rejected": -383.48162841796875, + "loss": 0.049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4240998029708862, + "rewards/margins": 4.532690525054932, + "rewards/rejected": -3.108590841293335, + "step": 9905 + }, + { + "epoch": 0.58, + "learning_rate": 4.011380728811562e-08, + "logits/chosen": -1.804024338722229, + "logits/rejected": -1.7839075326919556, + "logps/chosen": -279.263427734375, + "logps/rejected": -516.8477783203125, + "loss": 0.1016, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.544872999191284, + "rewards/margins": 1.6618834733963013, + "rewards/rejected": 0.8829895257949829, + "step": 9906 + }, + { + "epoch": 0.58, + "learning_rate": 4.01045694910719e-08, + "logits/chosen": -1.670889139175415, + "logits/rejected": -1.6667429208755493, + "logps/chosen": -243.94606018066406, + "logps/rejected": -490.8311767578125, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6512680053710938, + "rewards/margins": 5.207728385925293, + "rewards/rejected": -2.5564606189727783, + "step": 9907 + }, + { + "epoch": 0.58, + "learning_rate": 4.009533204556231e-08, + "logits/chosen": -1.8173623085021973, + "logits/rejected": -1.7573102712631226, + "logps/chosen": -277.7791442871094, + "logps/rejected": -420.05621337890625, + "loss": 0.236, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1152771711349487, + "rewards/margins": 1.6623505353927612, + "rewards/rejected": -0.5470733642578125, + "step": 9908 + }, + { + "epoch": 0.58, + "learning_rate": 4.008609495191496e-08, + "logits/chosen": -1.7901475429534912, + "logits/rejected": -1.807643175125122, + "logps/chosen": -172.2261962890625, + "logps/rejected": -556.0381469726562, + "loss": 0.055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3766571283340454, + "rewards/margins": 7.19036865234375, + "rewards/rejected": -5.813711643218994, + "step": 9909 + }, + { + "epoch": 0.58, + "learning_rate": 4.007685821045801e-08, + "logits/chosen": -2.010545492172241, + "logits/rejected": -1.9981274604797363, + "logps/chosen": -69.7881088256836, + "logps/rejected": -355.99798583984375, + "loss": 0.2931, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09401398152112961, + "rewards/margins": 4.627018451690674, + "rewards/rejected": -4.721032619476318, + "step": 9910 + }, + { + "epoch": 0.58, + "learning_rate": 4.00676218215196e-08, + "logits/chosen": -1.868072748184204, + "logits/rejected": -1.8657807111740112, + "logps/chosen": -227.57958984375, + "logps/rejected": -440.7107238769531, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8640687465667725, + "rewards/margins": 5.0536346435546875, + "rewards/rejected": -2.189566135406494, + "step": 9911 + }, + { + "epoch": 0.58, + "learning_rate": 4.005838578542785e-08, + "logits/chosen": -1.7204976081848145, + "logits/rejected": -1.7112228870391846, + "logps/chosen": -21.336795806884766, + "logps/rejected": -149.1824493408203, + "loss": 0.1462, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.02887761592865, + "rewards/margins": 3.199751853942871, + "rewards/rejected": -2.1708741188049316, + "step": 9912 + }, + { + "epoch": 0.58, + "learning_rate": 4.0049150102510864e-08, + "logits/chosen": -1.9442424774169922, + "logits/rejected": -1.9402821063995361, + "logps/chosen": -0.07085948437452316, + "logps/rejected": -256.58148193359375, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006846261210739613, + "rewards/margins": 4.200743198394775, + "rewards/rejected": -4.207589626312256, + "step": 9913 + }, + { + "epoch": 0.58, + "learning_rate": 4.0039914773096744e-08, + "logits/chosen": -1.7810274362564087, + "logits/rejected": -1.803983449935913, + "logps/chosen": -216.469970703125, + "logps/rejected": -313.49530029296875, + "loss": 0.0358, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.518019199371338, + "rewards/margins": 3.1789567470550537, + "rewards/rejected": -0.660937488079071, + "step": 9914 + }, + { + "epoch": 0.58, + "learning_rate": 4.003067979751356e-08, + "logits/chosen": -2.0709946155548096, + "logits/rejected": -2.026646852493286, + "logps/chosen": -233.3360137939453, + "logps/rejected": -486.2454833984375, + "loss": 0.0209, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3530960083007812, + "rewards/margins": 3.8908586502075195, + "rewards/rejected": -1.5377625226974487, + "step": 9915 + }, + { + "epoch": 0.58, + "learning_rate": 4.002144517608941e-08, + "logits/chosen": -1.8271799087524414, + "logits/rejected": -1.8169347047805786, + "logps/chosen": -272.07818603515625, + "logps/rejected": -567.941650390625, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.9099977016448975, + "rewards/margins": 4.438562393188477, + "rewards/rejected": -0.528564453125, + "step": 9916 + }, + { + "epoch": 0.58, + "learning_rate": 4.001221090915233e-08, + "logits/chosen": -1.8230252265930176, + "logits/rejected": -1.8113508224487305, + "logps/chosen": -111.64441680908203, + "logps/rejected": -172.7884979248047, + "loss": 0.4338, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0290863513946533, + "rewards/margins": 0.25810396671295166, + "rewards/rejected": 0.7709823846817017, + "step": 9917 + }, + { + "epoch": 0.58, + "learning_rate": 4.000297699703038e-08, + "logits/chosen": -2.026319742202759, + "logits/rejected": -2.0240695476531982, + "logps/chosen": -0.5719284415245056, + "logps/rejected": -187.37997436523438, + "loss": 0.3784, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033551525324583054, + "rewards/margins": 3.0359127521514893, + "rewards/rejected": -3.0694642066955566, + "step": 9918 + }, + { + "epoch": 0.58, + "learning_rate": 3.999374344005158e-08, + "logits/chosen": -1.7358113527297974, + "logits/rejected": -1.7287373542785645, + "logps/chosen": -38.87540054321289, + "logps/rejected": -308.864013671875, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11913567036390305, + "rewards/margins": 5.987537860870361, + "rewards/rejected": -5.868402004241943, + "step": 9919 + }, + { + "epoch": 0.58, + "learning_rate": 3.998451023854396e-08, + "logits/chosen": -1.9356064796447754, + "logits/rejected": -1.9400209188461304, + "logps/chosen": -244.34890747070312, + "logps/rejected": -479.67633056640625, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.388745069503784, + "rewards/margins": 3.6213836669921875, + "rewards/rejected": -1.2326385974884033, + "step": 9920 + }, + { + "epoch": 0.58, + "learning_rate": 3.997527739283553e-08, + "logits/chosen": -1.9156745672225952, + "logits/rejected": -1.908930778503418, + "logps/chosen": -23.60309600830078, + "logps/rejected": -168.35317993164062, + "loss": 0.3811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34589120745658875, + "rewards/margins": 1.054355263710022, + "rewards/rejected": -0.7084640860557556, + "step": 9921 + }, + { + "epoch": 0.58, + "learning_rate": 3.996604490325428e-08, + "logits/chosen": -1.6865911483764648, + "logits/rejected": -1.6871581077575684, + "logps/chosen": -0.28534752130508423, + "logps/rejected": -42.79378890991211, + "loss": 0.6284, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012494772672653198, + "rewards/margins": 0.046619635075330734, + "rewards/rejected": -0.047869112342596054, + "step": 9922 + }, + { + "epoch": 0.58, + "learning_rate": 3.9956812770128203e-08, + "logits/chosen": -1.8618851900100708, + "logits/rejected": -1.8580845594406128, + "logps/chosen": -16.885835647583008, + "logps/rejected": -211.40956115722656, + "loss": 0.2287, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2611219584941864, + "rewards/margins": 2.929394483566284, + "rewards/rejected": -2.6682724952697754, + "step": 9923 + }, + { + "epoch": 0.58, + "learning_rate": 3.9947580993785257e-08, + "logits/chosen": -1.9194756746292114, + "logits/rejected": -1.9157987833023071, + "logps/chosen": -0.034925173968076706, + "logps/rejected": -254.32382202148438, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015593005809932947, + "rewards/margins": 3.818063497543335, + "rewards/rejected": -3.819622755050659, + "step": 9924 + }, + { + "epoch": 0.58, + "learning_rate": 3.9938349574553405e-08, + "logits/chosen": -2.094815492630005, + "logits/rejected": -2.090388774871826, + "logps/chosen": -0.003947703633457422, + "logps/rejected": -368.2217102050781, + "loss": 0.3378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0021947775967419147, + "rewards/margins": 7.02520227432251, + "rewards/rejected": -7.023007392883301, + "step": 9925 + }, + { + "epoch": 0.58, + "learning_rate": 3.9929118512760604e-08, + "logits/chosen": -2.076826810836792, + "logits/rejected": -2.067953586578369, + "logps/chosen": -15.345088958740234, + "logps/rejected": -207.25096130371094, + "loss": 0.3584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025185203179717064, + "rewards/margins": 1.670224666595459, + "rewards/rejected": -1.645039439201355, + "step": 9926 + }, + { + "epoch": 0.58, + "learning_rate": 3.991988780873476e-08, + "logits/chosen": -2.0825273990631104, + "logits/rejected": -2.0726706981658936, + "logps/chosen": -7.82815408706665, + "logps/rejected": -155.482177734375, + "loss": 0.3907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017159318551421165, + "rewards/margins": 2.607654571533203, + "rewards/rejected": -2.6248137950897217, + "step": 9927 + }, + { + "epoch": 0.58, + "learning_rate": 3.99106574628038e-08, + "logits/chosen": -1.975334644317627, + "logits/rejected": -1.9690061807632446, + "logps/chosen": -10.655558586120605, + "logps/rejected": -176.30686950683594, + "loss": 0.3246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45143815875053406, + "rewards/margins": 1.291368842124939, + "rewards/rejected": -0.8399307131767273, + "step": 9928 + }, + { + "epoch": 0.58, + "learning_rate": 3.990142747529566e-08, + "logits/chosen": -1.9094096422195435, + "logits/rejected": -1.9626247882843018, + "logps/chosen": -316.5795593261719, + "logps/rejected": -451.0970153808594, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9052826166152954, + "rewards/margins": 2.453573703765869, + "rewards/rejected": -1.5482910871505737, + "step": 9929 + }, + { + "epoch": 0.58, + "learning_rate": 3.98921978465382e-08, + "logits/chosen": -1.916770100593567, + "logits/rejected": -1.9195365905761719, + "logps/chosen": -75.17288970947266, + "logps/rejected": -203.74566650390625, + "loss": 0.3177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4115554988384247, + "rewards/margins": 1.2271744012832642, + "rewards/rejected": -0.8156189322471619, + "step": 9930 + }, + { + "epoch": 0.58, + "learning_rate": 3.9882968576859325e-08, + "logits/chosen": -1.9317363500595093, + "logits/rejected": -1.9232256412506104, + "logps/chosen": -33.91048049926758, + "logps/rejected": -146.02749633789062, + "loss": 0.4772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13785210251808167, + "rewards/margins": 1.2667301893234253, + "rewards/rejected": -1.4045822620391846, + "step": 9931 + }, + { + "epoch": 0.58, + "learning_rate": 3.9873739666586894e-08, + "logits/chosen": -1.874783992767334, + "logits/rejected": -1.8613916635513306, + "logps/chosen": -78.26280975341797, + "logps/rejected": -242.42518615722656, + "loss": 0.4344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9293075799942017, + "rewards/margins": 0.2459869384765625, + "rewards/rejected": 0.6833206415176392, + "step": 9932 + }, + { + "epoch": 0.58, + "learning_rate": 3.986451111604876e-08, + "logits/chosen": -2.011396646499634, + "logits/rejected": -2.002728223800659, + "logps/chosen": -32.21208953857422, + "logps/rejected": -160.65147399902344, + "loss": 0.2241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6036842465400696, + "rewards/margins": 4.433815002441406, + "rewards/rejected": -3.8301308155059814, + "step": 9933 + }, + { + "epoch": 0.58, + "learning_rate": 3.9855282925572766e-08, + "logits/chosen": -1.9210205078125, + "logits/rejected": -1.901405692100525, + "logps/chosen": -69.55307006835938, + "logps/rejected": -245.9939422607422, + "loss": 0.126, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0641250610351562, + "rewards/margins": 3.8210999965667725, + "rewards/rejected": -2.756974935531616, + "step": 9934 + }, + { + "epoch": 0.58, + "learning_rate": 3.984605509548675e-08, + "logits/chosen": -2.0414822101593018, + "logits/rejected": -2.0424206256866455, + "logps/chosen": -7.85475492477417, + "logps/rejected": -102.43479919433594, + "loss": 0.5188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07035417854785919, + "rewards/margins": 0.6606791615486145, + "rewards/rejected": -0.7310333251953125, + "step": 9935 + }, + { + "epoch": 0.58, + "learning_rate": 3.9836827626118525e-08, + "logits/chosen": -1.881169319152832, + "logits/rejected": -1.915877103805542, + "logps/chosen": -253.79055786132812, + "logps/rejected": -321.562255859375, + "loss": 0.1805, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.183279514312744, + "rewards/margins": 1.2464599609375, + "rewards/rejected": 0.9368194937705994, + "step": 9936 + }, + { + "epoch": 0.58, + "learning_rate": 3.98276005177959e-08, + "logits/chosen": -1.8023463487625122, + "logits/rejected": -1.7885562181472778, + "logps/chosen": -74.53965759277344, + "logps/rejected": -188.29644775390625, + "loss": 0.3836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0072036744095385075, + "rewards/margins": 1.95842444896698, + "rewards/rejected": -1.9512207508087158, + "step": 9937 + }, + { + "epoch": 0.58, + "learning_rate": 3.981837377084666e-08, + "logits/chosen": -1.8655006885528564, + "logits/rejected": -1.8647079467773438, + "logps/chosen": -252.39630126953125, + "logps/rejected": -343.2220458984375, + "loss": 0.2249, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3229737281799316, + "rewards/margins": 0.6450928449630737, + "rewards/rejected": 1.677880883216858, + "step": 9938 + }, + { + "epoch": 0.58, + "learning_rate": 3.980914738559859e-08, + "logits/chosen": -1.9929609298706055, + "logits/rejected": -1.9860550165176392, + "logps/chosen": -106.91316223144531, + "logps/rejected": -377.9289245605469, + "loss": 0.1039, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5299538373947144, + "rewards/margins": 4.4570231437683105, + "rewards/rejected": -2.9270691871643066, + "step": 9939 + }, + { + "epoch": 0.58, + "learning_rate": 3.979992136237944e-08, + "logits/chosen": -1.9583053588867188, + "logits/rejected": -1.9505070447921753, + "logps/chosen": -0.15937376022338867, + "logps/rejected": -165.3869171142578, + "loss": 0.3792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01200189907103777, + "rewards/margins": 2.2930445671081543, + "rewards/rejected": -2.2810425758361816, + "step": 9940 + }, + { + "epoch": 0.58, + "learning_rate": 3.9790695701516996e-08, + "logits/chosen": -1.9656614065170288, + "logits/rejected": -1.9460301399230957, + "logps/chosen": -38.66948699951172, + "logps/rejected": -320.0736389160156, + "loss": 0.1515, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1222854852676392, + "rewards/margins": 2.7789719104766846, + "rewards/rejected": -1.6566864252090454, + "step": 9941 + }, + { + "epoch": 0.58, + "learning_rate": 3.9781470403338966e-08, + "logits/chosen": -1.7691521644592285, + "logits/rejected": -1.7719688415527344, + "logps/chosen": -66.56519317626953, + "logps/rejected": -218.63162231445312, + "loss": 0.2193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5156418085098267, + "rewards/margins": 2.917036533355713, + "rewards/rejected": -2.4013946056365967, + "step": 9942 + }, + { + "epoch": 0.58, + "learning_rate": 3.9772245468173105e-08, + "logits/chosen": -2.0572378635406494, + "logits/rejected": -2.057512044906616, + "logps/chosen": -109.23125457763672, + "logps/rejected": -175.6345672607422, + "loss": 0.3336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5408989191055298, + "rewards/margins": 1.010291337966919, + "rewards/rejected": -0.4693923890590668, + "step": 9943 + }, + { + "epoch": 0.58, + "learning_rate": 3.9763020896347096e-08, + "logits/chosen": -1.962808609008789, + "logits/rejected": -1.9655823707580566, + "logps/chosen": -53.40544128417969, + "logps/rejected": -120.73649597167969, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.740441918373108, + "rewards/margins": 2.9852447509765625, + "rewards/rejected": -1.2448028326034546, + "step": 9944 + }, + { + "epoch": 0.58, + "learning_rate": 3.975379668818868e-08, + "logits/chosen": -1.9019765853881836, + "logits/rejected": -1.8671374320983887, + "logps/chosen": -337.96197509765625, + "logps/rejected": -424.35089111328125, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0706727504730225, + "rewards/margins": 5.865759372711182, + "rewards/rejected": -3.795086622238159, + "step": 9945 + }, + { + "epoch": 0.58, + "learning_rate": 3.974457284402551e-08, + "logits/chosen": -1.7847713232040405, + "logits/rejected": -1.7033222913742065, + "logps/chosen": -167.30770874023438, + "logps/rejected": -494.667724609375, + "loss": 0.1437, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2863448858261108, + "rewards/margins": 2.8937149047851562, + "rewards/rejected": -1.6073700189590454, + "step": 9946 + }, + { + "epoch": 0.58, + "learning_rate": 3.9735349364185283e-08, + "logits/chosen": -1.9324816465377808, + "logits/rejected": -1.930249810218811, + "logps/chosen": -1.2057191133499146, + "logps/rejected": -25.810195922851562, + "loss": 0.6349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04842744022607803, + "rewards/margins": 0.04700074344873428, + "rewards/rejected": 0.00142669677734375, + "step": 9947 + }, + { + "epoch": 0.58, + "learning_rate": 3.972612624899565e-08, + "logits/chosen": -1.8207018375396729, + "logits/rejected": -1.8217923641204834, + "logps/chosen": -58.145240783691406, + "logps/rejected": -119.13800811767578, + "loss": 0.5051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3629619777202606, + "rewards/margins": 0.4484317898750305, + "rewards/rejected": -0.0854698196053505, + "step": 9948 + }, + { + "epoch": 0.58, + "learning_rate": 3.971690349878426e-08, + "logits/chosen": -1.6886122226715088, + "logits/rejected": -1.6597398519515991, + "logps/chosen": -267.6183166503906, + "logps/rejected": -245.02960205078125, + "loss": 0.4855, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.202246069908142, + "rewards/margins": -0.13812875747680664, + "rewards/rejected": 1.3403748273849487, + "step": 9949 + }, + { + "epoch": 0.58, + "learning_rate": 3.970768111387878e-08, + "logits/chosen": -1.984246015548706, + "logits/rejected": -1.9425288438796997, + "logps/chosen": -194.95114135742188, + "logps/rejected": -315.8138732910156, + "loss": 0.1145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.27866530418396, + "rewards/margins": 1.7046815156936646, + "rewards/rejected": 0.5739837884902954, + "step": 9950 + }, + { + "epoch": 0.58, + "learning_rate": 3.96984590946068e-08, + "logits/chosen": -1.9860137701034546, + "logits/rejected": -1.9905357360839844, + "logps/chosen": -101.23143005371094, + "logps/rejected": -184.21920776367188, + "loss": 0.4276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3524589538574219, + "rewards/margins": 1.8380317687988281, + "rewards/rejected": -2.19049072265625, + "step": 9951 + }, + { + "epoch": 0.58, + "learning_rate": 3.9689237441295945e-08, + "logits/chosen": -1.7911782264709473, + "logits/rejected": -1.7890318632125854, + "logps/chosen": -0.18736617267131805, + "logps/rejected": -172.65211486816406, + "loss": 0.425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08668389916419983, + "rewards/margins": 1.2586398124694824, + "rewards/rejected": -1.171955943107605, + "step": 9952 + }, + { + "epoch": 0.58, + "learning_rate": 3.9680016154273804e-08, + "logits/chosen": -1.9846704006195068, + "logits/rejected": -1.9831032752990723, + "logps/chosen": -22.711490631103516, + "logps/rejected": -178.326171875, + "loss": 0.6326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.671626091003418, + "rewards/margins": 1.7640750408172607, + "rewards/rejected": -2.4357011318206787, + "step": 9953 + }, + { + "epoch": 0.58, + "learning_rate": 3.967079523386799e-08, + "logits/chosen": -2.031552314758301, + "logits/rejected": -2.02663254737854, + "logps/chosen": -0.46715793013572693, + "logps/rejected": -288.83123779296875, + "loss": 0.3501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012023535557091236, + "rewards/margins": 7.398632526397705, + "rewards/rejected": -7.386609077453613, + "step": 9954 + }, + { + "epoch": 0.58, + "learning_rate": 3.9661574680406033e-08, + "logits/chosen": -1.7852957248687744, + "logits/rejected": -1.7837247848510742, + "logps/chosen": -0.009507265873253345, + "logps/rejected": -28.251811981201172, + "loss": 0.6729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002621762454509735, + "rewards/margins": 0.14146912097930908, + "rewards/rejected": -0.13884735107421875, + "step": 9955 + }, + { + "epoch": 0.58, + "learning_rate": 3.965235449421553e-08, + "logits/chosen": -1.7701315879821777, + "logits/rejected": -1.7756352424621582, + "logps/chosen": -282.257080078125, + "logps/rejected": -553.3052978515625, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.675384521484375, + "rewards/margins": 9.108606338500977, + "rewards/rejected": -7.433221340179443, + "step": 9956 + }, + { + "epoch": 0.58, + "learning_rate": 3.9643134675624e-08, + "logits/chosen": -2.0201828479766846, + "logits/rejected": -2.049928665161133, + "logps/chosen": -223.4755859375, + "logps/rejected": -543.92919921875, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7726119756698608, + "rewards/margins": 5.789561748504639, + "rewards/rejected": -4.016949653625488, + "step": 9957 + }, + { + "epoch": 0.58, + "learning_rate": 3.9633915224959e-08, + "logits/chosen": -2.0555269718170166, + "logits/rejected": -2.0606131553649902, + "logps/chosen": -9.553513526916504, + "logps/rejected": -194.48989868164062, + "loss": 0.3054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14159022271633148, + "rewards/margins": 3.525684356689453, + "rewards/rejected": -3.38409423828125, + "step": 9958 + }, + { + "epoch": 0.58, + "learning_rate": 3.9624696142548017e-08, + "logits/chosen": -1.9193952083587646, + "logits/rejected": -1.9108953475952148, + "logps/chosen": -128.7743682861328, + "logps/rejected": -191.58343505859375, + "loss": 0.1008, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.015852451324463, + "rewards/margins": 2.5754075050354004, + "rewards/rejected": -0.5595550537109375, + "step": 9959 + }, + { + "epoch": 0.58, + "learning_rate": 3.9615477428718603e-08, + "logits/chosen": -1.833787441253662, + "logits/rejected": -1.848262906074524, + "logps/chosen": -246.91883850097656, + "logps/rejected": -434.4078674316406, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8064987659454346, + "rewards/margins": 5.155708312988281, + "rewards/rejected": -2.3492095470428467, + "step": 9960 + }, + { + "epoch": 0.58, + "learning_rate": 3.960625908379821e-08, + "logits/chosen": -2.073716878890991, + "logits/rejected": -2.081306219100952, + "logps/chosen": -163.04855346679688, + "logps/rejected": -433.8482666015625, + "loss": 0.0749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2089935541152954, + "rewards/margins": 4.762197971343994, + "rewards/rejected": -3.553204298019409, + "step": 9961 + }, + { + "epoch": 0.58, + "learning_rate": 3.959704110811434e-08, + "logits/chosen": -1.8513591289520264, + "logits/rejected": -1.8428468704223633, + "logps/chosen": -179.77484130859375, + "logps/rejected": -415.84967041015625, + "loss": 0.0516, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0571746826171875, + "rewards/margins": 3.043548583984375, + "rewards/rejected": -0.9863739013671875, + "step": 9962 + }, + { + "epoch": 0.58, + "learning_rate": 3.958782350199446e-08, + "logits/chosen": -1.967026948928833, + "logits/rejected": -1.963626742362976, + "logps/chosen": -41.90102005004883, + "logps/rejected": -271.4208984375, + "loss": 0.2442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5479931235313416, + "rewards/margins": 3.960282564163208, + "rewards/rejected": -3.4122893810272217, + "step": 9963 + }, + { + "epoch": 0.58, + "learning_rate": 3.957860626576602e-08, + "logits/chosen": -2.0245401859283447, + "logits/rejected": -2.009084939956665, + "logps/chosen": -0.11288339644670486, + "logps/rejected": -151.7735137939453, + "loss": 0.3868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008870343677699566, + "rewards/margins": 2.3997249603271484, + "rewards/rejected": -2.390854597091675, + "step": 9964 + }, + { + "epoch": 0.58, + "learning_rate": 3.9569389399756455e-08, + "logits/chosen": -1.823535680770874, + "logits/rejected": -1.8029077053070068, + "logps/chosen": -149.11868286132812, + "logps/rejected": -204.58364868164062, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7084487676620483, + "rewards/margins": 0.2948882579803467, + "rewards/rejected": 1.4135605096817017, + "step": 9965 + }, + { + "epoch": 0.58, + "learning_rate": 3.956017290429321e-08, + "logits/chosen": -2.011084794998169, + "logits/rejected": -2.0081968307495117, + "logps/chosen": -222.25177001953125, + "logps/rejected": -349.6208190917969, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8643935918807983, + "rewards/margins": 3.623356580734253, + "rewards/rejected": -1.7589629888534546, + "step": 9966 + }, + { + "epoch": 0.58, + "learning_rate": 3.9550956779703675e-08, + "logits/chosen": -2.084049701690674, + "logits/rejected": -2.0765912532806396, + "logps/chosen": -4.635247230529785, + "logps/rejected": -139.84027099609375, + "loss": 0.5553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11183185875415802, + "rewards/margins": 0.47340261936187744, + "rewards/rejected": -0.3615707457065582, + "step": 9967 + }, + { + "epoch": 0.58, + "learning_rate": 3.954174102631526e-08, + "logits/chosen": -2.1629323959350586, + "logits/rejected": -2.154533863067627, + "logps/chosen": -11.558923721313477, + "logps/rejected": -63.90238571166992, + "loss": 0.5512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18410825729370117, + "rewards/margins": 0.11907491832971573, + "rewards/rejected": 0.06503333896398544, + "step": 9968 + }, + { + "epoch": 0.58, + "learning_rate": 3.9532525644455394e-08, + "logits/chosen": -1.707851767539978, + "logits/rejected": -1.7155712842941284, + "logps/chosen": -45.19842529296875, + "logps/rejected": -225.55352783203125, + "loss": 0.4061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5336494445800781, + "rewards/margins": 0.7343452572822571, + "rewards/rejected": -0.20069579780101776, + "step": 9969 + }, + { + "epoch": 0.58, + "learning_rate": 3.952331063445139e-08, + "logits/chosen": -1.8094249963760376, + "logits/rejected": -1.8008466958999634, + "logps/chosen": -0.0006994418217800558, + "logps/rejected": -163.57244873046875, + "loss": 0.3439, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2886729311721865e-06, + "rewards/margins": 4.1901469230651855, + "rewards/rejected": -4.1901445388793945, + "step": 9970 + }, + { + "epoch": 0.58, + "learning_rate": 3.951409599663066e-08, + "logits/chosen": -1.747746229171753, + "logits/rejected": -1.740928053855896, + "logps/chosen": -49.18450164794922, + "logps/rejected": -128.0722198486328, + "loss": 0.3685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8910552859306335, + "rewards/margins": 0.5484695434570312, + "rewards/rejected": 0.3425857722759247, + "step": 9971 + }, + { + "epoch": 0.58, + "learning_rate": 3.950488173132052e-08, + "logits/chosen": -1.9441081285476685, + "logits/rejected": -1.9369474649429321, + "logps/chosen": -208.91873168945312, + "logps/rejected": -366.2175598144531, + "loss": 0.103, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9949829578399658, + "rewards/margins": 2.3347229957580566, + "rewards/rejected": -0.33974000811576843, + "step": 9972 + }, + { + "epoch": 0.58, + "learning_rate": 3.949566783884832e-08, + "logits/chosen": -2.011420488357544, + "logits/rejected": -2.0149292945861816, + "logps/chosen": -0.028418783098459244, + "logps/rejected": -117.66893005371094, + "loss": 0.4184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020121298439335078, + "rewards/margins": 1.7093279361724854, + "rewards/rejected": -1.709529161453247, + "step": 9973 + }, + { + "epoch": 0.58, + "learning_rate": 3.948645431954138e-08, + "logits/chosen": -1.917574167251587, + "logits/rejected": -1.9084903001785278, + "logps/chosen": -158.9571533203125, + "logps/rejected": -406.5436706542969, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.460742235183716, + "rewards/margins": 3.5808746814727783, + "rewards/rejected": -1.1201324462890625, + "step": 9974 + }, + { + "epoch": 0.58, + "learning_rate": 3.947724117372702e-08, + "logits/chosen": -1.7519803047180176, + "logits/rejected": -1.7243489027023315, + "logps/chosen": -244.32220458984375, + "logps/rejected": -276.1786193847656, + "loss": 0.1528, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8242005109786987, + "rewards/margins": 1.5849213600158691, + "rewards/rejected": 0.23927918076515198, + "step": 9975 + }, + { + "epoch": 0.58, + "learning_rate": 3.946802840173251e-08, + "logits/chosen": -1.9539567232131958, + "logits/rejected": -1.9552099704742432, + "logps/chosen": -28.672805786132812, + "logps/rejected": -82.9534683227539, + "loss": 0.6662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04236793518066406, + "rewards/margins": 0.05548133701086044, + "rewards/rejected": -0.0978492721915245, + "step": 9976 + }, + { + "epoch": 0.58, + "learning_rate": 3.9458816003885165e-08, + "logits/chosen": -1.7304390668869019, + "logits/rejected": -1.7099859714508057, + "logps/chosen": -369.2798767089844, + "logps/rejected": -364.021240234375, + "loss": 0.5098, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.486993432044983, + "rewards/margins": -0.3887878656387329, + "rewards/rejected": 1.8757812976837158, + "step": 9977 + }, + { + "epoch": 0.58, + "learning_rate": 3.9449603980512226e-08, + "logits/chosen": -1.8308064937591553, + "logits/rejected": -1.8328206539154053, + "logps/chosen": -4.541829548543319e-05, + "logps/rejected": -149.81625366210938, + "loss": 0.4712, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.582981649851718e-07, + "rewards/margins": 1.2299308776855469, + "rewards/rejected": -1.2299317121505737, + "step": 9978 + }, + { + "epoch": 0.58, + "learning_rate": 3.944039233194096e-08, + "logits/chosen": -1.8270111083984375, + "logits/rejected": -1.826042890548706, + "logps/chosen": -26.275684356689453, + "logps/rejected": -235.06503295898438, + "loss": 0.2498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32421475648880005, + "rewards/margins": 3.2360098361968994, + "rewards/rejected": -2.911795139312744, + "step": 9979 + }, + { + "epoch": 0.58, + "learning_rate": 3.943118105849862e-08, + "logits/chosen": -2.016589641571045, + "logits/rejected": -1.9905027151107788, + "logps/chosen": -110.48283386230469, + "logps/rejected": -267.41351318359375, + "loss": 0.2526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5086776614189148, + "rewards/margins": 1.797947645187378, + "rewards/rejected": -1.289270043373108, + "step": 9980 + }, + { + "epoch": 0.58, + "learning_rate": 3.9421970160512423e-08, + "logits/chosen": -2.0114049911499023, + "logits/rejected": -1.9689269065856934, + "logps/chosen": -194.87075805664062, + "logps/rejected": -311.9615173339844, + "loss": 0.0683, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.613656759262085, + "rewards/margins": 2.4595491886138916, + "rewards/rejected": 0.15410767495632172, + "step": 9981 + }, + { + "epoch": 0.58, + "learning_rate": 3.9412759638309586e-08, + "logits/chosen": -2.001533031463623, + "logits/rejected": -1.9490045309066772, + "logps/chosen": -207.61605834960938, + "logps/rejected": -394.265869140625, + "loss": 0.2729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7748627066612244, + "rewards/margins": 0.5162445306777954, + "rewards/rejected": 0.25861817598342896, + "step": 9982 + }, + { + "epoch": 0.58, + "learning_rate": 3.940354949221732e-08, + "logits/chosen": -2.025480270385742, + "logits/rejected": -2.013124465942383, + "logps/chosen": -188.81637573242188, + "logps/rejected": -411.76983642578125, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8673737049102783, + "rewards/margins": 3.060629367828369, + "rewards/rejected": -1.1932556629180908, + "step": 9983 + }, + { + "epoch": 0.58, + "learning_rate": 3.93943397225628e-08, + "logits/chosen": -2.0121493339538574, + "logits/rejected": -2.0008609294891357, + "logps/chosen": -0.0001157502192654647, + "logps/rejected": -98.32511138916016, + "loss": 0.8089, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.1828051305637928e-06, + "rewards/margins": -0.4429437220096588, + "rewards/rejected": 0.44294053316116333, + "step": 9984 + }, + { + "epoch": 0.58, + "learning_rate": 3.9385130329673224e-08, + "logits/chosen": -1.9797511100769043, + "logits/rejected": -1.9866094589233398, + "logps/chosen": -48.977840423583984, + "logps/rejected": -192.76812744140625, + "loss": 0.2568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8629921078681946, + "rewards/margins": 1.3123786449432373, + "rewards/rejected": -0.4493865966796875, + "step": 9985 + }, + { + "epoch": 0.58, + "learning_rate": 3.937592131387573e-08, + "logits/chosen": -1.7639089822769165, + "logits/rejected": -1.7026678323745728, + "logps/chosen": -217.30233764648438, + "logps/rejected": -327.7462158203125, + "loss": 0.227, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.016841173171997, + "rewards/margins": 1.298671007156372, + "rewards/rejected": -0.281829833984375, + "step": 9986 + }, + { + "epoch": 0.58, + "learning_rate": 3.9366712675497485e-08, + "logits/chosen": -1.855037808418274, + "logits/rejected": -1.8110061883926392, + "logps/chosen": -253.725830078125, + "logps/rejected": -441.0234069824219, + "loss": 0.1644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6757873892784119, + "rewards/margins": 3.401010274887085, + "rewards/rejected": -2.7252228260040283, + "step": 9987 + }, + { + "epoch": 0.58, + "learning_rate": 3.9357504414865615e-08, + "logits/chosen": -1.8168442249298096, + "logits/rejected": -1.8126808404922485, + "logps/chosen": -0.16031339764595032, + "logps/rejected": -162.30734252929688, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008410988375544548, + "rewards/margins": 2.1305418014526367, + "rewards/rejected": -2.1389527320861816, + "step": 9988 + }, + { + "epoch": 0.58, + "learning_rate": 3.9348296532307226e-08, + "logits/chosen": -1.7715634107589722, + "logits/rejected": -1.7482887506484985, + "logps/chosen": -186.17251586914062, + "logps/rejected": -269.4287414550781, + "loss": 0.1232, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3111984729766846, + "rewards/margins": 1.642451524734497, + "rewards/rejected": 0.6687469482421875, + "step": 9989 + }, + { + "epoch": 0.58, + "learning_rate": 3.9339089028149483e-08, + "logits/chosen": -2.073598861694336, + "logits/rejected": -2.0781846046447754, + "logps/chosen": -0.042431965470314026, + "logps/rejected": -127.7874755859375, + "loss": 0.7428, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.021939998492598534, + "rewards/margins": -0.22786621749401093, + "rewards/rejected": 0.249806210398674, + "step": 9990 + }, + { + "epoch": 0.58, + "learning_rate": 3.932988190271943e-08, + "logits/chosen": -2.0125396251678467, + "logits/rejected": -1.9584318399429321, + "logps/chosen": -176.82894897460938, + "logps/rejected": -325.0530700683594, + "loss": 0.072, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8874359130859375, + "rewards/margins": 3.3404784202575684, + "rewards/rejected": -1.4530426263809204, + "step": 9991 + }, + { + "epoch": 0.58, + "learning_rate": 3.932067515634416e-08, + "logits/chosen": -1.9238673448562622, + "logits/rejected": -1.9294058084487915, + "logps/chosen": -128.92330932617188, + "logps/rejected": -202.0166473388672, + "loss": 0.2079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1183655261993408, + "rewards/margins": 1.2559127807617188, + "rewards/rejected": -0.1375472992658615, + "step": 9992 + }, + { + "epoch": 0.58, + "learning_rate": 3.9311468789350755e-08, + "logits/chosen": -1.863003134727478, + "logits/rejected": -1.8390412330627441, + "logps/chosen": -144.6366729736328, + "logps/rejected": -475.2432556152344, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2824783325195312, + "rewards/margins": 6.132652282714844, + "rewards/rejected": -3.8501739501953125, + "step": 9993 + }, + { + "epoch": 0.58, + "learning_rate": 3.9302262802066265e-08, + "logits/chosen": -2.0687124729156494, + "logits/rejected": -2.0755198001861572, + "logps/chosen": -10.138294219970703, + "logps/rejected": -226.7371063232422, + "loss": 0.3402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03358478471636772, + "rewards/margins": 3.1983509063720703, + "rewards/rejected": -3.1647660732269287, + "step": 9994 + }, + { + "epoch": 0.58, + "learning_rate": 3.929305719481772e-08, + "logits/chosen": -1.8414180278778076, + "logits/rejected": -1.8458884954452515, + "logps/chosen": -22.326251983642578, + "logps/rejected": -92.33511352539062, + "loss": 0.5567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20777778327465057, + "rewards/margins": 0.12175483256578445, + "rewards/rejected": 0.08602295070886612, + "step": 9995 + }, + { + "epoch": 0.58, + "learning_rate": 3.928385196793217e-08, + "logits/chosen": -1.9511539936065674, + "logits/rejected": -1.9512437582015991, + "logps/chosen": -32.366783142089844, + "logps/rejected": -85.1981201171875, + "loss": 0.5494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22316665947437286, + "rewards/margins": 0.5526466369628906, + "rewards/rejected": -0.32947999238967896, + "step": 9996 + }, + { + "epoch": 0.58, + "learning_rate": 3.92746471217366e-08, + "logits/chosen": -1.9789217710494995, + "logits/rejected": -1.968921184539795, + "logps/chosen": -45.45295715332031, + "logps/rejected": -146.30018615722656, + "loss": 0.4811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28266677260398865, + "rewards/margins": 0.4391365051269531, + "rewards/rejected": -0.15646973252296448, + "step": 9997 + }, + { + "epoch": 0.58, + "learning_rate": 3.926544265655804e-08, + "logits/chosen": -2.018601179122925, + "logits/rejected": -2.0131332874298096, + "logps/chosen": -2.9442615509033203, + "logps/rejected": -204.19471740722656, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09203832596540451, + "rewards/margins": 4.407003879547119, + "rewards/rejected": -4.49904203414917, + "step": 9998 + }, + { + "epoch": 0.58, + "learning_rate": 3.925623857272345e-08, + "logits/chosen": -1.8740389347076416, + "logits/rejected": -1.879887580871582, + "logps/chosen": -1.4645049571990967, + "logps/rejected": -180.62571716308594, + "loss": 0.3866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06804607063531876, + "rewards/margins": 1.9798839092254639, + "rewards/rejected": -1.9118378162384033, + "step": 9999 + }, + { + "epoch": 0.58, + "learning_rate": 3.924703487055984e-08, + "logits/chosen": -2.227010488510132, + "logits/rejected": -2.2227063179016113, + "logps/chosen": -0.5274345874786377, + "logps/rejected": -154.2903289794922, + "loss": 0.33, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04991252347826958, + "rewards/margins": 4.404508113861084, + "rewards/rejected": -4.454420566558838, + "step": 10000 + }, + { + "epoch": 0.58, + "learning_rate": 3.9237831550394134e-08, + "logits/chosen": -1.7295571565628052, + "logits/rejected": -1.7541061639785767, + "logps/chosen": -323.204345703125, + "logps/rejected": -479.4746398925781, + "loss": 0.2074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8835572004318237, + "rewards/margins": 0.7083832025527954, + "rewards/rejected": 1.1751739978790283, + "step": 10001 + }, + { + "epoch": 0.58, + "learning_rate": 3.9228628612553304e-08, + "logits/chosen": -1.9596863985061646, + "logits/rejected": -1.9802451133728027, + "logps/chosen": -154.5562286376953, + "logps/rejected": -314.28851318359375, + "loss": 0.1738, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4654556512832642, + "rewards/margins": 1.2851638793945312, + "rewards/rejected": 0.18029175698757172, + "step": 10002 + }, + { + "epoch": 0.58, + "learning_rate": 3.9219426057364266e-08, + "logits/chosen": -1.9764328002929688, + "logits/rejected": -1.9731600284576416, + "logps/chosen": -57.77354431152344, + "logps/rejected": -359.57977294921875, + "loss": 0.235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15142822265625, + "rewards/margins": 2.5728118419647217, + "rewards/rejected": -2.4213836193084717, + "step": 10003 + }, + { + "epoch": 0.58, + "learning_rate": 3.9210223885153956e-08, + "logits/chosen": -2.008279323577881, + "logits/rejected": -2.008617401123047, + "logps/chosen": -8.379465103149414, + "logps/rejected": -64.9276123046875, + "loss": 0.5917, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24553337693214417, + "rewards/margins": 0.1244380995631218, + "rewards/rejected": 0.12109527736902237, + "step": 10004 + }, + { + "epoch": 0.58, + "learning_rate": 3.920102209624927e-08, + "logits/chosen": -2.025688648223877, + "logits/rejected": -2.029129981994629, + "logps/chosen": -95.6299057006836, + "logps/rejected": -268.912841796875, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28587648272514343, + "rewards/margins": 1.6119431257247925, + "rewards/rejected": -1.3260666131973267, + "step": 10005 + }, + { + "epoch": 0.58, + "learning_rate": 3.9191820690977106e-08, + "logits/chosen": -1.7703989744186401, + "logits/rejected": -1.7789306640625, + "logps/chosen": -54.70159149169922, + "logps/rejected": -254.93218994140625, + "loss": 0.4356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035811614245176315, + "rewards/margins": 1.3313210010528564, + "rewards/rejected": -1.2955093383789062, + "step": 10006 + }, + { + "epoch": 0.58, + "learning_rate": 3.918261966966433e-08, + "logits/chosen": -2.038578748703003, + "logits/rejected": -2.036930799484253, + "logps/chosen": -4.4702799641527236e-05, + "logps/rejected": -135.07366943359375, + "loss": 0.3716, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.386652562970994e-06, + "rewards/margins": 2.5186734199523926, + "rewards/rejected": -2.5186691284179688, + "step": 10007 + }, + { + "epoch": 0.58, + "learning_rate": 3.917341903263782e-08, + "logits/chosen": -1.8925262689590454, + "logits/rejected": -1.7538572549819946, + "logps/chosen": -142.32986450195312, + "logps/rejected": -412.5902099609375, + "loss": 0.1844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0911407470703125, + "rewards/margins": 1.9375641345977783, + "rewards/rejected": -0.846423327922821, + "step": 10008 + }, + { + "epoch": 0.58, + "learning_rate": 3.9164218780224424e-08, + "logits/chosen": -2.029139757156372, + "logits/rejected": -2.0328238010406494, + "logps/chosen": -15.652585983276367, + "logps/rejected": -155.40325927734375, + "loss": 0.5349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02556142769753933, + "rewards/margins": 0.72346031665802, + "rewards/rejected": -0.6978988647460938, + "step": 10009 + }, + { + "epoch": 0.58, + "learning_rate": 3.915501891275097e-08, + "logits/chosen": -2.039532423019409, + "logits/rejected": -2.057821035385132, + "logps/chosen": -208.08969116210938, + "logps/rejected": -240.82281494140625, + "loss": 0.1828, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9762696027755737, + "rewards/margins": 1.1766984462738037, + "rewards/rejected": 0.7995712161064148, + "step": 10010 + }, + { + "epoch": 0.58, + "learning_rate": 3.914581943054431e-08, + "logits/chosen": -2.0474720001220703, + "logits/rejected": -2.0451626777648926, + "logps/chosen": -0.0025312588550150394, + "logps/rejected": -253.99295043945312, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011354011803632602, + "rewards/margins": 7.051310062408447, + "rewards/rejected": -7.0514235496521, + "step": 10011 + }, + { + "epoch": 0.58, + "learning_rate": 3.9136620333931225e-08, + "logits/chosen": -1.9239944219589233, + "logits/rejected": -1.9212403297424316, + "logps/chosen": -4.7391815185546875, + "logps/rejected": -61.219085693359375, + "loss": 0.5362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10677080601453781, + "rewards/margins": 0.5767558217048645, + "rewards/rejected": -0.6835266351699829, + "step": 10012 + }, + { + "epoch": 0.58, + "learning_rate": 3.912742162323854e-08, + "logits/chosen": -2.186138153076172, + "logits/rejected": -2.175650119781494, + "logps/chosen": -2.872912591556087e-05, + "logps/rejected": -103.26145935058594, + "loss": 0.3716, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1456799004226923e-07, + "rewards/margins": 2.8512120246887207, + "rewards/rejected": -2.8512122631073, + "step": 10013 + }, + { + "epoch": 0.58, + "learning_rate": 3.9118223298793005e-08, + "logits/chosen": -2.000232696533203, + "logits/rejected": -1.9891142845153809, + "logps/chosen": -55.70509338378906, + "logps/rejected": -116.93467712402344, + "loss": 0.5073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4504654109477997, + "rewards/margins": 1.09912109375, + "rewards/rejected": -1.549586534500122, + "step": 10014 + }, + { + "epoch": 0.58, + "learning_rate": 3.910902536092142e-08, + "logits/chosen": -1.8624478578567505, + "logits/rejected": -1.841583251953125, + "logps/chosen": -6.704341888427734, + "logps/rejected": -240.8663330078125, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11611838638782501, + "rewards/margins": 5.0606465339660645, + "rewards/rejected": -4.944528102874756, + "step": 10015 + }, + { + "epoch": 0.58, + "learning_rate": 3.909982780995053e-08, + "logits/chosen": -2.064419984817505, + "logits/rejected": -2.073653221130371, + "logps/chosen": -233.88229370117188, + "logps/rejected": -322.8736572265625, + "loss": 0.1464, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.652587890625, + "rewards/margins": 1.5061218738555908, + "rewards/rejected": 0.14646606147289276, + "step": 10016 + }, + { + "epoch": 0.58, + "learning_rate": 3.909063064620707e-08, + "logits/chosen": -1.818966269493103, + "logits/rejected": -1.8004000186920166, + "logps/chosen": -185.57794189453125, + "logps/rejected": -270.4284362792969, + "loss": 0.185, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.852893114089966, + "rewards/margins": 0.9318450689315796, + "rewards/rejected": 1.9210480451583862, + "step": 10017 + }, + { + "epoch": 0.58, + "learning_rate": 3.908143387001777e-08, + "logits/chosen": -1.9817343950271606, + "logits/rejected": -1.9358099699020386, + "logps/chosen": -167.01303100585938, + "logps/rejected": -258.4443664550781, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4356597661972046, + "rewards/margins": 0.8282317519187927, + "rewards/rejected": 0.6074280142784119, + "step": 10018 + }, + { + "epoch": 0.58, + "learning_rate": 3.9072237481709345e-08, + "logits/chosen": -1.9803619384765625, + "logits/rejected": -1.9770686626434326, + "logps/chosen": -0.6973126530647278, + "logps/rejected": -156.49490356445312, + "loss": 0.3901, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03177960589528084, + "rewards/margins": 2.575404405593872, + "rewards/rejected": -2.6071839332580566, + "step": 10019 + }, + { + "epoch": 0.58, + "learning_rate": 3.90630414816085e-08, + "logits/chosen": -1.7922450304031372, + "logits/rejected": -1.8279114961624146, + "logps/chosen": -170.20428466796875, + "logps/rejected": -177.33970642089844, + "loss": 0.2215, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.905310034751892, + "rewards/margins": 0.7078063488006592, + "rewards/rejected": 1.197503685951233, + "step": 10020 + }, + { + "epoch": 0.58, + "learning_rate": 3.905384587004193e-08, + "logits/chosen": -2.0017642974853516, + "logits/rejected": -2.004394769668579, + "logps/chosen": -5.5132293701171875, + "logps/rejected": -37.36354446411133, + "loss": 0.5515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35795697569847107, + "rewards/margins": 0.2042330652475357, + "rewards/rejected": 0.15372391045093536, + "step": 10021 + }, + { + "epoch": 0.58, + "learning_rate": 3.9044650647336285e-08, + "logits/chosen": -1.9058982133865356, + "logits/rejected": -1.9049996137619019, + "logps/chosen": -33.79283905029297, + "logps/rejected": -160.52081298828125, + "loss": 0.4424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2550319731235504, + "rewards/margins": 1.1016658544540405, + "rewards/rejected": -0.8466339111328125, + "step": 10022 + }, + { + "epoch": 0.58, + "learning_rate": 3.9035455813818246e-08, + "logits/chosen": -1.8737448453903198, + "logits/rejected": -1.7627815008163452, + "logps/chosen": -240.8826904296875, + "logps/rejected": -461.46875, + "loss": 0.1106, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.304766893386841, + "rewards/margins": 1.9182281494140625, + "rewards/rejected": 0.38653871417045593, + "step": 10023 + }, + { + "epoch": 0.58, + "learning_rate": 3.902626136981444e-08, + "logits/chosen": -1.795689582824707, + "logits/rejected": -1.755137324333191, + "logps/chosen": -189.13729858398438, + "logps/rejected": -307.193115234375, + "loss": 0.0554, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5278046131134033, + "rewards/margins": 3.1689577102661133, + "rewards/rejected": -0.6411529779434204, + "step": 10024 + }, + { + "epoch": 0.58, + "learning_rate": 3.901706731565152e-08, + "logits/chosen": -1.887749195098877, + "logits/rejected": -1.8894932270050049, + "logps/chosen": -29.06641387939453, + "logps/rejected": -112.0625228881836, + "loss": 0.2443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6474601626396179, + "rewards/margins": 2.353781223297119, + "rewards/rejected": -1.7063210010528564, + "step": 10025 + }, + { + "epoch": 0.58, + "learning_rate": 3.9007873651656086e-08, + "logits/chosen": -1.9112298488616943, + "logits/rejected": -1.8873716592788696, + "logps/chosen": -325.64666748046875, + "logps/rejected": -498.3071594238281, + "loss": 0.075, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.229852318763733, + "rewards/margins": 2.9179201126098633, + "rewards/rejected": -1.6880676746368408, + "step": 10026 + }, + { + "epoch": 0.58, + "learning_rate": 3.899868037815475e-08, + "logits/chosen": -1.921106219291687, + "logits/rejected": -1.9157488346099854, + "logps/chosen": -21.888492584228516, + "logps/rejected": -302.4859313964844, + "loss": 0.105, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4057422876358032, + "rewards/margins": 4.226866722106934, + "rewards/rejected": -2.821124315261841, + "step": 10027 + }, + { + "epoch": 0.58, + "learning_rate": 3.89894874954741e-08, + "logits/chosen": -1.8456110954284668, + "logits/rejected": -1.8456997871398926, + "logps/chosen": -226.53294372558594, + "logps/rejected": -271.01019287109375, + "loss": 0.3849, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.7775161266326904, + "rewards/margins": -0.09374547004699707, + "rewards/rejected": 2.8712615966796875, + "step": 10028 + }, + { + "epoch": 0.58, + "learning_rate": 3.8980295003940695e-08, + "logits/chosen": -1.9311938285827637, + "logits/rejected": -1.912075161933899, + "logps/chosen": -1.1455217599868774, + "logps/rejected": -153.59117126464844, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.044641781598329544, + "rewards/margins": 1.7164846658706665, + "rewards/rejected": -1.6718429327011108, + "step": 10029 + }, + { + "epoch": 0.58, + "learning_rate": 3.8971102903881145e-08, + "logits/chosen": -1.9929720163345337, + "logits/rejected": -1.985133409500122, + "logps/chosen": -7.479220390319824, + "logps/rejected": -194.66372680664062, + "loss": 0.3815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1393657773733139, + "rewards/margins": 0.9919933676719666, + "rewards/rejected": -0.8526275753974915, + "step": 10030 + }, + { + "epoch": 0.58, + "learning_rate": 3.8961911195621945e-08, + "logits/chosen": -1.8469297885894775, + "logits/rejected": -1.848240852355957, + "logps/chosen": -17.304399490356445, + "logps/rejected": -158.8318328857422, + "loss": 0.2777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6247782111167908, + "rewards/margins": 1.9520165920257568, + "rewards/rejected": -1.3272384405136108, + "step": 10031 + }, + { + "epoch": 0.58, + "learning_rate": 3.8952719879489666e-08, + "logits/chosen": -1.997262716293335, + "logits/rejected": -1.98056960105896, + "logps/chosen": -3.851123332977295, + "logps/rejected": -175.99905395507812, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09174628555774689, + "rewards/margins": 4.510499477386475, + "rewards/rejected": -4.418753147125244, + "step": 10032 + }, + { + "epoch": 0.58, + "learning_rate": 3.894352895581081e-08, + "logits/chosen": -1.9119912385940552, + "logits/rejected": -1.9207394123077393, + "logps/chosen": -198.48141479492188, + "logps/rejected": -461.18035888671875, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5895721912384033, + "rewards/margins": 4.991930961608887, + "rewards/rejected": -2.4023590087890625, + "step": 10033 + }, + { + "epoch": 0.58, + "learning_rate": 3.8934338424911886e-08, + "logits/chosen": -2.07305908203125, + "logits/rejected": -2.0538368225097656, + "logps/chosen": -6.091433169785887e-05, + "logps/rejected": -290.4036865234375, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9594459145273504e-08, + "rewards/margins": 8.830352783203125, + "rewards/rejected": -8.830352783203125, + "step": 10034 + }, + { + "epoch": 0.58, + "learning_rate": 3.8925148287119394e-08, + "logits/chosen": -2.0646140575408936, + "logits/rejected": -2.0645885467529297, + "logps/chosen": -1.0800679922103882, + "logps/rejected": -148.33462524414062, + "loss": 0.3629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03641747310757637, + "rewards/margins": 3.3049261569976807, + "rewards/rejected": -3.341343641281128, + "step": 10035 + }, + { + "epoch": 0.58, + "learning_rate": 3.891595854275981e-08, + "logits/chosen": -2.061948299407959, + "logits/rejected": -2.0647406578063965, + "logps/chosen": -5.429759979248047, + "logps/rejected": -81.56748962402344, + "loss": 0.5496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11638083308935165, + "rewards/margins": 0.4447431266307831, + "rewards/rejected": -0.32836228609085083, + "step": 10036 + }, + { + "epoch": 0.58, + "learning_rate": 3.890676919215959e-08, + "logits/chosen": -2.037283182144165, + "logits/rejected": -2.0422959327697754, + "logps/chosen": -26.3608341217041, + "logps/rejected": -303.6277770996094, + "loss": 0.4568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39976978302001953, + "rewards/margins": 4.311085224151611, + "rewards/rejected": -4.710855007171631, + "step": 10037 + }, + { + "epoch": 0.58, + "learning_rate": 3.88975802356452e-08, + "logits/chosen": -1.994560718536377, + "logits/rejected": -1.9859124422073364, + "logps/chosen": -86.3941421508789, + "logps/rejected": -217.93460083007812, + "loss": 0.1702, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7307251691818237, + "rewards/margins": 1.1938278675079346, + "rewards/rejected": 0.5368973016738892, + "step": 10038 + }, + { + "epoch": 0.58, + "learning_rate": 3.888839167354305e-08, + "logits/chosen": -2.067366361618042, + "logits/rejected": -2.067037582397461, + "logps/chosen": -26.265165328979492, + "logps/rejected": -227.97970581054688, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14429035782814026, + "rewards/margins": 4.512895584106445, + "rewards/rejected": -4.368605136871338, + "step": 10039 + }, + { + "epoch": 0.58, + "learning_rate": 3.8879203506179606e-08, + "logits/chosen": -1.9359519481658936, + "logits/rejected": -1.8861647844314575, + "logps/chosen": -267.9344482421875, + "logps/rejected": -426.0595703125, + "loss": 0.0445, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.982870578765869, + "rewards/margins": 3.033435106277466, + "rewards/rejected": -0.05056457594037056, + "step": 10040 + }, + { + "epoch": 0.58, + "learning_rate": 3.8870015733881235e-08, + "logits/chosen": -2.052123546600342, + "logits/rejected": -2.0558595657348633, + "logps/chosen": -8.012735366821289, + "logps/rejected": -191.3485565185547, + "loss": 0.3024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2945553958415985, + "rewards/margins": 2.720057487487793, + "rewards/rejected": -2.425502061843872, + "step": 10041 + }, + { + "epoch": 0.58, + "learning_rate": 3.886082835697435e-08, + "logits/chosen": -2.043038845062256, + "logits/rejected": -2.0099105834960938, + "logps/chosen": -162.9881591796875, + "logps/rejected": -309.6836853027344, + "loss": 0.0873, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.106894016265869, + "rewards/margins": 2.1713502407073975, + "rewards/rejected": -0.06445617973804474, + "step": 10042 + }, + { + "epoch": 0.58, + "learning_rate": 3.8851641375785324e-08, + "logits/chosen": -1.8146159648895264, + "logits/rejected": -1.8149203062057495, + "logps/chosen": -1.057975172996521, + "logps/rejected": -30.266948699951172, + "loss": 0.6393, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.046878229826688766, + "rewards/margins": -0.013717472553253174, + "rewards/rejected": 0.06059570237994194, + "step": 10043 + }, + { + "epoch": 0.58, + "learning_rate": 3.884245479064054e-08, + "logits/chosen": -1.8011772632598877, + "logits/rejected": -1.8037875890731812, + "logps/chosen": -40.51431655883789, + "logps/rejected": -218.44174194335938, + "loss": 0.3701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7903679013252258, + "rewards/margins": 0.7735649347305298, + "rewards/rejected": 0.01680297963321209, + "step": 10044 + }, + { + "epoch": 0.58, + "learning_rate": 3.883326860186633e-08, + "logits/chosen": -1.9664567708969116, + "logits/rejected": -1.9531196355819702, + "logps/chosen": -56.26460266113281, + "logps/rejected": -183.55694580078125, + "loss": 0.2442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5122928619384766, + "rewards/margins": 2.2158145904541016, + "rewards/rejected": -1.703521728515625, + "step": 10045 + }, + { + "epoch": 0.58, + "learning_rate": 3.882408280978906e-08, + "logits/chosen": -1.9616498947143555, + "logits/rejected": -1.9574012756347656, + "logps/chosen": -18.348278045654297, + "logps/rejected": -146.72274780273438, + "loss": 0.2926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6507072448730469, + "rewards/margins": 1.3827720880508423, + "rewards/rejected": -0.7320648431777954, + "step": 10046 + }, + { + "epoch": 0.58, + "learning_rate": 3.881489741473501e-08, + "logits/chosen": -1.8469328880310059, + "logits/rejected": -1.8491522073745728, + "logps/chosen": -33.64451217651367, + "logps/rejected": -208.80615234375, + "loss": 0.2722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049890901893377304, + "rewards/margins": 4.010463714599609, + "rewards/rejected": -3.9605729579925537, + "step": 10047 + }, + { + "epoch": 0.58, + "learning_rate": 3.8805712417030535e-08, + "logits/chosen": -1.746616005897522, + "logits/rejected": -1.740049123764038, + "logps/chosen": -37.17242431640625, + "logps/rejected": -186.3779296875, + "loss": 0.2473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5854637026786804, + "rewards/margins": 1.9912376403808594, + "rewards/rejected": -1.4057739973068237, + "step": 10048 + }, + { + "epoch": 0.58, + "learning_rate": 3.87965278170019e-08, + "logits/chosen": -2.011577844619751, + "logits/rejected": -2.010571002960205, + "logps/chosen": -8.5350980758667, + "logps/rejected": -95.08511352539062, + "loss": 0.4634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10960831493139267, + "rewards/margins": 1.4097856283187866, + "rewards/rejected": -1.5193939208984375, + "step": 10049 + }, + { + "epoch": 0.58, + "learning_rate": 3.878734361497539e-08, + "logits/chosen": -1.7571760416030884, + "logits/rejected": -1.7537341117858887, + "logps/chosen": -9.974506378173828, + "logps/rejected": -96.08635711669922, + "loss": 0.6812, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06404886394739151, + "rewards/margins": -0.1395224630832672, + "rewards/rejected": 0.20357131958007812, + "step": 10050 + }, + { + "epoch": 0.58, + "learning_rate": 3.877815981127731e-08, + "logits/chosen": -2.0463106632232666, + "logits/rejected": -2.0390563011169434, + "logps/chosen": -85.95713806152344, + "logps/rejected": -230.98768615722656, + "loss": 0.6457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.631098210811615, + "rewards/margins": 0.5415931344032288, + "rewards/rejected": -1.1726913452148438, + "step": 10051 + }, + { + "epoch": 0.58, + "learning_rate": 3.876897640623386e-08, + "logits/chosen": -1.9900667667388916, + "logits/rejected": -1.9942996501922607, + "logps/chosen": -0.00012767090811394155, + "logps/rejected": -98.70769500732422, + "loss": 0.3633, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6355443171487423e-06, + "rewards/margins": 2.557304859161377, + "rewards/rejected": -2.5573012828826904, + "step": 10052 + }, + { + "epoch": 0.59, + "learning_rate": 3.8759793400171314e-08, + "logits/chosen": -1.8401740789413452, + "logits/rejected": -1.7916585206985474, + "logps/chosen": -217.4654541015625, + "logps/rejected": -440.75616455078125, + "loss": 0.0739, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5337631702423096, + "rewards/margins": 2.214207410812378, + "rewards/rejected": 0.3195556700229645, + "step": 10053 + }, + { + "epoch": 0.59, + "learning_rate": 3.875061079341589e-08, + "logits/chosen": -1.9556884765625, + "logits/rejected": -1.9528714418411255, + "logps/chosen": -10.125252723693848, + "logps/rejected": -187.3168487548828, + "loss": 0.4277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037505246698856354, + "rewards/margins": 1.688936710357666, + "rewards/rejected": -1.7264419794082642, + "step": 10054 + }, + { + "epoch": 0.59, + "learning_rate": 3.87414285862938e-08, + "logits/chosen": -1.9642211198806763, + "logits/rejected": -1.9186395406723022, + "logps/chosen": -144.57305908203125, + "logps/rejected": -288.9953308105469, + "loss": 0.2197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7628815174102783, + "rewards/margins": 1.0151764154434204, + "rewards/rejected": 0.7477051019668579, + "step": 10055 + }, + { + "epoch": 0.59, + "learning_rate": 3.873224677913123e-08, + "logits/chosen": -2.002960443496704, + "logits/rejected": -2.0040781497955322, + "logps/chosen": -0.6828334331512451, + "logps/rejected": -83.1965103149414, + "loss": 0.4015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08092281967401505, + "rewards/margins": 1.3921709060668945, + "rewards/rejected": -1.3112480640411377, + "step": 10056 + }, + { + "epoch": 0.59, + "learning_rate": 3.8723065372254383e-08, + "logits/chosen": -1.9383915662765503, + "logits/rejected": -1.9349488019943237, + "logps/chosen": -120.64459228515625, + "logps/rejected": -218.07667541503906, + "loss": 0.2524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5325172543525696, + "rewards/margins": 2.7887306213378906, + "rewards/rejected": -2.256213426589966, + "step": 10057 + }, + { + "epoch": 0.59, + "learning_rate": 3.8713884365989405e-08, + "logits/chosen": -2.022334575653076, + "logits/rejected": -1.9936505556106567, + "logps/chosen": -159.05032348632812, + "logps/rejected": -308.8446960449219, + "loss": 0.1321, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.227947950363159, + "rewards/margins": 1.3366851806640625, + "rewards/rejected": 0.8912628293037415, + "step": 10058 + }, + { + "epoch": 0.59, + "learning_rate": 3.870470376066247e-08, + "logits/chosen": -1.9558550119400024, + "logits/rejected": -1.953796148300171, + "logps/chosen": -36.18828201293945, + "logps/rejected": -211.44761657714844, + "loss": 0.4665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04801902920007706, + "rewards/margins": 1.3252331018447876, + "rewards/rejected": -1.2772140502929688, + "step": 10059 + }, + { + "epoch": 0.59, + "learning_rate": 3.86955235565997e-08, + "logits/chosen": -1.8336243629455566, + "logits/rejected": -1.8434070348739624, + "logps/chosen": -2.71794997388497e-05, + "logps/rejected": -171.45254516601562, + "loss": 0.357, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.437225579247752e-07, + "rewards/margins": 3.133704423904419, + "rewards/rejected": -3.1337037086486816, + "step": 10060 + }, + { + "epoch": 0.59, + "learning_rate": 3.868634375412724e-08, + "logits/chosen": -1.960546612739563, + "logits/rejected": -1.9389652013778687, + "logps/chosen": -216.55702209472656, + "logps/rejected": -362.92657470703125, + "loss": 0.1007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.910121202468872, + "rewards/margins": 2.4651384353637695, + "rewards/rejected": -0.5550171136856079, + "step": 10061 + }, + { + "epoch": 0.59, + "learning_rate": 3.867716435357118e-08, + "logits/chosen": -1.8715996742248535, + "logits/rejected": -1.8636102676391602, + "logps/chosen": -38.188480377197266, + "logps/rejected": -178.435546875, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11889686435461044, + "rewards/margins": 2.555560827255249, + "rewards/rejected": -2.436663866043091, + "step": 10062 + }, + { + "epoch": 0.59, + "learning_rate": 3.866798535525764e-08, + "logits/chosen": -1.784129023551941, + "logits/rejected": -1.7720965147018433, + "logps/chosen": -276.0010986328125, + "logps/rejected": -455.7509765625, + "loss": 0.2703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.490142822265625, + "rewards/margins": 1.398950219154358, + "rewards/rejected": -0.9088073968887329, + "step": 10063 + }, + { + "epoch": 0.59, + "learning_rate": 3.865880675951267e-08, + "logits/chosen": -2.0658655166625977, + "logits/rejected": -2.0672097206115723, + "logps/chosen": -0.03147660195827484, + "logps/rejected": -50.14527893066406, + "loss": 0.5655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014043179107829928, + "rewards/margins": 0.5676435232162476, + "rewards/rejected": -0.5662391781806946, + "step": 10064 + }, + { + "epoch": 0.59, + "learning_rate": 3.864962856666239e-08, + "logits/chosen": -1.798724889755249, + "logits/rejected": -1.8237102031707764, + "logps/chosen": -190.8129119873047, + "logps/rejected": -260.38153076171875, + "loss": 0.1243, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2003707885742188, + "rewards/margins": 1.649195909500122, + "rewards/rejected": -0.44882509112358093, + "step": 10065 + }, + { + "epoch": 0.59, + "learning_rate": 3.864045077703279e-08, + "logits/chosen": -1.8211767673492432, + "logits/rejected": -1.7928022146224976, + "logps/chosen": -135.27549743652344, + "logps/rejected": -299.2267761230469, + "loss": 0.3823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.486825555562973, + "rewards/margins": 1.0277283191680908, + "rewards/rejected": -0.5409027338027954, + "step": 10066 + }, + { + "epoch": 0.59, + "learning_rate": 3.863127339094997e-08, + "logits/chosen": -1.8572279214859009, + "logits/rejected": -1.8294334411621094, + "logps/chosen": -224.3787841796875, + "logps/rejected": -462.54095458984375, + "loss": 0.0426, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3275543451309204, + "rewards/margins": 3.9101595878601074, + "rewards/rejected": -2.5826051235198975, + "step": 10067 + }, + { + "epoch": 0.59, + "learning_rate": 3.8622096408739915e-08, + "logits/chosen": -1.8373022079467773, + "logits/rejected": -1.8398714065551758, + "logps/chosen": -58.024993896484375, + "logps/rejected": -203.3367919921875, + "loss": 0.3813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6802864074707031, + "rewards/margins": 0.6585105657577515, + "rewards/rejected": 0.02177581749856472, + "step": 10068 + }, + { + "epoch": 0.59, + "learning_rate": 3.861291983072866e-08, + "logits/chosen": -1.9260112047195435, + "logits/rejected": -1.926151156425476, + "logps/chosen": -20.9801025390625, + "logps/rejected": -55.75150680541992, + "loss": 0.5995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018532181158661842, + "rewards/margins": 0.42319947481155396, + "rewards/rejected": -0.44173166155815125, + "step": 10069 + }, + { + "epoch": 0.59, + "learning_rate": 3.8603743657242186e-08, + "logits/chosen": -1.760331153869629, + "logits/rejected": -1.7438615560531616, + "logps/chosen": -0.0028797052800655365, + "logps/rejected": -209.69830322265625, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0003812998184002936, + "rewards/margins": 5.135453701019287, + "rewards/rejected": -5.135072231292725, + "step": 10070 + }, + { + "epoch": 0.59, + "learning_rate": 3.859456788860647e-08, + "logits/chosen": -1.9410275220870972, + "logits/rejected": -1.9421120882034302, + "logps/chosen": -11.411843299865723, + "logps/rejected": -247.72476196289062, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32259827852249146, + "rewards/margins": 1.1516633033752441, + "rewards/rejected": -0.8290649652481079, + "step": 10071 + }, + { + "epoch": 0.59, + "learning_rate": 3.858539252514753e-08, + "logits/chosen": -1.8515275716781616, + "logits/rejected": -1.840836524963379, + "logps/chosen": -33.44151306152344, + "logps/rejected": -143.65713500976562, + "loss": 0.3356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032018281519412994, + "rewards/margins": 1.9394019842147827, + "rewards/rejected": -1.9714202880859375, + "step": 10072 + }, + { + "epoch": 0.59, + "learning_rate": 3.8576217567191235e-08, + "logits/chosen": -2.004544258117676, + "logits/rejected": -1.9400246143341064, + "logps/chosen": -221.7919921875, + "logps/rejected": -501.23046875, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.472830295562744, + "rewards/margins": 6.770303726196289, + "rewards/rejected": -4.297473430633545, + "step": 10073 + }, + { + "epoch": 0.59, + "learning_rate": 3.85670430150636e-08, + "logits/chosen": -1.8522799015045166, + "logits/rejected": -1.855674386024475, + "logps/chosen": -16.948680877685547, + "logps/rejected": -165.1783447265625, + "loss": 0.3939, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06632614135742188, + "rewards/margins": 2.432750701904297, + "rewards/rejected": -2.4990768432617188, + "step": 10074 + }, + { + "epoch": 0.59, + "learning_rate": 3.8557868869090523e-08, + "logits/chosen": -1.7793573141098022, + "logits/rejected": -1.775064468383789, + "logps/chosen": -50.84803771972656, + "logps/rejected": -297.6633605957031, + "loss": 0.0514, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7609299421310425, + "rewards/margins": 4.953132629394531, + "rewards/rejected": -3.1922028064727783, + "step": 10075 + }, + { + "epoch": 0.59, + "learning_rate": 3.8548695129597916e-08, + "logits/chosen": -1.9275906085968018, + "logits/rejected": -1.9237250089645386, + "logps/chosen": -34.68573760986328, + "logps/rejected": -146.96092224121094, + "loss": 0.1984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2098690271377563, + "rewards/margins": 1.7439647912979126, + "rewards/rejected": -0.5340957641601562, + "step": 10076 + }, + { + "epoch": 0.59, + "learning_rate": 3.8539521796911654e-08, + "logits/chosen": -2.0082643032073975, + "logits/rejected": -2.009758949279785, + "logps/chosen": -1.2590609788894653, + "logps/rejected": -108.5008773803711, + "loss": 0.4335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21946506202220917, + "rewards/margins": 1.096623420715332, + "rewards/rejected": -0.8771583437919617, + "step": 10077 + }, + { + "epoch": 0.59, + "learning_rate": 3.853034887135766e-08, + "logits/chosen": -2.0627071857452393, + "logits/rejected": -2.0548338890075684, + "logps/chosen": -23.333948135375977, + "logps/rejected": -168.09014892578125, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2656934857368469, + "rewards/margins": 2.3269636631011963, + "rewards/rejected": -2.061270236968994, + "step": 10078 + }, + { + "epoch": 0.59, + "learning_rate": 3.8521176353261756e-08, + "logits/chosen": -1.982411503791809, + "logits/rejected": -1.9965014457702637, + "logps/chosen": -257.48681640625, + "logps/rejected": -304.19647216796875, + "loss": 0.0791, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.596435546875, + "rewards/margins": 2.250457763671875, + "rewards/rejected": 0.345977783203125, + "step": 10079 + }, + { + "epoch": 0.59, + "learning_rate": 3.851200424294984e-08, + "logits/chosen": -1.776611328125, + "logits/rejected": -1.7316398620605469, + "logps/chosen": -254.74356079101562, + "logps/rejected": -460.806396484375, + "loss": 0.0321, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5719146728515625, + "rewards/margins": 3.123483180999756, + "rewards/rejected": 0.4484314024448395, + "step": 10080 + }, + { + "epoch": 0.59, + "learning_rate": 3.850283254074771e-08, + "logits/chosen": -2.0684759616851807, + "logits/rejected": -2.057929277420044, + "logps/chosen": -113.01278686523438, + "logps/rejected": -250.0555419921875, + "loss": 0.1751, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.807196021080017, + "rewards/margins": 1.241455078125, + "rewards/rejected": 0.5657410025596619, + "step": 10081 + }, + { + "epoch": 0.59, + "learning_rate": 3.849366124698121e-08, + "logits/chosen": -1.6960432529449463, + "logits/rejected": -1.6504820585250854, + "logps/chosen": -195.48834228515625, + "logps/rejected": -453.7593688964844, + "loss": 0.0704, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3785247802734375, + "rewards/margins": 2.774658203125, + "rewards/rejected": -1.3961334228515625, + "step": 10082 + }, + { + "epoch": 0.59, + "learning_rate": 3.848449036197615e-08, + "logits/chosen": -1.8616644144058228, + "logits/rejected": -1.8675541877746582, + "logps/chosen": -1.5191608667373657, + "logps/rejected": -171.45779418945312, + "loss": 0.3894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0032325030770152807, + "rewards/margins": 2.3856940269470215, + "rewards/rejected": -2.3824615478515625, + "step": 10083 + }, + { + "epoch": 0.59, + "learning_rate": 3.847531988605832e-08, + "logits/chosen": -1.8784793615341187, + "logits/rejected": -1.8862230777740479, + "logps/chosen": -274.1051025390625, + "logps/rejected": -400.2573547363281, + "loss": 0.0665, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.296240210533142, + "rewards/margins": 3.1371397972106934, + "rewards/rejected": -1.8408997058868408, + "step": 10084 + }, + { + "epoch": 0.59, + "learning_rate": 3.84661498195535e-08, + "logits/chosen": -1.963680624961853, + "logits/rejected": -1.980530023574829, + "logps/chosen": -156.0032958984375, + "logps/rejected": -209.24069213867188, + "loss": 0.1849, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7429474592208862, + "rewards/margins": 1.1161606311798096, + "rewards/rejected": 0.6267868280410767, + "step": 10085 + }, + { + "epoch": 0.59, + "learning_rate": 3.8456980162787456e-08, + "logits/chosen": -2.155438184738159, + "logits/rejected": -2.1510438919067383, + "logps/chosen": -13.028308868408203, + "logps/rejected": -258.6050109863281, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05744647979736328, + "rewards/margins": 3.996142864227295, + "rewards/rejected": -3.9386963844299316, + "step": 10086 + }, + { + "epoch": 0.59, + "learning_rate": 3.844781091608594e-08, + "logits/chosen": -2.066535472869873, + "logits/rejected": -2.0557446479797363, + "logps/chosen": -64.19583129882812, + "logps/rejected": -236.63279724121094, + "loss": 0.3501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.793353259563446, + "rewards/margins": 0.8520156741142273, + "rewards/rejected": -0.05866241455078125, + "step": 10087 + }, + { + "epoch": 0.59, + "learning_rate": 3.843864207977469e-08, + "logits/chosen": -1.9198641777038574, + "logits/rejected": -1.9011335372924805, + "logps/chosen": -65.24884796142578, + "logps/rejected": -210.38949584960938, + "loss": 0.1874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7941520810127258, + "rewards/margins": 2.8074088096618652, + "rewards/rejected": -2.013256788253784, + "step": 10088 + }, + { + "epoch": 0.59, + "learning_rate": 3.842947365417942e-08, + "logits/chosen": -2.156691551208496, + "logits/rejected": -2.1491036415100098, + "logps/chosen": -121.05870056152344, + "logps/rejected": -327.1751403808594, + "loss": 0.1657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6293746829032898, + "rewards/margins": 4.375767707824707, + "rewards/rejected": -3.7463929653167725, + "step": 10089 + }, + { + "epoch": 0.59, + "learning_rate": 3.842030563962583e-08, + "logits/chosen": -1.8193976879119873, + "logits/rejected": -1.8169952630996704, + "logps/chosen": -0.00037450582021847367, + "logps/rejected": -145.28369140625, + "loss": 0.439, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.674518352956511e-05, + "rewards/margins": 1.4621843099594116, + "rewards/rejected": -1.4622009992599487, + "step": 10090 + }, + { + "epoch": 0.59, + "learning_rate": 3.841113803643966e-08, + "logits/chosen": -2.1138415336608887, + "logits/rejected": -2.1095101833343506, + "logps/chosen": -56.39899444580078, + "logps/rejected": -222.05657958984375, + "loss": 0.2187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6193878054618835, + "rewards/margins": 2.280038356781006, + "rewards/rejected": -1.660650610923767, + "step": 10091 + }, + { + "epoch": 0.59, + "learning_rate": 3.8401970844946526e-08, + "logits/chosen": -1.9741789102554321, + "logits/rejected": -1.9749327898025513, + "logps/chosen": -0.0013818240258842707, + "logps/rejected": -162.45974731445312, + "loss": 0.4098, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0400107031455263e-05, + "rewards/margins": 2.1231651306152344, + "rewards/rejected": -2.123225450515747, + "step": 10092 + }, + { + "epoch": 0.59, + "learning_rate": 3.8392804065472135e-08, + "logits/chosen": -2.146357297897339, + "logits/rejected": -2.139606237411499, + "logps/chosen": -0.6606724262237549, + "logps/rejected": -138.7119598388672, + "loss": 0.452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02322360686957836, + "rewards/margins": 1.7251681089401245, + "rewards/rejected": -1.7483917474746704, + "step": 10093 + }, + { + "epoch": 0.59, + "learning_rate": 3.838363769834211e-08, + "logits/chosen": -1.8136413097381592, + "logits/rejected": -1.7981313467025757, + "logps/chosen": -17.664398193359375, + "logps/rejected": -338.5516357421875, + "loss": 0.2332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40473100543022156, + "rewards/margins": 6.476767539978027, + "rewards/rejected": -6.0720367431640625, + "step": 10094 + }, + { + "epoch": 0.59, + "learning_rate": 3.837447174388211e-08, + "logits/chosen": -1.9859827756881714, + "logits/rejected": -1.9821453094482422, + "logps/chosen": -38.774173736572266, + "logps/rejected": -199.12796020507812, + "loss": 0.3881, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10226020961999893, + "rewards/margins": 2.440791130065918, + "rewards/rejected": -2.543051242828369, + "step": 10095 + }, + { + "epoch": 0.59, + "learning_rate": 3.836530620241773e-08, + "logits/chosen": -2.042466402053833, + "logits/rejected": -2.0612385272979736, + "logps/chosen": -219.6119384765625, + "logps/rejected": -285.3870849609375, + "loss": 0.1266, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2317841053009033, + "rewards/margins": 1.7762360572814941, + "rewards/rejected": 0.45554810762405396, + "step": 10096 + }, + { + "epoch": 0.59, + "learning_rate": 3.83561410742746e-08, + "logits/chosen": -1.9263315200805664, + "logits/rejected": -1.9206368923187256, + "logps/chosen": -4.564873218536377, + "logps/rejected": -134.0438232421875, + "loss": 0.3973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029875516891479492, + "rewards/margins": 2.205631732940674, + "rewards/rejected": -2.2355072498321533, + "step": 10097 + }, + { + "epoch": 0.59, + "learning_rate": 3.834697635977827e-08, + "logits/chosen": -1.9663395881652832, + "logits/rejected": -1.9578163623809814, + "logps/chosen": -25.856117248535156, + "logps/rejected": -246.5884246826172, + "loss": 0.1948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6412284970283508, + "rewards/margins": 6.098615646362305, + "rewards/rejected": -5.4573869705200195, + "step": 10098 + }, + { + "epoch": 0.59, + "learning_rate": 3.833781205925436e-08, + "logits/chosen": -1.8282777070999146, + "logits/rejected": -1.8348963260650635, + "logps/chosen": -157.41476440429688, + "logps/rejected": -289.58917236328125, + "loss": 0.295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3792679011821747, + "rewards/margins": 1.2165237665176392, + "rewards/rejected": -0.8372558951377869, + "step": 10099 + }, + { + "epoch": 0.59, + "learning_rate": 3.83286481730284e-08, + "logits/chosen": -1.9050053358078003, + "logits/rejected": -1.8943804502487183, + "logps/chosen": -181.66636657714844, + "logps/rejected": -444.57598876953125, + "loss": 0.0243, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8309921026229858, + "rewards/margins": 4.1254777908325195, + "rewards/rejected": -2.294485569000244, + "step": 10100 + }, + { + "epoch": 0.59, + "learning_rate": 3.831948470142596e-08, + "logits/chosen": -1.8584089279174805, + "logits/rejected": -1.8568027019500732, + "logps/chosen": -141.06674194335938, + "logps/rejected": -229.75, + "loss": 0.5194, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8965698480606079, + "rewards/margins": -0.3534972667694092, + "rewards/rejected": 1.250067114830017, + "step": 10101 + }, + { + "epoch": 0.59, + "learning_rate": 3.831032164477255e-08, + "logits/chosen": -1.844942569732666, + "logits/rejected": -1.8477861881256104, + "logps/chosen": -203.16510009765625, + "logps/rejected": -303.6959228515625, + "loss": 0.0639, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.602870225906372, + "rewards/margins": 2.145430088043213, + "rewards/rejected": 0.45744019746780396, + "step": 10102 + }, + { + "epoch": 0.59, + "learning_rate": 3.8301159003393705e-08, + "logits/chosen": -1.93662691116333, + "logits/rejected": -1.929653286933899, + "logps/chosen": -222.8188018798828, + "logps/rejected": -225.23739624023438, + "loss": 0.1599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.687831163406372, + "rewards/margins": 1.6277694702148438, + "rewards/rejected": 0.06006164476275444, + "step": 10103 + }, + { + "epoch": 0.59, + "learning_rate": 3.82919967776149e-08, + "logits/chosen": -2.034808874130249, + "logits/rejected": -2.027764081954956, + "logps/chosen": -33.39186096191406, + "logps/rejected": -285.28851318359375, + "loss": 0.4639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08018646389245987, + "rewards/margins": 2.092521905899048, + "rewards/rejected": -2.17270827293396, + "step": 10104 + }, + { + "epoch": 0.59, + "learning_rate": 3.828283496776166e-08, + "logits/chosen": -1.8677787780761719, + "logits/rejected": -1.954118013381958, + "logps/chosen": -165.93621826171875, + "logps/rejected": -187.23794555664062, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9761520624160767, + "rewards/margins": 0.1823974847793579, + "rewards/rejected": 1.7937545776367188, + "step": 10105 + }, + { + "epoch": 0.59, + "learning_rate": 3.827367357415942e-08, + "logits/chosen": -1.9118750095367432, + "logits/rejected": -1.9093999862670898, + "logps/chosen": -1.320674180984497, + "logps/rejected": -149.43994140625, + "loss": 0.3245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11947216838598251, + "rewards/margins": 3.3291425704956055, + "rewards/rejected": -3.209670305252075, + "step": 10106 + }, + { + "epoch": 0.59, + "learning_rate": 3.8264512597133666e-08, + "logits/chosen": -2.0996029376983643, + "logits/rejected": -2.0946733951568604, + "logps/chosen": -42.9906120300293, + "logps/rejected": -214.24172973632812, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18112297356128693, + "rewards/margins": 2.312260150909424, + "rewards/rejected": -2.1311371326446533, + "step": 10107 + }, + { + "epoch": 0.59, + "learning_rate": 3.825535203700982e-08, + "logits/chosen": -1.9176807403564453, + "logits/rejected": -2.0033934116363525, + "logps/chosen": -319.28826904296875, + "logps/rejected": -280.81463623046875, + "loss": 0.0466, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.557821750640869, + "rewards/margins": 2.3650331497192383, + "rewards/rejected": 1.1927887201309204, + "step": 10108 + }, + { + "epoch": 0.59, + "learning_rate": 3.824619189411333e-08, + "logits/chosen": -1.8361871242523193, + "logits/rejected": -1.8815208673477173, + "logps/chosen": -187.84979248046875, + "logps/rejected": -241.27207946777344, + "loss": 0.1042, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.09806227684021, + "rewards/margins": 1.6629350185394287, + "rewards/rejected": 0.43512725830078125, + "step": 10109 + }, + { + "epoch": 0.59, + "learning_rate": 3.8237032168769585e-08, + "logits/chosen": -2.0854263305664062, + "logits/rejected": -2.077240467071533, + "logps/chosen": -0.0008469083695672452, + "logps/rejected": -89.09970092773438, + "loss": 0.5263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0001238342229044065, + "rewards/margins": 0.837469756603241, + "rewards/rejected": -0.8373458981513977, + "step": 10110 + }, + { + "epoch": 0.59, + "learning_rate": 3.8227872861303985e-08, + "logits/chosen": -2.0031516551971436, + "logits/rejected": -1.9952789545059204, + "logps/chosen": -7.501269340515137, + "logps/rejected": -103.11734008789062, + "loss": 0.5857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3249034881591797, + "rewards/margins": 0.13497428596019745, + "rewards/rejected": 0.18992920219898224, + "step": 10111 + }, + { + "epoch": 0.59, + "learning_rate": 3.821871397204196e-08, + "logits/chosen": -2.0719094276428223, + "logits/rejected": -2.0610857009887695, + "logps/chosen": -64.2105941772461, + "logps/rejected": -298.070068359375, + "loss": 0.4487, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5989250540733337, + "rewards/margins": 3.277719736099243, + "rewards/rejected": -3.8766448497772217, + "step": 10112 + }, + { + "epoch": 0.59, + "learning_rate": 3.820955550130881e-08, + "logits/chosen": -2.0002388954162598, + "logits/rejected": -2.0006325244903564, + "logps/chosen": -22.833927154541016, + "logps/rejected": -143.61181640625, + "loss": 0.3088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2747173309326172, + "rewards/margins": 2.487304449081421, + "rewards/rejected": -2.2125871181488037, + "step": 10113 + }, + { + "epoch": 0.59, + "learning_rate": 3.820039744942994e-08, + "logits/chosen": -1.9362622499465942, + "logits/rejected": -1.9568257331848145, + "logps/chosen": -264.20758056640625, + "logps/rejected": -393.7753601074219, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9298462867736816, + "rewards/margins": 2.639975070953369, + "rewards/rejected": 0.2898712158203125, + "step": 10114 + }, + { + "epoch": 0.59, + "learning_rate": 3.819123981673066e-08, + "logits/chosen": -2.0592100620269775, + "logits/rejected": -2.040837287902832, + "logps/chosen": -40.63263702392578, + "logps/rejected": -189.6774444580078, + "loss": 0.2733, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36781617999076843, + "rewards/margins": 2.951319932937622, + "rewards/rejected": -2.5835037231445312, + "step": 10115 + }, + { + "epoch": 0.59, + "learning_rate": 3.818208260353632e-08, + "logits/chosen": -1.863946795463562, + "logits/rejected": -1.8525363206863403, + "logps/chosen": -69.61895751953125, + "logps/rejected": -182.22186279296875, + "loss": 0.4075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2740821838378906, + "rewards/margins": 1.1392982006072998, + "rewards/rejected": -0.865216076374054, + "step": 10116 + }, + { + "epoch": 0.59, + "learning_rate": 3.8172925810172204e-08, + "logits/chosen": -2.0643539428710938, + "logits/rejected": -2.055717706680298, + "logps/chosen": -0.0007361018797382712, + "logps/rejected": -140.60687255859375, + "loss": 0.3639, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.94974951329641e-05, + "rewards/margins": 2.8673934936523438, + "rewards/rejected": -2.8674628734588623, + "step": 10117 + }, + { + "epoch": 0.59, + "learning_rate": 3.8163769436963624e-08, + "logits/chosen": -1.8815675973892212, + "logits/rejected": -1.8786845207214355, + "logps/chosen": -178.1145477294922, + "logps/rejected": -420.414794921875, + "loss": 0.0358, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.947773814201355, + "rewards/margins": 4.1841139793396, + "rewards/rejected": -2.236340284347534, + "step": 10118 + }, + { + "epoch": 0.59, + "learning_rate": 3.815461348423584e-08, + "logits/chosen": -1.9131566286087036, + "logits/rejected": -1.9119236469268799, + "logps/chosen": -16.858108520507812, + "logps/rejected": -254.4502716064453, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10493335872888565, + "rewards/margins": 5.6219024658203125, + "rewards/rejected": -5.516969203948975, + "step": 10119 + }, + { + "epoch": 0.59, + "learning_rate": 3.814545795231413e-08, + "logits/chosen": -1.8736833333969116, + "logits/rejected": -1.8630443811416626, + "logps/chosen": -227.35731506347656, + "logps/rejected": -441.28857421875, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.604327440261841, + "rewards/margins": 5.092556953430176, + "rewards/rejected": -2.488229513168335, + "step": 10120 + }, + { + "epoch": 0.59, + "learning_rate": 3.8136302841523734e-08, + "logits/chosen": -1.9111664295196533, + "logits/rejected": -1.9099509716033936, + "logps/chosen": -39.25748825073242, + "logps/rejected": -166.5486297607422, + "loss": 0.2497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9620029330253601, + "rewards/margins": 1.9153003692626953, + "rewards/rejected": -0.9532974362373352, + "step": 10121 + }, + { + "epoch": 0.59, + "learning_rate": 3.812714815218991e-08, + "logits/chosen": -2.0828287601470947, + "logits/rejected": -2.072910785675049, + "logps/chosen": -16.725507736206055, + "logps/rejected": -172.75753784179688, + "loss": 0.1947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9309927225112915, + "rewards/margins": 2.388796091079712, + "rewards/rejected": -1.4578033685684204, + "step": 10122 + }, + { + "epoch": 0.59, + "learning_rate": 3.811799388463785e-08, + "logits/chosen": -1.805623173713684, + "logits/rejected": -1.7974389791488647, + "logps/chosen": -230.349853515625, + "logps/rejected": -302.097900390625, + "loss": 0.2533, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.711752414703369, + "rewards/margins": 0.4302337169647217, + "rewards/rejected": 3.2815186977386475, + "step": 10123 + }, + { + "epoch": 0.59, + "learning_rate": 3.810884003919277e-08, + "logits/chosen": -1.9717183113098145, + "logits/rejected": -1.9608142375946045, + "logps/chosen": -5.255392074584961, + "logps/rejected": -138.83177185058594, + "loss": 0.4433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16207380592823029, + "rewards/margins": 1.1241053342819214, + "rewards/rejected": -0.9620315432548523, + "step": 10124 + }, + { + "epoch": 0.59, + "learning_rate": 3.809968661617986e-08, + "logits/chosen": -1.9285427331924438, + "logits/rejected": -1.921905279159546, + "logps/chosen": -7.875600337982178, + "logps/rejected": -89.86111450195312, + "loss": 0.6763, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08746514469385147, + "rewards/margins": -0.07497229427099228, + "rewards/rejected": 0.16243743896484375, + "step": 10125 + }, + { + "epoch": 0.59, + "learning_rate": 3.809053361592429e-08, + "logits/chosen": -1.8869118690490723, + "logits/rejected": -1.8608421087265015, + "logps/chosen": -276.6785888671875, + "logps/rejected": -495.12152099609375, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1177825927734375, + "rewards/margins": 4.5555572509765625, + "rewards/rejected": -2.437774658203125, + "step": 10126 + }, + { + "epoch": 0.59, + "learning_rate": 3.8081381038751216e-08, + "logits/chosen": -1.9825949668884277, + "logits/rejected": -1.9887455701828003, + "logps/chosen": -121.62347412109375, + "logps/rejected": -334.8453369140625, + "loss": 0.0944, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3514999151229858, + "rewards/margins": 3.1960830688476562, + "rewards/rejected": -1.8445831537246704, + "step": 10127 + }, + { + "epoch": 0.59, + "learning_rate": 3.80722288849858e-08, + "logits/chosen": -1.9363638162612915, + "logits/rejected": -1.946818232536316, + "logps/chosen": -6.372881889343262, + "logps/rejected": -168.1190948486328, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18512506783008575, + "rewards/margins": 3.777045726776123, + "rewards/rejected": -3.5919206142425537, + "step": 10128 + }, + { + "epoch": 0.59, + "learning_rate": 3.8063077154953146e-08, + "logits/chosen": -1.9323220252990723, + "logits/rejected": -1.9206808805465698, + "logps/chosen": -187.35073852539062, + "logps/rejected": -336.4109802246094, + "loss": 0.0494, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3051788806915283, + "rewards/margins": 2.69586181640625, + "rewards/rejected": -0.39068299531936646, + "step": 10129 + }, + { + "epoch": 0.59, + "learning_rate": 3.805392584897839e-08, + "logits/chosen": -2.09733510017395, + "logits/rejected": -2.072563648223877, + "logps/chosen": -231.13580322265625, + "logps/rejected": -413.3100280761719, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.006918430328369, + "rewards/margins": 4.105847358703613, + "rewards/rejected": -2.098928928375244, + "step": 10130 + }, + { + "epoch": 0.59, + "learning_rate": 3.804477496738661e-08, + "logits/chosen": -1.841122031211853, + "logits/rejected": -1.8764424324035645, + "logps/chosen": -231.75628662109375, + "logps/rejected": -639.061279296875, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9207489490509033, + "rewards/margins": 9.42334270477295, + "rewards/rejected": -7.502593994140625, + "step": 10131 + }, + { + "epoch": 0.59, + "learning_rate": 3.8035624510502896e-08, + "logits/chosen": -2.0383942127227783, + "logits/rejected": -2.0688674449920654, + "logps/chosen": -162.38233947753906, + "logps/rejected": -399.23968505859375, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.148420810699463, + "rewards/margins": 4.865312576293945, + "rewards/rejected": -2.7168915271759033, + "step": 10132 + }, + { + "epoch": 0.59, + "learning_rate": 3.802647447865236e-08, + "logits/chosen": -1.7518750429153442, + "logits/rejected": -1.7499679327011108, + "logps/chosen": -36.3414306640625, + "logps/rejected": -131.15599060058594, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8913406729698181, + "rewards/margins": 1.0776886940002441, + "rewards/rejected": -0.18634796142578125, + "step": 10133 + }, + { + "epoch": 0.59, + "learning_rate": 3.801732487215998e-08, + "logits/chosen": -1.897284984588623, + "logits/rejected": -1.8972992897033691, + "logps/chosen": -13.169086456298828, + "logps/rejected": -119.0150146484375, + "loss": 0.4535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07140465080738068, + "rewards/margins": 1.583828330039978, + "rewards/rejected": -1.6552330255508423, + "step": 10134 + }, + { + "epoch": 0.59, + "learning_rate": 3.800817569135086e-08, + "logits/chosen": -2.005993604660034, + "logits/rejected": -2.0025336742401123, + "logps/chosen": -44.54833984375, + "logps/rejected": -170.039306640625, + "loss": 0.3236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31216737627983093, + "rewards/margins": 2.322659492492676, + "rewards/rejected": -2.0104920864105225, + "step": 10135 + }, + { + "epoch": 0.59, + "learning_rate": 3.799902693654998e-08, + "logits/chosen": -1.6974462270736694, + "logits/rejected": -1.6839165687561035, + "logps/chosen": -193.87637329101562, + "logps/rejected": -572.962158203125, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.469244360923767, + "rewards/margins": 5.797418594360352, + "rewards/rejected": -4.328174114227295, + "step": 10136 + }, + { + "epoch": 0.59, + "learning_rate": 3.7989878608082384e-08, + "logits/chosen": -2.0733747482299805, + "logits/rejected": -2.0746662616729736, + "logps/chosen": -0.617168128490448, + "logps/rejected": -118.2895736694336, + "loss": 0.3645, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014271008782088757, + "rewards/margins": 2.612220287322998, + "rewards/rejected": -2.597949266433716, + "step": 10137 + }, + { + "epoch": 0.59, + "learning_rate": 3.798073070627303e-08, + "logits/chosen": -1.829213261604309, + "logits/rejected": -1.876550555229187, + "logps/chosen": -321.18994140625, + "logps/rejected": -395.8548889160156, + "loss": 0.0647, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0181092023849487, + "rewards/margins": 3.324990749359131, + "rewards/rejected": -2.3068816661834717, + "step": 10138 + }, + { + "epoch": 0.59, + "learning_rate": 3.7971583231446936e-08, + "logits/chosen": -1.9249173402786255, + "logits/rejected": -1.9045674800872803, + "logps/chosen": -57.01957702636719, + "logps/rejected": -180.96844482421875, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1927940398454666, + "rewards/margins": 1.316231608390808, + "rewards/rejected": -1.123437523841858, + "step": 10139 + }, + { + "epoch": 0.59, + "learning_rate": 3.796243618392903e-08, + "logits/chosen": -2.0285353660583496, + "logits/rejected": -2.0253984928131104, + "logps/chosen": -18.837570190429688, + "logps/rejected": -117.94470977783203, + "loss": 0.3664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6616876721382141, + "rewards/margins": 1.1803690195083618, + "rewards/rejected": -0.5186813473701477, + "step": 10140 + }, + { + "epoch": 0.59, + "learning_rate": 3.795328956404428e-08, + "logits/chosen": -2.0425057411193848, + "logits/rejected": -1.9820375442504883, + "logps/chosen": -231.468994140625, + "logps/rejected": -440.37139892578125, + "loss": 0.0585, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.18389892578125, + "rewards/margins": 2.9475340843200684, + "rewards/rejected": -0.7636352777481079, + "step": 10141 + }, + { + "epoch": 0.59, + "learning_rate": 3.7944143372117596e-08, + "logits/chosen": -2.017254114151001, + "logits/rejected": -2.009791135787964, + "logps/chosen": -2.96122407913208, + "logps/rejected": -240.91294860839844, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022562289610505104, + "rewards/margins": 3.0006415843963623, + "rewards/rejected": -2.978079319000244, + "step": 10142 + }, + { + "epoch": 0.59, + "learning_rate": 3.7934997608473924e-08, + "logits/chosen": -1.788187026977539, + "logits/rejected": -1.8030614852905273, + "logps/chosen": -227.97906494140625, + "logps/rejected": -368.7792053222656, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2353851795196533, + "rewards/margins": 4.59027099609375, + "rewards/rejected": -2.3548858165740967, + "step": 10143 + }, + { + "epoch": 0.59, + "learning_rate": 3.792585227343814e-08, + "logits/chosen": -2.0964834690093994, + "logits/rejected": -2.117616891860962, + "logps/chosen": -198.23519897460938, + "logps/rejected": -387.6079406738281, + "loss": 0.0675, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5695708990097046, + "rewards/margins": 2.6929931640625, + "rewards/rejected": -1.1234222650527954, + "step": 10144 + }, + { + "epoch": 0.59, + "learning_rate": 3.7916707367335164e-08, + "logits/chosen": -1.8402957916259766, + "logits/rejected": -1.805862307548523, + "logps/chosen": -241.72024536132812, + "logps/rejected": -341.8778381347656, + "loss": 0.2312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8150909543037415, + "rewards/margins": 1.0497039556503296, + "rewards/rejected": -0.23461304605007172, + "step": 10145 + }, + { + "epoch": 0.59, + "learning_rate": 3.790756289048983e-08, + "logits/chosen": -1.9993785619735718, + "logits/rejected": -2.0000524520874023, + "logps/chosen": -63.883018493652344, + "logps/rejected": -301.7362060546875, + "loss": 0.3048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11734962463378906, + "rewards/margins": 3.3252651691436768, + "rewards/rejected": -3.442614793777466, + "step": 10146 + }, + { + "epoch": 0.59, + "learning_rate": 3.789841884322703e-08, + "logits/chosen": -1.7494208812713623, + "logits/rejected": -1.7186775207519531, + "logps/chosen": -153.30587768554688, + "logps/rejected": -347.43914794921875, + "loss": 0.1425, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.45110023021698, + "rewards/margins": 1.5081497430801392, + "rewards/rejected": -0.05704956129193306, + "step": 10147 + }, + { + "epoch": 0.59, + "learning_rate": 3.7889275225871574e-08, + "logits/chosen": -1.994476079940796, + "logits/rejected": -1.9935582876205444, + "logps/chosen": -13.500052452087402, + "logps/rejected": -70.73533630371094, + "loss": 0.51, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024999333545565605, + "rewards/margins": 0.8399170637130737, + "rewards/rejected": -0.8149177432060242, + "step": 10148 + }, + { + "epoch": 0.59, + "learning_rate": 3.788013203874831e-08, + "logits/chosen": -2.0622611045837402, + "logits/rejected": -2.057562828063965, + "logps/chosen": -23.939884185791016, + "logps/rejected": -121.584716796875, + "loss": 0.1301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2543690204620361, + "rewards/margins": 3.0840885639190674, + "rewards/rejected": -1.8297195434570312, + "step": 10149 + }, + { + "epoch": 0.59, + "learning_rate": 3.7870989282182034e-08, + "logits/chosen": -1.876078486442566, + "logits/rejected": -1.923014760017395, + "logps/chosen": -205.00888061523438, + "logps/rejected": -441.66448974609375, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8369125127792358, + "rewards/margins": 4.2998762130737305, + "rewards/rejected": -2.462963819503784, + "step": 10150 + }, + { + "epoch": 0.59, + "learning_rate": 3.786184695649753e-08, + "logits/chosen": -1.8439140319824219, + "logits/rejected": -1.8609848022460938, + "logps/chosen": -218.21292114257812, + "logps/rejected": -421.22833251953125, + "loss": 0.1048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7761048078536987, + "rewards/margins": 1.9370423555374146, + "rewards/rejected": -0.16093750298023224, + "step": 10151 + }, + { + "epoch": 0.59, + "learning_rate": 3.7852705062019643e-08, + "logits/chosen": -2.031400680541992, + "logits/rejected": -2.0218846797943115, + "logps/chosen": -211.9312744140625, + "logps/rejected": -265.4541320800781, + "loss": 0.2245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.887110948562622, + "rewards/margins": 0.7895249128341675, + "rewards/rejected": 1.0975860357284546, + "step": 10152 + }, + { + "epoch": 0.59, + "learning_rate": 3.7843563599073046e-08, + "logits/chosen": -1.5301827192306519, + "logits/rejected": -1.5286329984664917, + "logps/chosen": -30.815654754638672, + "logps/rejected": -171.03030395507812, + "loss": 0.3929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18641071021556854, + "rewards/margins": 3.8420960903167725, + "rewards/rejected": -4.028506755828857, + "step": 10153 + }, + { + "epoch": 0.59, + "learning_rate": 3.783442256798257e-08, + "logits/chosen": -2.017052173614502, + "logits/rejected": -2.0211987495422363, + "logps/chosen": -37.51097106933594, + "logps/rejected": -247.02853393554688, + "loss": 0.0948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.377192735671997, + "rewards/margins": 3.6502702236175537, + "rewards/rejected": -2.2730774879455566, + "step": 10154 + }, + { + "epoch": 0.59, + "learning_rate": 3.7825281969072875e-08, + "logits/chosen": -2.008582830429077, + "logits/rejected": -2.0021257400512695, + "logps/chosen": -0.00018965438357554376, + "logps/rejected": -75.1998519897461, + "loss": 0.4172, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.380620470736176e-05, + "rewards/margins": 1.7803902626037598, + "rewards/rejected": -1.78032648563385, + "step": 10155 + }, + { + "epoch": 0.59, + "learning_rate": 3.7816141802668744e-08, + "logits/chosen": -1.9398330450057983, + "logits/rejected": -1.9356690645217896, + "logps/chosen": -1.9341175556182861, + "logps/rejected": -96.30523681640625, + "loss": 0.471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2597031891345978, + "rewards/margins": 0.753145694732666, + "rewards/rejected": -0.4934425354003906, + "step": 10156 + }, + { + "epoch": 0.59, + "learning_rate": 3.780700206909484e-08, + "logits/chosen": -2.0245361328125, + "logits/rejected": -2.0247700214385986, + "logps/chosen": -0.08031149953603745, + "logps/rejected": -80.27653503417969, + "loss": 0.6138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002392715308815241, + "rewards/margins": 0.3338942229747772, + "rewards/rejected": -0.3362869322299957, + "step": 10157 + }, + { + "epoch": 0.59, + "learning_rate": 3.779786276867588e-08, + "logits/chosen": -2.038926124572754, + "logits/rejected": -2.0367040634155273, + "logps/chosen": -80.15557098388672, + "logps/rejected": -328.69140625, + "loss": 0.072, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5863220691680908, + "rewards/margins": 5.97177791595459, + "rewards/rejected": -4.38545560836792, + "step": 10158 + }, + { + "epoch": 0.59, + "learning_rate": 3.7788723901736515e-08, + "logits/chosen": -1.83739173412323, + "logits/rejected": -1.8462437391281128, + "logps/chosen": -248.31594848632812, + "logps/rejected": -397.92071533203125, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2714996337890625, + "rewards/margins": 4.9124603271484375, + "rewards/rejected": -1.640960693359375, + "step": 10159 + }, + { + "epoch": 0.59, + "learning_rate": 3.777958546860142e-08, + "logits/chosen": -1.7220722436904907, + "logits/rejected": -1.694905161857605, + "logps/chosen": -145.49673461914062, + "logps/rejected": -207.4798126220703, + "loss": 0.1287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1819610595703125, + "rewards/margins": 1.7253174781799316, + "rewards/rejected": -0.5433563590049744, + "step": 10160 + }, + { + "epoch": 0.59, + "learning_rate": 3.77704474695952e-08, + "logits/chosen": -2.047287702560425, + "logits/rejected": -2.0469393730163574, + "logps/chosen": -207.7064666748047, + "logps/rejected": -384.9275207519531, + "loss": 0.0708, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.597712755203247, + "rewards/margins": 2.7127671241760254, + "rewards/rejected": -1.1150543689727783, + "step": 10161 + }, + { + "epoch": 0.59, + "learning_rate": 3.776130990504253e-08, + "logits/chosen": -2.066038131713867, + "logits/rejected": -2.053471326828003, + "logps/chosen": -0.827248215675354, + "logps/rejected": -115.66411590576172, + "loss": 0.3719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0148392915725708, + "rewards/margins": 2.271000862121582, + "rewards/rejected": -2.2561614513397217, + "step": 10162 + }, + { + "epoch": 0.59, + "learning_rate": 3.775217277526798e-08, + "logits/chosen": -2.098372220993042, + "logits/rejected": -2.078885316848755, + "logps/chosen": -11.751801490783691, + "logps/rejected": -282.6864013671875, + "loss": 0.3132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14985179901123047, + "rewards/margins": 4.720782279968262, + "rewards/rejected": -4.570930480957031, + "step": 10163 + }, + { + "epoch": 0.59, + "learning_rate": 3.774303608059618e-08, + "logits/chosen": -2.0151333808898926, + "logits/rejected": -2.0087294578552246, + "logps/chosen": -23.66830062866211, + "logps/rejected": -320.7672424316406, + "loss": 0.2814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2721754014492035, + "rewards/margins": 8.650362014770508, + "rewards/rejected": -8.378186225891113, + "step": 10164 + }, + { + "epoch": 0.59, + "learning_rate": 3.7733899821351686e-08, + "logits/chosen": -2.047891139984131, + "logits/rejected": -2.0266640186309814, + "logps/chosen": -0.00014483316044788808, + "logps/rejected": -166.36871337890625, + "loss": 0.3636, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7651387679507025e-05, + "rewards/margins": 3.234804630279541, + "rewards/rejected": -3.2347869873046875, + "step": 10165 + }, + { + "epoch": 0.59, + "learning_rate": 3.772476399785907e-08, + "logits/chosen": -2.02300763130188, + "logits/rejected": -2.008756160736084, + "logps/chosen": -89.09056091308594, + "logps/rejected": -476.8936767578125, + "loss": 0.046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7913620471954346, + "rewards/margins": 6.165724754333496, + "rewards/rejected": -4.374362468719482, + "step": 10166 + }, + { + "epoch": 0.59, + "learning_rate": 3.771562861044288e-08, + "logits/chosen": -2.0815343856811523, + "logits/rejected": -2.0771355628967285, + "logps/chosen": -118.76705932617188, + "logps/rejected": -240.7632598876953, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3747711181640625, + "rewards/margins": 1.24432373046875, + "rewards/rejected": -0.8695526123046875, + "step": 10167 + }, + { + "epoch": 0.59, + "learning_rate": 3.770649365942766e-08, + "logits/chosen": -1.5795530080795288, + "logits/rejected": -1.567996859550476, + "logps/chosen": -160.77410888671875, + "logps/rejected": -316.173583984375, + "loss": 0.1473, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.480670213699341, + "rewards/margins": 1.288598656654358, + "rewards/rejected": 1.192071557044983, + "step": 10168 + }, + { + "epoch": 0.59, + "learning_rate": 3.7697359145137906e-08, + "logits/chosen": -2.0116729736328125, + "logits/rejected": -2.006673812866211, + "logps/chosen": -0.10123804211616516, + "logps/rejected": -88.17570495605469, + "loss": 0.5056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05957348272204399, + "rewards/margins": 0.8644394874572754, + "rewards/rejected": -0.8048660159111023, + "step": 10169 + }, + { + "epoch": 0.59, + "learning_rate": 3.768822506789814e-08, + "logits/chosen": -1.9669138193130493, + "logits/rejected": -1.9676952362060547, + "logps/chosen": -3.7162561416625977, + "logps/rejected": -146.47108459472656, + "loss": 0.4532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10916276276111603, + "rewards/margins": 1.3352080583572388, + "rewards/rejected": -1.2260452508926392, + "step": 10170 + }, + { + "epoch": 0.59, + "learning_rate": 3.767909142803284e-08, + "logits/chosen": -1.9259761571884155, + "logits/rejected": -1.9144446849822998, + "logps/chosen": -224.7808837890625, + "logps/rejected": -443.00848388671875, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.419633626937866, + "rewards/margins": 6.25859260559082, + "rewards/rejected": -3.838958740234375, + "step": 10171 + }, + { + "epoch": 0.59, + "learning_rate": 3.766995822586647e-08, + "logits/chosen": -1.909000277519226, + "logits/rejected": -1.918945074081421, + "logps/chosen": -254.96588134765625, + "logps/rejected": -501.947998046875, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.644274950027466, + "rewards/margins": 2.9425294399261475, + "rewards/rejected": -0.2982544004917145, + "step": 10172 + }, + { + "epoch": 0.59, + "learning_rate": 3.7660825461723525e-08, + "logits/chosen": -1.8476142883300781, + "logits/rejected": -1.846653699874878, + "logps/chosen": -20.28876304626465, + "logps/rejected": -305.9609680175781, + "loss": 0.4198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2030738890171051, + "rewards/margins": 2.5605368614196777, + "rewards/rejected": -2.76361083984375, + "step": 10173 + }, + { + "epoch": 0.59, + "learning_rate": 3.765169313592839e-08, + "logits/chosen": -1.8795732259750366, + "logits/rejected": -1.8724087476730347, + "logps/chosen": -305.77105712890625, + "logps/rejected": -456.97021484375, + "loss": 0.2079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.99853515625, + "rewards/margins": 1.24383544921875, + "rewards/rejected": 0.75469970703125, + "step": 10174 + }, + { + "epoch": 0.59, + "learning_rate": 3.764256124880555e-08, + "logits/chosen": -1.859800934791565, + "logits/rejected": -1.8577526807785034, + "logps/chosen": -63.43647766113281, + "logps/rejected": -143.19842529296875, + "loss": 0.3304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.73699951171875, + "rewards/margins": 1.0889450311660767, + "rewards/rejected": -0.3519454896450043, + "step": 10175 + }, + { + "epoch": 0.59, + "learning_rate": 3.763342980067934e-08, + "logits/chosen": -1.7322282791137695, + "logits/rejected": -1.8401916027069092, + "logps/chosen": -192.76153564453125, + "logps/rejected": -296.0658264160156, + "loss": 0.1126, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3919464349746704, + "rewards/margins": 1.9356567859649658, + "rewards/rejected": -0.5437103509902954, + "step": 10176 + }, + { + "epoch": 0.59, + "learning_rate": 3.7624298791874224e-08, + "logits/chosen": -1.868553638458252, + "logits/rejected": -1.8667243719100952, + "logps/chosen": -2.104766368865967, + "logps/rejected": -26.10689353942871, + "loss": 0.7433, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.05500531196594238, + "rewards/margins": -0.23279309272766113, + "rewards/rejected": 0.17778778076171875, + "step": 10177 + }, + { + "epoch": 0.59, + "learning_rate": 3.761516822271454e-08, + "logits/chosen": -2.0430822372436523, + "logits/rejected": -2.058861494064331, + "logps/chosen": -221.44967651367188, + "logps/rejected": -246.36990356445312, + "loss": 0.2834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.027583360671997, + "rewards/margins": 1.0480422973632812, + "rewards/rejected": -0.02045898512005806, + "step": 10178 + }, + { + "epoch": 0.59, + "learning_rate": 3.7606038093524665e-08, + "logits/chosen": -2.0103907585144043, + "logits/rejected": -2.0055627822875977, + "logps/chosen": -17.466571807861328, + "logps/rejected": -102.05900573730469, + "loss": 0.5587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33934783935546875, + "rewards/margins": 0.2505546510219574, + "rewards/rejected": 0.08879318088293076, + "step": 10179 + }, + { + "epoch": 0.59, + "learning_rate": 3.7596908404628935e-08, + "logits/chosen": -1.9657578468322754, + "logits/rejected": -1.955969214439392, + "logps/chosen": -95.22650909423828, + "logps/rejected": -390.804443359375, + "loss": 0.1942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4861869812011719, + "rewards/margins": 1.0070793628692627, + "rewards/rejected": 0.47910767793655396, + "step": 10180 + }, + { + "epoch": 0.59, + "learning_rate": 3.7587779156351705e-08, + "logits/chosen": -2.1291379928588867, + "logits/rejected": -2.1171019077301025, + "logps/chosen": -7.001639366149902, + "logps/rejected": -234.00750732421875, + "loss": 0.3564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03735242038965225, + "rewards/margins": 4.256600379943848, + "rewards/rejected": -4.293952941894531, + "step": 10181 + }, + { + "epoch": 0.59, + "learning_rate": 3.757865034901726e-08, + "logits/chosen": -1.849942684173584, + "logits/rejected": -1.8513308763504028, + "logps/chosen": -0.00010835842113010585, + "logps/rejected": -121.67349243164062, + "loss": 0.4665, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.066164249001304e-06, + "rewards/margins": 1.272610068321228, + "rewards/rejected": -1.2726150751113892, + "step": 10182 + }, + { + "epoch": 0.59, + "learning_rate": 3.756952198294992e-08, + "logits/chosen": -1.6588852405548096, + "logits/rejected": -1.6667550802230835, + "logps/chosen": -7.080897921696305e-05, + "logps/rejected": -215.00157165527344, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2172375793161336e-06, + "rewards/margins": 3.496030569076538, + "rewards/rejected": -3.49603271484375, + "step": 10183 + }, + { + "epoch": 0.59, + "learning_rate": 3.756039405847396e-08, + "logits/chosen": -1.7945352792739868, + "logits/rejected": -1.7893925905227661, + "logps/chosen": -54.306182861328125, + "logps/rejected": -261.4831848144531, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.100311279296875, + "rewards/margins": 0.8891937136650085, + "rewards/rejected": 0.21111755073070526, + "step": 10184 + }, + { + "epoch": 0.59, + "learning_rate": 3.755126657591365e-08, + "logits/chosen": -1.8641210794448853, + "logits/rejected": -1.8413196802139282, + "logps/chosen": -247.19210815429688, + "logps/rejected": -408.5768737792969, + "loss": 0.2124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9762725830078125, + "rewards/margins": 2.123443603515625, + "rewards/rejected": -1.1471710205078125, + "step": 10185 + }, + { + "epoch": 0.59, + "learning_rate": 3.754213953559325e-08, + "logits/chosen": -1.8072140216827393, + "logits/rejected": -1.7856059074401855, + "logps/chosen": -150.3249053955078, + "logps/rejected": -330.3926086425781, + "loss": 0.0994, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.874925374984741, + "rewards/margins": 1.8243149518966675, + "rewards/rejected": 1.0506104230880737, + "step": 10186 + }, + { + "epoch": 0.59, + "learning_rate": 3.7533012937836997e-08, + "logits/chosen": -1.8267935514450073, + "logits/rejected": -1.8246949911117554, + "logps/chosen": -38.62096405029297, + "logps/rejected": -298.5620422363281, + "loss": 0.1605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9842140078544617, + "rewards/margins": 3.219745635986328, + "rewards/rejected": -2.2355315685272217, + "step": 10187 + }, + { + "epoch": 0.59, + "learning_rate": 3.7523886782969095e-08, + "logits/chosen": -1.9500740766525269, + "logits/rejected": -1.9407082796096802, + "logps/chosen": -26.58519172668457, + "logps/rejected": -163.56910705566406, + "loss": 0.2363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.627083420753479, + "rewards/margins": 2.4288597106933594, + "rewards/rejected": -1.8017761707305908, + "step": 10188 + }, + { + "epoch": 0.59, + "learning_rate": 3.7514761071313764e-08, + "logits/chosen": -1.866408109664917, + "logits/rejected": -1.8661110401153564, + "logps/chosen": -30.521726608276367, + "logps/rejected": -172.09063720703125, + "loss": 0.4896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2806234359741211, + "rewards/margins": 0.5529882907867432, + "rewards/rejected": -0.2723648250102997, + "step": 10189 + }, + { + "epoch": 0.59, + "learning_rate": 3.750563580319519e-08, + "logits/chosen": -1.7537153959274292, + "logits/rejected": -1.7184172868728638, + "logps/chosen": -331.7414245605469, + "logps/rejected": -515.700927734375, + "loss": 0.1078, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.566397190093994, + "rewards/margins": 1.5395478010177612, + "rewards/rejected": 1.026849389076233, + "step": 10190 + }, + { + "epoch": 0.59, + "learning_rate": 3.749651097893756e-08, + "logits/chosen": -1.9917324781417847, + "logits/rejected": -1.9908556938171387, + "logps/chosen": -25.751832962036133, + "logps/rejected": -176.78004455566406, + "loss": 0.1928, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.788124680519104, + "rewards/margins": 2.1787664890289307, + "rewards/rejected": -1.3906418085098267, + "step": 10191 + }, + { + "epoch": 0.59, + "learning_rate": 3.748738659886501e-08, + "logits/chosen": -2.0297210216522217, + "logits/rejected": -2.0243356227874756, + "logps/chosen": -9.4537353515625, + "logps/rejected": -170.44664001464844, + "loss": 0.3602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25892725586891174, + "rewards/margins": 1.5544655323028564, + "rewards/rejected": -1.295538306236267, + "step": 10192 + }, + { + "epoch": 0.59, + "learning_rate": 3.7478262663301685e-08, + "logits/chosen": -1.9513702392578125, + "logits/rejected": -1.948626160621643, + "logps/chosen": -8.07514476776123, + "logps/rejected": -246.31312561035156, + "loss": 0.2489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2869986593723297, + "rewards/margins": 3.842270612716675, + "rewards/rejected": -3.555271863937378, + "step": 10193 + }, + { + "epoch": 0.59, + "learning_rate": 3.7469139172571744e-08, + "logits/chosen": -1.9737522602081299, + "logits/rejected": -1.9832091331481934, + "logps/chosen": -49.91645812988281, + "logps/rejected": -153.87120056152344, + "loss": 0.3302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6271473169326782, + "rewards/margins": 1.0512027740478516, + "rewards/rejected": -0.4240554869174957, + "step": 10194 + }, + { + "epoch": 0.59, + "learning_rate": 3.746001612699925e-08, + "logits/chosen": -1.9165308475494385, + "logits/rejected": -1.9108914136886597, + "logps/chosen": -0.24392879009246826, + "logps/rejected": -145.54104614257812, + "loss": 0.4163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00040830971556715667, + "rewards/margins": 1.602958083152771, + "rewards/rejected": -1.6025497913360596, + "step": 10195 + }, + { + "epoch": 0.59, + "learning_rate": 3.7450893526908344e-08, + "logits/chosen": -1.7351266145706177, + "logits/rejected": -1.6962841749191284, + "logps/chosen": -144.71022033691406, + "logps/rejected": -299.2125549316406, + "loss": 0.2249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46349793672561646, + "rewards/margins": 2.9696593284606934, + "rewards/rejected": -2.5061614513397217, + "step": 10196 + }, + { + "epoch": 0.59, + "learning_rate": 3.744177137262307e-08, + "logits/chosen": -1.97890305519104, + "logits/rejected": -1.9315459728240967, + "logps/chosen": -155.169677734375, + "logps/rejected": -279.15252685546875, + "loss": 0.2083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3376343250274658, + "rewards/margins": 1.0188568830490112, + "rewards/rejected": 0.318777471780777, + "step": 10197 + }, + { + "epoch": 0.59, + "learning_rate": 3.743264966446752e-08, + "logits/chosen": -2.04911208152771, + "logits/rejected": -2.0393941402435303, + "logps/chosen": -63.34961700439453, + "logps/rejected": -248.63485717773438, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4871627986431122, + "rewards/margins": 1.8362351655960083, + "rewards/rejected": -1.3490723371505737, + "step": 10198 + }, + { + "epoch": 0.59, + "learning_rate": 3.7423528402765715e-08, + "logits/chosen": -1.9492231607437134, + "logits/rejected": -1.9947630167007446, + "logps/chosen": -123.92092895507812, + "logps/rejected": -193.0842742919922, + "loss": 0.1392, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0143678188323975, + "rewards/margins": 1.5987626314163208, + "rewards/rejected": 0.4156051576137543, + "step": 10199 + }, + { + "epoch": 0.59, + "learning_rate": 3.741440758784171e-08, + "logits/chosen": -1.862630844116211, + "logits/rejected": -1.867458462715149, + "logps/chosen": -243.62954711914062, + "logps/rejected": -378.596435546875, + "loss": 0.1526, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4816925525665283, + "rewards/margins": 1.3393677473068237, + "rewards/rejected": 0.14232483506202698, + "step": 10200 + }, + { + "epoch": 0.59, + "learning_rate": 3.74052872200195e-08, + "logits/chosen": -1.7725321054458618, + "logits/rejected": -1.7859392166137695, + "logps/chosen": -142.7683868408203, + "logps/rejected": -338.22161865234375, + "loss": 0.1344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6579177975654602, + "rewards/margins": 3.089155673980713, + "rewards/rejected": -2.4312379360198975, + "step": 10201 + }, + { + "epoch": 0.59, + "learning_rate": 3.7396167299623105e-08, + "logits/chosen": -1.8342043161392212, + "logits/rejected": -1.837260365486145, + "logps/chosen": -197.4280548095703, + "logps/rejected": -407.16986083984375, + "loss": 0.0987, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4962480068206787, + "rewards/margins": 1.658238410949707, + "rewards/rejected": 0.8380096554756165, + "step": 10202 + }, + { + "epoch": 0.59, + "learning_rate": 3.738704782697649e-08, + "logits/chosen": -1.8522449731826782, + "logits/rejected": -1.8461483716964722, + "logps/chosen": -25.609912872314453, + "logps/rejected": -152.7279052734375, + "loss": 0.2803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4104408323764801, + "rewards/margins": 3.5772926807403564, + "rewards/rejected": -3.166851758956909, + "step": 10203 + }, + { + "epoch": 0.59, + "learning_rate": 3.7377928802403644e-08, + "logits/chosen": -1.997399926185608, + "logits/rejected": -1.9921172857284546, + "logps/chosen": -2.3662497997283936, + "logps/rejected": -179.05674743652344, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02463223971426487, + "rewards/margins": 4.800036907196045, + "rewards/rejected": -4.775404453277588, + "step": 10204 + }, + { + "epoch": 0.59, + "learning_rate": 3.73688102262285e-08, + "logits/chosen": -1.891585111618042, + "logits/rejected": -1.9006363153457642, + "logps/chosen": -262.65594482421875, + "logps/rejected": -433.53564453125, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.305401563644409, + "rewards/margins": 5.474578857421875, + "rewards/rejected": -3.169177293777466, + "step": 10205 + }, + { + "epoch": 0.59, + "learning_rate": 3.7359692098775006e-08, + "logits/chosen": -1.8901495933532715, + "logits/rejected": -1.8702408075332642, + "logps/chosen": -163.34429931640625, + "logps/rejected": -252.40692138671875, + "loss": 0.0853, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.332122802734375, + "rewards/margins": 2.4430909156799316, + "rewards/rejected": -0.11096801608800888, + "step": 10206 + }, + { + "epoch": 0.59, + "learning_rate": 3.735057442036707e-08, + "logits/chosen": -1.8617088794708252, + "logits/rejected": -1.8565292358398438, + "logps/chosen": -0.0012561870971694589, + "logps/rejected": -231.31866455078125, + "loss": 0.3788, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.96571034193039e-05, + "rewards/margins": 2.6212077140808105, + "rewards/rejected": -2.621307373046875, + "step": 10207 + }, + { + "epoch": 0.59, + "learning_rate": 3.7341457191328616e-08, + "logits/chosen": -1.9790445566177368, + "logits/rejected": -1.9494823217391968, + "logps/chosen": -275.90386962890625, + "logps/rejected": -457.1488037109375, + "loss": 0.0844, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.397479295730591, + "rewards/margins": 2.19815993309021, + "rewards/rejected": 0.19931946694850922, + "step": 10208 + }, + { + "epoch": 0.59, + "learning_rate": 3.733234041198352e-08, + "logits/chosen": -1.7234729528427124, + "logits/rejected": -1.7268024682998657, + "logps/chosen": -6.957574844360352, + "logps/rejected": -85.8538818359375, + "loss": 0.3878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0591367743909359, + "rewards/margins": 1.5164655447006226, + "rewards/rejected": -1.4573287963867188, + "step": 10209 + }, + { + "epoch": 0.59, + "learning_rate": 3.732322408265566e-08, + "logits/chosen": -2.081205129623413, + "logits/rejected": -2.0760514736175537, + "logps/chosen": -13.847362518310547, + "logps/rejected": -150.448486328125, + "loss": 0.4272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.268149197101593, + "rewards/margins": 1.2537083625793457, + "rewards/rejected": -0.9855591058731079, + "step": 10210 + }, + { + "epoch": 0.59, + "learning_rate": 3.731410820366888e-08, + "logits/chosen": -2.1795716285705566, + "logits/rejected": -2.1655521392822266, + "logps/chosen": -0.007763276807963848, + "logps/rejected": -141.6786346435547, + "loss": 0.5466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007078853086568415, + "rewards/margins": 0.7086930871009827, + "rewards/rejected": -0.7094009518623352, + "step": 10211 + }, + { + "epoch": 0.59, + "learning_rate": 3.730499277534702e-08, + "logits/chosen": -1.7575234174728394, + "logits/rejected": -1.7614002227783203, + "logps/chosen": -0.33530503511428833, + "logps/rejected": -77.50540924072266, + "loss": 0.4933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020152106881141663, + "rewards/margins": 0.9703325629234314, + "rewards/rejected": -0.9904846549034119, + "step": 10212 + }, + { + "epoch": 0.59, + "learning_rate": 3.729587779801394e-08, + "logits/chosen": -2.0363993644714355, + "logits/rejected": -2.040790319442749, + "logps/chosen": -0.012102779932320118, + "logps/rejected": -156.49395751953125, + "loss": 0.3658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006514367996715009, + "rewards/margins": 3.2058558464050293, + "rewards/rejected": -3.2065072059631348, + "step": 10213 + }, + { + "epoch": 0.59, + "learning_rate": 3.7286763271993405e-08, + "logits/chosen": -1.8887299299240112, + "logits/rejected": -1.8743864297866821, + "logps/chosen": -129.5115966796875, + "logps/rejected": -270.5440368652344, + "loss": 0.2581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8913986086845398, + "rewards/margins": 0.9900069832801819, + "rewards/rejected": -0.09860839694738388, + "step": 10214 + }, + { + "epoch": 0.59, + "learning_rate": 3.727764919760925e-08, + "logits/chosen": -1.8368364572525024, + "logits/rejected": -1.834251880645752, + "logps/chosen": -15.939384460449219, + "logps/rejected": -189.92098999023438, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30177995562553406, + "rewards/margins": 2.2204108238220215, + "rewards/rejected": -1.9186309576034546, + "step": 10215 + }, + { + "epoch": 0.59, + "learning_rate": 3.726853557518519e-08, + "logits/chosen": -1.8126227855682373, + "logits/rejected": -1.8177111148834229, + "logps/chosen": -40.54636001586914, + "logps/rejected": -252.81626892089844, + "loss": 0.1951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.727289617061615, + "rewards/margins": 2.120298147201538, + "rewards/rejected": -1.3930084705352783, + "step": 10216 + }, + { + "epoch": 0.59, + "learning_rate": 3.7259422405045046e-08, + "logits/chosen": -1.885104775428772, + "logits/rejected": -1.8833250999450684, + "logps/chosen": -15.754680633544922, + "logps/rejected": -287.596435546875, + "loss": 0.1512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7337280511856079, + "rewards/margins": 5.960833549499512, + "rewards/rejected": -5.227105617523193, + "step": 10217 + }, + { + "epoch": 0.59, + "learning_rate": 3.725030968751254e-08, + "logits/chosen": -1.6911101341247559, + "logits/rejected": -1.6656502485275269, + "logps/chosen": -216.0531768798828, + "logps/rejected": -457.96044921875, + "loss": 0.1053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5948044061660767, + "rewards/margins": 3.5889816284179688, + "rewards/rejected": -1.994177222251892, + "step": 10218 + }, + { + "epoch": 0.59, + "learning_rate": 3.7241197422911397e-08, + "logits/chosen": -1.8535780906677246, + "logits/rejected": -1.8564870357513428, + "logps/chosen": -147.6851348876953, + "logps/rejected": -218.93116760253906, + "loss": 0.7341, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3509781062602997, + "rewards/margins": -0.6231292486190796, + "rewards/rejected": 0.9741073846817017, + "step": 10219 + }, + { + "epoch": 0.59, + "learning_rate": 3.723208561156533e-08, + "logits/chosen": -1.8396357297897339, + "logits/rejected": -1.8162862062454224, + "logps/chosen": -202.74668884277344, + "logps/rejected": -340.5027770996094, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.206538438796997, + "rewards/margins": 3.5706100463867188, + "rewards/rejected": -0.36407166719436646, + "step": 10220 + }, + { + "epoch": 0.59, + "learning_rate": 3.7222974253798044e-08, + "logits/chosen": -1.783972144126892, + "logits/rejected": -1.7924199104309082, + "logps/chosen": -203.63491821289062, + "logps/rejected": -248.81585693359375, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.662145972251892, + "rewards/margins": 0.1961822509765625, + "rewards/rejected": 1.4659637212753296, + "step": 10221 + }, + { + "epoch": 0.59, + "learning_rate": 3.72138633499332e-08, + "logits/chosen": -1.7852226495742798, + "logits/rejected": -1.7546029090881348, + "logps/chosen": -252.24415588378906, + "logps/rejected": -432.6805419921875, + "loss": 0.1319, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4137375354766846, + "rewards/margins": 1.415480136871338, + "rewards/rejected": 0.9982574582099915, + "step": 10222 + }, + { + "epoch": 0.59, + "learning_rate": 3.7204752900294486e-08, + "logits/chosen": -1.7759013175964355, + "logits/rejected": -1.7759184837341309, + "logps/chosen": -168.49810791015625, + "logps/rejected": -259.88079833984375, + "loss": 0.2218, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0911102294921875, + "rewards/margins": 0.9695037603378296, + "rewards/rejected": 0.12160644680261612, + "step": 10223 + }, + { + "epoch": 0.59, + "learning_rate": 3.719564290520552e-08, + "logits/chosen": -1.9268529415130615, + "logits/rejected": -1.927883505821228, + "logps/chosen": -157.680419921875, + "logps/rejected": -461.1138916015625, + "loss": 0.0547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7891570925712585, + "rewards/margins": 5.748388767242432, + "rewards/rejected": -4.959231853485107, + "step": 10224 + }, + { + "epoch": 0.6, + "learning_rate": 3.718653336498997e-08, + "logits/chosen": -2.107147693634033, + "logits/rejected": -2.088698625564575, + "logps/chosen": -6.639782804995775e-05, + "logps/rejected": -139.03689575195312, + "loss": 0.3867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5139696643018397e-06, + "rewards/margins": 2.441192626953125, + "rewards/rejected": -2.4411911964416504, + "step": 10225 + }, + { + "epoch": 0.6, + "learning_rate": 3.7177424279971414e-08, + "logits/chosen": -1.851212739944458, + "logits/rejected": -1.8306152820587158, + "logps/chosen": -90.9027328491211, + "logps/rejected": -360.6139831542969, + "loss": 0.0993, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2189712524414062, + "rewards/margins": 7.494389533996582, + "rewards/rejected": -6.275418281555176, + "step": 10226 + }, + { + "epoch": 0.6, + "learning_rate": 3.716831565047349e-08, + "logits/chosen": -1.9526898860931396, + "logits/rejected": -1.9580296277999878, + "logps/chosen": -15.988173484802246, + "logps/rejected": -142.5526123046875, + "loss": 0.2017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8050810694694519, + "rewards/margins": 2.88411021232605, + "rewards/rejected": -2.079029083251953, + "step": 10227 + }, + { + "epoch": 0.6, + "learning_rate": 3.7159207476819746e-08, + "logits/chosen": -1.9487782716751099, + "logits/rejected": -2.0185515880584717, + "logps/chosen": -248.94223022460938, + "logps/rejected": -357.9992980957031, + "loss": 0.1405, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9059418439865112, + "rewards/margins": 1.39190673828125, + "rewards/rejected": 0.5140350461006165, + "step": 10228 + }, + { + "epoch": 0.6, + "learning_rate": 3.715009975933377e-08, + "logits/chosen": -1.9045565128326416, + "logits/rejected": -1.9075666666030884, + "logps/chosen": -179.83274841308594, + "logps/rejected": -249.01119995117188, + "loss": 0.2081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9771606922149658, + "rewards/margins": 1.0603241920471191, + "rewards/rejected": 0.9168365597724915, + "step": 10229 + }, + { + "epoch": 0.6, + "learning_rate": 3.7140992498339095e-08, + "logits/chosen": -2.008915424346924, + "logits/rejected": -2.000544548034668, + "logps/chosen": -5.531234637601301e-05, + "logps/rejected": -271.9703369140625, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.536721563563333e-07, + "rewards/margins": 6.862986087799072, + "rewards/rejected": -6.862985134124756, + "step": 10230 + }, + { + "epoch": 0.6, + "learning_rate": 3.713188569415928e-08, + "logits/chosen": -2.0245983600616455, + "logits/rejected": -2.0122036933898926, + "logps/chosen": -36.117794036865234, + "logps/rejected": -323.052978515625, + "loss": 0.135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6482502222061157, + "rewards/margins": 3.5649585723876953, + "rewards/rejected": -2.916708469390869, + "step": 10231 + }, + { + "epoch": 0.6, + "learning_rate": 3.712277934711782e-08, + "logits/chosen": -1.9902067184448242, + "logits/rejected": -1.99191153049469, + "logps/chosen": -0.00046720518730580807, + "logps/rejected": -71.50409698486328, + "loss": 0.7898, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.705287599586882e-05, + "rewards/margins": -0.3551136553287506, + "rewards/rejected": 0.3550766110420227, + "step": 10232 + }, + { + "epoch": 0.6, + "learning_rate": 3.711367345753822e-08, + "logits/chosen": -1.807465672492981, + "logits/rejected": -1.8532692193984985, + "logps/chosen": -351.0992431640625, + "logps/rejected": -285.1767272949219, + "loss": 0.2191, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1507446765899658, + "rewards/margins": 1.3398346900939941, + "rewards/rejected": -0.18908996880054474, + "step": 10233 + }, + { + "epoch": 0.6, + "learning_rate": 3.7104568025744e-08, + "logits/chosen": -2.0706217288970947, + "logits/rejected": -2.064336061477661, + "logps/chosen": -10.91175651550293, + "logps/rejected": -170.42172241210938, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25067779421806335, + "rewards/margins": 3.272265911102295, + "rewards/rejected": -3.021588087081909, + "step": 10234 + }, + { + "epoch": 0.6, + "learning_rate": 3.7095463052058567e-08, + "logits/chosen": -1.9814866781234741, + "logits/rejected": -1.9734512567520142, + "logps/chosen": -1.3199496269226074, + "logps/rejected": -431.8524169921875, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003865838225465268, + "rewards/margins": 7.068857669830322, + "rewards/rejected": -7.069244384765625, + "step": 10235 + }, + { + "epoch": 0.6, + "learning_rate": 3.708635853680545e-08, + "logits/chosen": -1.8198391199111938, + "logits/rejected": -1.8761197328567505, + "logps/chosen": -222.59495544433594, + "logps/rejected": -324.8838195800781, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.838095188140869, + "rewards/margins": 3.90049147605896, + "rewards/rejected": -1.0623962879180908, + "step": 10236 + }, + { + "epoch": 0.6, + "learning_rate": 3.7077254480308004e-08, + "logits/chosen": -2.0138442516326904, + "logits/rejected": -2.017169713973999, + "logps/chosen": -52.85027313232422, + "logps/rejected": -357.2196044921875, + "loss": 0.1147, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0436798334121704, + "rewards/margins": 4.690548896789551, + "rewards/rejected": -3.646868944168091, + "step": 10237 + }, + { + "epoch": 0.6, + "learning_rate": 3.706815088288972e-08, + "logits/chosen": -1.977291226387024, + "logits/rejected": -1.9563519954681396, + "logps/chosen": -193.9882049560547, + "logps/rejected": -339.7117004394531, + "loss": 0.6289, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1938400268554688, + "rewards/margins": 2.3769822120666504, + "rewards/rejected": -3.570822238922119, + "step": 10238 + }, + { + "epoch": 0.6, + "learning_rate": 3.705904774487396e-08, + "logits/chosen": -2.06325626373291, + "logits/rejected": -2.0585670471191406, + "logps/chosen": -52.517677307128906, + "logps/rejected": -180.14926147460938, + "loss": 0.2842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0155013799667358, + "rewards/margins": 1.0325775146484375, + "rewards/rejected": -0.01707611046731472, + "step": 10239 + }, + { + "epoch": 0.6, + "learning_rate": 3.704994506658413e-08, + "logits/chosen": -1.846161127090454, + "logits/rejected": -1.8447421789169312, + "logps/chosen": -4.282996654510498, + "logps/rejected": -131.5091094970703, + "loss": 0.7202, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.031394243240356445, + "rewards/margins": -0.10118947178125381, + "rewards/rejected": 0.06979522854089737, + "step": 10240 + }, + { + "epoch": 0.6, + "learning_rate": 3.704084284834359e-08, + "logits/chosen": -2.080737829208374, + "logits/rejected": -2.0898220539093018, + "logps/chosen": -230.42019653320312, + "logps/rejected": -356.714599609375, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.26615309715271, + "rewards/margins": 3.0693695545196533, + "rewards/rejected": 0.19678345322608948, + "step": 10241 + }, + { + "epoch": 0.6, + "learning_rate": 3.7031741090475716e-08, + "logits/chosen": -1.8626084327697754, + "logits/rejected": -1.8360874652862549, + "logps/chosen": -179.33641052246094, + "logps/rejected": -338.2666320800781, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8092727661132812, + "rewards/margins": 3.675074815750122, + "rewards/rejected": -1.8658020496368408, + "step": 10242 + }, + { + "epoch": 0.6, + "learning_rate": 3.702263979330382e-08, + "logits/chosen": -1.792138934135437, + "logits/rejected": -1.8262792825698853, + "logps/chosen": -261.17840576171875, + "logps/rejected": -259.4134521484375, + "loss": 0.0994, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9438111782073975, + "rewards/margins": 1.6375367641448975, + "rewards/rejected": 1.3062744140625, + "step": 10243 + }, + { + "epoch": 0.6, + "learning_rate": 3.701353895715125e-08, + "logits/chosen": -1.9795738458633423, + "logits/rejected": -1.973103404045105, + "logps/chosen": -149.74960327148438, + "logps/rejected": -235.12835693359375, + "loss": 0.1649, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5394150018692017, + "rewards/margins": 2.05413818359375, + "rewards/rejected": -0.5147232413291931, + "step": 10244 + }, + { + "epoch": 0.6, + "learning_rate": 3.7004438582341285e-08, + "logits/chosen": -1.8018296957015991, + "logits/rejected": -1.7977863550186157, + "logps/chosen": -35.148597717285156, + "logps/rejected": -191.86883544921875, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1587802916765213, + "rewards/margins": 3.383350133895874, + "rewards/rejected": -3.224569797515869, + "step": 10245 + }, + { + "epoch": 0.6, + "learning_rate": 3.699533866919724e-08, + "logits/chosen": -1.9812300205230713, + "logits/rejected": -1.9701682329177856, + "logps/chosen": -90.3257064819336, + "logps/rejected": -408.33984375, + "loss": 0.1356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5753677487373352, + "rewards/margins": 4.579649448394775, + "rewards/rejected": -4.004281520843506, + "step": 10246 + }, + { + "epoch": 0.6, + "learning_rate": 3.698623921804238e-08, + "logits/chosen": -1.7546913623809814, + "logits/rejected": -1.7524693012237549, + "logps/chosen": -220.89434814453125, + "logps/rejected": -322.17889404296875, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6862869262695312, + "rewards/margins": 2.5145492553710938, + "rewards/rejected": -0.8282623291015625, + "step": 10247 + }, + { + "epoch": 0.6, + "learning_rate": 3.6977140229199955e-08, + "logits/chosen": -1.7748600244522095, + "logits/rejected": -1.784934639930725, + "logps/chosen": -0.0064047882333397865, + "logps/rejected": -497.96881103515625, + "loss": 0.3436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002502288843970746, + "rewards/margins": 13.39352798461914, + "rewards/rejected": -13.393777847290039, + "step": 10248 + }, + { + "epoch": 0.6, + "learning_rate": 3.696804170299321e-08, + "logits/chosen": -1.873685598373413, + "logits/rejected": -1.8468822240829468, + "logps/chosen": -127.42919158935547, + "logps/rejected": -338.6102294921875, + "loss": 0.5366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6551139950752258, + "rewards/margins": 0.8200141787528992, + "rewards/rejected": -1.475128173828125, + "step": 10249 + }, + { + "epoch": 0.6, + "learning_rate": 3.6958943639745375e-08, + "logits/chosen": -1.9867942333221436, + "logits/rejected": -1.9875993728637695, + "logps/chosen": -13.627205848693848, + "logps/rejected": -189.2787322998047, + "loss": 0.4248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015762710943818092, + "rewards/margins": 1.9407657384872437, + "rewards/rejected": -1.9250030517578125, + "step": 10250 + }, + { + "epoch": 0.6, + "learning_rate": 3.694984603977964e-08, + "logits/chosen": -1.9531835317611694, + "logits/rejected": -1.9517128467559814, + "logps/chosen": -0.00010406746878288686, + "logps/rejected": -121.32062530517578, + "loss": 0.4913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0001907215191749856, + "rewards/margins": 1.0774681568145752, + "rewards/rejected": -1.077277421951294, + "step": 10251 + }, + { + "epoch": 0.6, + "learning_rate": 3.694074890341923e-08, + "logits/chosen": -1.9528841972351074, + "logits/rejected": -1.9555469751358032, + "logps/chosen": -12.125024795532227, + "logps/rejected": -78.9532470703125, + "loss": 0.6064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1124994307756424, + "rewards/margins": 0.2827066481113434, + "rewards/rejected": -0.170207217335701, + "step": 10252 + }, + { + "epoch": 0.6, + "learning_rate": 3.693165223098728e-08, + "logits/chosen": -1.7159568071365356, + "logits/rejected": -1.6865228414535522, + "logps/chosen": -302.447998046875, + "logps/rejected": -465.9191589355469, + "loss": 0.117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.089385986328125, + "rewards/margins": 1.8486602306365967, + "rewards/rejected": 0.24072571098804474, + "step": 10253 + }, + { + "epoch": 0.6, + "learning_rate": 3.6922556022806954e-08, + "logits/chosen": -1.647750735282898, + "logits/rejected": -1.6468831300735474, + "logps/chosen": -3.3086209297180176, + "logps/rejected": -184.35667419433594, + "loss": 0.2979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16996203362941742, + "rewards/margins": 3.269646167755127, + "rewards/rejected": -3.099684238433838, + "step": 10254 + }, + { + "epoch": 0.6, + "learning_rate": 3.6913460279201444e-08, + "logits/chosen": -2.0364487171173096, + "logits/rejected": -2.0339083671569824, + "logps/chosen": -10.462705612182617, + "logps/rejected": -190.4049072265625, + "loss": 0.321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.086389921605587, + "rewards/margins": 3.138369083404541, + "rewards/rejected": -3.0519790649414062, + "step": 10255 + }, + { + "epoch": 0.6, + "learning_rate": 3.6904365000493805e-08, + "logits/chosen": -2.0124666690826416, + "logits/rejected": -1.9888415336608887, + "logps/chosen": -85.79441833496094, + "logps/rejected": -442.63238525390625, + "loss": 0.2162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.517303466796875, + "rewards/margins": 4.628304958343506, + "rewards/rejected": -4.111001491546631, + "step": 10256 + }, + { + "epoch": 0.6, + "learning_rate": 3.6895270187007213e-08, + "logits/chosen": -1.781903862953186, + "logits/rejected": -1.741978645324707, + "logps/chosen": -145.58888244628906, + "logps/rejected": -310.7219543457031, + "loss": 0.0862, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0730316638946533, + "rewards/margins": 2.4498536586761475, + "rewards/rejected": -0.376821905374527, + "step": 10257 + }, + { + "epoch": 0.6, + "learning_rate": 3.6886175839064686e-08, + "logits/chosen": -1.9886635541915894, + "logits/rejected": -1.989087700843811, + "logps/chosen": -38.8929328918457, + "logps/rejected": -248.93399047851562, + "loss": 0.2138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5349521636962891, + "rewards/margins": 4.780673503875732, + "rewards/rejected": -4.245721340179443, + "step": 10258 + }, + { + "epoch": 0.6, + "learning_rate": 3.687708195698937e-08, + "logits/chosen": -1.840116262435913, + "logits/rejected": -1.8382819890975952, + "logps/chosen": -201.35032653808594, + "logps/rejected": -303.6446838378906, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.382071018218994, + "rewards/margins": 4.133648872375488, + "rewards/rejected": -1.7515777349472046, + "step": 10259 + }, + { + "epoch": 0.6, + "learning_rate": 3.686798854110428e-08, + "logits/chosen": -1.8773497343063354, + "logits/rejected": -1.9026029109954834, + "logps/chosen": -289.56915283203125, + "logps/rejected": -343.2902526855469, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.15533447265625, + "rewards/margins": 3.351733446121216, + "rewards/rejected": -0.19639892876148224, + "step": 10260 + }, + { + "epoch": 0.6, + "learning_rate": 3.685889559173248e-08, + "logits/chosen": -1.986147403717041, + "logits/rejected": -1.984363317489624, + "logps/chosen": -7.090595722198486, + "logps/rejected": -78.56221008300781, + "loss": 0.3786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41350695490837097, + "rewards/margins": 0.9683181047439575, + "rewards/rejected": -0.5548111200332642, + "step": 10261 + }, + { + "epoch": 0.6, + "learning_rate": 3.684980310919699e-08, + "logits/chosen": -1.9980522394180298, + "logits/rejected": -1.9926564693450928, + "logps/chosen": -19.547792434692383, + "logps/rejected": -119.83084106445312, + "loss": 1.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.33635413646698, + "rewards/margins": 0.2892714738845825, + "rewards/rejected": -1.6256256103515625, + "step": 10262 + }, + { + "epoch": 0.6, + "learning_rate": 3.684071109382081e-08, + "logits/chosen": -1.7724570035934448, + "logits/rejected": -1.7776073217391968, + "logps/chosen": -237.80441284179688, + "logps/rejected": -364.12481689453125, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.063119649887085, + "rewards/margins": 3.06632399559021, + "rewards/rejected": -0.003204345703125, + "step": 10263 + }, + { + "epoch": 0.6, + "learning_rate": 3.6831619545926945e-08, + "logits/chosen": -2.0930566787719727, + "logits/rejected": -2.08909273147583, + "logps/chosen": -33.8555908203125, + "logps/rejected": -266.65277099609375, + "loss": 0.2399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.166157528758049, + "rewards/margins": 4.41018533706665, + "rewards/rejected": -4.244027614593506, + "step": 10264 + }, + { + "epoch": 0.6, + "learning_rate": 3.6822528465838376e-08, + "logits/chosen": -2.1069257259368896, + "logits/rejected": -2.1015169620513916, + "logps/chosen": -4.203479766845703, + "logps/rejected": -205.00164794921875, + "loss": 0.4429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0057232859544456005, + "rewards/margins": 1.3446232080459595, + "rewards/rejected": -1.3503464460372925, + "step": 10265 + }, + { + "epoch": 0.6, + "learning_rate": 3.6813437853878036e-08, + "logits/chosen": -2.0764098167419434, + "logits/rejected": -2.0818657875061035, + "logps/chosen": -11.218073844909668, + "logps/rejected": -201.1988983154297, + "loss": 0.1152, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4087624549865723, + "rewards/margins": 3.5730888843536377, + "rewards/rejected": -2.1643264293670654, + "step": 10266 + }, + { + "epoch": 0.6, + "learning_rate": 3.68043477103689e-08, + "logits/chosen": -1.9028453826904297, + "logits/rejected": -1.9018893241882324, + "logps/chosen": -9.810746996663511e-05, + "logps/rejected": -145.19525146484375, + "loss": 0.3821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.983351648959797e-05, + "rewards/margins": 2.4729461669921875, + "rewards/rejected": -2.472926378250122, + "step": 10267 + }, + { + "epoch": 0.6, + "learning_rate": 3.679525803563387e-08, + "logits/chosen": -1.8399020433425903, + "logits/rejected": -1.8452426195144653, + "logps/chosen": -231.62234497070312, + "logps/rejected": -354.5044250488281, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8068970441818237, + "rewards/margins": 3.632748603820801, + "rewards/rejected": -1.8258514404296875, + "step": 10268 + }, + { + "epoch": 0.6, + "learning_rate": 3.6786168829995875e-08, + "logits/chosen": -1.8595459461212158, + "logits/rejected": -1.8956308364868164, + "logps/chosen": -234.02793884277344, + "logps/rejected": -422.4547119140625, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2533462047576904, + "rewards/margins": 5.1418776512146, + "rewards/rejected": -2.888531446456909, + "step": 10269 + }, + { + "epoch": 0.6, + "learning_rate": 3.6777080093777795e-08, + "logits/chosen": -2.109807252883911, + "logits/rejected": -2.112499713897705, + "logps/chosen": -0.023355882614850998, + "logps/rejected": -137.79017639160156, + "loss": 0.5351, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006166281527839601, + "rewards/margins": 0.6656813025474548, + "rewards/rejected": -0.6662979125976562, + "step": 10270 + }, + { + "epoch": 0.6, + "learning_rate": 3.6767991827302515e-08, + "logits/chosen": -1.9049650430679321, + "logits/rejected": -1.9067702293395996, + "logps/chosen": -40.8188591003418, + "logps/rejected": -202.6300506591797, + "loss": 0.322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21013717353343964, + "rewards/margins": 2.2195212841033936, + "rewards/rejected": -2.0093841552734375, + "step": 10271 + }, + { + "epoch": 0.6, + "learning_rate": 3.675890403089289e-08, + "logits/chosen": -1.907101035118103, + "logits/rejected": -1.8792141675949097, + "logps/chosen": -110.90602111816406, + "logps/rejected": -209.63197326660156, + "loss": 0.3062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9098602533340454, + "rewards/margins": 0.9002929925918579, + "rewards/rejected": 0.0095672607421875, + "step": 10272 + }, + { + "epoch": 0.6, + "learning_rate": 3.674981670487174e-08, + "logits/chosen": -1.794947862625122, + "logits/rejected": -1.7960870265960693, + "logps/chosen": -31.224328994750977, + "logps/rejected": -138.01583862304688, + "loss": 0.1751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8941370248794556, + "rewards/margins": 2.9618191719055176, + "rewards/rejected": -2.0676820278167725, + "step": 10273 + }, + { + "epoch": 0.6, + "learning_rate": 3.6740729849561944e-08, + "logits/chosen": -2.035475969314575, + "logits/rejected": -2.016505241394043, + "logps/chosen": -229.79434204101562, + "logps/rejected": -305.775390625, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5802841186523438, + "rewards/margins": 0.33328092098236084, + "rewards/rejected": 1.247003197669983, + "step": 10274 + }, + { + "epoch": 0.6, + "learning_rate": 3.673164346528625e-08, + "logits/chosen": -1.850455641746521, + "logits/rejected": -1.8507400751113892, + "logps/chosen": -7.649603843688965, + "logps/rejected": -87.5745620727539, + "loss": 0.3511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24131326377391815, + "rewards/margins": 1.4053078889846802, + "rewards/rejected": -1.1639946699142456, + "step": 10275 + }, + { + "epoch": 0.6, + "learning_rate": 3.672255755236753e-08, + "logits/chosen": -1.8229326009750366, + "logits/rejected": -1.819563388824463, + "logps/chosen": -5.221260653343052e-05, + "logps/rejected": -140.6297149658203, + "loss": 0.4234, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0013129667640897e-06, + "rewards/margins": 1.720995545387268, + "rewards/rejected": -1.7209945917129517, + "step": 10276 + }, + { + "epoch": 0.6, + "learning_rate": 3.671347211112847e-08, + "logits/chosen": -1.8680683374404907, + "logits/rejected": -1.872373104095459, + "logps/chosen": -0.002043686341494322, + "logps/rejected": -121.12136840820312, + "loss": 0.4053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010293239029124379, + "rewards/margins": 1.5243791341781616, + "rewards/rejected": -1.524482011795044, + "step": 10277 + }, + { + "epoch": 0.6, + "learning_rate": 3.67043871418919e-08, + "logits/chosen": -2.070878744125366, + "logits/rejected": -2.0727860927581787, + "logps/chosen": -116.18810272216797, + "logps/rejected": -477.85284423828125, + "loss": 0.0524, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.339911699295044, + "rewards/margins": 3.1184091567993164, + "rewards/rejected": -1.778497338294983, + "step": 10278 + }, + { + "epoch": 0.6, + "learning_rate": 3.6695302644980506e-08, + "logits/chosen": -1.7663428783416748, + "logits/rejected": -1.7618873119354248, + "logps/chosen": -41.62723922729492, + "logps/rejected": -158.28683471679688, + "loss": 0.3773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11432037502527237, + "rewards/margins": 1.9899688959121704, + "rewards/rejected": -1.8756484985351562, + "step": 10279 + }, + { + "epoch": 0.6, + "learning_rate": 3.668621862071707e-08, + "logits/chosen": -1.9242308139801025, + "logits/rejected": -1.9732283353805542, + "logps/chosen": -145.26353454589844, + "logps/rejected": -363.8757629394531, + "loss": 0.1896, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.380574107170105, + "rewards/margins": 1.1221694946289062, + "rewards/rejected": 0.25840455293655396, + "step": 10280 + }, + { + "epoch": 0.6, + "learning_rate": 3.667713506942426e-08, + "logits/chosen": -1.9186561107635498, + "logits/rejected": -2.020698070526123, + "logps/chosen": -280.2876281738281, + "logps/rejected": -301.24224853515625, + "loss": 0.0581, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.537558078765869, + "rewards/margins": 2.206320285797119, + "rewards/rejected": 0.33123779296875, + "step": 10281 + }, + { + "epoch": 0.6, + "learning_rate": 3.666805199142479e-08, + "logits/chosen": -1.9949171543121338, + "logits/rejected": -1.9940587282180786, + "logps/chosen": -3.902965545654297, + "logps/rejected": -154.038818359375, + "loss": 0.5537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09171595424413681, + "rewards/margins": 0.40685880184173584, + "rewards/rejected": -0.31514284014701843, + "step": 10282 + }, + { + "epoch": 0.6, + "learning_rate": 3.6658969387041313e-08, + "logits/chosen": -1.8719871044158936, + "logits/rejected": -1.8683040142059326, + "logps/chosen": -40.90446472167969, + "logps/rejected": -274.2362060546875, + "loss": 0.309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09793205559253693, + "rewards/margins": 2.973623275756836, + "rewards/rejected": -2.8756911754608154, + "step": 10283 + }, + { + "epoch": 0.6, + "learning_rate": 3.6649887256596516e-08, + "logits/chosen": -1.690200924873352, + "logits/rejected": -1.68381667137146, + "logps/chosen": -234.30299377441406, + "logps/rejected": -418.2616882324219, + "loss": 0.0679, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.446009874343872, + "rewards/margins": 2.5610275268554688, + "rewards/rejected": -0.11501770466566086, + "step": 10284 + }, + { + "epoch": 0.6, + "learning_rate": 3.6640805600413016e-08, + "logits/chosen": -1.9014179706573486, + "logits/rejected": -1.8939130306243896, + "logps/chosen": -262.5780944824219, + "logps/rejected": -508.05731201171875, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0269134044647217, + "rewards/margins": 5.941977024078369, + "rewards/rejected": -2.9150636196136475, + "step": 10285 + }, + { + "epoch": 0.6, + "learning_rate": 3.663172441881346e-08, + "logits/chosen": -1.9752064943313599, + "logits/rejected": -1.960524559020996, + "logps/chosen": -16.573652267456055, + "logps/rejected": -241.22561645507812, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03178444132208824, + "rewards/margins": 4.763524532318115, + "rewards/rejected": -4.7317399978637695, + "step": 10286 + }, + { + "epoch": 0.6, + "learning_rate": 3.662264371212043e-08, + "logits/chosen": -1.9499505758285522, + "logits/rejected": -1.9479666948318481, + "logps/chosen": -149.20257568359375, + "logps/rejected": -279.78924560546875, + "loss": 0.1831, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0508453845977783, + "rewards/margins": 0.9982757568359375, + "rewards/rejected": 1.0525696277618408, + "step": 10287 + }, + { + "epoch": 0.6, + "learning_rate": 3.661356348065654e-08, + "logits/chosen": -2.020878314971924, + "logits/rejected": -2.018665313720703, + "logps/chosen": -29.656591415405273, + "logps/rejected": -249.76092529296875, + "loss": 0.3489, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06202239915728569, + "rewards/margins": 2.4679062366485596, + "rewards/rejected": -2.529928684234619, + "step": 10288 + }, + { + "epoch": 0.6, + "learning_rate": 3.660448372474435e-08, + "logits/chosen": -1.962491512298584, + "logits/rejected": -1.9632654190063477, + "logps/chosen": -45.686912536621094, + "logps/rejected": -100.78639221191406, + "loss": 0.4546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8010025024414062, + "rewards/margins": 0.26128846406936646, + "rewards/rejected": 0.5397140383720398, + "step": 10289 + }, + { + "epoch": 0.6, + "learning_rate": 3.6595404444706426e-08, + "logits/chosen": -1.7457616329193115, + "logits/rejected": -1.6889704465866089, + "logps/chosen": -193.63092041015625, + "logps/rejected": -550.4764404296875, + "loss": 0.1752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2529144287109375, + "rewards/margins": 1.9443085193634033, + "rewards/rejected": -0.691394031047821, + "step": 10290 + }, + { + "epoch": 0.6, + "learning_rate": 3.65863256408653e-08, + "logits/chosen": -2.0118579864501953, + "logits/rejected": -2.0099551677703857, + "logps/chosen": -4.8407111167907715, + "logps/rejected": -128.09849548339844, + "loss": 0.4817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2755972445011139, + "rewards/margins": 0.7204031944274902, + "rewards/rejected": -0.44480592012405396, + "step": 10291 + }, + { + "epoch": 0.6, + "learning_rate": 3.657724731354351e-08, + "logits/chosen": -1.601555347442627, + "logits/rejected": -1.5847681760787964, + "logps/chosen": -250.03121948242188, + "logps/rejected": -306.1775207519531, + "loss": 0.1943, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8729339838027954, + "rewards/margins": 1.1310088634490967, + "rewards/rejected": 0.741925060749054, + "step": 10292 + }, + { + "epoch": 0.6, + "learning_rate": 3.6568169463063546e-08, + "logits/chosen": -1.578485369682312, + "logits/rejected": -1.5836868286132812, + "logps/chosen": -5.754358768463135, + "logps/rejected": -122.82621765136719, + "loss": 0.264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42611098289489746, + "rewards/margins": 2.481236457824707, + "rewards/rejected": -2.0551254749298096, + "step": 10293 + }, + { + "epoch": 0.6, + "learning_rate": 3.655909208974789e-08, + "logits/chosen": -1.7491637468338013, + "logits/rejected": -1.766473650932312, + "logps/chosen": -131.6271209716797, + "logps/rejected": -226.0691680908203, + "loss": 0.1852, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4718750715255737, + "rewards/margins": 1.033888339996338, + "rewards/rejected": 0.4379867613315582, + "step": 10294 + }, + { + "epoch": 0.6, + "learning_rate": 3.655001519391906e-08, + "logits/chosen": -2.0525004863739014, + "logits/rejected": -2.043055534362793, + "logps/chosen": -4.780004978179932, + "logps/rejected": -151.56788635253906, + "loss": 0.4159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12689076364040375, + "rewards/margins": 1.471979022026062, + "rewards/rejected": -1.345088243484497, + "step": 10295 + }, + { + "epoch": 0.6, + "learning_rate": 3.6540938775899454e-08, + "logits/chosen": -1.9281020164489746, + "logits/rejected": -1.9290035963058472, + "logps/chosen": -146.14862060546875, + "logps/rejected": -262.16986083984375, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8862030506134033, + "rewards/margins": 3.7516815662384033, + "rewards/rejected": 0.134521484375, + "step": 10296 + }, + { + "epoch": 0.6, + "learning_rate": 3.653186283601157e-08, + "logits/chosen": -1.7196587324142456, + "logits/rejected": -1.6936606168746948, + "logps/chosen": -208.150390625, + "logps/rejected": -447.0074157714844, + "loss": 0.1014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.46684730052948, + "rewards/margins": 3.2190048694610596, + "rewards/rejected": -1.7521575689315796, + "step": 10297 + }, + { + "epoch": 0.6, + "learning_rate": 3.652278737457776e-08, + "logits/chosen": -1.871231198310852, + "logits/rejected": -1.8550466299057007, + "logps/chosen": -44.072792053222656, + "logps/rejected": -294.1244812011719, + "loss": 0.2567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27894097566604614, + "rewards/margins": 3.438135862350464, + "rewards/rejected": -3.1591949462890625, + "step": 10298 + }, + { + "epoch": 0.6, + "learning_rate": 3.651371239192049e-08, + "logits/chosen": -1.857158899307251, + "logits/rejected": -1.8563660383224487, + "logps/chosen": -48.27188491821289, + "logps/rejected": -160.8619842529297, + "loss": 0.246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43507805466651917, + "rewards/margins": 1.9737300872802734, + "rewards/rejected": -1.5386520624160767, + "step": 10299 + }, + { + "epoch": 0.6, + "learning_rate": 3.6504637888362115e-08, + "logits/chosen": -1.790953516960144, + "logits/rejected": -1.7954533100128174, + "logps/chosen": -0.42057111859321594, + "logps/rejected": -92.15571594238281, + "loss": 0.3969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015049082227051258, + "rewards/margins": 1.784194827079773, + "rewards/rejected": -1.7992439270019531, + "step": 10300 + }, + { + "epoch": 0.6, + "learning_rate": 3.649556386422503e-08, + "logits/chosen": -1.8168503046035767, + "logits/rejected": -1.8131542205810547, + "logps/chosen": -26.468294143676758, + "logps/rejected": -205.96585083007812, + "loss": 0.3598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10773219913244247, + "rewards/margins": 2.3566734790802, + "rewards/rejected": -2.24894118309021, + "step": 10301 + }, + { + "epoch": 0.6, + "learning_rate": 3.648649031983156e-08, + "logits/chosen": -2.1034913063049316, + "logits/rejected": -2.0848586559295654, + "logps/chosen": -60.74195861816406, + "logps/rejected": -262.4620056152344, + "loss": 0.2241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3520255982875824, + "rewards/margins": 4.087313175201416, + "rewards/rejected": -3.7352874279022217, + "step": 10302 + }, + { + "epoch": 0.6, + "learning_rate": 3.647741725550406e-08, + "logits/chosen": -1.945837140083313, + "logits/rejected": -1.9803608655929565, + "logps/chosen": -301.77069091796875, + "logps/rejected": -349.2334289550781, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1392700672149658, + "rewards/margins": 0.5603241324424744, + "rewards/rejected": 0.5789459347724915, + "step": 10303 + }, + { + "epoch": 0.6, + "learning_rate": 3.646834467156484e-08, + "logits/chosen": -1.848889946937561, + "logits/rejected": -1.8493965864181519, + "logps/chosen": -16.147294998168945, + "logps/rejected": -210.5931854248047, + "loss": 0.2538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4017972946166992, + "rewards/margins": 4.471424579620361, + "rewards/rejected": -4.069627285003662, + "step": 10304 + }, + { + "epoch": 0.6, + "learning_rate": 3.645927256833622e-08, + "logits/chosen": -1.9559022188186646, + "logits/rejected": -1.956360101699829, + "logps/chosen": -66.53865051269531, + "logps/rejected": -282.4476623535156, + "loss": 0.1436, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1135529279708862, + "rewards/margins": 5.530710220336914, + "rewards/rejected": -4.417157173156738, + "step": 10305 + }, + { + "epoch": 0.6, + "learning_rate": 3.6450200946140464e-08, + "logits/chosen": -2.085291624069214, + "logits/rejected": -2.0740115642547607, + "logps/chosen": -9.906010382110253e-05, + "logps/rejected": -179.71664428710938, + "loss": 0.3771, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.193234877267969e-06, + "rewards/margins": 2.568086862564087, + "rewards/rejected": -2.568084716796875, + "step": 10306 + }, + { + "epoch": 0.6, + "learning_rate": 3.644112980529985e-08, + "logits/chosen": -1.8214523792266846, + "logits/rejected": -1.8149689435958862, + "logps/chosen": -30.242712020874023, + "logps/rejected": -97.64547729492188, + "loss": 0.5307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31102657318115234, + "rewards/margins": 0.3480718731880188, + "rewards/rejected": -0.03704528883099556, + "step": 10307 + }, + { + "epoch": 0.6, + "learning_rate": 3.643205914613663e-08, + "logits/chosen": -1.660165548324585, + "logits/rejected": -1.659666895866394, + "logps/chosen": -205.76124572753906, + "logps/rejected": -345.94769287109375, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.355943441390991, + "rewards/margins": 3.2672104835510254, + "rewards/rejected": -0.911267101764679, + "step": 10308 + }, + { + "epoch": 0.6, + "learning_rate": 3.6422988968973045e-08, + "logits/chosen": -1.8143210411071777, + "logits/rejected": -1.8286778926849365, + "logps/chosen": -226.17727661132812, + "logps/rejected": -329.1521301269531, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.598773241043091, + "rewards/margins": 3.1925415992736816, + "rewards/rejected": -0.593768298625946, + "step": 10309 + }, + { + "epoch": 0.6, + "learning_rate": 3.641391927413129e-08, + "logits/chosen": -2.0681464672088623, + "logits/rejected": -2.0632591247558594, + "logps/chosen": -0.0001016830065054819, + "logps/rejected": -124.5103988647461, + "loss": 0.4011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3946897752248333e-06, + "rewards/margins": 2.158804416656494, + "rewards/rejected": -2.1588058471679688, + "step": 10310 + }, + { + "epoch": 0.6, + "learning_rate": 3.640485006193359e-08, + "logits/chosen": -1.8820455074310303, + "logits/rejected": -1.871726632118225, + "logps/chosen": -197.7834014892578, + "logps/rejected": -306.58856201171875, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2142837047576904, + "rewards/margins": 3.7492873668670654, + "rewards/rejected": -1.535003662109375, + "step": 10311 + }, + { + "epoch": 0.6, + "learning_rate": 3.639578133270211e-08, + "logits/chosen": -2.038649082183838, + "logits/rejected": -2.0386877059936523, + "logps/chosen": -9.556740760803223, + "logps/rejected": -101.03384399414062, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38473770022392273, + "rewards/margins": 1.9059687852859497, + "rewards/rejected": -1.5212310552597046, + "step": 10312 + }, + { + "epoch": 0.6, + "learning_rate": 3.6386713086759035e-08, + "logits/chosen": -1.965276837348938, + "logits/rejected": -1.9561723470687866, + "logps/chosen": -0.01614420674741268, + "logps/rejected": -230.85647583007812, + "loss": 0.3576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014783391961827874, + "rewards/margins": 3.8326478004455566, + "rewards/rejected": -3.8341262340545654, + "step": 10313 + }, + { + "epoch": 0.6, + "learning_rate": 3.637764532442649e-08, + "logits/chosen": -2.0044991970062256, + "logits/rejected": -2.0035459995269775, + "logps/chosen": -0.0027972841635346413, + "logps/rejected": -127.58694458007812, + "loss": 0.3721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00014938903041183949, + "rewards/margins": 3.0276427268981934, + "rewards/rejected": -3.0274932384490967, + "step": 10314 + }, + { + "epoch": 0.6, + "learning_rate": 3.6368578046026597e-08, + "logits/chosen": -1.950347900390625, + "logits/rejected": -1.949794054031372, + "logps/chosen": -35.08012008666992, + "logps/rejected": -129.8706512451172, + "loss": 0.7562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36489906907081604, + "rewards/margins": 0.14505121111869812, + "rewards/rejected": -0.5099502801895142, + "step": 10315 + }, + { + "epoch": 0.6, + "learning_rate": 3.635951125188153e-08, + "logits/chosen": -2.088097095489502, + "logits/rejected": -2.0756406784057617, + "logps/chosen": -25.200468063354492, + "logps/rejected": -361.555419921875, + "loss": 0.3198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0011659621959552169, + "rewards/margins": 6.709848403930664, + "rewards/rejected": -6.708682537078857, + "step": 10316 + }, + { + "epoch": 0.6, + "learning_rate": 3.635044494231331e-08, + "logits/chosen": -2.0829501152038574, + "logits/rejected": -2.0883429050445557, + "logps/chosen": -6.876295566558838, + "logps/rejected": -59.131019592285156, + "loss": 0.7488, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04992861673235893, + "rewards/margins": -0.3010959029197693, + "rewards/rejected": 0.25116729736328125, + "step": 10317 + }, + { + "epoch": 0.6, + "learning_rate": 3.634137911764409e-08, + "logits/chosen": -1.8189380168914795, + "logits/rejected": -1.7924808263778687, + "logps/chosen": -148.09185791015625, + "logps/rejected": -284.2751770019531, + "loss": 0.4083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2751480042934418, + "rewards/margins": 0.18363799154758453, + "rewards/rejected": 0.09151001274585724, + "step": 10318 + }, + { + "epoch": 0.6, + "learning_rate": 3.633231377819586e-08, + "logits/chosen": -1.9821802377700806, + "logits/rejected": -1.9653441905975342, + "logps/chosen": -55.40826416015625, + "logps/rejected": -314.1265869140625, + "loss": 0.0893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.482153296470642, + "rewards/margins": 7.718992710113525, + "rewards/rejected": -6.236839294433594, + "step": 10319 + }, + { + "epoch": 0.6, + "learning_rate": 3.6323248924290717e-08, + "logits/chosen": -1.8773103952407837, + "logits/rejected": -1.9316520690917969, + "logps/chosen": -310.89727783203125, + "logps/rejected": -467.2872619628906, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.045074462890625, + "rewards/margins": 3.459372043609619, + "rewards/rejected": -0.414297491312027, + "step": 10320 + }, + { + "epoch": 0.6, + "learning_rate": 3.631418455625067e-08, + "logits/chosen": -1.8058018684387207, + "logits/rejected": -1.7871617078781128, + "logps/chosen": -210.21292114257812, + "logps/rejected": -485.668701171875, + "loss": 0.1273, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.138706922531128, + "rewards/margins": 1.7113265991210938, + "rewards/rejected": 0.42738038301467896, + "step": 10321 + }, + { + "epoch": 0.6, + "learning_rate": 3.630512067439774e-08, + "logits/chosen": -2.011129140853882, + "logits/rejected": -2.003147840499878, + "logps/chosen": -88.98223114013672, + "logps/rejected": -268.35675048828125, + "loss": 0.0968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9930229187011719, + "rewards/margins": 3.2203330993652344, + "rewards/rejected": -2.2273101806640625, + "step": 10322 + }, + { + "epoch": 0.6, + "learning_rate": 3.6296057279053904e-08, + "logits/chosen": -2.0489699840545654, + "logits/rejected": -1.9920456409454346, + "logps/chosen": -281.6197509765625, + "logps/rejected": -425.47900390625, + "loss": 0.1145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4510865211486816, + "rewards/margins": 1.781494140625, + "rewards/rejected": 0.6695923209190369, + "step": 10323 + }, + { + "epoch": 0.6, + "learning_rate": 3.628699437054115e-08, + "logits/chosen": -1.687259554862976, + "logits/rejected": -1.7127819061279297, + "logps/chosen": -238.8495330810547, + "logps/rejected": -337.8122253417969, + "loss": 0.1279, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2424392700195312, + "rewards/margins": 1.410386562347412, + "rewards/rejected": 0.8320526480674744, + "step": 10324 + }, + { + "epoch": 0.6, + "learning_rate": 3.627793194918143e-08, + "logits/chosen": -1.9675896167755127, + "logits/rejected": -1.9673928022384644, + "logps/chosen": -13.204375267028809, + "logps/rejected": -85.01879119873047, + "loss": 0.3934, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029730796813964844, + "rewards/margins": 1.8590596914291382, + "rewards/rejected": -1.8293288946151733, + "step": 10325 + }, + { + "epoch": 0.6, + "learning_rate": 3.626887001529669e-08, + "logits/chosen": -2.021212100982666, + "logits/rejected": -2.0210440158843994, + "logps/chosen": -21.020479202270508, + "logps/rejected": -158.88192749023438, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8287267684936523, + "rewards/margins": 1.0525716543197632, + "rewards/rejected": -0.22384491562843323, + "step": 10326 + }, + { + "epoch": 0.6, + "learning_rate": 3.625980856920886e-08, + "logits/chosen": -1.9944851398468018, + "logits/rejected": -1.990970492362976, + "logps/chosen": -4.574784278869629, + "logps/rejected": -149.95755004882812, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07681941986083984, + "rewards/margins": 1.988374948501587, + "rewards/rejected": -1.911555528640747, + "step": 10327 + }, + { + "epoch": 0.6, + "learning_rate": 3.625074761123984e-08, + "logits/chosen": -1.9994385242462158, + "logits/rejected": -1.9904834032058716, + "logps/chosen": -3.778903919737786e-05, + "logps/rejected": -333.22247314453125, + "loss": 0.3364, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6715798614750383e-06, + "rewards/margins": 7.4261393547058105, + "rewards/rejected": -7.426135540008545, + "step": 10328 + }, + { + "epoch": 0.6, + "learning_rate": 3.624168714171151e-08, + "logits/chosen": -2.0967507362365723, + "logits/rejected": -2.0925633907318115, + "logps/chosen": -68.31556701660156, + "logps/rejected": -406.078369140625, + "loss": 0.1832, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7046661376953125, + "rewards/margins": 5.8941192626953125, + "rewards/rejected": -5.189453125, + "step": 10329 + }, + { + "epoch": 0.6, + "learning_rate": 3.6232627160945764e-08, + "logits/chosen": -1.8574007749557495, + "logits/rejected": -1.863900065422058, + "logps/chosen": -61.626792907714844, + "logps/rejected": -209.28073120117188, + "loss": 0.7096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0051029920578003, + "rewards/margins": 2.3160548210144043, + "rewards/rejected": -3.321157932281494, + "step": 10330 + }, + { + "epoch": 0.6, + "learning_rate": 3.6223567669264434e-08, + "logits/chosen": -1.8343545198440552, + "logits/rejected": -1.747967004776001, + "logps/chosen": -263.9884033203125, + "logps/rejected": -846.5136108398438, + "loss": 0.0541, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.283361792564392, + "rewards/margins": 5.865606784820557, + "rewards/rejected": -4.582244873046875, + "step": 10331 + }, + { + "epoch": 0.6, + "learning_rate": 3.6214508666989377e-08, + "logits/chosen": -2.0085206031799316, + "logits/rejected": -2.001612663269043, + "logps/chosen": -19.236888885498047, + "logps/rejected": -229.3619842529297, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28553277254104614, + "rewards/margins": 3.7107157707214355, + "rewards/rejected": -3.425183057785034, + "step": 10332 + }, + { + "epoch": 0.6, + "learning_rate": 3.620545015444239e-08, + "logits/chosen": -2.000908136367798, + "logits/rejected": -1.9952833652496338, + "logps/chosen": -0.013793554157018661, + "logps/rejected": -86.8880844116211, + "loss": 0.662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006644135806709528, + "rewards/margins": 0.12230881303548813, + "rewards/rejected": -0.11566467583179474, + "step": 10333 + }, + { + "epoch": 0.6, + "learning_rate": 3.619639213194529e-08, + "logits/chosen": -1.9878814220428467, + "logits/rejected": -1.9880788326263428, + "logps/chosen": -41.14512252807617, + "logps/rejected": -245.2754669189453, + "loss": 0.3533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05602912977337837, + "rewards/margins": 2.7152676582336426, + "rewards/rejected": -2.771296739578247, + "step": 10334 + }, + { + "epoch": 0.6, + "learning_rate": 3.618733459981988e-08, + "logits/chosen": -1.980050802230835, + "logits/rejected": -1.9468737840652466, + "logps/chosen": -155.7179412841797, + "logps/rejected": -321.65283203125, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0026992559432983, + "rewards/margins": 0.8520614504814148, + "rewards/rejected": 0.15063782036304474, + "step": 10335 + }, + { + "epoch": 0.6, + "learning_rate": 3.617827755838789e-08, + "logits/chosen": -2.0340447425842285, + "logits/rejected": -2.035221815109253, + "logps/chosen": -0.010421107523143291, + "logps/rejected": -328.6388854980469, + "loss": 0.331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001012252178043127, + "rewards/margins": 6.867896556854248, + "rewards/rejected": -6.868908882141113, + "step": 10336 + }, + { + "epoch": 0.6, + "learning_rate": 3.6169221007971115e-08, + "logits/chosen": -1.9339532852172852, + "logits/rejected": -1.9276150465011597, + "logps/chosen": -22.1107120513916, + "logps/rejected": -205.49078369140625, + "loss": 0.2276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8009946942329407, + "rewards/margins": 2.2054288387298584, + "rewards/rejected": -1.4044342041015625, + "step": 10337 + }, + { + "epoch": 0.6, + "learning_rate": 3.6160164948891234e-08, + "logits/chosen": -1.9408844709396362, + "logits/rejected": -1.9455277919769287, + "logps/chosen": -0.10353077948093414, + "logps/rejected": -204.06536865234375, + "loss": 0.3685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0016432770062237978, + "rewards/margins": 3.082437038421631, + "rewards/rejected": -3.080793857574463, + "step": 10338 + }, + { + "epoch": 0.6, + "learning_rate": 3.6151109381470025e-08, + "logits/chosen": -2.077385187149048, + "logits/rejected": -2.0589709281921387, + "logps/chosen": -0.006056877318769693, + "logps/rejected": -210.13751220703125, + "loss": 0.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005577869014814496, + "rewards/margins": 3.6866796016693115, + "rewards/rejected": -3.687237501144409, + "step": 10339 + }, + { + "epoch": 0.6, + "learning_rate": 3.6142054306029124e-08, + "logits/chosen": -2.074516534805298, + "logits/rejected": -2.058318614959717, + "logps/chosen": -220.98443603515625, + "logps/rejected": -292.947021484375, + "loss": 0.1508, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.01957106590271, + "rewards/margins": 1.5501099824905396, + "rewards/rejected": 0.469461053609848, + "step": 10340 + }, + { + "epoch": 0.6, + "learning_rate": 3.6132999722890256e-08, + "logits/chosen": -2.1036276817321777, + "logits/rejected": -2.1014907360076904, + "logps/chosen": -3.8308446407318115, + "logps/rejected": -173.31149291992188, + "loss": 0.5231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05805337429046631, + "rewards/margins": 0.7164518237113953, + "rewards/rejected": -0.658398449420929, + "step": 10341 + }, + { + "epoch": 0.6, + "learning_rate": 3.6123945632375064e-08, + "logits/chosen": -2.184849500656128, + "logits/rejected": -2.18232798576355, + "logps/chosen": -45.08854675292969, + "logps/rejected": -288.724609375, + "loss": 0.2223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6367382407188416, + "rewards/margins": 5.297364711761475, + "rewards/rejected": -4.660626411437988, + "step": 10342 + }, + { + "epoch": 0.6, + "learning_rate": 3.61148920348052e-08, + "logits/chosen": -1.9391605854034424, + "logits/rejected": -1.940556526184082, + "logps/chosen": -8.018254280090332, + "logps/rejected": -105.04714965820312, + "loss": 0.3372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3002574145793915, + "rewards/margins": 1.9245586395263672, + "rewards/rejected": -1.6243011951446533, + "step": 10343 + }, + { + "epoch": 0.6, + "learning_rate": 3.610583893050229e-08, + "logits/chosen": -1.9712231159210205, + "logits/rejected": -2.0059523582458496, + "logps/chosen": -195.6573486328125, + "logps/rejected": -402.7364807128906, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6821014881134033, + "rewards/margins": 3.28360915184021, + "rewards/rejected": -0.6015076041221619, + "step": 10344 + }, + { + "epoch": 0.6, + "learning_rate": 3.6096786319787946e-08, + "logits/chosen": -1.9534578323364258, + "logits/rejected": -1.9241485595703125, + "logps/chosen": -319.1060485839844, + "logps/rejected": -528.5215454101562, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.969412326812744, + "rewards/margins": 3.8085358142852783, + "rewards/rejected": -0.839123547077179, + "step": 10345 + }, + { + "epoch": 0.6, + "learning_rate": 3.6087734202983756e-08, + "logits/chosen": -1.8681018352508545, + "logits/rejected": -1.8588531017303467, + "logps/chosen": -43.95297622680664, + "logps/rejected": -252.41409301757812, + "loss": 0.2623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2588123381137848, + "rewards/margins": 2.405125856399536, + "rewards/rejected": -2.146313428878784, + "step": 10346 + }, + { + "epoch": 0.6, + "learning_rate": 3.607868258041131e-08, + "logits/chosen": -2.1686782836914062, + "logits/rejected": -2.159332275390625, + "logps/chosen": -61.53504943847656, + "logps/rejected": -191.67108154296875, + "loss": 0.172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7749878168106079, + "rewards/margins": 2.436598300933838, + "rewards/rejected": -1.66161048412323, + "step": 10347 + }, + { + "epoch": 0.6, + "learning_rate": 3.606963145239214e-08, + "logits/chosen": -1.8823410272598267, + "logits/rejected": -1.886423110961914, + "logps/chosen": -225.60227966308594, + "logps/rejected": -261.2563781738281, + "loss": 0.1967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9838837385177612, + "rewards/margins": 0.8478943109512329, + "rewards/rejected": 1.1359894275665283, + "step": 10348 + }, + { + "epoch": 0.6, + "learning_rate": 3.606058081924782e-08, + "logits/chosen": -1.763891577720642, + "logits/rejected": -1.7623111009597778, + "logps/chosen": -8.967952728271484, + "logps/rejected": -104.44047546386719, + "loss": 0.391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.052797701209783554, + "rewards/margins": 2.0982773303985596, + "rewards/rejected": -2.0454795360565186, + "step": 10349 + }, + { + "epoch": 0.6, + "learning_rate": 3.6051530681299834e-08, + "logits/chosen": -1.7971549034118652, + "logits/rejected": -1.7890825271606445, + "logps/chosen": -198.7515869140625, + "logps/rejected": -407.9437561035156, + "loss": 0.0454, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.167804002761841, + "rewards/margins": 2.733102560043335, + "rewards/rejected": -0.5652984976768494, + "step": 10350 + }, + { + "epoch": 0.6, + "learning_rate": 3.6042481038869724e-08, + "logits/chosen": -1.873268961906433, + "logits/rejected": -1.8647416830062866, + "logps/chosen": -50.75381851196289, + "logps/rejected": -168.44818115234375, + "loss": 0.2465, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1905399560928345, + "rewards/margins": 1.1852115392684937, + "rewards/rejected": 0.005328369326889515, + "step": 10351 + }, + { + "epoch": 0.6, + "learning_rate": 3.603343189227895e-08, + "logits/chosen": -1.8740984201431274, + "logits/rejected": -1.9423305988311768, + "logps/chosen": -169.69528198242188, + "logps/rejected": -236.3227081298828, + "loss": 0.6703, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5560547113418579, + "rewards/margins": -0.8517853021621704, + "rewards/rejected": 1.4078400135040283, + "step": 10352 + }, + { + "epoch": 0.6, + "learning_rate": 3.6024383241849e-08, + "logits/chosen": -1.7944858074188232, + "logits/rejected": -1.7907241582870483, + "logps/chosen": -164.86700439453125, + "logps/rejected": -311.68359375, + "loss": 0.0607, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.249220371246338, + "rewards/margins": 2.3950729370117188, + "rewards/rejected": -0.14585267007350922, + "step": 10353 + }, + { + "epoch": 0.6, + "learning_rate": 3.6015335087901313e-08, + "logits/chosen": -2.172935962677002, + "logits/rejected": -2.1644127368927, + "logps/chosen": -0.00090252939844504, + "logps/rejected": -241.47219848632812, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.99230741802603e-05, + "rewards/margins": 3.7734568119049072, + "rewards/rejected": -3.7734267711639404, + "step": 10354 + }, + { + "epoch": 0.6, + "learning_rate": 3.600628743075732e-08, + "logits/chosen": -1.9632091522216797, + "logits/rejected": -1.9674173593521118, + "logps/chosen": -24.2519474029541, + "logps/rejected": -171.11624145507812, + "loss": 0.6659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0561029426753521, + "rewards/margins": 0.08249568939208984, + "rewards/rejected": -0.13859863579273224, + "step": 10355 + }, + { + "epoch": 0.6, + "learning_rate": 3.599724027073847e-08, + "logits/chosen": -1.921866774559021, + "logits/rejected": -1.8983596563339233, + "logps/chosen": -156.44869995117188, + "logps/rejected": -326.37005615234375, + "loss": 0.1847, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6210845708847046, + "rewards/margins": 1.5741851329803467, + "rewards/rejected": 0.04689941555261612, + "step": 10356 + }, + { + "epoch": 0.6, + "learning_rate": 3.598819360816612e-08, + "logits/chosen": -2.064685106277466, + "logits/rejected": -2.0602023601531982, + "logps/chosen": -86.02351379394531, + "logps/rejected": -258.5919189453125, + "loss": 0.4262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4836440980434418, + "rewards/margins": 3.835196018218994, + "rewards/rejected": -4.318840026855469, + "step": 10357 + }, + { + "epoch": 0.6, + "learning_rate": 3.59791474433617e-08, + "logits/chosen": -2.0741512775421143, + "logits/rejected": -2.0633504390716553, + "logps/chosen": -8.690079994266853e-05, + "logps/rejected": -184.391845703125, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.767746020206687e-07, + "rewards/margins": 4.956571578979492, + "rewards/rejected": -4.95657205581665, + "step": 10358 + }, + { + "epoch": 0.6, + "learning_rate": 3.5970101776646514e-08, + "logits/chosen": -2.0218355655670166, + "logits/rejected": -2.017141103744507, + "logps/chosen": -18.164981842041016, + "logps/rejected": -322.0279235839844, + "loss": 0.2612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25265198945999146, + "rewards/margins": 5.744263172149658, + "rewards/rejected": -5.491611003875732, + "step": 10359 + }, + { + "epoch": 0.6, + "learning_rate": 3.596105660834198e-08, + "logits/chosen": -1.9918203353881836, + "logits/rejected": -1.9807758331298828, + "logps/chosen": -4.468827724456787, + "logps/rejected": -195.98126220703125, + "loss": 0.3873, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019509268924593925, + "rewards/margins": 1.5004862546920776, + "rewards/rejected": -1.480976939201355, + "step": 10360 + }, + { + "epoch": 0.6, + "learning_rate": 3.5952011938769345e-08, + "logits/chosen": -1.8720868825912476, + "logits/rejected": -1.8625839948654175, + "logps/chosen": -18.950559616088867, + "logps/rejected": -243.208740234375, + "loss": 0.3141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033649445977061987, + "rewards/margins": 4.111403942108154, + "rewards/rejected": -4.114768981933594, + "step": 10361 + }, + { + "epoch": 0.6, + "learning_rate": 3.594296776824999e-08, + "logits/chosen": -1.7955055236816406, + "logits/rejected": -1.7933262586593628, + "logps/chosen": -136.39735412597656, + "logps/rejected": -213.87545776367188, + "loss": 0.6238, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3691467344760895, + "rewards/margins": -0.07665404677391052, + "rewards/rejected": 0.44580078125, + "step": 10362 + }, + { + "epoch": 0.6, + "learning_rate": 3.593392409710516e-08, + "logits/chosen": -1.954500436782837, + "logits/rejected": -1.9600900411605835, + "logps/chosen": -4.801462173461914, + "logps/rejected": -53.5733528137207, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1305646002292633, + "rewards/margins": 2.0252022743225098, + "rewards/rejected": -1.8946377038955688, + "step": 10363 + }, + { + "epoch": 0.6, + "learning_rate": 3.5924880925656174e-08, + "logits/chosen": -1.8107428550720215, + "logits/rejected": -1.801658034324646, + "logps/chosen": -45.00013732910156, + "logps/rejected": -259.17254638671875, + "loss": 0.2072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5564178824424744, + "rewards/margins": 1.8371963500976562, + "rewards/rejected": -1.2807785272598267, + "step": 10364 + }, + { + "epoch": 0.6, + "learning_rate": 3.591583825422425e-08, + "logits/chosen": -1.6883392333984375, + "logits/rejected": -1.6747509241104126, + "logps/chosen": -227.20620727539062, + "logps/rejected": -358.1966552734375, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.441336154937744, + "rewards/margins": 2.901815891265869, + "rewards/rejected": 0.539520263671875, + "step": 10365 + }, + { + "epoch": 0.6, + "learning_rate": 3.590679608313065e-08, + "logits/chosen": -1.789853811264038, + "logits/rejected": -1.793258786201477, + "logps/chosen": -3.814658703049645e-05, + "logps/rejected": -223.1658477783203, + "loss": 0.425, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.960105227131862e-06, + "rewards/margins": 1.7827696800231934, + "rewards/rejected": -1.7827637195587158, + "step": 10366 + }, + { + "epoch": 0.6, + "learning_rate": 3.589775441269659e-08, + "logits/chosen": -1.946423053741455, + "logits/rejected": -1.9650717973709106, + "logps/chosen": -165.64404296875, + "logps/rejected": -246.9303741455078, + "loss": 0.0767, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4852646589279175, + "rewards/margins": 2.2180237770080566, + "rewards/rejected": -0.7327591180801392, + "step": 10367 + }, + { + "epoch": 0.6, + "learning_rate": 3.5888713243243274e-08, + "logits/chosen": -1.941763997077942, + "logits/rejected": -1.9454652070999146, + "logps/chosen": -11.686756134033203, + "logps/rejected": -94.24165344238281, + "loss": 0.6585, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06352481991052628, + "rewards/margins": -0.1621130108833313, + "rewards/rejected": 0.22563782334327698, + "step": 10368 + }, + { + "epoch": 0.6, + "learning_rate": 3.587967257509189e-08, + "logits/chosen": -1.977168083190918, + "logits/rejected": -1.924514651298523, + "logps/chosen": -176.5220184326172, + "logps/rejected": -276.05645751953125, + "loss": 0.0676, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2729384899139404, + "rewards/margins": 2.0920729637145996, + "rewards/rejected": 0.18086548149585724, + "step": 10369 + }, + { + "epoch": 0.6, + "learning_rate": 3.587063240856361e-08, + "logits/chosen": -1.8193715810775757, + "logits/rejected": -1.7912918329238892, + "logps/chosen": -205.26605224609375, + "logps/rejected": -383.0990905761719, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6504485607147217, + "rewards/margins": 2.790841579437256, + "rewards/rejected": -0.14039306342601776, + "step": 10370 + }, + { + "epoch": 0.6, + "learning_rate": 3.586159274397957e-08, + "logits/chosen": -1.9705255031585693, + "logits/rejected": -1.9721230268478394, + "logps/chosen": -39.02354431152344, + "logps/rejected": -185.44003295898438, + "loss": 0.3147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2704235017299652, + "rewards/margins": 2.018406391143799, + "rewards/rejected": -1.7479828596115112, + "step": 10371 + }, + { + "epoch": 0.6, + "learning_rate": 3.585255358166093e-08, + "logits/chosen": -2.0165627002716064, + "logits/rejected": -2.0155749320983887, + "logps/chosen": -0.7683067917823792, + "logps/rejected": -256.1105651855469, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012790406122803688, + "rewards/margins": 6.942956447601318, + "rewards/rejected": -6.955746650695801, + "step": 10372 + }, + { + "epoch": 0.6, + "learning_rate": 3.5843514921928776e-08, + "logits/chosen": -1.950536847114563, + "logits/rejected": -1.952480673789978, + "logps/chosen": -48.1375732421875, + "logps/rejected": -193.2515869140625, + "loss": 0.3138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2223655730485916, + "rewards/margins": 2.1474432945251465, + "rewards/rejected": -1.9250777959823608, + "step": 10373 + }, + { + "epoch": 0.6, + "learning_rate": 3.583447676510423e-08, + "logits/chosen": -1.89821457862854, + "logits/rejected": -1.9024314880371094, + "logps/chosen": -15.61752700805664, + "logps/rejected": -113.43741607666016, + "loss": 0.3014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49783822894096375, + "rewards/margins": 1.8849903345108032, + "rewards/rejected": -1.387152075767517, + "step": 10374 + }, + { + "epoch": 0.6, + "learning_rate": 3.582543911150835e-08, + "logits/chosen": -1.9504079818725586, + "logits/rejected": -1.9168566465377808, + "logps/chosen": -242.5032958984375, + "logps/rejected": -526.1434326171875, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8852447271347046, + "rewards/margins": 3.9616546630859375, + "rewards/rejected": -2.0764100551605225, + "step": 10375 + }, + { + "epoch": 0.6, + "learning_rate": 3.581640196146219e-08, + "logits/chosen": -2.0816454887390137, + "logits/rejected": -2.0616917610168457, + "logps/chosen": -153.04637145996094, + "logps/rejected": -339.2259521484375, + "loss": 0.2074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.399287462234497, + "rewards/margins": 0.9952621459960938, + "rewards/rejected": 0.40402528643608093, + "step": 10376 + }, + { + "epoch": 0.6, + "learning_rate": 3.580736531528685e-08, + "logits/chosen": -2.0225043296813965, + "logits/rejected": -1.9404774904251099, + "logps/chosen": -239.28785705566406, + "logps/rejected": -476.50689697265625, + "loss": 0.2744, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2632721662521362, + "rewards/margins": 0.8481354713439941, + "rewards/rejected": 0.4151367247104645, + "step": 10377 + }, + { + "epoch": 0.6, + "learning_rate": 3.579832917330329e-08, + "logits/chosen": -1.789315104484558, + "logits/rejected": -1.763953447341919, + "logps/chosen": -132.83900451660156, + "logps/rejected": -218.69955444335938, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8473861813545227, + "rewards/margins": 0.8414108157157898, + "rewards/rejected": 0.0059753418900072575, + "step": 10378 + }, + { + "epoch": 0.6, + "learning_rate": 3.578929353583257e-08, + "logits/chosen": -2.010436773300171, + "logits/rejected": -2.001232624053955, + "logps/chosen": -49.1549072265625, + "logps/rejected": -258.9853820800781, + "loss": 0.2719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5998870730400085, + "rewards/margins": 1.8687286376953125, + "rewards/rejected": -1.2688416242599487, + "step": 10379 + }, + { + "epoch": 0.6, + "learning_rate": 3.578025840319563e-08, + "logits/chosen": -1.9289674758911133, + "logits/rejected": -1.9289485216140747, + "logps/chosen": -227.50767517089844, + "logps/rejected": -433.8606872558594, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8928207159042358, + "rewards/margins": 3.523982286453247, + "rewards/rejected": -1.6311615705490112, + "step": 10380 + }, + { + "epoch": 0.6, + "learning_rate": 3.5771223775713486e-08, + "logits/chosen": -1.7218290567398071, + "logits/rejected": -1.7236251831054688, + "logps/chosen": -118.79177856445312, + "logps/rejected": -255.80703735351562, + "loss": 0.2124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.700177788734436, + "rewards/margins": 2.9104394912719727, + "rewards/rejected": -2.210261583328247, + "step": 10381 + }, + { + "epoch": 0.6, + "learning_rate": 3.576218965370706e-08, + "logits/chosen": -1.8346577882766724, + "logits/rejected": -1.8381012678146362, + "logps/chosen": -101.58468627929688, + "logps/rejected": -213.16946411132812, + "loss": 0.358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6550659537315369, + "rewards/margins": 0.9700287580490112, + "rewards/rejected": -0.314962774515152, + "step": 10382 + }, + { + "epoch": 0.6, + "learning_rate": 3.5753156037497314e-08, + "logits/chosen": -1.9587364196777344, + "logits/rejected": -1.9589698314666748, + "logps/chosen": -4.171901226043701, + "logps/rejected": -128.98001098632812, + "loss": 0.4965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13361290097236633, + "rewards/margins": 1.6533607244491577, + "rewards/rejected": -1.7869735956192017, + "step": 10383 + }, + { + "epoch": 0.6, + "learning_rate": 3.574412292740515e-08, + "logits/chosen": -1.9296576976776123, + "logits/rejected": -1.8770833015441895, + "logps/chosen": -239.58157348632812, + "logps/rejected": -633.2167358398438, + "loss": 0.4141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8592987060546875, + "rewards/margins": 5.502267360687256, + "rewards/rejected": -6.361566066741943, + "step": 10384 + }, + { + "epoch": 0.6, + "learning_rate": 3.5735090323751475e-08, + "logits/chosen": -1.9173970222473145, + "logits/rejected": -1.9603160619735718, + "logps/chosen": -139.60528564453125, + "logps/rejected": -302.09295654296875, + "loss": 0.1107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6962188482284546, + "rewards/margins": 2.076821804046631, + "rewards/rejected": -0.38060304522514343, + "step": 10385 + }, + { + "epoch": 0.6, + "learning_rate": 3.5726058226857165e-08, + "logits/chosen": -2.033569097518921, + "logits/rejected": -2.0296549797058105, + "logps/chosen": -18.605520248413086, + "logps/rejected": -79.43312072753906, + "loss": 0.3856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38064250349998474, + "rewards/margins": 0.98213791847229, + "rewards/rejected": -0.6014953851699829, + "step": 10386 + }, + { + "epoch": 0.6, + "learning_rate": 3.5717026637043093e-08, + "logits/chosen": -1.8067290782928467, + "logits/rejected": -1.7806757688522339, + "logps/chosen": -219.8759002685547, + "logps/rejected": -278.35382080078125, + "loss": 0.2221, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.829885959625244, + "rewards/margins": 0.7264955043792725, + "rewards/rejected": 2.1033904552459717, + "step": 10387 + }, + { + "epoch": 0.6, + "learning_rate": 3.570799555463009e-08, + "logits/chosen": -1.8475005626678467, + "logits/rejected": -1.8581109046936035, + "logps/chosen": -43.90084457397461, + "logps/rejected": -340.0418701171875, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2785099148750305, + "rewards/margins": 6.9419074058532715, + "rewards/rejected": -6.663397312164307, + "step": 10388 + }, + { + "epoch": 0.6, + "learning_rate": 3.569896497993901e-08, + "logits/chosen": -2.00539231300354, + "logits/rejected": -1.9924031496047974, + "logps/chosen": -76.73930358886719, + "logps/rejected": -159.56781005859375, + "loss": 0.3548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36043092608451843, + "rewards/margins": 1.7726150751113892, + "rewards/rejected": -1.4121841192245483, + "step": 10389 + }, + { + "epoch": 0.6, + "learning_rate": 3.568993491329064e-08, + "logits/chosen": -1.91161048412323, + "logits/rejected": -1.894760012626648, + "logps/chosen": -166.7920684814453, + "logps/rejected": -359.6015625, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1747682094573975, + "rewards/margins": 2.925079584121704, + "rewards/rejected": -0.7503113150596619, + "step": 10390 + }, + { + "epoch": 0.6, + "learning_rate": 3.568090535500579e-08, + "logits/chosen": -2.0472702980041504, + "logits/rejected": -2.027944564819336, + "logps/chosen": -4.9470872909296304e-05, + "logps/rejected": -182.66099548339844, + "loss": 0.3766, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.264823706354946e-07, + "rewards/margins": 2.578315496444702, + "rewards/rejected": -2.5783157348632812, + "step": 10391 + }, + { + "epoch": 0.6, + "learning_rate": 3.5671876305405204e-08, + "logits/chosen": -1.9711148738861084, + "logits/rejected": -1.9313223361968994, + "logps/chosen": -141.09640502929688, + "logps/rejected": -204.91453552246094, + "loss": 0.1358, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7302826642990112, + "rewards/margins": 2.037065267562866, + "rewards/rejected": -0.3067825436592102, + "step": 10392 + }, + { + "epoch": 0.6, + "learning_rate": 3.566284776480967e-08, + "logits/chosen": -1.9224785566329956, + "logits/rejected": -1.9142963886260986, + "logps/chosen": -4.36300688306801e-05, + "logps/rejected": -100.69947814941406, + "loss": 0.5415, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.815922238776693e-06, + "rewards/margins": 0.7212114930152893, + "rewards/rejected": -0.7212066650390625, + "step": 10393 + }, + { + "epoch": 0.6, + "learning_rate": 3.565381973353991e-08, + "logits/chosen": -1.8258099555969238, + "logits/rejected": -1.804614782333374, + "logps/chosen": -168.72164916992188, + "logps/rejected": -283.9542541503906, + "loss": 0.0802, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7913055419921875, + "rewards/margins": 1.916540503501892, + "rewards/rejected": 0.8747650384902954, + "step": 10394 + }, + { + "epoch": 0.6, + "learning_rate": 3.564479221191664e-08, + "logits/chosen": -1.6030340194702148, + "logits/rejected": -1.6114803552627563, + "logps/chosen": -0.010390495881438255, + "logps/rejected": -371.4732971191406, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005121746216900647, + "rewards/margins": 5.70297908782959, + "rewards/rejected": -5.7034912109375, + "step": 10395 + }, + { + "epoch": 0.6, + "learning_rate": 3.5635765200260595e-08, + "logits/chosen": -1.8856267929077148, + "logits/rejected": -1.884448528289795, + "logps/chosen": -174.61817932128906, + "logps/rejected": -292.47705078125, + "loss": 0.0692, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6649796962738037, + "rewards/margins": 2.0135819911956787, + "rewards/rejected": 0.651397705078125, + "step": 10396 + }, + { + "epoch": 0.61, + "learning_rate": 3.56267386988924e-08, + "logits/chosen": -1.8107490539550781, + "logits/rejected": -1.8044116497039795, + "logps/chosen": -7.248273849487305, + "logps/rejected": -72.76971435546875, + "loss": 0.7169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1566542237997055, + "rewards/margins": 0.09231428802013397, + "rewards/rejected": -0.24896851181983948, + "step": 10397 + }, + { + "epoch": 0.61, + "learning_rate": 3.561771270813279e-08, + "logits/chosen": -1.869032621383667, + "logits/rejected": -1.850650668144226, + "logps/chosen": -249.51046752929688, + "logps/rejected": -421.5291748046875, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.312518358230591, + "rewards/margins": 3.598248243331909, + "rewards/rejected": -0.2857299745082855, + "step": 10398 + }, + { + "epoch": 0.61, + "learning_rate": 3.560868722830234e-08, + "logits/chosen": -1.8180444240570068, + "logits/rejected": -1.8315569162368774, + "logps/chosen": -211.8599395751953, + "logps/rejected": -615.2823486328125, + "loss": 0.0685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9134628176689148, + "rewards/margins": 12.783500671386719, + "rewards/rejected": -11.870038032531738, + "step": 10399 + }, + { + "epoch": 0.61, + "learning_rate": 3.559966225972175e-08, + "logits/chosen": -1.9953285455703735, + "logits/rejected": -1.9859309196472168, + "logps/chosen": -166.15878295898438, + "logps/rejected": -303.01519775390625, + "loss": 0.0934, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6174408197402954, + "rewards/margins": 2.0829436779022217, + "rewards/rejected": -0.46550294756889343, + "step": 10400 + }, + { + "epoch": 0.61, + "learning_rate": 3.559063780271156e-08, + "logits/chosen": -1.9573789834976196, + "logits/rejected": -1.9463895559310913, + "logps/chosen": -168.44644165039062, + "logps/rejected": -544.1474609375, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4250519275665283, + "rewards/margins": 9.598044395446777, + "rewards/rejected": -7.17299222946167, + "step": 10401 + }, + { + "epoch": 0.61, + "learning_rate": 3.5581613857592424e-08, + "logits/chosen": -1.790779709815979, + "logits/rejected": -1.7875512838363647, + "logps/chosen": -188.22201538085938, + "logps/rejected": -471.99981689453125, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.229296922683716, + "rewards/margins": 3.5787901878356934, + "rewards/rejected": -1.349493384361267, + "step": 10402 + }, + { + "epoch": 0.61, + "learning_rate": 3.557259042468488e-08, + "logits/chosen": -1.8244906663894653, + "logits/rejected": -1.8194959163665771, + "logps/chosen": -247.2694854736328, + "logps/rejected": -407.85308837890625, + "loss": 0.0436, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2429091930389404, + "rewards/margins": 2.9704298973083496, + "rewards/rejected": -0.727520763874054, + "step": 10403 + }, + { + "epoch": 0.61, + "learning_rate": 3.556356750430951e-08, + "logits/chosen": -1.9288522005081177, + "logits/rejected": -1.9260425567626953, + "logps/chosen": -7.927260594442487e-05, + "logps/rejected": -205.2901153564453, + "loss": 0.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8595434312373982e-06, + "rewards/margins": 3.3473575115203857, + "rewards/rejected": -3.347355604171753, + "step": 10404 + }, + { + "epoch": 0.61, + "learning_rate": 3.555454509678682e-08, + "logits/chosen": -1.6757919788360596, + "logits/rejected": -1.668982744216919, + "logps/chosen": -7.9591264724731445, + "logps/rejected": -169.48385620117188, + "loss": 0.471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018491173163056374, + "rewards/margins": 0.7312444448471069, + "rewards/rejected": -0.7127532958984375, + "step": 10405 + }, + { + "epoch": 0.61, + "learning_rate": 3.5545523202437363e-08, + "logits/chosen": -1.9945708513259888, + "logits/rejected": -1.9918237924575806, + "logps/chosen": -2.351980209350586, + "logps/rejected": -136.71844482421875, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033231187611818314, + "rewards/margins": 2.4560365676879883, + "rewards/rejected": -2.4892678260803223, + "step": 10406 + }, + { + "epoch": 0.61, + "learning_rate": 3.5536501821581624e-08, + "logits/chosen": -2.010789155960083, + "logits/rejected": -2.019256830215454, + "logps/chosen": -74.11634826660156, + "logps/rejected": -258.54852294921875, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2319092750549316, + "rewards/margins": 4.879467964172363, + "rewards/rejected": -2.6475586891174316, + "step": 10407 + }, + { + "epoch": 0.61, + "learning_rate": 3.552748095454009e-08, + "logits/chosen": -1.9768015146255493, + "logits/rejected": -1.9740551710128784, + "logps/chosen": -167.51620483398438, + "logps/rejected": -347.4438171386719, + "loss": 1.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4582122564315796, + "rewards/margins": 0.12418222427368164, + "rewards/rejected": -1.5823944807052612, + "step": 10408 + }, + { + "epoch": 0.61, + "learning_rate": 3.5518460601633224e-08, + "logits/chosen": -1.8889198303222656, + "logits/rejected": -1.884735107421875, + "logps/chosen": -186.88531494140625, + "logps/rejected": -323.580810546875, + "loss": 0.2795, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4322250485420227, + "rewards/margins": 0.7540176510810852, + "rewards/rejected": -0.3217926025390625, + "step": 10409 + }, + { + "epoch": 0.61, + "learning_rate": 3.5509440763181475e-08, + "logits/chosen": -2.0275118350982666, + "logits/rejected": -2.035637617111206, + "logps/chosen": -223.99453735351562, + "logps/rejected": -374.93707275390625, + "loss": 0.4651, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1374664306640625, + "rewards/margins": -0.08783876895904541, + "rewards/rejected": 1.225305199623108, + "step": 10410 + }, + { + "epoch": 0.61, + "learning_rate": 3.5500421439505265e-08, + "logits/chosen": -2.06036639213562, + "logits/rejected": -2.0523788928985596, + "logps/chosen": -44.09760284423828, + "logps/rejected": -107.88616943359375, + "loss": 1.2877, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.34666749835014343, + "rewards/margins": -1.6326645612716675, + "rewards/rejected": 1.2859970331192017, + "step": 10411 + }, + { + "epoch": 0.61, + "learning_rate": 3.5491402630925017e-08, + "logits/chosen": -1.914379596710205, + "logits/rejected": -1.913588523864746, + "logps/chosen": -35.126312255859375, + "logps/rejected": -143.80899047851562, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3836860656738281, + "rewards/margins": 1.0379493236541748, + "rewards/rejected": -0.6542633175849915, + "step": 10412 + }, + { + "epoch": 0.61, + "learning_rate": 3.548238433776111e-08, + "logits/chosen": -1.7456587553024292, + "logits/rejected": -1.7454930543899536, + "logps/chosen": -12.879039764404297, + "logps/rejected": -173.71730041503906, + "loss": 0.4429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1203712448477745, + "rewards/margins": 1.862403154373169, + "rewards/rejected": -1.9827743768692017, + "step": 10413 + }, + { + "epoch": 0.61, + "learning_rate": 3.5473366560333934e-08, + "logits/chosen": -1.7723511457443237, + "logits/rejected": -1.763109564781189, + "logps/chosen": -6.905760765075684, + "logps/rejected": -75.76837158203125, + "loss": 0.4903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.296617329120636, + "rewards/margins": 0.5951379537582397, + "rewards/rejected": -0.29852065443992615, + "step": 10414 + }, + { + "epoch": 0.61, + "learning_rate": 3.546434929896383e-08, + "logits/chosen": -1.8995728492736816, + "logits/rejected": -1.8728293180465698, + "logps/chosen": -232.07115173339844, + "logps/rejected": -462.0660400390625, + "loss": 0.084, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3825058937072754, + "rewards/margins": 1.7951645851135254, + "rewards/rejected": 1.58734130859375, + "step": 10415 + }, + { + "epoch": 0.61, + "learning_rate": 3.545533255397112e-08, + "logits/chosen": -1.9933538436889648, + "logits/rejected": -1.9863349199295044, + "logps/chosen": -24.44431495666504, + "logps/rejected": -127.56878662109375, + "loss": 0.4508, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13191433250904083, + "rewards/margins": 1.7063310146331787, + "rewards/rejected": -1.8382453918457031, + "step": 10416 + }, + { + "epoch": 0.61, + "learning_rate": 3.544631632567619e-08, + "logits/chosen": -1.8083895444869995, + "logits/rejected": -1.812587022781372, + "logps/chosen": -15.777178764343262, + "logps/rejected": -197.80630493164062, + "loss": 0.5525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1014382392168045, + "rewards/margins": 0.4442101716995239, + "rewards/rejected": -0.3427719175815582, + "step": 10417 + }, + { + "epoch": 0.61, + "learning_rate": 3.5437300614399244e-08, + "logits/chosen": -1.8796355724334717, + "logits/rejected": -1.8841553926467896, + "logps/chosen": -53.250511169433594, + "logps/rejected": -107.81553649902344, + "loss": 0.6466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11048813164234161, + "rewards/margins": 0.35114288330078125, + "rewards/rejected": -0.46163102984428406, + "step": 10418 + }, + { + "epoch": 0.61, + "learning_rate": 3.5428285420460656e-08, + "logits/chosen": -1.7446914911270142, + "logits/rejected": -1.7238401174545288, + "logps/chosen": -6.544434290844947e-05, + "logps/rejected": -273.85821533203125, + "loss": 0.3466, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.425132141885115e-06, + "rewards/margins": 6.50014066696167, + "rewards/rejected": -6.500134468078613, + "step": 10419 + }, + { + "epoch": 0.61, + "learning_rate": 3.541927074418061e-08, + "logits/chosen": -1.89593505859375, + "logits/rejected": -1.8604156970977783, + "logps/chosen": -184.00257873535156, + "logps/rejected": -379.3074645996094, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9391586184501648, + "rewards/margins": 2.5359573364257812, + "rewards/rejected": -1.5967987775802612, + "step": 10420 + }, + { + "epoch": 0.61, + "learning_rate": 3.541025658587943e-08, + "logits/chosen": -1.7352865934371948, + "logits/rejected": -1.7410715818405151, + "logps/chosen": -192.1689910888672, + "logps/rejected": -475.4410400390625, + "loss": 0.038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5689407587051392, + "rewards/margins": 5.3134660720825195, + "rewards/rejected": -3.744525194168091, + "step": 10421 + }, + { + "epoch": 0.61, + "learning_rate": 3.5401242945877263e-08, + "logits/chosen": -1.9378060102462769, + "logits/rejected": -1.9297478199005127, + "logps/chosen": -131.14418029785156, + "logps/rejected": -505.4218444824219, + "loss": 0.0834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3187881708145142, + "rewards/margins": 6.7164411544799805, + "rewards/rejected": -5.397653102874756, + "step": 10422 + }, + { + "epoch": 0.61, + "learning_rate": 3.539222982449438e-08, + "logits/chosen": -1.6687132120132446, + "logits/rejected": -1.6476327180862427, + "logps/chosen": -227.84176635742188, + "logps/rejected": -327.4356994628906, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8987793922424316, + "rewards/margins": 2.972360372543335, + "rewards/rejected": -0.07358093559741974, + "step": 10423 + }, + { + "epoch": 0.61, + "learning_rate": 3.538321722205094e-08, + "logits/chosen": -1.8260924816131592, + "logits/rejected": -1.84780752658844, + "logps/chosen": -272.17523193359375, + "logps/rejected": -405.1857604980469, + "loss": 0.0518, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.853906273841858, + "rewards/margins": 2.6892426013946533, + "rewards/rejected": -0.8353363275527954, + "step": 10424 + }, + { + "epoch": 0.61, + "learning_rate": 3.537420513886713e-08, + "logits/chosen": -1.9211738109588623, + "logits/rejected": -1.9192522764205933, + "logps/chosen": -30.75115966796875, + "logps/rejected": -166.67498779296875, + "loss": 0.7756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6871143579483032, + "rewards/margins": 0.3654903173446655, + "rewards/rejected": -1.0526046752929688, + "step": 10425 + }, + { + "epoch": 0.61, + "learning_rate": 3.5365193575263086e-08, + "logits/chosen": -1.9253919124603271, + "logits/rejected": -1.9285330772399902, + "logps/chosen": -186.0603485107422, + "logps/rejected": -462.42974853515625, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.834136962890625, + "rewards/margins": 3.693225145339966, + "rewards/rejected": -2.859088182449341, + "step": 10426 + }, + { + "epoch": 0.61, + "learning_rate": 3.535618253155896e-08, + "logits/chosen": -1.8824782371520996, + "logits/rejected": -1.8704555034637451, + "logps/chosen": -5.27620267868042, + "logps/rejected": -254.6741485595703, + "loss": 0.2334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18935780227184296, + "rewards/margins": 4.351607799530029, + "rewards/rejected": -4.16225004196167, + "step": 10427 + }, + { + "epoch": 0.61, + "learning_rate": 3.534717200807485e-08, + "logits/chosen": -1.6096224784851074, + "logits/rejected": -1.6189762353897095, + "logps/chosen": -346.7034606933594, + "logps/rejected": -648.4177856445312, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8623840808868408, + "rewards/margins": 6.40350341796875, + "rewards/rejected": -4.541119575500488, + "step": 10428 + }, + { + "epoch": 0.61, + "learning_rate": 3.533816200513089e-08, + "logits/chosen": -1.819331169128418, + "logits/rejected": -1.8068115711212158, + "logps/chosen": -17.027267456054688, + "logps/rejected": -171.80487060546875, + "loss": 0.4686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2730855941772461, + "rewards/margins": 2.377990245819092, + "rewards/rejected": -2.651075839996338, + "step": 10429 + }, + { + "epoch": 0.61, + "learning_rate": 3.5329152523047114e-08, + "logits/chosen": -1.7838304042816162, + "logits/rejected": -1.7866237163543701, + "logps/chosen": -129.07791137695312, + "logps/rejected": -320.19586181640625, + "loss": 0.1539, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0058029890060425, + "rewards/margins": 3.4735426902770996, + "rewards/rejected": -2.4677398204803467, + "step": 10430 + }, + { + "epoch": 0.61, + "learning_rate": 3.532014356214361e-08, + "logits/chosen": -2.083214044570923, + "logits/rejected": -2.0737526416778564, + "logps/chosen": -92.89671325683594, + "logps/rejected": -289.04473876953125, + "loss": 0.225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33653566241264343, + "rewards/margins": 2.0098938941955566, + "rewards/rejected": -1.6733582019805908, + "step": 10431 + }, + { + "epoch": 0.61, + "learning_rate": 3.531113512274041e-08, + "logits/chosen": -2.0438456535339355, + "logits/rejected": -2.042815685272217, + "logps/chosen": -3.7252230644226074, + "logps/rejected": -172.29635620117188, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11585364490747452, + "rewards/margins": 2.4112071990966797, + "rewards/rejected": -2.295353651046753, + "step": 10432 + }, + { + "epoch": 0.61, + "learning_rate": 3.5302127205157545e-08, + "logits/chosen": -1.9400743246078491, + "logits/rejected": -1.9388203620910645, + "logps/chosen": -6.2536163330078125, + "logps/rejected": -79.56246948242188, + "loss": 0.6246, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2797531187534332, + "rewards/margins": -0.0562286376953125, + "rewards/rejected": 0.3359817564487457, + "step": 10433 + }, + { + "epoch": 0.61, + "learning_rate": 3.529311980971501e-08, + "logits/chosen": -1.818755030632019, + "logits/rejected": -1.807928442955017, + "logps/chosen": -349.51904296875, + "logps/rejected": -476.2879638671875, + "loss": 0.0634, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.795080542564392, + "rewards/margins": 2.390951633453369, + "rewards/rejected": -0.5958709716796875, + "step": 10434 + }, + { + "epoch": 0.61, + "learning_rate": 3.528411293673281e-08, + "logits/chosen": -1.7760355472564697, + "logits/rejected": -1.7659822702407837, + "logps/chosen": -0.007932178676128387, + "logps/rejected": -200.11752319335938, + "loss": 0.345, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0026898470241576433, + "rewards/margins": 3.359797477722168, + "rewards/rejected": -3.357107639312744, + "step": 10435 + }, + { + "epoch": 0.61, + "learning_rate": 3.5275106586530887e-08, + "logits/chosen": -2.000108003616333, + "logits/rejected": -2.0009765625, + "logps/chosen": -63.7340087890625, + "logps/rejected": -145.7982177734375, + "loss": 0.4324, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5346580743789673, + "rewards/margins": -0.004578351974487305, + "rewards/rejected": 1.5392364263534546, + "step": 10436 + }, + { + "epoch": 0.61, + "learning_rate": 3.52661007594292e-08, + "logits/chosen": -2.1470494270324707, + "logits/rejected": -2.141767740249634, + "logps/chosen": -1.394746323057916e-05, + "logps/rejected": -112.49728393554688, + "loss": 0.3903, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3112931185332855e-07, + "rewards/margins": 2.5005338191986084, + "rewards/rejected": -2.5005340576171875, + "step": 10437 + }, + { + "epoch": 0.61, + "learning_rate": 3.5257095455747715e-08, + "logits/chosen": -1.7783241271972656, + "logits/rejected": -1.8303295373916626, + "logps/chosen": -197.52316284179688, + "logps/rejected": -378.8880615234375, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.176321506500244, + "rewards/margins": 4.666348457336426, + "rewards/rejected": -2.4900269508361816, + "step": 10438 + }, + { + "epoch": 0.61, + "learning_rate": 3.5248090675806284e-08, + "logits/chosen": -2.1215898990631104, + "logits/rejected": -2.1148035526275635, + "logps/chosen": -18.68869972229004, + "logps/rejected": -134.34527587890625, + "loss": 0.5919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1923469603061676, + "rewards/margins": 0.34398725628852844, + "rewards/rejected": -0.536334216594696, + "step": 10439 + }, + { + "epoch": 0.61, + "learning_rate": 3.523908641992486e-08, + "logits/chosen": -1.6655776500701904, + "logits/rejected": -1.6066251993179321, + "logps/chosen": -191.6302490234375, + "logps/rejected": -389.1138000488281, + "loss": 0.1924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.146479845046997, + "rewards/margins": 1.5588730573654175, + "rewards/rejected": -0.412393182516098, + "step": 10440 + }, + { + "epoch": 0.61, + "learning_rate": 3.523008268842326e-08, + "logits/chosen": -1.8275396823883057, + "logits/rejected": -1.8151347637176514, + "logps/chosen": -45.09679412841797, + "logps/rejected": -343.0665283203125, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1330135315656662, + "rewards/margins": 5.949652194976807, + "rewards/rejected": -5.816638469696045, + "step": 10441 + }, + { + "epoch": 0.61, + "learning_rate": 3.52210794816214e-08, + "logits/chosen": -1.9051036834716797, + "logits/rejected": -1.902242660522461, + "logps/chosen": -30.47467613220215, + "logps/rejected": -140.26258850097656, + "loss": 0.4871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2733469009399414, + "rewards/margins": 0.6504572033882141, + "rewards/rejected": -0.3771103024482727, + "step": 10442 + }, + { + "epoch": 0.61, + "learning_rate": 3.5212076799839066e-08, + "logits/chosen": -1.9062798023223877, + "logits/rejected": -1.912123203277588, + "logps/chosen": -160.7898712158203, + "logps/rejected": -267.77264404296875, + "loss": 0.1362, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9953094720840454, + "rewards/margins": 1.3695099353790283, + "rewards/rejected": 0.6257995963096619, + "step": 10443 + }, + { + "epoch": 0.61, + "learning_rate": 3.5203074643396113e-08, + "logits/chosen": -2.103985071182251, + "logits/rejected": -2.0875608921051025, + "logps/chosen": -20.292001724243164, + "logps/rejected": -274.01910400390625, + "loss": 0.1654, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0893805027008057, + "rewards/margins": 2.4256811141967773, + "rewards/rejected": -1.3363007307052612, + "step": 10444 + }, + { + "epoch": 0.61, + "learning_rate": 3.519407301261231e-08, + "logits/chosen": -1.9366472959518433, + "logits/rejected": -1.9359089136123657, + "logps/chosen": -9.158757209777832, + "logps/rejected": -110.91435241699219, + "loss": 0.4326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.226568803191185, + "rewards/margins": 0.8617929816246033, + "rewards/rejected": -0.6352241635322571, + "step": 10445 + }, + { + "epoch": 0.61, + "learning_rate": 3.518507190780748e-08, + "logits/chosen": -1.7918496131896973, + "logits/rejected": -1.7754614353179932, + "logps/chosen": -211.9114990234375, + "logps/rejected": -287.4007873535156, + "loss": 0.2308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5192794799804688, + "rewards/margins": 1.148738145828247, + "rewards/rejected": 0.37054139375686646, + "step": 10446 + }, + { + "epoch": 0.61, + "learning_rate": 3.5176071329301346e-08, + "logits/chosen": -1.738474726676941, + "logits/rejected": -1.7643638849258423, + "logps/chosen": -179.4429931640625, + "logps/rejected": -174.1560516357422, + "loss": 0.4026, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.1423981189727783, + "rewards/margins": -0.04136049747467041, + "rewards/rejected": 1.1837586164474487, + "step": 10447 + }, + { + "epoch": 0.61, + "learning_rate": 3.5167071277413686e-08, + "logits/chosen": -2.027254104614258, + "logits/rejected": -2.0172336101531982, + "logps/chosen": -13.210792541503906, + "logps/rejected": -126.44483184814453, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2350522130727768, + "rewards/margins": 1.4230595827102661, + "rewards/rejected": -1.1880073547363281, + "step": 10448 + }, + { + "epoch": 0.61, + "learning_rate": 3.51580717524642e-08, + "logits/chosen": -1.943722128868103, + "logits/rejected": -1.939794659614563, + "logps/chosen": -23.63907814025879, + "logps/rejected": -186.01443481445312, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2424451857805252, + "rewards/margins": 3.6465864181518555, + "rewards/rejected": -3.4041411876678467, + "step": 10449 + }, + { + "epoch": 0.61, + "learning_rate": 3.514907275477261e-08, + "logits/chosen": -1.9358805418014526, + "logits/rejected": -1.9336833953857422, + "logps/chosen": -8.873085021972656, + "logps/rejected": -221.03872680664062, + "loss": 0.3972, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1229001060128212, + "rewards/margins": 1.401763677597046, + "rewards/rejected": -1.278863549232483, + "step": 10450 + }, + { + "epoch": 0.61, + "learning_rate": 3.51400742846586e-08, + "logits/chosen": -1.81658136844635, + "logits/rejected": -1.833922266960144, + "logps/chosen": -248.7161865234375, + "logps/rejected": -446.3631591796875, + "loss": 0.0636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7827820181846619, + "rewards/margins": 7.851898193359375, + "rewards/rejected": -7.069116115570068, + "step": 10451 + }, + { + "epoch": 0.61, + "learning_rate": 3.513107634244183e-08, + "logits/chosen": -1.6856626272201538, + "logits/rejected": -1.6532264947891235, + "logps/chosen": -264.75396728515625, + "logps/rejected": -454.2325744628906, + "loss": 0.0701, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1628479957580566, + "rewards/margins": 2.7908754348754883, + "rewards/rejected": -0.6280273795127869, + "step": 10452 + }, + { + "epoch": 0.61, + "learning_rate": 3.5122078928441966e-08, + "logits/chosen": -1.7832727432250977, + "logits/rejected": -1.7791869640350342, + "logps/chosen": -330.36981201171875, + "logps/rejected": -519.8196411132812, + "loss": 0.2007, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.064953565597534, + "rewards/margins": 0.9214293956756592, + "rewards/rejected": 1.143524169921875, + "step": 10453 + }, + { + "epoch": 0.61, + "learning_rate": 3.5113082042978635e-08, + "logits/chosen": -1.8609553575515747, + "logits/rejected": -1.8575764894485474, + "logps/chosen": -20.398569107055664, + "logps/rejected": -111.19975280761719, + "loss": 0.3827, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3612024486064911, + "rewards/margins": 1.253191590309143, + "rewards/rejected": -0.8919891715049744, + "step": 10454 + }, + { + "epoch": 0.61, + "learning_rate": 3.510408568637144e-08, + "logits/chosen": -1.920332670211792, + "logits/rejected": -1.9168078899383545, + "logps/chosen": -18.08565330505371, + "logps/rejected": -233.93194580078125, + "loss": 0.1943, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6443372964859009, + "rewards/margins": 3.3239941596984863, + "rewards/rejected": -2.679656982421875, + "step": 10455 + }, + { + "epoch": 0.61, + "learning_rate": 3.5095089858939983e-08, + "logits/chosen": -1.851313591003418, + "logits/rejected": -1.8170818090438843, + "logps/chosen": -146.5489959716797, + "logps/rejected": -259.58929443359375, + "loss": 0.1435, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9019485712051392, + "rewards/margins": 1.5344345569610596, + "rewards/rejected": 0.367514044046402, + "step": 10456 + }, + { + "epoch": 0.61, + "learning_rate": 3.508609456100387e-08, + "logits/chosen": -2.005431890487671, + "logits/rejected": -1.969964623451233, + "logps/chosen": -222.00167846679688, + "logps/rejected": -346.3222351074219, + "loss": 0.0387, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9061081409454346, + "rewards/margins": 3.1402604579925537, + "rewards/rejected": -0.23415222764015198, + "step": 10457 + }, + { + "epoch": 0.61, + "learning_rate": 3.50770997928826e-08, + "logits/chosen": -1.9001730680465698, + "logits/rejected": -1.874632477760315, + "logps/chosen": -177.33782958984375, + "logps/rejected": -278.55596923828125, + "loss": 0.1158, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4797821044921875, + "rewards/margins": 1.4330413341522217, + "rewards/rejected": 1.0467407703399658, + "step": 10458 + }, + { + "epoch": 0.61, + "learning_rate": 3.506810555489577e-08, + "logits/chosen": -1.9810243844985962, + "logits/rejected": -1.9646953344345093, + "logps/chosen": -247.75656127929688, + "logps/rejected": -533.1126708984375, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9178069829940796, + "rewards/margins": 3.2581512928009033, + "rewards/rejected": -1.3403443098068237, + "step": 10459 + }, + { + "epoch": 0.61, + "learning_rate": 3.5059111847362836e-08, + "logits/chosen": -1.9378987550735474, + "logits/rejected": -1.9232206344604492, + "logps/chosen": -64.17066955566406, + "logps/rejected": -247.24522399902344, + "loss": 0.1043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.965557873249054, + "rewards/margins": 3.9581146240234375, + "rewards/rejected": -2.9925568103790283, + "step": 10460 + }, + { + "epoch": 0.61, + "learning_rate": 3.505011867060337e-08, + "logits/chosen": -2.09879469871521, + "logits/rejected": -2.065809488296509, + "logps/chosen": -119.99397277832031, + "logps/rejected": -244.83843994140625, + "loss": 0.2025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0334014892578125, + "rewards/margins": 1.1739470958709717, + "rewards/rejected": 0.859454333782196, + "step": 10461 + }, + { + "epoch": 0.61, + "learning_rate": 3.5041126024936785e-08, + "logits/chosen": -1.942392349243164, + "logits/rejected": -1.940041184425354, + "logps/chosen": -16.912635803222656, + "logps/rejected": -64.32721710205078, + "loss": 0.5252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015779495006427169, + "rewards/margins": 0.7672136425971985, + "rewards/rejected": -0.7687916159629822, + "step": 10462 + }, + { + "epoch": 0.61, + "learning_rate": 3.50321339106826e-08, + "logits/chosen": -1.9806795120239258, + "logits/rejected": -1.99240243434906, + "logps/chosen": -169.5135040283203, + "logps/rejected": -233.05264282226562, + "loss": 0.0625, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3794540166854858, + "rewards/margins": 2.9324874877929688, + "rewards/rejected": -1.553033471107483, + "step": 10463 + }, + { + "epoch": 0.61, + "learning_rate": 3.5023142328160205e-08, + "logits/chosen": -1.9847887754440308, + "logits/rejected": -1.9646062850952148, + "logps/chosen": -74.66156005859375, + "logps/rejected": -313.1489562988281, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11692352592945099, + "rewards/margins": 2.886183261871338, + "rewards/rejected": -2.7692596912384033, + "step": 10464 + }, + { + "epoch": 0.61, + "learning_rate": 3.501415127768908e-08, + "logits/chosen": -1.9685782194137573, + "logits/rejected": -1.9615678787231445, + "logps/chosen": -0.003828272456303239, + "logps/rejected": -240.98623657226562, + "loss": 0.3532, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0313867935328744e-05, + "rewards/margins": 4.585068702697754, + "rewards/rejected": -4.585089206695557, + "step": 10465 + }, + { + "epoch": 0.61, + "learning_rate": 3.5005160759588594e-08, + "logits/chosen": -1.876976490020752, + "logits/rejected": -1.8766084909439087, + "logps/chosen": -0.770871102809906, + "logps/rejected": -85.60832214355469, + "loss": 0.5872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024844735860824585, + "rewards/margins": 0.44308358430862427, + "rewards/rejected": -0.4182388484477997, + "step": 10466 + }, + { + "epoch": 0.61, + "learning_rate": 3.4996170774178154e-08, + "logits/chosen": -1.6417617797851562, + "logits/rejected": -1.6585389375686646, + "logps/chosen": -172.51419067382812, + "logps/rejected": -308.794921875, + "loss": 0.114, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0451691150665283, + "rewards/margins": 1.471826195716858, + "rewards/rejected": 1.5733429193496704, + "step": 10467 + }, + { + "epoch": 0.61, + "learning_rate": 3.498718132177711e-08, + "logits/chosen": -1.9652966260910034, + "logits/rejected": -1.9165343046188354, + "logps/chosen": -334.4346923828125, + "logps/rejected": -442.7110595703125, + "loss": 0.038, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.602526903152466, + "rewards/margins": 4.560601711273193, + "rewards/rejected": -1.958074927330017, + "step": 10468 + }, + { + "epoch": 0.61, + "learning_rate": 3.4978192402704833e-08, + "logits/chosen": -2.028630495071411, + "logits/rejected": -2.0276474952697754, + "logps/chosen": -66.26156616210938, + "logps/rejected": -212.88995361328125, + "loss": 0.2769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1514633148908615, + "rewards/margins": 3.191420078277588, + "rewards/rejected": -3.03995680809021, + "step": 10469 + }, + { + "epoch": 0.61, + "learning_rate": 3.496920401728063e-08, + "logits/chosen": -1.9761765003204346, + "logits/rejected": -1.9620634317398071, + "logps/chosen": -159.82496643066406, + "logps/rejected": -285.5372314453125, + "loss": 0.2607, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6557892560958862, + "rewards/margins": 0.6315979957580566, + "rewards/rejected": 1.0241912603378296, + "step": 10470 + }, + { + "epoch": 0.61, + "learning_rate": 3.496021616582383e-08, + "logits/chosen": -1.8646488189697266, + "logits/rejected": -1.8653348684310913, + "logps/chosen": -37.551910400390625, + "logps/rejected": -183.28346252441406, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08952484279870987, + "rewards/margins": 1.8787293434143066, + "rewards/rejected": -1.789204478263855, + "step": 10471 + }, + { + "epoch": 0.61, + "learning_rate": 3.495122884865371e-08, + "logits/chosen": -2.1075351238250732, + "logits/rejected": -2.100203275680542, + "logps/chosen": -56.5577278137207, + "logps/rejected": -279.09466552734375, + "loss": 0.2626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13773231208324432, + "rewards/margins": 5.0119709968566895, + "rewards/rejected": -4.87423849105835, + "step": 10472 + }, + { + "epoch": 0.61, + "learning_rate": 3.494224206608957e-08, + "logits/chosen": -1.7322947978973389, + "logits/rejected": -1.7350720167160034, + "logps/chosen": -3.104250192642212, + "logps/rejected": -216.93031311035156, + "loss": 0.3801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017332648858428, + "rewards/margins": 3.075094699859619, + "rewards/rejected": -3.0577621459960938, + "step": 10473 + }, + { + "epoch": 0.61, + "learning_rate": 3.493325581845063e-08, + "logits/chosen": -1.8745157718658447, + "logits/rejected": -1.8915255069732666, + "logps/chosen": -169.0543212890625, + "logps/rejected": -258.74163818359375, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5530014038085938, + "rewards/margins": 1.9047927856445312, + "rewards/rejected": 0.6482086181640625, + "step": 10474 + }, + { + "epoch": 0.61, + "learning_rate": 3.492427010605615e-08, + "logits/chosen": -1.6897711753845215, + "logits/rejected": -1.6206529140472412, + "logps/chosen": -223.54881286621094, + "logps/rejected": -426.8244934082031, + "loss": 0.1233, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1598052978515625, + "rewards/margins": 1.833929419517517, + "rewards/rejected": 0.325875848531723, + "step": 10475 + }, + { + "epoch": 0.61, + "learning_rate": 3.491528492922534e-08, + "logits/chosen": -1.968235969543457, + "logits/rejected": -1.9568344354629517, + "logps/chosen": -37.841949462890625, + "logps/rejected": -164.6427764892578, + "loss": 0.1801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7527523040771484, + "rewards/margins": 3.371995210647583, + "rewards/rejected": -2.6192429065704346, + "step": 10476 + }, + { + "epoch": 0.61, + "learning_rate": 3.490630028827738e-08, + "logits/chosen": -1.884345293045044, + "logits/rejected": -1.8784846067428589, + "logps/chosen": -9.396288871765137, + "logps/rejected": -103.97039794921875, + "loss": 0.6519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21361227333545685, + "rewards/margins": 0.32653361558914185, + "rewards/rejected": -0.5401458740234375, + "step": 10477 + }, + { + "epoch": 0.61, + "learning_rate": 3.48973161835315e-08, + "logits/chosen": -2.018878698348999, + "logits/rejected": -2.007733106613159, + "logps/chosen": -36.21927261352539, + "logps/rejected": -274.90594482421875, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10769615322351456, + "rewards/margins": 5.997958183288574, + "rewards/rejected": -5.890262126922607, + "step": 10478 + }, + { + "epoch": 0.61, + "learning_rate": 3.488833261530679e-08, + "logits/chosen": -1.944655418395996, + "logits/rejected": -1.9461320638656616, + "logps/chosen": -250.03065490722656, + "logps/rejected": -323.6746520996094, + "loss": 0.4871, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.2124130725860596, + "rewards/margins": -0.47600555419921875, + "rewards/rejected": 3.6884186267852783, + "step": 10479 + }, + { + "epoch": 0.61, + "learning_rate": 3.487934958392245e-08, + "logits/chosen": -1.8460445404052734, + "logits/rejected": -1.8642480373382568, + "logps/chosen": -159.62551879882812, + "logps/rejected": -268.1749267578125, + "loss": 0.0946, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0541900396347046, + "rewards/margins": 2.3074982166290283, + "rewards/rejected": -1.2533081769943237, + "step": 10480 + }, + { + "epoch": 0.61, + "learning_rate": 3.487036708969755e-08, + "logits/chosen": -1.8172714710235596, + "logits/rejected": -1.824519157409668, + "logps/chosen": -275.8095703125, + "logps/rejected": -353.43572998046875, + "loss": 0.1235, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9708465337753296, + "rewards/margins": 1.929968237876892, + "rewards/rejected": 0.0408782958984375, + "step": 10481 + }, + { + "epoch": 0.61, + "learning_rate": 3.486138513295125e-08, + "logits/chosen": -1.9606600999832153, + "logits/rejected": -1.9643510580062866, + "logps/chosen": -25.230152130126953, + "logps/rejected": -239.20350646972656, + "loss": 0.2594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47351208329200745, + "rewards/margins": 2.490736246109009, + "rewards/rejected": -2.017224073410034, + "step": 10482 + }, + { + "epoch": 0.61, + "learning_rate": 3.485240371400257e-08, + "logits/chosen": -1.7992597818374634, + "logits/rejected": -1.8043688535690308, + "logps/chosen": -1.5408172607421875, + "logps/rejected": -262.06494140625, + "loss": 0.3616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0013918876647949219, + "rewards/margins": 4.997446060180664, + "rewards/rejected": -4.996054172515869, + "step": 10483 + }, + { + "epoch": 0.61, + "learning_rate": 3.4843422833170636e-08, + "logits/chosen": -2.0708560943603516, + "logits/rejected": -2.064948558807373, + "logps/chosen": -0.00012647702533286065, + "logps/rejected": -105.4079360961914, + "loss": 0.4404, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.897956958098803e-06, + "rewards/margins": 1.5409735441207886, + "rewards/rejected": -1.5409774780273438, + "step": 10484 + }, + { + "epoch": 0.61, + "learning_rate": 3.483444249077443e-08, + "logits/chosen": -1.8967434167861938, + "logits/rejected": -1.90022873878479, + "logps/chosen": -8.500452995300293, + "logps/rejected": -101.8200912475586, + "loss": 0.5596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18753233551979065, + "rewards/margins": 0.3939661979675293, + "rewards/rejected": -0.20643387734889984, + "step": 10485 + }, + { + "epoch": 0.61, + "learning_rate": 3.4825462687133036e-08, + "logits/chosen": -2.1230971813201904, + "logits/rejected": -2.11863112449646, + "logps/chosen": -5.248561859130859, + "logps/rejected": -150.12669372558594, + "loss": 0.3844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13583146035671234, + "rewards/margins": 2.065143585205078, + "rewards/rejected": -1.9293121099472046, + "step": 10486 + }, + { + "epoch": 0.61, + "learning_rate": 3.481648342256541e-08, + "logits/chosen": -1.850663185119629, + "logits/rejected": -1.8610349893569946, + "logps/chosen": -226.49002075195312, + "logps/rejected": -317.089111328125, + "loss": 0.2132, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8437440395355225, + "rewards/margins": 0.7073090076446533, + "rewards/rejected": 2.136435031890869, + "step": 10487 + }, + { + "epoch": 0.61, + "learning_rate": 3.480750469739059e-08, + "logits/chosen": -1.8283189535140991, + "logits/rejected": -1.8395777940750122, + "logps/chosen": -243.008056640625, + "logps/rejected": -497.8370056152344, + "loss": 0.1086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0946441888809204, + "rewards/margins": 4.6242218017578125, + "rewards/rejected": -3.5295777320861816, + "step": 10488 + }, + { + "epoch": 0.61, + "learning_rate": 3.47985265119275e-08, + "logits/chosen": -1.6198830604553223, + "logits/rejected": -1.6568670272827148, + "logps/chosen": -179.68600463867188, + "logps/rejected": -356.8155517578125, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8517287969589233, + "rewards/margins": 3.7424941062927246, + "rewards/rejected": -1.8907654285430908, + "step": 10489 + }, + { + "epoch": 0.61, + "learning_rate": 3.478954886649511e-08, + "logits/chosen": -1.987476110458374, + "logits/rejected": -1.9872627258300781, + "logps/chosen": -22.987024307250977, + "logps/rejected": -107.52458953857422, + "loss": 0.5546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20927734673023224, + "rewards/margins": 0.11394043266773224, + "rewards/rejected": 0.0953369140625, + "step": 10490 + }, + { + "epoch": 0.61, + "learning_rate": 3.478057176141235e-08, + "logits/chosen": -1.8929247856140137, + "logits/rejected": -1.8958983421325684, + "logps/chosen": -234.96585083007812, + "logps/rejected": -367.8902587890625, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.857321262359619, + "rewards/margins": 3.9209933280944824, + "rewards/rejected": -1.0636719465255737, + "step": 10491 + }, + { + "epoch": 0.61, + "learning_rate": 3.477159519699813e-08, + "logits/chosen": -2.0377581119537354, + "logits/rejected": -2.0310330390930176, + "logps/chosen": -12.50152587890625, + "logps/rejected": -191.8419952392578, + "loss": 0.4437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03689298778772354, + "rewards/margins": 1.3605456352233887, + "rewards/rejected": -1.3236526250839233, + "step": 10492 + }, + { + "epoch": 0.61, + "learning_rate": 3.4762619173571324e-08, + "logits/chosen": -1.9091379642486572, + "logits/rejected": -1.899688959121704, + "logps/chosen": -30.04616928100586, + "logps/rejected": -165.62362670898438, + "loss": 0.3408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2649787962436676, + "rewards/margins": 1.9199912548065186, + "rewards/rejected": -1.6550124883651733, + "step": 10493 + }, + { + "epoch": 0.61, + "learning_rate": 3.475364369145083e-08, + "logits/chosen": -1.8102566003799438, + "logits/rejected": -1.812662959098816, + "logps/chosen": -174.71017456054688, + "logps/rejected": -340.55560302734375, + "loss": 0.0622, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4464858770370483, + "rewards/margins": 2.809919834136963, + "rewards/rejected": -1.363433837890625, + "step": 10494 + }, + { + "epoch": 0.61, + "learning_rate": 3.474466875095549e-08, + "logits/chosen": -1.7991856336593628, + "logits/rejected": -1.7978103160858154, + "logps/chosen": -173.5924530029297, + "logps/rejected": -320.32720947265625, + "loss": 0.3064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1811447143554688, + "rewards/margins": 0.2715774178504944, + "rewards/rejected": 0.9095672965049744, + "step": 10495 + }, + { + "epoch": 0.61, + "learning_rate": 3.473569435240414e-08, + "logits/chosen": -1.9757241010665894, + "logits/rejected": -1.961891531944275, + "logps/chosen": -0.010925160720944405, + "logps/rejected": -201.7674560546875, + "loss": 0.3706, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0019032827112823725, + "rewards/margins": 2.987816333770752, + "rewards/rejected": -2.985913038253784, + "step": 10496 + }, + { + "epoch": 0.61, + "learning_rate": 3.472672049611558e-08, + "logits/chosen": -1.9959571361541748, + "logits/rejected": -1.9988670349121094, + "logps/chosen": -0.5323500633239746, + "logps/rejected": -155.92803955078125, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06145694479346275, + "rewards/margins": 2.4282217025756836, + "rewards/rejected": -2.366764783859253, + "step": 10497 + }, + { + "epoch": 0.61, + "learning_rate": 3.471774718240862e-08, + "logits/chosen": -1.8739277124404907, + "logits/rejected": -1.9011379480361938, + "logps/chosen": -245.764404296875, + "logps/rejected": -342.625732421875, + "loss": 0.0321, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.108355760574341, + "rewards/margins": 3.510821580886841, + "rewards/rejected": -1.4024658203125, + "step": 10498 + }, + { + "epoch": 0.61, + "learning_rate": 3.4708774411602046e-08, + "logits/chosen": -2.0515003204345703, + "logits/rejected": -2.1297860145568848, + "logps/chosen": -227.6446990966797, + "logps/rejected": -315.9994201660156, + "loss": 0.1517, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7486069202423096, + "rewards/margins": 1.2756638526916504, + "rewards/rejected": 0.47294312715530396, + "step": 10499 + }, + { + "epoch": 0.61, + "learning_rate": 3.469980218401459e-08, + "logits/chosen": -1.7403297424316406, + "logits/rejected": -1.8173426389694214, + "logps/chosen": -298.0846862792969, + "logps/rejected": -572.7343139648438, + "loss": 0.1612, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7482361197471619, + "rewards/margins": 6.466137886047363, + "rewards/rejected": -7.21437406539917, + "step": 10500 + }, + { + "epoch": 0.61, + "learning_rate": 3.469083049996502e-08, + "logits/chosen": -1.9559893608093262, + "logits/rejected": -1.9562565088272095, + "logps/chosen": -0.002659271704033017, + "logps/rejected": -149.54147338867188, + "loss": 0.3617, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.250182534335181e-05, + "rewards/margins": 2.7620561122894287, + "rewards/rejected": -2.7621285915374756, + "step": 10501 + }, + { + "epoch": 0.61, + "learning_rate": 3.4681859359772014e-08, + "logits/chosen": -2.057774782180786, + "logits/rejected": -2.044802665710449, + "logps/chosen": -54.97882843017578, + "logps/rejected": -198.1556854248047, + "loss": 0.379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02031402662396431, + "rewards/margins": 2.504277229309082, + "rewards/rejected": -2.5245912075042725, + "step": 10502 + }, + { + "epoch": 0.61, + "learning_rate": 3.4672888763754315e-08, + "logits/chosen": -2.0437123775482178, + "logits/rejected": -2.0364909172058105, + "logps/chosen": -63.34375762939453, + "logps/rejected": -361.06689453125, + "loss": 0.1688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7919365167617798, + "rewards/margins": 6.09607458114624, + "rewards/rejected": -5.30413818359375, + "step": 10503 + }, + { + "epoch": 0.61, + "learning_rate": 3.466391871223056e-08, + "logits/chosen": -1.9267507791519165, + "logits/rejected": -1.9279733896255493, + "logps/chosen": -20.658056259155273, + "logps/rejected": -97.75482940673828, + "loss": 0.2531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7592081427574158, + "rewards/margins": 1.2370271682739258, + "rewards/rejected": -0.4778190553188324, + "step": 10504 + }, + { + "epoch": 0.61, + "learning_rate": 3.465494920551944e-08, + "logits/chosen": -1.8663439750671387, + "logits/rejected": -1.8514209985733032, + "logps/chosen": -172.85755920410156, + "logps/rejected": -328.5654296875, + "loss": 0.2542, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9225815534591675, + "rewards/margins": 0.6639572381973267, + "rewards/rejected": 1.2586243152618408, + "step": 10505 + }, + { + "epoch": 0.61, + "learning_rate": 3.464598024393958e-08, + "logits/chosen": -2.1401100158691406, + "logits/rejected": -2.129849910736084, + "logps/chosen": -17.272701263427734, + "logps/rejected": -187.12725830078125, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30360299348831177, + "rewards/margins": 1.815086841583252, + "rewards/rejected": -1.5114837884902954, + "step": 10506 + }, + { + "epoch": 0.61, + "learning_rate": 3.463701182780961e-08, + "logits/chosen": -1.9215365648269653, + "logits/rejected": -1.9165174961090088, + "logps/chosen": -53.56418228149414, + "logps/rejected": -139.02218627929688, + "loss": 0.3654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6725826263427734, + "rewards/margins": 0.9367168545722961, + "rewards/rejected": -0.2641342282295227, + "step": 10507 + }, + { + "epoch": 0.61, + "learning_rate": 3.4628043957448114e-08, + "logits/chosen": -2.074018955230713, + "logits/rejected": -2.0711746215820312, + "logps/chosen": -25.765949249267578, + "logps/rejected": -340.8116149902344, + "loss": 0.3376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17165279388427734, + "rewards/margins": 4.28568696975708, + "rewards/rejected": -4.457339763641357, + "step": 10508 + }, + { + "epoch": 0.61, + "learning_rate": 3.4619076633173716e-08, + "logits/chosen": -1.9861780405044556, + "logits/rejected": -1.9901601076126099, + "logps/chosen": -0.14331050217151642, + "logps/rejected": -149.04345703125, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03820221498608589, + "rewards/margins": 2.815758228302002, + "rewards/rejected": -2.7775559425354004, + "step": 10509 + }, + { + "epoch": 0.61, + "learning_rate": 3.4610109855304934e-08, + "logits/chosen": -1.9760243892669678, + "logits/rejected": -1.9773451089859009, + "logps/chosen": -103.26654052734375, + "logps/rejected": -318.5252685546875, + "loss": 0.3189, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09658356010913849, + "rewards/margins": 3.9964799880981445, + "rewards/rejected": -3.8998963832855225, + "step": 10510 + }, + { + "epoch": 0.61, + "learning_rate": 3.460114362416034e-08, + "logits/chosen": -1.8296691179275513, + "logits/rejected": -1.826913833618164, + "logps/chosen": -21.62077522277832, + "logps/rejected": -229.20111083984375, + "loss": 0.2624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4872865676879883, + "rewards/margins": 2.305656671524048, + "rewards/rejected": -1.8183701038360596, + "step": 10511 + }, + { + "epoch": 0.61, + "learning_rate": 3.459217794005843e-08, + "logits/chosen": -1.9275047779083252, + "logits/rejected": -1.907394528388977, + "logps/chosen": -71.73023223876953, + "logps/rejected": -403.8597412109375, + "loss": 0.252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5247001647949219, + "rewards/margins": 2.36637806892395, + "rewards/rejected": -1.8416779041290283, + "step": 10512 + }, + { + "epoch": 0.61, + "learning_rate": 3.458321280331775e-08, + "logits/chosen": -2.0382895469665527, + "logits/rejected": -2.025897264480591, + "logps/chosen": -0.0003563966020010412, + "logps/rejected": -148.26919555664062, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.046304457006045e-05, + "rewards/margins": 2.5727791786193848, + "rewards/rejected": -2.5727386474609375, + "step": 10513 + }, + { + "epoch": 0.61, + "learning_rate": 3.4574248214256754e-08, + "logits/chosen": -1.8254412412643433, + "logits/rejected": -1.8342496156692505, + "logps/chosen": -11.620978355407715, + "logps/rejected": -106.2296371459961, + "loss": 0.4046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07509946823120117, + "rewards/margins": 1.583510160446167, + "rewards/rejected": -1.5084106922149658, + "step": 10514 + }, + { + "epoch": 0.61, + "learning_rate": 3.456528417319392e-08, + "logits/chosen": -2.0375468730926514, + "logits/rejected": -2.042144536972046, + "logps/chosen": -32.201812744140625, + "logps/rejected": -305.830810546875, + "loss": 0.3118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22245101630687714, + "rewards/margins": 2.50736927986145, + "rewards/rejected": -2.2849183082580566, + "step": 10515 + }, + { + "epoch": 0.61, + "learning_rate": 3.4556320680447684e-08, + "logits/chosen": -1.9207837581634521, + "logits/rejected": -1.9273772239685059, + "logps/chosen": -185.21551513671875, + "logps/rejected": -450.2264404296875, + "loss": 0.0405, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8896210193634033, + "rewards/margins": 4.494394302368164, + "rewards/rejected": -2.6047730445861816, + "step": 10516 + }, + { + "epoch": 0.61, + "learning_rate": 3.454735773633648e-08, + "logits/chosen": -1.805640459060669, + "logits/rejected": -1.8184218406677246, + "logps/chosen": -218.51431274414062, + "logps/rejected": -374.7544860839844, + "loss": 0.1914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7194763422012329, + "rewards/margins": 1.6528840065002441, + "rewards/rejected": -0.9334076046943665, + "step": 10517 + }, + { + "epoch": 0.61, + "learning_rate": 3.4538395341178715e-08, + "logits/chosen": -2.082674264907837, + "logits/rejected": -2.0664567947387695, + "logps/chosen": -54.4632568359375, + "logps/rejected": -345.80279541015625, + "loss": 0.151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.800650417804718, + "rewards/margins": 6.744980335235596, + "rewards/rejected": -5.944329738616943, + "step": 10518 + }, + { + "epoch": 0.61, + "learning_rate": 3.452943349529277e-08, + "logits/chosen": -1.7537099123001099, + "logits/rejected": -1.7481093406677246, + "logps/chosen": -166.1649932861328, + "logps/rejected": -256.31658935546875, + "loss": 0.0885, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.620143175125122, + "rewards/margins": 1.761885166168213, + "rewards/rejected": 0.858258068561554, + "step": 10519 + }, + { + "epoch": 0.61, + "learning_rate": 3.4520472198997056e-08, + "logits/chosen": -1.8284145593643188, + "logits/rejected": -1.8131792545318604, + "logps/chosen": -221.29763793945312, + "logps/rejected": -276.1512451171875, + "loss": 0.4325, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.408013939857483, + "rewards/margins": -0.22493290901184082, + "rewards/rejected": 1.6329468488693237, + "step": 10520 + }, + { + "epoch": 0.61, + "learning_rate": 3.4511511452609856e-08, + "logits/chosen": -1.8288308382034302, + "logits/rejected": -1.8146958351135254, + "logps/chosen": -151.14199829101562, + "logps/rejected": -253.452392578125, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0374176502227783, + "rewards/margins": 2.278524875640869, + "rewards/rejected": 0.758892834186554, + "step": 10521 + }, + { + "epoch": 0.61, + "learning_rate": 3.4502551256449555e-08, + "logits/chosen": -1.7465267181396484, + "logits/rejected": -1.7383630275726318, + "logps/chosen": -206.005615234375, + "logps/rejected": -387.8395080566406, + "loss": 0.0602, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2473480701446533, + "rewards/margins": 3.6029176712036133, + "rewards/rejected": -2.35556960105896, + "step": 10522 + }, + { + "epoch": 0.61, + "learning_rate": 3.449359161083442e-08, + "logits/chosen": -1.8006757497787476, + "logits/rejected": -1.7956973314285278, + "logps/chosen": -6.97074556350708, + "logps/rejected": -133.916259765625, + "loss": 0.4666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17674164474010468, + "rewards/margins": 0.9584959745407104, + "rewards/rejected": -0.7817543148994446, + "step": 10523 + }, + { + "epoch": 0.61, + "learning_rate": 3.448463251608278e-08, + "logits/chosen": -1.8267658948898315, + "logits/rejected": -1.8333840370178223, + "logps/chosen": -257.27227783203125, + "logps/rejected": -454.72344970703125, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.525884985923767, + "rewards/margins": 5.2874298095703125, + "rewards/rejected": -3.761544942855835, + "step": 10524 + }, + { + "epoch": 0.61, + "learning_rate": 3.447567397251286e-08, + "logits/chosen": -1.9342690706253052, + "logits/rejected": -1.9177305698394775, + "logps/chosen": -217.36279296875, + "logps/rejected": -445.8411560058594, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.412362813949585, + "rewards/margins": 5.461822509765625, + "rewards/rejected": -3.049459934234619, + "step": 10525 + }, + { + "epoch": 0.61, + "learning_rate": 3.446671598044296e-08, + "logits/chosen": -1.98530113697052, + "logits/rejected": -1.9845587015151978, + "logps/chosen": -36.69528579711914, + "logps/rejected": -195.507080078125, + "loss": 0.3395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37191200256347656, + "rewards/margins": 1.5816227197647095, + "rewards/rejected": -1.209710717201233, + "step": 10526 + }, + { + "epoch": 0.61, + "learning_rate": 3.445775854019127e-08, + "logits/chosen": -1.9862720966339111, + "logits/rejected": -1.9777559041976929, + "logps/chosen": -8.186541557312012, + "logps/rejected": -238.4737548828125, + "loss": 0.323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10576114803552628, + "rewards/margins": 2.302321672439575, + "rewards/rejected": -2.1965606212615967, + "step": 10527 + }, + { + "epoch": 0.61, + "learning_rate": 3.444880165207605e-08, + "logits/chosen": -1.9863132238388062, + "logits/rejected": -1.9987460374832153, + "logps/chosen": -184.62681579589844, + "logps/rejected": -287.1054382324219, + "loss": 0.0788, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.655908226966858, + "rewards/margins": 2.223797559738159, + "rewards/rejected": -0.567889392375946, + "step": 10528 + }, + { + "epoch": 0.61, + "learning_rate": 3.443984531641544e-08, + "logits/chosen": -2.003157377243042, + "logits/rejected": -2.0333175659179688, + "logps/chosen": -185.887451171875, + "logps/rejected": -549.7386474609375, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0584380626678467, + "rewards/margins": 5.646035671234131, + "rewards/rejected": -3.587597608566284, + "step": 10529 + }, + { + "epoch": 0.61, + "learning_rate": 3.443088953352766e-08, + "logits/chosen": -1.8504555225372314, + "logits/rejected": -1.8381080627441406, + "logps/chosen": -206.60971069335938, + "logps/rejected": -421.3291931152344, + "loss": 0.4436, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.1766159534454346, + "rewards/margins": -0.3327469825744629, + "rewards/rejected": 3.5093629360198975, + "step": 10530 + }, + { + "epoch": 0.61, + "learning_rate": 3.442193430373082e-08, + "logits/chosen": -1.964969277381897, + "logits/rejected": -1.9656026363372803, + "logps/chosen": -3.504724008962512e-05, + "logps/rejected": -188.206298828125, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1458036769672617e-07, + "rewards/margins": 4.065704345703125, + "rewards/rejected": -4.065704345703125, + "step": 10531 + }, + { + "epoch": 0.61, + "learning_rate": 3.441297962734309e-08, + "logits/chosen": -1.976447343826294, + "logits/rejected": -1.974835991859436, + "logps/chosen": -204.97354125976562, + "logps/rejected": -238.59532165527344, + "loss": 0.2087, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.295017957687378, + "rewards/margins": 0.6781187057495117, + "rewards/rejected": 2.616899251937866, + "step": 10532 + }, + { + "epoch": 0.61, + "learning_rate": 3.440402550468255e-08, + "logits/chosen": -1.9293673038482666, + "logits/rejected": -1.948071837425232, + "logps/chosen": -304.01702880859375, + "logps/rejected": -422.3631286621094, + "loss": 0.0737, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9062530994415283, + "rewards/margins": 2.9373598098754883, + "rewards/rejected": -1.0311065912246704, + "step": 10533 + }, + { + "epoch": 0.61, + "learning_rate": 3.439507193606733e-08, + "logits/chosen": -1.8361388444900513, + "logits/rejected": -1.8202375173568726, + "logps/chosen": -409.4593505859375, + "logps/rejected": -464.9971008300781, + "loss": 0.2316, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0262024402618408, + "rewards/margins": 0.9211456775665283, + "rewards/rejected": 0.1050567626953125, + "step": 10534 + }, + { + "epoch": 0.61, + "learning_rate": 3.438611892181548e-08, + "logits/chosen": -1.943315029144287, + "logits/rejected": -1.9809410572052002, + "logps/chosen": -168.20346069335938, + "logps/rejected": -264.50811767578125, + "loss": 0.1914, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1560685634613037, + "rewards/margins": 0.8985917568206787, + "rewards/rejected": 1.257476806640625, + "step": 10535 + }, + { + "epoch": 0.61, + "learning_rate": 3.437716646224506e-08, + "logits/chosen": -2.036142587661743, + "logits/rejected": -2.0203518867492676, + "logps/chosen": -38.67942428588867, + "logps/rejected": -353.6184997558594, + "loss": 0.2131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.765429675579071, + "rewards/margins": 1.9074981212615967, + "rewards/rejected": -1.1420685052871704, + "step": 10536 + }, + { + "epoch": 0.61, + "learning_rate": 3.43682145576741e-08, + "logits/chosen": -2.150981903076172, + "logits/rejected": -2.1515917778015137, + "logps/chosen": -46.107177734375, + "logps/rejected": -282.0191955566406, + "loss": 0.0891, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2925971746444702, + "rewards/margins": 3.6754889488220215, + "rewards/rejected": -2.382891893386841, + "step": 10537 + }, + { + "epoch": 0.61, + "learning_rate": 3.435926320842062e-08, + "logits/chosen": -1.7984626293182373, + "logits/rejected": -1.8344749212265015, + "logps/chosen": -211.3194580078125, + "logps/rejected": -346.64898681640625, + "loss": 0.0604, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5094711780548096, + "rewards/margins": 2.403795003890991, + "rewards/rejected": -0.8943237662315369, + "step": 10538 + }, + { + "epoch": 0.61, + "learning_rate": 3.435031241480264e-08, + "logits/chosen": -1.8819973468780518, + "logits/rejected": -1.8842332363128662, + "logps/chosen": -242.82212829589844, + "logps/rejected": -421.6966552734375, + "loss": 0.0851, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1305253505706787, + "rewards/margins": 2.8585588932037354, + "rewards/rejected": -0.7280334830284119, + "step": 10539 + }, + { + "epoch": 0.61, + "learning_rate": 3.434136217713809e-08, + "logits/chosen": -1.9670867919921875, + "logits/rejected": -1.9746917486190796, + "logps/chosen": -0.7376503348350525, + "logps/rejected": -94.40960693359375, + "loss": 0.4618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04184284806251526, + "rewards/margins": 1.3095964193344116, + "rewards/rejected": -1.2677536010742188, + "step": 10540 + }, + { + "epoch": 0.61, + "learning_rate": 3.4332412495744976e-08, + "logits/chosen": -1.8950157165527344, + "logits/rejected": -1.8900623321533203, + "logps/chosen": -13.969881057739258, + "logps/rejected": -156.75335693359375, + "loss": 0.3771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019555378705263138, + "rewards/margins": 3.0204617977142334, + "rewards/rejected": -3.000906467437744, + "step": 10541 + }, + { + "epoch": 0.61, + "learning_rate": 3.432346337094118e-08, + "logits/chosen": -1.814690351486206, + "logits/rejected": -1.8229174613952637, + "logps/chosen": -274.1534118652344, + "logps/rejected": -368.64019775390625, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1542022228240967, + "rewards/margins": 3.9080872535705566, + "rewards/rejected": -1.7538849115371704, + "step": 10542 + }, + { + "epoch": 0.61, + "learning_rate": 3.4314514803044684e-08, + "logits/chosen": -1.8008772134780884, + "logits/rejected": -1.8044121265411377, + "logps/chosen": -157.14292907714844, + "logps/rejected": -277.1408386230469, + "loss": 0.0753, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.030316114425659, + "rewards/margins": 2.171856641769409, + "rewards/rejected": -0.14154052734375, + "step": 10543 + }, + { + "epoch": 0.61, + "learning_rate": 3.4305566792373306e-08, + "logits/chosen": -1.9776498079299927, + "logits/rejected": -1.9805700778961182, + "logps/chosen": -0.28185999393463135, + "logps/rejected": -27.820812225341797, + "loss": 0.6858, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00010702609870349988, + "rewards/margins": -0.0024846792221069336, + "rewards/rejected": 0.0025917054153978825, + "step": 10544 + }, + { + "epoch": 0.61, + "learning_rate": 3.4296619339245e-08, + "logits/chosen": -1.8675402402877808, + "logits/rejected": -1.8490134477615356, + "logps/chosen": -227.55694580078125, + "logps/rejected": -356.1206359863281, + "loss": 0.1724, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6336243152618408, + "rewards/margins": 1.0131957530975342, + "rewards/rejected": 0.6204285025596619, + "step": 10545 + }, + { + "epoch": 0.61, + "learning_rate": 3.428767244397756e-08, + "logits/chosen": -1.8054593801498413, + "logits/rejected": -1.7973823547363281, + "logps/chosen": -63.14579772949219, + "logps/rejected": -258.9919128417969, + "loss": 0.1044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1944923400878906, + "rewards/margins": 8.658097267150879, + "rewards/rejected": -7.463604927062988, + "step": 10546 + }, + { + "epoch": 0.61, + "learning_rate": 3.4278726106888865e-08, + "logits/chosen": -1.9722557067871094, + "logits/rejected": -1.9636482000350952, + "logps/chosen": -1.8136610984802246, + "logps/rejected": -356.53460693359375, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2828124165534973, + "rewards/margins": 7.045993328094482, + "rewards/rejected": -6.763180732727051, + "step": 10547 + }, + { + "epoch": 0.61, + "learning_rate": 3.426978032829672e-08, + "logits/chosen": -1.8669339418411255, + "logits/rejected": -1.860687017440796, + "logps/chosen": -12.032142639160156, + "logps/rejected": -132.48863220214844, + "loss": 0.7323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38313570618629456, + "rewards/margins": 0.22006484866142273, + "rewards/rejected": -0.6032005548477173, + "step": 10548 + }, + { + "epoch": 0.61, + "learning_rate": 3.4260835108518925e-08, + "logits/chosen": -1.8714066743850708, + "logits/rejected": -1.8473560810089111, + "logps/chosen": -306.4957580566406, + "logps/rejected": -261.7972717285156, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4154083728790283, + "rewards/margins": 3.4647889137268066, + "rewards/rejected": -2.0493805408477783, + "step": 10549 + }, + { + "epoch": 0.61, + "learning_rate": 3.4251890447873256e-08, + "logits/chosen": -1.859376311302185, + "logits/rejected": -1.8600327968597412, + "logps/chosen": -10.23495101928711, + "logps/rejected": -207.1041259765625, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09794493019580841, + "rewards/margins": 2.6320362091064453, + "rewards/rejected": -2.5340912342071533, + "step": 10550 + }, + { + "epoch": 0.61, + "learning_rate": 3.424294634667747e-08, + "logits/chosen": -1.883779764175415, + "logits/rejected": -1.8762962818145752, + "logps/chosen": -33.56324768066406, + "logps/rejected": -139.1475067138672, + "loss": 0.3015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7609840631484985, + "rewards/margins": 1.1536812782287598, + "rewards/rejected": -0.39269715547561646, + "step": 10551 + }, + { + "epoch": 0.61, + "learning_rate": 3.42340028052493e-08, + "logits/chosen": -1.9511016607284546, + "logits/rejected": -1.9265143871307373, + "logps/chosen": -53.64201354980469, + "logps/rejected": -426.66900634765625, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8148666620254517, + "rewards/margins": 5.400468349456787, + "rewards/rejected": -3.585601806640625, + "step": 10552 + }, + { + "epoch": 0.61, + "learning_rate": 3.4225059823906485e-08, + "logits/chosen": -1.784201741218567, + "logits/rejected": -1.8023043870925903, + "logps/chosen": -248.5466766357422, + "logps/rejected": -328.1676025390625, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0473709106445312, + "rewards/margins": 2.1394240856170654, + "rewards/rejected": -0.09205322712659836, + "step": 10553 + }, + { + "epoch": 0.61, + "learning_rate": 3.4216117402966694e-08, + "logits/chosen": -1.8710999488830566, + "logits/rejected": -1.878829836845398, + "logps/chosen": -7.712702063145116e-05, + "logps/rejected": -176.78387451171875, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2992874189876602e-06, + "rewards/margins": 3.800039291381836, + "rewards/rejected": -3.8000380992889404, + "step": 10554 + }, + { + "epoch": 0.61, + "learning_rate": 3.4207175542747627e-08, + "logits/chosen": -2.0473713874816895, + "logits/rejected": -2.05025315284729, + "logps/chosen": -8.027714729309082, + "logps/rejected": -24.273544311523438, + "loss": 0.6506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06080351024866104, + "rewards/margins": 0.08666467666625977, + "rewards/rejected": -0.025861168280243874, + "step": 10555 + }, + { + "epoch": 0.61, + "learning_rate": 3.4198234243566926e-08, + "logits/chosen": -2.029601573944092, + "logits/rejected": -2.0163393020629883, + "logps/chosen": -7.629250467289239e-05, + "logps/rejected": -169.36431884765625, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2754783256241353e-06, + "rewards/margins": 4.024317741394043, + "rewards/rejected": -4.024316310882568, + "step": 10556 + }, + { + "epoch": 0.61, + "learning_rate": 3.4189293505742257e-08, + "logits/chosen": -2.025604009628296, + "logits/rejected": -1.9885811805725098, + "logps/chosen": -181.7401580810547, + "logps/rejected": -405.4658203125, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5021194219589233, + "rewards/margins": 4.077073574066162, + "rewards/rejected": -2.5749542713165283, + "step": 10557 + }, + { + "epoch": 0.61, + "learning_rate": 3.41803533295912e-08, + "logits/chosen": -1.9319041967391968, + "logits/rejected": -1.874498963356018, + "logps/chosen": -133.13998413085938, + "logps/rejected": -345.993408203125, + "loss": 0.2005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1410675048828125, + "rewards/margins": 2.0836455821990967, + "rewards/rejected": -0.942578136920929, + "step": 10558 + }, + { + "epoch": 0.61, + "learning_rate": 3.4171413715431375e-08, + "logits/chosen": -1.665761113166809, + "logits/rejected": -1.6687523126602173, + "logps/chosen": -5.426576614379883, + "logps/rejected": -141.16864013671875, + "loss": 0.5163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.054566528648138046, + "rewards/margins": 0.9113972783088684, + "rewards/rejected": -0.9659637808799744, + "step": 10559 + }, + { + "epoch": 0.61, + "learning_rate": 3.4162474663580385e-08, + "logits/chosen": -1.8496803045272827, + "logits/rejected": -1.853161334991455, + "logps/chosen": -28.412755966186523, + "logps/rejected": -128.96902465820312, + "loss": 0.4178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36207523941993713, + "rewards/margins": 0.5686556100845337, + "rewards/rejected": -0.20658035576343536, + "step": 10560 + }, + { + "epoch": 0.61, + "learning_rate": 3.415353617435574e-08, + "logits/chosen": -1.9450637102127075, + "logits/rejected": -1.9443085193634033, + "logps/chosen": -0.3871113955974579, + "logps/rejected": -243.38082885742188, + "loss": 0.3478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020725375041365623, + "rewards/margins": 4.277694225311279, + "rewards/rejected": -4.29841947555542, + "step": 10561 + }, + { + "epoch": 0.61, + "learning_rate": 3.414459824807503e-08, + "logits/chosen": -1.8809869289398193, + "logits/rejected": -1.8842628002166748, + "logps/chosen": -122.79339599609375, + "logps/rejected": -284.93560791015625, + "loss": 0.1351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8996124267578125, + "rewards/margins": 4.872505187988281, + "rewards/rejected": -3.9728927612304688, + "step": 10562 + }, + { + "epoch": 0.61, + "learning_rate": 3.413566088505572e-08, + "logits/chosen": -1.904189109802246, + "logits/rejected": -1.9048683643341064, + "logps/chosen": -25.20859718322754, + "logps/rejected": -171.58563232421875, + "loss": 0.3581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07414760440587997, + "rewards/margins": 1.8405996561050415, + "rewards/rejected": -1.7664520740509033, + "step": 10563 + }, + { + "epoch": 0.61, + "learning_rate": 3.4126724085615354e-08, + "logits/chosen": -1.857881784439087, + "logits/rejected": -1.8841108083724976, + "logps/chosen": -242.77044677734375, + "logps/rejected": -411.64898681640625, + "loss": 0.075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.70880126953125, + "rewards/margins": 2.9645631313323975, + "rewards/rejected": -2.2557618618011475, + "step": 10564 + }, + { + "epoch": 0.61, + "learning_rate": 3.4117787850071377e-08, + "logits/chosen": -2.086681842803955, + "logits/rejected": -2.0704081058502197, + "logps/chosen": -146.5558624267578, + "logps/rejected": -235.2554931640625, + "loss": 0.1226, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.078625440597534, + "rewards/margins": 1.7584105730056763, + "rewards/rejected": 0.3202148377895355, + "step": 10565 + }, + { + "epoch": 0.61, + "learning_rate": 3.4108852178741287e-08, + "logits/chosen": -1.8990072011947632, + "logits/rejected": -1.9555426836013794, + "logps/chosen": -181.99880981445312, + "logps/rejected": -346.99688720703125, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6261993646621704, + "rewards/margins": 3.302630662918091, + "rewards/rejected": -1.6764312982559204, + "step": 10566 + }, + { + "epoch": 0.61, + "learning_rate": 3.4099917071942476e-08, + "logits/chosen": -2.0940933227539062, + "logits/rejected": -2.084613561630249, + "logps/chosen": -0.0006799941183999181, + "logps/rejected": -140.6426239013672, + "loss": 0.344, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2447260562330484e-05, + "rewards/margins": 3.038299083709717, + "rewards/rejected": -3.0382766723632812, + "step": 10567 + }, + { + "epoch": 0.61, + "learning_rate": 3.4090982529992404e-08, + "logits/chosen": -1.9958205223083496, + "logits/rejected": -1.9939496517181396, + "logps/chosen": -26.811384201049805, + "logps/rejected": -186.98281860351562, + "loss": 0.3045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2753269374370575, + "rewards/margins": 2.6109471321105957, + "rewards/rejected": -2.335620164871216, + "step": 10568 + }, + { + "epoch": 0.62, + "learning_rate": 3.408204855320844e-08, + "logits/chosen": -1.9242064952850342, + "logits/rejected": -1.9249829053878784, + "logps/chosen": -74.67955017089844, + "logps/rejected": -130.74703979492188, + "loss": 0.9055, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6054443717002869, + "rewards/margins": -0.5597214102745056, + "rewards/rejected": -0.04572296142578125, + "step": 10569 + }, + { + "epoch": 0.62, + "learning_rate": 3.407311514190798e-08, + "logits/chosen": -1.6678223609924316, + "logits/rejected": -1.7084248065948486, + "logps/chosen": -240.221435546875, + "logps/rejected": -427.8453369140625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7346465587615967, + "rewards/margins": 7.081368923187256, + "rewards/rejected": -3.346722364425659, + "step": 10570 + }, + { + "epoch": 0.62, + "learning_rate": 3.406418229640837e-08, + "logits/chosen": -1.94002103805542, + "logits/rejected": -1.9061906337738037, + "logps/chosen": -187.93434143066406, + "logps/rejected": -390.56646728515625, + "loss": 0.051, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2124481201171875, + "rewards/margins": 3.8682680130004883, + "rewards/rejected": -1.6558197736740112, + "step": 10571 + }, + { + "epoch": 0.62, + "learning_rate": 3.4055250017026973e-08, + "logits/chosen": -1.9876137971878052, + "logits/rejected": -1.9866410493850708, + "logps/chosen": -0.0476563386619091, + "logps/rejected": -182.99441528320312, + "loss": 0.4154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003267560852691531, + "rewards/margins": 1.7291513681411743, + "rewards/rejected": -1.7324188947677612, + "step": 10572 + }, + { + "epoch": 0.62, + "learning_rate": 3.404631830408107e-08, + "logits/chosen": -2.1019227504730225, + "logits/rejected": -2.0925345420837402, + "logps/chosen": -6.403965950012207, + "logps/rejected": -174.99417114257812, + "loss": 0.3491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07323231548070908, + "rewards/margins": 2.7404184341430664, + "rewards/rejected": -2.6671860218048096, + "step": 10573 + }, + { + "epoch": 0.62, + "learning_rate": 3.4037387157888e-08, + "logits/chosen": -1.7596404552459717, + "logits/rejected": -1.6768418550491333, + "logps/chosen": -174.2714080810547, + "logps/rejected": -341.68914794921875, + "loss": 0.0748, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4257583618164062, + "rewards/margins": 2.6480178833007812, + "rewards/rejected": -0.222259521484375, + "step": 10574 + }, + { + "epoch": 0.62, + "learning_rate": 3.4028456578765e-08, + "logits/chosen": -2.002876043319702, + "logits/rejected": -1.9937021732330322, + "logps/chosen": -21.023517608642578, + "logps/rejected": -212.3806610107422, + "loss": 0.2747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3402891159057617, + "rewards/margins": 5.246392726898193, + "rewards/rejected": -4.906103610992432, + "step": 10575 + }, + { + "epoch": 0.62, + "learning_rate": 3.4019526567029356e-08, + "logits/chosen": -1.9240608215332031, + "logits/rejected": -1.9192112684249878, + "logps/chosen": -1.7616844177246094, + "logps/rejected": -156.85317993164062, + "loss": 0.3268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23147597908973694, + "rewards/margins": 3.0259616374969482, + "rewards/rejected": -2.794485569000244, + "step": 10576 + }, + { + "epoch": 0.62, + "learning_rate": 3.401059712299829e-08, + "logits/chosen": -1.7706263065338135, + "logits/rejected": -1.7703797817230225, + "logps/chosen": -89.09748077392578, + "logps/rejected": -396.7254943847656, + "loss": 0.1532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8831467032432556, + "rewards/margins": 4.660499572753906, + "rewards/rejected": -3.777353048324585, + "step": 10577 + }, + { + "epoch": 0.62, + "learning_rate": 3.4001668246989026e-08, + "logits/chosen": -1.9382622241973877, + "logits/rejected": -1.9401108026504517, + "logps/chosen": -9.382948875427246, + "logps/rejected": -107.73316955566406, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34690696001052856, + "rewards/margins": 1.6820356845855713, + "rewards/rejected": -1.3351287841796875, + "step": 10578 + }, + { + "epoch": 0.62, + "learning_rate": 3.399273993931878e-08, + "logits/chosen": -1.8650908470153809, + "logits/rejected": -1.8529951572418213, + "logps/chosen": -28.704601287841797, + "logps/rejected": -348.4126281738281, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22818413376808167, + "rewards/margins": 6.517457485198975, + "rewards/rejected": -6.289273262023926, + "step": 10579 + }, + { + "epoch": 0.62, + "learning_rate": 3.3983812200304685e-08, + "logits/chosen": -2.0062530040740967, + "logits/rejected": -2.1025185585021973, + "logps/chosen": -238.82095336914062, + "logps/rejected": -434.6715393066406, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7494934797286987, + "rewards/margins": 4.0331268310546875, + "rewards/rejected": -2.2836334705352783, + "step": 10580 + }, + { + "epoch": 0.62, + "learning_rate": 3.3974885030263956e-08, + "logits/chosen": -1.9410641193389893, + "logits/rejected": -1.9387681484222412, + "logps/chosen": -25.259540557861328, + "logps/rejected": -98.16401672363281, + "loss": 0.3794, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45881253480911255, + "rewards/margins": 1.0592008829116821, + "rewards/rejected": -0.6003883481025696, + "step": 10581 + }, + { + "epoch": 0.62, + "learning_rate": 3.3965958429513666e-08, + "logits/chosen": -1.5029021501541138, + "logits/rejected": -1.5144983530044556, + "logps/chosen": -48.040103912353516, + "logps/rejected": -437.65020751953125, + "loss": 0.2004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6580490469932556, + "rewards/margins": 5.7924089431762695, + "rewards/rejected": -5.134359836578369, + "step": 10582 + }, + { + "epoch": 0.62, + "learning_rate": 3.395703239837099e-08, + "logits/chosen": -1.7750948667526245, + "logits/rejected": -1.7538753747940063, + "logps/chosen": -140.72268676757812, + "logps/rejected": -311.6053466796875, + "loss": 0.261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9519394040107727, + "rewards/margins": 0.8728744387626648, + "rewards/rejected": 0.07906494289636612, + "step": 10583 + }, + { + "epoch": 0.62, + "learning_rate": 3.394810693715298e-08, + "logits/chosen": -1.9969613552093506, + "logits/rejected": -1.9762063026428223, + "logps/chosen": -251.7169189453125, + "logps/rejected": -323.17584228515625, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4030518531799316, + "rewards/margins": 0.25516366958618164, + "rewards/rejected": 2.14788818359375, + "step": 10584 + }, + { + "epoch": 0.62, + "learning_rate": 3.393918204617675e-08, + "logits/chosen": -1.6159316301345825, + "logits/rejected": -1.6766068935394287, + "logps/chosen": -219.20281982421875, + "logps/rejected": -399.80859375, + "loss": 0.0679, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.999578833580017, + "rewards/margins": 2.086895704269409, + "rewards/rejected": -0.08731689304113388, + "step": 10585 + }, + { + "epoch": 0.62, + "learning_rate": 3.39302577257593e-08, + "logits/chosen": -2.1456828117370605, + "logits/rejected": -2.138897180557251, + "logps/chosen": -19.476490020751953, + "logps/rejected": -279.6133117675781, + "loss": 0.2201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5173754096031189, + "rewards/margins": 4.053855895996094, + "rewards/rejected": -3.536480665206909, + "step": 10586 + }, + { + "epoch": 0.62, + "learning_rate": 3.392133397621775e-08, + "logits/chosen": -1.6019072532653809, + "logits/rejected": -1.5930860042572021, + "logps/chosen": -186.51400756835938, + "logps/rejected": -303.635498046875, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1466751098632812, + "rewards/margins": 3.6076369285583496, + "rewards/rejected": -1.460961937904358, + "step": 10587 + }, + { + "epoch": 0.62, + "learning_rate": 3.3912410797869015e-08, + "logits/chosen": -1.9483364820480347, + "logits/rejected": -1.953249216079712, + "logps/chosen": -25.847618103027344, + "logps/rejected": -102.09313201904297, + "loss": 0.5447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44706517457962036, + "rewards/margins": 0.016336262226104736, + "rewards/rejected": 0.4307289123535156, + "step": 10588 + }, + { + "epoch": 0.62, + "learning_rate": 3.390348819103017e-08, + "logits/chosen": -1.8389970064163208, + "logits/rejected": -1.8849906921386719, + "logps/chosen": -115.49671936035156, + "logps/rejected": -342.46087646484375, + "loss": 0.108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8650360107421875, + "rewards/margins": 3.046072483062744, + "rewards/rejected": -2.1810364723205566, + "step": 10589 + }, + { + "epoch": 0.62, + "learning_rate": 3.3894566156018156e-08, + "logits/chosen": -2.031611442565918, + "logits/rejected": -2.003657579421997, + "logps/chosen": -47.546695709228516, + "logps/rejected": -336.4791259765625, + "loss": 0.1351, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1190365552902222, + "rewards/margins": 7.915902614593506, + "rewards/rejected": -6.796865940093994, + "step": 10590 + }, + { + "epoch": 0.62, + "learning_rate": 3.388564469314994e-08, + "logits/chosen": -1.9472874402999878, + "logits/rejected": -1.9704816341400146, + "logps/chosen": -235.38064575195312, + "logps/rejected": -296.4197998046875, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9992005825042725, + "rewards/margins": 3.082058906555176, + "rewards/rejected": -0.08285827934741974, + "step": 10591 + }, + { + "epoch": 0.62, + "learning_rate": 3.3876723802742435e-08, + "logits/chosen": -1.9146009683609009, + "logits/rejected": -1.919740080833435, + "logps/chosen": -3.3178720474243164, + "logps/rejected": -97.34268188476562, + "loss": 0.4483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036612655967473984, + "rewards/margins": 1.3763664960861206, + "rewards/rejected": -1.4129791259765625, + "step": 10592 + }, + { + "epoch": 0.62, + "learning_rate": 3.3867803485112586e-08, + "logits/chosen": -1.957302451133728, + "logits/rejected": -1.9584168195724487, + "logps/chosen": -27.395341873168945, + "logps/rejected": -138.86212158203125, + "loss": 0.5669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0637422576546669, + "rewards/margins": 0.3469587564468384, + "rewards/rejected": -0.4107010066509247, + "step": 10593 + }, + { + "epoch": 0.62, + "learning_rate": 3.385888374057725e-08, + "logits/chosen": -1.8147586584091187, + "logits/rejected": -1.7280205488204956, + "logps/chosen": -198.50408935546875, + "logps/rejected": -549.4036865234375, + "loss": 0.0627, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7012298107147217, + "rewards/margins": 2.6282989978790283, + "rewards/rejected": 0.07293090969324112, + "step": 10594 + }, + { + "epoch": 0.62, + "learning_rate": 3.384996456945333e-08, + "logits/chosen": -1.7533783912658691, + "logits/rejected": -1.755820393562317, + "logps/chosen": -75.64585876464844, + "logps/rejected": -332.70965576171875, + "loss": 0.0785, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.757746934890747, + "rewards/margins": 2.197209119796753, + "rewards/rejected": 0.5605377554893494, + "step": 10595 + }, + { + "epoch": 0.62, + "learning_rate": 3.384104597205766e-08, + "logits/chosen": -1.8525770902633667, + "logits/rejected": -1.855103611946106, + "logps/chosen": -31.76100730895996, + "logps/rejected": -202.61904907226562, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5860899090766907, + "rewards/margins": 1.1940275430679321, + "rewards/rejected": -0.6079376339912415, + "step": 10596 + }, + { + "epoch": 0.62, + "learning_rate": 3.383212794870709e-08, + "logits/chosen": -1.7121042013168335, + "logits/rejected": -1.7048561573028564, + "logps/chosen": -221.822509765625, + "logps/rejected": -384.4373779296875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1314055919647217, + "rewards/margins": 5.111672878265381, + "rewards/rejected": -1.9802674055099487, + "step": 10597 + }, + { + "epoch": 0.62, + "learning_rate": 3.38232104997184e-08, + "logits/chosen": -2.0110318660736084, + "logits/rejected": -2.003386974334717, + "logps/chosen": -53.01226043701172, + "logps/rejected": -445.64447021484375, + "loss": 0.2328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4533935487270355, + "rewards/margins": 7.198840141296387, + "rewards/rejected": -6.745446681976318, + "step": 10598 + }, + { + "epoch": 0.62, + "learning_rate": 3.3814293625408416e-08, + "logits/chosen": -1.877068281173706, + "logits/rejected": -1.8843574523925781, + "logps/chosen": -0.00015985497157089412, + "logps/rejected": -299.02142333984375, + "loss": 0.3442, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.884763034671778e-06, + "rewards/margins": 6.006588935852051, + "rewards/rejected": -6.006591796875, + "step": 10599 + }, + { + "epoch": 0.62, + "learning_rate": 3.3805377326093896e-08, + "logits/chosen": -1.7802029848098755, + "logits/rejected": -1.7759132385253906, + "logps/chosen": -189.85812377929688, + "logps/rejected": -424.86480712890625, + "loss": 0.1549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8076385855674744, + "rewards/margins": 1.7128082513809204, + "rewards/rejected": -0.905169665813446, + "step": 10600 + }, + { + "epoch": 0.62, + "learning_rate": 3.379646160209158e-08, + "logits/chosen": -1.8748012781143188, + "logits/rejected": -1.867820143699646, + "logps/chosen": -278.1461181640625, + "logps/rejected": -626.1644897460938, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.839288353919983, + "rewards/margins": 7.52252197265625, + "rewards/rejected": -5.683233737945557, + "step": 10601 + }, + { + "epoch": 0.62, + "learning_rate": 3.378754645371823e-08, + "logits/chosen": -2.141556739807129, + "logits/rejected": -2.142012596130371, + "logps/chosen": -9.5231294631958, + "logps/rejected": -130.44891357421875, + "loss": 0.37, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07082100212574005, + "rewards/margins": 1.5040844678878784, + "rewards/rejected": -1.4332634210586548, + "step": 10602 + }, + { + "epoch": 0.62, + "learning_rate": 3.37786318812905e-08, + "logits/chosen": -1.8586156368255615, + "logits/rejected": -1.8552974462509155, + "logps/chosen": -269.32489013671875, + "logps/rejected": -636.259765625, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.876415967941284, + "rewards/margins": 8.393908500671387, + "rewards/rejected": -5.517492771148682, + "step": 10603 + }, + { + "epoch": 0.62, + "learning_rate": 3.376971788512515e-08, + "logits/chosen": -1.5010031461715698, + "logits/rejected": -1.4920018911361694, + "logps/chosen": -34.78950881958008, + "logps/rejected": -292.6512451171875, + "loss": 0.1981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7421157956123352, + "rewards/margins": 3.065974473953247, + "rewards/rejected": -2.3238587379455566, + "step": 10604 + }, + { + "epoch": 0.62, + "learning_rate": 3.3760804465538775e-08, + "logits/chosen": -1.8080412149429321, + "logits/rejected": -1.8271782398223877, + "logps/chosen": -155.99258422851562, + "logps/rejected": -278.78564453125, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.031689405441284, + "rewards/margins": 3.3921263217926025, + "rewards/rejected": -0.3604370057582855, + "step": 10605 + }, + { + "epoch": 0.62, + "learning_rate": 3.3751891622848094e-08, + "logits/chosen": -1.908164381980896, + "logits/rejected": -1.9122027158737183, + "logps/chosen": -0.0003257874632254243, + "logps/rejected": -209.73443603515625, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3110379768477287e-05, + "rewards/margins": 5.274921417236328, + "rewards/rejected": -5.274908542633057, + "step": 10606 + }, + { + "epoch": 0.62, + "learning_rate": 3.374297935736967e-08, + "logits/chosen": -1.8980268239974976, + "logits/rejected": -1.9021176099777222, + "logps/chosen": -168.77352905273438, + "logps/rejected": -255.00796508789062, + "loss": 0.1216, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.82623291015625, + "rewards/margins": 1.5738433599472046, + "rewards/rejected": 0.252389520406723, + "step": 10607 + }, + { + "epoch": 0.62, + "learning_rate": 3.3734067669420165e-08, + "logits/chosen": -1.8338762521743774, + "logits/rejected": -1.8480225801467896, + "logps/chosen": -178.37432861328125, + "logps/rejected": -307.2167663574219, + "loss": 0.0564, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5809326171875, + "rewards/margins": 2.5726897716522217, + "rewards/rejected": -0.9917572140693665, + "step": 10608 + }, + { + "epoch": 0.62, + "learning_rate": 3.372515655931613e-08, + "logits/chosen": -1.721541166305542, + "logits/rejected": -1.7208887338638306, + "logps/chosen": -42.24763870239258, + "logps/rejected": -239.28900146484375, + "loss": 0.1202, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0135631561279297, + "rewards/margins": 4.139347553253174, + "rewards/rejected": -3.125784397125244, + "step": 10609 + }, + { + "epoch": 0.62, + "learning_rate": 3.3716246027374154e-08, + "logits/chosen": -1.9335323572158813, + "logits/rejected": -1.8701943159103394, + "logps/chosen": -208.9598846435547, + "logps/rejected": -459.18499755859375, + "loss": 0.1153, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7790955305099487, + "rewards/margins": 1.901574730873108, + "rewards/rejected": -0.12247925251722336, + "step": 10610 + }, + { + "epoch": 0.62, + "learning_rate": 3.370733607391075e-08, + "logits/chosen": -1.8663554191589355, + "logits/rejected": -1.8418893814086914, + "logps/chosen": -8.022701513255015e-05, + "logps/rejected": -353.95037841796875, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.58295933943009e-05, + "rewards/margins": 7.612461090087891, + "rewards/rejected": -7.61244535446167, + "step": 10611 + }, + { + "epoch": 0.62, + "learning_rate": 3.3698426699242485e-08, + "logits/chosen": -1.7553726434707642, + "logits/rejected": -1.747537612915039, + "logps/chosen": -14.231306076049805, + "logps/rejected": -148.0994415283203, + "loss": 0.4506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03734264522790909, + "rewards/margins": 1.1703507900238037, + "rewards/rejected": -1.2076934576034546, + "step": 10612 + }, + { + "epoch": 0.62, + "learning_rate": 3.368951790368584e-08, + "logits/chosen": -1.9417340755462646, + "logits/rejected": -1.9511921405792236, + "logps/chosen": -197.80996704101562, + "logps/rejected": -306.8905944824219, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1505463123321533, + "rewards/margins": 4.229181289672852, + "rewards/rejected": -2.078634738922119, + "step": 10613 + }, + { + "epoch": 0.62, + "learning_rate": 3.36806096875573e-08, + "logits/chosen": -1.9532686471939087, + "logits/rejected": -1.94013512134552, + "logps/chosen": -51.66522216796875, + "logps/rejected": -244.70535278320312, + "loss": 0.1377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.951882541179657, + "rewards/margins": 3.162618637084961, + "rewards/rejected": -2.210736036300659, + "step": 10614 + }, + { + "epoch": 0.62, + "learning_rate": 3.367170205117332e-08, + "logits/chosen": -2.002210855484009, + "logits/rejected": -1.9926116466522217, + "logps/chosen": -0.0053328569047153, + "logps/rejected": -116.63040161132812, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007260350394062698, + "rewards/margins": 3.4846341609954834, + "rewards/rejected": -3.483908176422119, + "step": 10615 + }, + { + "epoch": 0.62, + "learning_rate": 3.3662794994850376e-08, + "logits/chosen": -1.9104466438293457, + "logits/rejected": -1.9115498065948486, + "logps/chosen": -39.685218811035156, + "logps/rejected": -220.343994140625, + "loss": 0.3976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4028816223144531, + "rewards/margins": 2.214745283126831, + "rewards/rejected": -2.617626905441284, + "step": 10616 + }, + { + "epoch": 0.62, + "learning_rate": 3.3653888518904846e-08, + "logits/chosen": -1.913148283958435, + "logits/rejected": -1.9096144437789917, + "logps/chosen": -88.93192291259766, + "logps/rejected": -401.9761962890625, + "loss": 0.1314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8590096235275269, + "rewards/margins": 1.6830360889434814, + "rewards/rejected": 0.17597351968288422, + "step": 10617 + }, + { + "epoch": 0.62, + "learning_rate": 3.364498262365317e-08, + "logits/chosen": -1.8714773654937744, + "logits/rejected": -1.8460299968719482, + "logps/chosen": -6.240344047546387, + "logps/rejected": -172.02297973632812, + "loss": 0.3246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2690594792366028, + "rewards/margins": 1.9277997016906738, + "rewards/rejected": -1.6587402820587158, + "step": 10618 + }, + { + "epoch": 0.62, + "learning_rate": 3.36360773094117e-08, + "logits/chosen": -1.9618449211120605, + "logits/rejected": -1.948407530784607, + "logps/chosen": -50.247589111328125, + "logps/rejected": -384.0009765625, + "loss": 0.2816, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.253927618265152, + "rewards/margins": 6.471646308898926, + "rewards/rejected": -6.217718601226807, + "step": 10619 + }, + { + "epoch": 0.62, + "learning_rate": 3.362717257649681e-08, + "logits/chosen": -2.0766656398773193, + "logits/rejected": -2.0521576404571533, + "logps/chosen": -0.00017857117927633226, + "logps/rejected": -311.3994140625, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.789210829083459e-07, + "rewards/margins": 5.711971282958984, + "rewards/rejected": -5.711970806121826, + "step": 10620 + }, + { + "epoch": 0.62, + "learning_rate": 3.3618268425224836e-08, + "logits/chosen": -1.9645227193832397, + "logits/rejected": -1.9833014011383057, + "logps/chosen": -273.931396484375, + "logps/rejected": -454.083740234375, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.100140333175659, + "rewards/margins": 4.523327827453613, + "rewards/rejected": -2.423187255859375, + "step": 10621 + }, + { + "epoch": 0.62, + "learning_rate": 3.360936485591209e-08, + "logits/chosen": -1.9007810354232788, + "logits/rejected": -1.8918778896331787, + "logps/chosen": -40.637454986572266, + "logps/rejected": -264.8887939453125, + "loss": 0.2187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5070476531982422, + "rewards/margins": 5.235183238983154, + "rewards/rejected": -4.728135585784912, + "step": 10622 + }, + { + "epoch": 0.62, + "learning_rate": 3.360046186887491e-08, + "logits/chosen": -1.7588518857955933, + "logits/rejected": -1.77911376953125, + "logps/chosen": -352.81317138671875, + "logps/rejected": -698.2818603515625, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6230742931365967, + "rewards/margins": 13.262857437133789, + "rewards/rejected": -10.639782905578613, + "step": 10623 + }, + { + "epoch": 0.62, + "learning_rate": 3.359155946442952e-08, + "logits/chosen": -1.8942147493362427, + "logits/rejected": -1.8989661931991577, + "logps/chosen": -4.339174847700633e-05, + "logps/rejected": -122.8658447265625, + "loss": 0.4028, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0132589522982016e-06, + "rewards/margins": 1.85610830783844, + "rewards/rejected": -1.8561073541641235, + "step": 10624 + }, + { + "epoch": 0.62, + "learning_rate": 3.358265764289222e-08, + "logits/chosen": -1.8014171123504639, + "logits/rejected": -1.802411437034607, + "logps/chosen": -184.75387573242188, + "logps/rejected": -355.09771728515625, + "loss": 0.1641, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8345001935958862, + "rewards/margins": 1.1104309558868408, + "rewards/rejected": 0.7240692377090454, + "step": 10625 + }, + { + "epoch": 0.62, + "learning_rate": 3.35737564045792e-08, + "logits/chosen": -1.9483709335327148, + "logits/rejected": -1.9492157697677612, + "logps/chosen": -203.43606567382812, + "logps/rejected": -384.2727966308594, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8623138666152954, + "rewards/margins": 3.4982941150665283, + "rewards/rejected": -1.635980248451233, + "step": 10626 + }, + { + "epoch": 0.62, + "learning_rate": 3.3564855749806734e-08, + "logits/chosen": -2.0624914169311523, + "logits/rejected": -2.066148042678833, + "logps/chosen": -42.945411682128906, + "logps/rejected": -166.781005859375, + "loss": 0.4709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7954033017158508, + "rewards/margins": 0.26533734798431396, + "rewards/rejected": 0.5300659537315369, + "step": 10627 + }, + { + "epoch": 0.62, + "learning_rate": 3.3555955678890947e-08, + "logits/chosen": -1.7645846605300903, + "logits/rejected": -1.761325478553772, + "logps/chosen": -76.78189086914062, + "logps/rejected": -331.00927734375, + "loss": 0.3033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18738937377929688, + "rewards/margins": 3.324453115463257, + "rewards/rejected": -3.13706374168396, + "step": 10628 + }, + { + "epoch": 0.62, + "learning_rate": 3.354705619214807e-08, + "logits/chosen": -2.0585343837738037, + "logits/rejected": -2.05318021774292, + "logps/chosen": -27.080528259277344, + "logps/rejected": -159.23519897460938, + "loss": 0.0969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5197495222091675, + "rewards/margins": 2.907034397125244, + "rewards/rejected": -1.3872848749160767, + "step": 10629 + }, + { + "epoch": 0.62, + "learning_rate": 3.353815728989424e-08, + "logits/chosen": -1.9787178039550781, + "logits/rejected": -1.974604606628418, + "logps/chosen": -2.181520903832279e-05, + "logps/rejected": -174.86883544921875, + "loss": 0.4138, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1801656683019246e-06, + "rewards/margins": 1.8743274211883545, + "rewards/rejected": -1.87432861328125, + "step": 10630 + }, + { + "epoch": 0.62, + "learning_rate": 3.352925897244558e-08, + "logits/chosen": -2.101062059402466, + "logits/rejected": -2.100001573562622, + "logps/chosen": -12.3912935256958, + "logps/rejected": -105.45352172851562, + "loss": 0.4766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21905823051929474, + "rewards/margins": 0.7332168817520142, + "rewards/rejected": -0.5141586661338806, + "step": 10631 + }, + { + "epoch": 0.62, + "learning_rate": 3.3520361240118214e-08, + "logits/chosen": -1.9939157962799072, + "logits/rejected": -1.9746263027191162, + "logps/chosen": -0.0007869766559451818, + "logps/rejected": -168.4794158935547, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2653503037872724e-05, + "rewards/margins": 3.8916990756988525, + "rewards/rejected": -3.891676425933838, + "step": 10632 + }, + { + "epoch": 0.62, + "learning_rate": 3.351146409322823e-08, + "logits/chosen": -1.8450857400894165, + "logits/rejected": -1.8265196084976196, + "logps/chosen": -199.02255249023438, + "logps/rejected": -371.65997314453125, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.38812255859375, + "rewards/margins": 4.099123954772949, + "rewards/rejected": 0.28899842500686646, + "step": 10633 + }, + { + "epoch": 0.62, + "learning_rate": 3.350256753209169e-08, + "logits/chosen": -1.781526803970337, + "logits/rejected": -1.7839884757995605, + "logps/chosen": -36.0617790222168, + "logps/rejected": -124.93426513671875, + "loss": 0.7697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5972742438316345, + "rewards/margins": 0.22584891319274902, + "rewards/rejected": -0.8231231570243835, + "step": 10634 + }, + { + "epoch": 0.62, + "learning_rate": 3.349367155702465e-08, + "logits/chosen": -1.7257832288742065, + "logits/rejected": -1.6885606050491333, + "logps/chosen": -250.39935302734375, + "logps/rejected": -476.5478515625, + "loss": 0.0541, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3125946521759033, + "rewards/margins": 4.0528106689453125, + "rewards/rejected": -2.740216016769409, + "step": 10635 + }, + { + "epoch": 0.62, + "learning_rate": 3.348477616834313e-08, + "logits/chosen": -2.1406338214874268, + "logits/rejected": -2.141535997390747, + "logps/chosen": -98.91547393798828, + "logps/rejected": -368.24664306640625, + "loss": 0.1464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8756973147392273, + "rewards/margins": 3.3770217895507812, + "rewards/rejected": -2.501324415206909, + "step": 10636 + }, + { + "epoch": 0.62, + "learning_rate": 3.3475881366363164e-08, + "logits/chosen": -1.8799831867218018, + "logits/rejected": -1.882688045501709, + "logps/chosen": -7.503726005554199, + "logps/rejected": -180.41336059570312, + "loss": 0.2698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28769826889038086, + "rewards/margins": 4.8954758644104, + "rewards/rejected": -4.6077775955200195, + "step": 10637 + }, + { + "epoch": 0.62, + "learning_rate": 3.3466987151400704e-08, + "logits/chosen": -1.8471887111663818, + "logits/rejected": -1.8498438596725464, + "logps/chosen": -0.014428069815039635, + "logps/rejected": -77.31267547607422, + "loss": 0.4652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002929088659584522, + "rewards/margins": 1.3501251935958862, + "rewards/rejected": -1.3504180908203125, + "step": 10638 + }, + { + "epoch": 0.62, + "learning_rate": 3.345809352377173e-08, + "logits/chosen": -1.9558802843093872, + "logits/rejected": -1.9518097639083862, + "logps/chosen": -79.32955932617188, + "logps/rejected": -221.78970336914062, + "loss": 0.1469, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1967239379882812, + "rewards/margins": 3.5154404640197754, + "rewards/rejected": -2.318716526031494, + "step": 10639 + }, + { + "epoch": 0.62, + "learning_rate": 3.3449200483792207e-08, + "logits/chosen": -1.8678196668624878, + "logits/rejected": -1.8439685106277466, + "logps/chosen": -362.009033203125, + "logps/rejected": -730.7468872070312, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1508240699768066, + "rewards/margins": 9.373046875, + "rewards/rejected": -7.222222805023193, + "step": 10640 + }, + { + "epoch": 0.62, + "learning_rate": 3.344030803177801e-08, + "logits/chosen": -1.9441181421279907, + "logits/rejected": -1.9433674812316895, + "logps/chosen": -113.83183288574219, + "logps/rejected": -245.6223907470703, + "loss": 0.3041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33820268511772156, + "rewards/margins": 0.8753944635391235, + "rewards/rejected": -0.5371918082237244, + "step": 10641 + }, + { + "epoch": 0.62, + "learning_rate": 3.3431416168045113e-08, + "logits/chosen": -1.9123332500457764, + "logits/rejected": -1.9158096313476562, + "logps/chosen": -19.895540237426758, + "logps/rejected": -193.88455200195312, + "loss": 0.1737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8928758502006531, + "rewards/margins": 3.2218096256256104, + "rewards/rejected": -2.3289337158203125, + "step": 10642 + }, + { + "epoch": 0.62, + "learning_rate": 3.342252489290932e-08, + "logits/chosen": -1.9380171298980713, + "logits/rejected": -1.9351308345794678, + "logps/chosen": -10.910362243652344, + "logps/rejected": -109.28130340576172, + "loss": 0.4201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14944982528686523, + "rewards/margins": 1.396464467048645, + "rewards/rejected": -1.2470146417617798, + "step": 10643 + }, + { + "epoch": 0.62, + "learning_rate": 3.341363420668657e-08, + "logits/chosen": -1.881892442703247, + "logits/rejected": -1.8788821697235107, + "logps/chosen": -175.69915771484375, + "logps/rejected": -303.4933166503906, + "loss": 0.2582, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.271246314048767, + "rewards/margins": 0.6660003066062927, + "rewards/rejected": 0.6052460074424744, + "step": 10644 + }, + { + "epoch": 0.62, + "learning_rate": 3.340474410969264e-08, + "logits/chosen": -1.8464627265930176, + "logits/rejected": -1.8505319356918335, + "logps/chosen": -226.42318725585938, + "logps/rejected": -255.67831420898438, + "loss": 0.053, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.34464430809021, + "rewards/margins": 2.333484172821045, + "rewards/rejected": 1.0111602544784546, + "step": 10645 + }, + { + "epoch": 0.62, + "learning_rate": 3.339585460224339e-08, + "logits/chosen": -2.115713596343994, + "logits/rejected": -2.1154439449310303, + "logps/chosen": -1.8501956462860107, + "logps/rejected": -79.08536529541016, + "loss": 0.4795, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09291501343250275, + "rewards/margins": 0.986680269241333, + "rewards/rejected": -0.8937652707099915, + "step": 10646 + }, + { + "epoch": 0.62, + "learning_rate": 3.3386965684654586e-08, + "logits/chosen": -1.8859994411468506, + "logits/rejected": -1.8824939727783203, + "logps/chosen": -36.31056213378906, + "logps/rejected": -145.66424560546875, + "loss": 0.2282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.031716227531433, + "rewards/margins": 1.396791934967041, + "rewards/rejected": -0.3650756776332855, + "step": 10647 + }, + { + "epoch": 0.62, + "learning_rate": 3.337807735724205e-08, + "logits/chosen": -1.894295573234558, + "logits/rejected": -1.8906135559082031, + "logps/chosen": -7.925296783447266, + "logps/rejected": -73.30915832519531, + "loss": 0.753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22142978012561798, + "rewards/margins": 0.09222303330898285, + "rewards/rejected": -0.31365281343460083, + "step": 10648 + }, + { + "epoch": 0.62, + "learning_rate": 3.336918962032149e-08, + "logits/chosen": -1.8981932401657104, + "logits/rejected": -1.9031462669372559, + "logps/chosen": -0.8983268737792969, + "logps/rejected": -180.17047119140625, + "loss": 0.348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1565798968076706, + "rewards/margins": 2.23119854927063, + "rewards/rejected": -2.0746185779571533, + "step": 10649 + }, + { + "epoch": 0.62, + "learning_rate": 3.336030247420868e-08, + "logits/chosen": -1.9967777729034424, + "logits/rejected": -1.9872338771820068, + "logps/chosen": -241.00399780273438, + "logps/rejected": -480.38287353515625, + "loss": 0.0681, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.163525342941284, + "rewards/margins": 2.7384581565856934, + "rewards/rejected": -0.574932873249054, + "step": 10650 + }, + { + "epoch": 0.62, + "learning_rate": 3.335141591921931e-08, + "logits/chosen": -1.9688184261322021, + "logits/rejected": -1.9660643339157104, + "logps/chosen": -9.735040664672852, + "logps/rejected": -92.22494506835938, + "loss": 0.6304, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12581253051757812, + "rewards/margins": -0.013407140970230103, + "rewards/rejected": 0.13921967148780823, + "step": 10651 + }, + { + "epoch": 0.62, + "learning_rate": 3.3342529955669093e-08, + "logits/chosen": -1.65841805934906, + "logits/rejected": -1.6626050472259521, + "logps/chosen": -202.61753845214844, + "logps/rejected": -370.5626220703125, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.639845371246338, + "rewards/margins": 3.3662309646606445, + "rewards/rejected": -0.7263855338096619, + "step": 10652 + }, + { + "epoch": 0.62, + "learning_rate": 3.333364458387368e-08, + "logits/chosen": -2.0594208240509033, + "logits/rejected": -2.0592503547668457, + "logps/chosen": -159.31809997558594, + "logps/rejected": -441.9748229980469, + "loss": 0.0826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.259791612625122, + "rewards/margins": 6.183427810668945, + "rewards/rejected": -4.923635959625244, + "step": 10653 + }, + { + "epoch": 0.62, + "learning_rate": 3.332475980414875e-08, + "logits/chosen": -1.7666125297546387, + "logits/rejected": -1.7639747858047485, + "logps/chosen": -12.557455062866211, + "logps/rejected": -170.09619140625, + "loss": 0.2088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5602747201919556, + "rewards/margins": 2.665121078491211, + "rewards/rejected": -2.104846239089966, + "step": 10654 + }, + { + "epoch": 0.62, + "learning_rate": 3.3315875616809904e-08, + "logits/chosen": -1.767161250114441, + "logits/rejected": -1.7718505859375, + "logps/chosen": -22.837200164794922, + "logps/rejected": -256.17987060546875, + "loss": 0.1946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5217838287353516, + "rewards/margins": 2.7316226959228516, + "rewards/rejected": -2.2098388671875, + "step": 10655 + }, + { + "epoch": 0.62, + "learning_rate": 3.3306992022172774e-08, + "logits/chosen": -2.0085434913635254, + "logits/rejected": -2.0140573978424072, + "logps/chosen": -0.0625835731625557, + "logps/rejected": -176.56741333007812, + "loss": 0.3983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004380680620670319, + "rewards/margins": 2.2573182582855225, + "rewards/rejected": -2.2616989612579346, + "step": 10656 + }, + { + "epoch": 0.62, + "learning_rate": 3.3298109020552947e-08, + "logits/chosen": -1.9113215208053589, + "logits/rejected": -1.9180363416671753, + "logps/chosen": -170.784912109375, + "logps/rejected": -325.1382751464844, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8577301502227783, + "rewards/margins": 2.941082715988159, + "rewards/rejected": -0.08335266262292862, + "step": 10657 + }, + { + "epoch": 0.62, + "learning_rate": 3.328922661226598e-08, + "logits/chosen": -1.943917155265808, + "logits/rejected": -1.9138548374176025, + "logps/chosen": -208.9053497314453, + "logps/rejected": -351.0858459472656, + "loss": 0.2557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.242305040359497, + "rewards/margins": 0.90733802318573, + "rewards/rejected": 0.3349670469760895, + "step": 10658 + }, + { + "epoch": 0.62, + "learning_rate": 3.3280344797627416e-08, + "logits/chosen": -1.939980387687683, + "logits/rejected": -1.956174612045288, + "logps/chosen": -220.46322631835938, + "logps/rejected": -348.90057373046875, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.035266160964966, + "rewards/margins": 3.7767577171325684, + "rewards/rejected": -1.741491675376892, + "step": 10659 + }, + { + "epoch": 0.62, + "learning_rate": 3.32714635769528e-08, + "logits/chosen": -1.8091505765914917, + "logits/rejected": -1.7684940099716187, + "logps/chosen": -194.20709228515625, + "logps/rejected": -474.1273193359375, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8446792364120483, + "rewards/margins": 4.318360805511475, + "rewards/rejected": -2.473681688308716, + "step": 10660 + }, + { + "epoch": 0.62, + "learning_rate": 3.326258295055763e-08, + "logits/chosen": -1.9404798746109009, + "logits/rejected": -1.9294060468673706, + "logps/chosen": -154.88461303710938, + "logps/rejected": -309.0943603515625, + "loss": 0.0507, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1282577514648438, + "rewards/margins": 2.7152633666992188, + "rewards/rejected": -0.587005615234375, + "step": 10661 + }, + { + "epoch": 0.62, + "learning_rate": 3.325370291875737e-08, + "logits/chosen": -1.6623966693878174, + "logits/rejected": -1.660810947418213, + "logps/chosen": -30.914958953857422, + "logps/rejected": -199.10064697265625, + "loss": 0.5637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10288315266370773, + "rewards/margins": 0.36210861802101135, + "rewards/rejected": -0.259225457906723, + "step": 10662 + }, + { + "epoch": 0.62, + "learning_rate": 3.324482348186751e-08, + "logits/chosen": -2.0383384227752686, + "logits/rejected": -2.0283987522125244, + "logps/chosen": -0.00033040533890016377, + "logps/rejected": -253.4500732421875, + "loss": 0.3361, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.269517027452821e-06, + "rewards/margins": 5.875118732452393, + "rewards/rejected": -5.8751115798950195, + "step": 10663 + }, + { + "epoch": 0.62, + "learning_rate": 3.323594464020346e-08, + "logits/chosen": -2.01033878326416, + "logits/rejected": -2.0058107376098633, + "logps/chosen": -5.292834248393774e-05, + "logps/rejected": -71.99967193603516, + "loss": 0.4143, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2172412172949407e-06, + "rewards/margins": 1.8814105987548828, + "rewards/rejected": -1.8814083337783813, + "step": 10664 + }, + { + "epoch": 0.62, + "learning_rate": 3.3227066394080694e-08, + "logits/chosen": -1.9921289682388306, + "logits/rejected": -1.989099383354187, + "logps/chosen": -17.375612258911133, + "logps/rejected": -177.08883666992188, + "loss": 0.2707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.512434184551239, + "rewards/margins": 2.0075573921203613, + "rewards/rejected": -1.495123267173767, + "step": 10665 + }, + { + "epoch": 0.62, + "learning_rate": 3.321818874381454e-08, + "logits/chosen": -1.916732907295227, + "logits/rejected": -1.8507918119430542, + "logps/chosen": -336.1656799316406, + "logps/rejected": -697.133544921875, + "loss": 0.0632, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.237924337387085, + "rewards/margins": 3.500570774078369, + "rewards/rejected": -1.2626465559005737, + "step": 10666 + }, + { + "epoch": 0.62, + "learning_rate": 3.3209311689720446e-08, + "logits/chosen": -1.8089934587478638, + "logits/rejected": -1.8569788932800293, + "logps/chosen": -197.80422973632812, + "logps/rejected": -333.4102783203125, + "loss": 0.037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5575958490371704, + "rewards/margins": 4.333011150360107, + "rewards/rejected": -2.7754151821136475, + "step": 10667 + }, + { + "epoch": 0.62, + "learning_rate": 3.3200435232113685e-08, + "logits/chosen": -1.7848249673843384, + "logits/rejected": -1.7084848880767822, + "logps/chosen": -207.2023468017578, + "logps/rejected": -452.32440185546875, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.263958692550659, + "rewards/margins": 3.650555372238159, + "rewards/rejected": -1.3865966796875, + "step": 10668 + }, + { + "epoch": 0.62, + "learning_rate": 3.319155937130968e-08, + "logits/chosen": -1.7301236391067505, + "logits/rejected": -1.7326091527938843, + "logps/chosen": -5.37205696105957, + "logps/rejected": -73.72064208984375, + "loss": 0.6204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17865422368049622, + "rewards/margins": 0.4511462152004242, + "rewards/rejected": -0.6298004388809204, + "step": 10669 + }, + { + "epoch": 0.62, + "learning_rate": 3.318268410762368e-08, + "logits/chosen": -2.0984909534454346, + "logits/rejected": -2.090778112411499, + "logps/chosen": -18.89870834350586, + "logps/rejected": -160.56753540039062, + "loss": 0.3796, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0038988112937659025, + "rewards/margins": 1.99862539768219, + "rewards/rejected": -1.994726538658142, + "step": 10670 + }, + { + "epoch": 0.62, + "learning_rate": 3.3173809441371027e-08, + "logits/chosen": -1.6935256719589233, + "logits/rejected": -1.6947139501571655, + "logps/chosen": -33.34239196777344, + "logps/rejected": -197.467041015625, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013857650570571423, + "rewards/margins": 3.6389002799987793, + "rewards/rejected": -3.625042676925659, + "step": 10671 + }, + { + "epoch": 0.62, + "learning_rate": 3.3164935372866956e-08, + "logits/chosen": -1.8862930536270142, + "logits/rejected": -1.89175283908844, + "logps/chosen": -17.652563095092773, + "logps/rejected": -251.4998321533203, + "loss": 0.2809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5817155838012695, + "rewards/margins": 2.025564670562744, + "rewards/rejected": -1.4438492059707642, + "step": 10672 + }, + { + "epoch": 0.62, + "learning_rate": 3.315606190242673e-08, + "logits/chosen": -1.976958990097046, + "logits/rejected": -1.9901483058929443, + "logps/chosen": -57.42431640625, + "logps/rejected": -254.89096069335938, + "loss": 0.4103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5202194452285767, + "rewards/margins": 0.76361083984375, + "rewards/rejected": -0.24339142441749573, + "step": 10673 + }, + { + "epoch": 0.62, + "learning_rate": 3.314718903036557e-08, + "logits/chosen": -1.8945372104644775, + "logits/rejected": -1.8914724588394165, + "logps/chosen": -197.54324340820312, + "logps/rejected": -306.4291687011719, + "loss": 0.1965, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.697039842605591, + "rewards/margins": 0.8187469244003296, + "rewards/rejected": 1.8782929182052612, + "step": 10674 + }, + { + "epoch": 0.62, + "learning_rate": 3.313831675699871e-08, + "logits/chosen": -1.8683035373687744, + "logits/rejected": -1.8565500974655151, + "logps/chosen": -193.48553466796875, + "logps/rejected": -274.45953369140625, + "loss": 0.2235, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2800629138946533, + "rewards/margins": 1.6638398170471191, + "rewards/rejected": -0.38377687335014343, + "step": 10675 + }, + { + "epoch": 0.62, + "learning_rate": 3.3129445082641303e-08, + "logits/chosen": -1.9542557001113892, + "logits/rejected": -1.9438374042510986, + "logps/chosen": -27.609086990356445, + "logps/rejected": -174.44476318359375, + "loss": 0.4275, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07974071800708771, + "rewards/margins": 1.9860308170318604, + "rewards/rejected": -2.0657715797424316, + "step": 10676 + }, + { + "epoch": 0.62, + "learning_rate": 3.312057400760854e-08, + "logits/chosen": -1.8518528938293457, + "logits/rejected": -1.8488047122955322, + "logps/chosen": -29.106008529663086, + "logps/rejected": -79.31571960449219, + "loss": 0.4111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26967984437942505, + "rewards/margins": 1.1703565120697021, + "rewards/rejected": -0.9006767272949219, + "step": 10677 + }, + { + "epoch": 0.62, + "learning_rate": 3.311170353221554e-08, + "logits/chosen": -2.02217698097229, + "logits/rejected": -2.0541131496429443, + "logps/chosen": -117.24920654296875, + "logps/rejected": -338.3192138671875, + "loss": 0.0337, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6568955183029175, + "rewards/margins": 4.762467861175537, + "rewards/rejected": -3.105572462081909, + "step": 10678 + }, + { + "epoch": 0.62, + "learning_rate": 3.3102833656777454e-08, + "logits/chosen": -1.75403892993927, + "logits/rejected": -1.7524620294570923, + "logps/chosen": -24.262022018432617, + "logps/rejected": -254.5194549560547, + "loss": 0.2611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4747638702392578, + "rewards/margins": 3.4773242473602295, + "rewards/rejected": -3.0025603771209717, + "step": 10679 + }, + { + "epoch": 0.62, + "learning_rate": 3.309396438160936e-08, + "logits/chosen": -1.769998550415039, + "logits/rejected": -1.7530258893966675, + "logps/chosen": -241.28204345703125, + "logps/rejected": -524.76513671875, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9039063453674316, + "rewards/margins": 6.724123954772949, + "rewards/rejected": -3.8202178478240967, + "step": 10680 + }, + { + "epoch": 0.62, + "learning_rate": 3.308509570702635e-08, + "logits/chosen": -1.8435531854629517, + "logits/rejected": -1.841411828994751, + "logps/chosen": -71.26651000976562, + "logps/rejected": -299.30340576171875, + "loss": 0.1791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6082359552383423, + "rewards/margins": 2.344863176345825, + "rewards/rejected": -1.736627221107483, + "step": 10681 + }, + { + "epoch": 0.62, + "learning_rate": 3.307622763334349e-08, + "logits/chosen": -1.9003959894180298, + "logits/rejected": -1.8922362327575684, + "logps/chosen": -12.167659759521484, + "logps/rejected": -95.15477752685547, + "loss": 0.7272, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.057684995234012604, + "rewards/margins": -0.31674814224243164, + "rewards/rejected": 0.37443313002586365, + "step": 10682 + }, + { + "epoch": 0.62, + "learning_rate": 3.306736016087578e-08, + "logits/chosen": -1.9185595512390137, + "logits/rejected": -1.9363657236099243, + "logps/chosen": -186.8023223876953, + "logps/rejected": -507.8088684082031, + "loss": 0.0413, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.047419786453247, + "rewards/margins": 10.220072746276855, + "rewards/rejected": -9.172653198242188, + "step": 10683 + }, + { + "epoch": 0.62, + "learning_rate": 3.3058493289938305e-08, + "logits/chosen": -1.9508129358291626, + "logits/rejected": -1.9520692825317383, + "logps/chosen": -0.0021822513081133366, + "logps/rejected": -105.21810913085938, + "loss": 0.4098, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.499893349129707e-05, + "rewards/margins": 1.7143901586532593, + "rewards/rejected": -1.7144851684570312, + "step": 10684 + }, + { + "epoch": 0.62, + "learning_rate": 3.3049627020845976e-08, + "logits/chosen": -1.9419502019882202, + "logits/rejected": -1.9507983922958374, + "logps/chosen": -16.640188217163086, + "logps/rejected": -158.3955841064453, + "loss": 0.2822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9600939154624939, + "rewards/margins": 1.0242433547973633, + "rewards/rejected": -0.06414947658777237, + "step": 10685 + }, + { + "epoch": 0.62, + "learning_rate": 3.304076135391385e-08, + "logits/chosen": -1.8567537069320679, + "logits/rejected": -1.8653810024261475, + "logps/chosen": -199.66043090820312, + "logps/rejected": -419.90264892578125, + "loss": 0.1195, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.394970655441284, + "rewards/margins": 1.5639128684997559, + "rewards/rejected": 0.8310577273368835, + "step": 10686 + }, + { + "epoch": 0.62, + "learning_rate": 3.3031896289456795e-08, + "logits/chosen": -2.0040061473846436, + "logits/rejected": -2.0025572776794434, + "logps/chosen": -0.11254285275936127, + "logps/rejected": -54.74443817138672, + "loss": 0.5432, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005579304415732622, + "rewards/margins": 0.6451348066329956, + "rewards/rejected": -0.650714099407196, + "step": 10687 + }, + { + "epoch": 0.62, + "learning_rate": 3.3023031827789825e-08, + "logits/chosen": -2.022827625274658, + "logits/rejected": -1.954960823059082, + "logps/chosen": -179.71900939941406, + "logps/rejected": -444.6861572265625, + "loss": 0.0786, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4961471557617188, + "rewards/margins": 2.304487705230713, + "rewards/rejected": 0.19165955483913422, + "step": 10688 + }, + { + "epoch": 0.62, + "learning_rate": 3.301416796922777e-08, + "logits/chosen": -1.7557801008224487, + "logits/rejected": -1.6865434646606445, + "logps/chosen": -202.07107543945312, + "logps/rejected": -304.6867370605469, + "loss": 0.2076, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.94964599609375, + "rewards/margins": 1.0840849876403809, + "rewards/rejected": 0.8655609488487244, + "step": 10689 + }, + { + "epoch": 0.62, + "learning_rate": 3.300530471408559e-08, + "logits/chosen": -2.138139486312866, + "logits/rejected": -2.048671245574951, + "logps/chosen": -107.59263610839844, + "logps/rejected": -281.39447021484375, + "loss": 0.0986, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.280346632003784, + "rewards/margins": 2.03741455078125, + "rewards/rejected": 0.24293212592601776, + "step": 10690 + }, + { + "epoch": 0.62, + "learning_rate": 3.2996442062678084e-08, + "logits/chosen": -2.0967745780944824, + "logits/rejected": -2.0904574394226074, + "logps/chosen": -0.0028047242667526007, + "logps/rejected": -208.72866821289062, + "loss": 0.3976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014899841335136443, + "rewards/margins": 2.9567618370056152, + "rewards/rejected": -2.9569108486175537, + "step": 10691 + }, + { + "epoch": 0.62, + "learning_rate": 3.298758001532015e-08, + "logits/chosen": -1.8593692779541016, + "logits/rejected": -1.8608176708221436, + "logps/chosen": -16.779870986938477, + "logps/rejected": -251.64694213867188, + "loss": 0.2111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8279442191123962, + "rewards/margins": 2.537338972091675, + "rewards/rejected": -1.7093948125839233, + "step": 10692 + }, + { + "epoch": 0.62, + "learning_rate": 3.297871857232658e-08, + "logits/chosen": -1.981102705001831, + "logits/rejected": -1.9823609590530396, + "logps/chosen": -217.60604858398438, + "logps/rejected": -266.68414306640625, + "loss": 0.3395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7226303219795227, + "rewards/margins": 0.3513473570346832, + "rewards/rejected": 0.3712829649448395, + "step": 10693 + }, + { + "epoch": 0.62, + "learning_rate": 3.2969857734012204e-08, + "logits/chosen": -1.952043890953064, + "logits/rejected": -1.9556080102920532, + "logps/chosen": -38.23137283325195, + "logps/rejected": -175.83740234375, + "loss": 0.5961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14907188713550568, + "rewards/margins": 0.3024921417236328, + "rewards/rejected": -0.4515640437602997, + "step": 10694 + }, + { + "epoch": 0.62, + "learning_rate": 3.2960997500691765e-08, + "logits/chosen": -1.9986971616744995, + "logits/rejected": -1.9365400075912476, + "logps/chosen": -168.2588653564453, + "logps/rejected": -373.5826110839844, + "loss": 0.1507, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8001006841659546, + "rewards/margins": 1.612481713294983, + "rewards/rejected": 0.18761901557445526, + "step": 10695 + }, + { + "epoch": 0.62, + "learning_rate": 3.295213787268005e-08, + "logits/chosen": -2.009570598602295, + "logits/rejected": -2.0152783393859863, + "logps/chosen": -7.086369514465332, + "logps/rejected": -309.71954345703125, + "loss": 0.2474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2935066223144531, + "rewards/margins": 6.284207820892334, + "rewards/rejected": -5.990701198577881, + "step": 10696 + }, + { + "epoch": 0.62, + "learning_rate": 3.294327885029179e-08, + "logits/chosen": -2.097080707550049, + "logits/rejected": -2.101674795150757, + "logps/chosen": -32.371910095214844, + "logps/rejected": -254.0848388671875, + "loss": 0.2, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0293877124786377, + "rewards/margins": 1.9062919616699219, + "rewards/rejected": -0.876904308795929, + "step": 10697 + }, + { + "epoch": 0.62, + "learning_rate": 3.2934420433841697e-08, + "logits/chosen": -1.631587028503418, + "logits/rejected": -1.6200956106185913, + "logps/chosen": -0.0010030755074694753, + "logps/rejected": -255.87515258789062, + "loss": 0.3291, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.297527529066429e-05, + "rewards/margins": 7.30629825592041, + "rewards/rejected": -7.3063812255859375, + "step": 10698 + }, + { + "epoch": 0.62, + "learning_rate": 3.2925562623644464e-08, + "logits/chosen": -1.9824728965759277, + "logits/rejected": -1.9845927953720093, + "logps/chosen": -13.672242164611816, + "logps/rejected": -25.960832595825195, + "loss": 0.6714, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05964794382452965, + "rewards/margins": -0.07696533203125, + "rewards/rejected": 0.13661327958106995, + "step": 10699 + }, + { + "epoch": 0.62, + "learning_rate": 3.2916705420014765e-08, + "logits/chosen": -1.711951494216919, + "logits/rejected": -1.7223820686340332, + "logps/chosen": -73.55593872070312, + "logps/rejected": -197.69558715820312, + "loss": 0.4662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.201690673828125, + "rewards/margins": 0.4431823790073395, + "rewards/rejected": -0.24149170517921448, + "step": 10700 + }, + { + "epoch": 0.62, + "learning_rate": 3.290784882326726e-08, + "logits/chosen": -2.0537948608398438, + "logits/rejected": -2.062958240509033, + "logps/chosen": -56.7406120300293, + "logps/rejected": -139.13262939453125, + "loss": 0.3781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46896323561668396, + "rewards/margins": 1.1358668804168701, + "rewards/rejected": -0.6669036746025085, + "step": 10701 + }, + { + "epoch": 0.62, + "learning_rate": 3.2898992833716563e-08, + "logits/chosen": -1.8181049823760986, + "logits/rejected": -1.8174878358840942, + "logps/chosen": -0.0003320723189972341, + "logps/rejected": -115.71466064453125, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.630508788570296e-05, + "rewards/margins": 1.9653942584991455, + "rewards/rejected": -1.9654206037521362, + "step": 10702 + }, + { + "epoch": 0.62, + "learning_rate": 3.28901374516773e-08, + "logits/chosen": -1.8858871459960938, + "logits/rejected": -1.88733971118927, + "logps/chosen": -42.09170913696289, + "logps/rejected": -159.43606567382812, + "loss": 0.3776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21838036179542542, + "rewards/margins": 2.3054797649383545, + "rewards/rejected": -2.523860216140747, + "step": 10703 + }, + { + "epoch": 0.62, + "learning_rate": 3.288128267746403e-08, + "logits/chosen": -1.8619306087493896, + "logits/rejected": -1.8598271608352661, + "logps/chosen": -47.169830322265625, + "logps/rejected": -151.92489624023438, + "loss": 0.541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2209651917219162, + "rewards/margins": 0.20467643439769745, + "rewards/rejected": 0.01628875732421875, + "step": 10704 + }, + { + "epoch": 0.62, + "learning_rate": 3.2872428511391366e-08, + "logits/chosen": -1.8910393714904785, + "logits/rejected": -1.8941165208816528, + "logps/chosen": -43.100563049316406, + "logps/rejected": -164.59141540527344, + "loss": 0.2341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7705772519111633, + "rewards/margins": 1.9792945384979248, + "rewards/rejected": -1.2087173461914062, + "step": 10705 + }, + { + "epoch": 0.62, + "learning_rate": 3.286357495377379e-08, + "logits/chosen": -1.7720917463302612, + "logits/rejected": -1.7972031831741333, + "logps/chosen": -234.8363494873047, + "logps/rejected": -382.66485595703125, + "loss": 0.0613, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.580683946609497, + "rewards/margins": 2.580339193344116, + "rewards/rejected": 0.0003448486386332661, + "step": 10706 + }, + { + "epoch": 0.62, + "learning_rate": 3.285472200492588e-08, + "logits/chosen": -1.7602009773254395, + "logits/rejected": -1.7446283102035522, + "logps/chosen": -109.17225646972656, + "logps/rejected": -205.72903442382812, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6338905692100525, + "rewards/margins": 1.126471757888794, + "rewards/rejected": -0.49258118867874146, + "step": 10707 + }, + { + "epoch": 0.62, + "learning_rate": 3.284586966516209e-08, + "logits/chosen": -2.0631799697875977, + "logits/rejected": -2.0629708766937256, + "logps/chosen": -34.59321594238281, + "logps/rejected": -200.69140625, + "loss": 0.2759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24490013718605042, + "rewards/margins": 2.4356870651245117, + "rewards/rejected": -2.190786838531494, + "step": 10708 + }, + { + "epoch": 0.62, + "learning_rate": 3.283701793479695e-08, + "logits/chosen": -2.0032989978790283, + "logits/rejected": -1.9989029169082642, + "logps/chosen": -56.657806396484375, + "logps/rejected": -234.60336303710938, + "loss": 0.4402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27614519000053406, + "rewards/margins": 4.380909442901611, + "rewards/rejected": -4.657054424285889, + "step": 10709 + }, + { + "epoch": 0.62, + "learning_rate": 3.2828166814144856e-08, + "logits/chosen": -1.9008634090423584, + "logits/rejected": -1.8697172403335571, + "logps/chosen": -251.37704467773438, + "logps/rejected": -413.57611083984375, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0149199962615967, + "rewards/margins": 3.264883279800415, + "rewards/rejected": -0.24996338784694672, + "step": 10710 + }, + { + "epoch": 0.62, + "learning_rate": 3.2819316303520286e-08, + "logits/chosen": -1.906583547592163, + "logits/rejected": -1.8929939270019531, + "logps/chosen": -170.71495056152344, + "logps/rejected": -468.58843994140625, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7174224853515625, + "rewards/margins": 4.276690483093262, + "rewards/rejected": -2.5592682361602783, + "step": 10711 + }, + { + "epoch": 0.62, + "learning_rate": 3.281046640323763e-08, + "logits/chosen": -2.1249353885650635, + "logits/rejected": -2.1265645027160645, + "logps/chosen": -44.76888656616211, + "logps/rejected": -134.44500732421875, + "loss": 0.1259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5096790790557861, + "rewards/margins": 2.321512222290039, + "rewards/rejected": -0.8118332028388977, + "step": 10712 + }, + { + "epoch": 0.62, + "learning_rate": 3.2801617113611304e-08, + "logits/chosen": -2.0446410179138184, + "logits/rejected": -2.0508956909179688, + "logps/chosen": -22.395767211914062, + "logps/rejected": -180.987548828125, + "loss": 0.2817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2980804443359375, + "rewards/margins": 1.8309555053710938, + "rewards/rejected": -1.5328750610351562, + "step": 10713 + }, + { + "epoch": 0.62, + "learning_rate": 3.2792768434955654e-08, + "logits/chosen": -2.0617611408233643, + "logits/rejected": -2.0436387062072754, + "logps/chosen": -51.593196868896484, + "logps/rejected": -234.6790771484375, + "loss": 0.3919, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17675743997097015, + "rewards/margins": 0.7581478357315063, + "rewards/rejected": -0.581390380859375, + "step": 10714 + }, + { + "epoch": 0.62, + "learning_rate": 3.278392036758505e-08, + "logits/chosen": -1.9410123825073242, + "logits/rejected": -1.9387422800064087, + "logps/chosen": -17.899898529052734, + "logps/rejected": -213.3350830078125, + "loss": 0.3075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1379413604736328, + "rewards/margins": 3.099815845489502, + "rewards/rejected": -2.961874485015869, + "step": 10715 + }, + { + "epoch": 0.62, + "learning_rate": 3.27750729118138e-08, + "logits/chosen": -1.9753086566925049, + "logits/rejected": -1.9693571329116821, + "logps/chosen": -8.875182151794434, + "logps/rejected": -89.9637451171875, + "loss": 0.5586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08749103546142578, + "rewards/margins": 0.43346577882766724, + "rewards/rejected": -0.34597474336624146, + "step": 10716 + }, + { + "epoch": 0.62, + "learning_rate": 3.2766226067956225e-08, + "logits/chosen": -1.8903063535690308, + "logits/rejected": -1.8815537691116333, + "logps/chosen": -203.8019561767578, + "logps/rejected": -291.3027648925781, + "loss": 0.0887, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.062880039215088, + "rewards/margins": 2.3312668800354004, + "rewards/rejected": -0.2683868408203125, + "step": 10717 + }, + { + "epoch": 0.62, + "learning_rate": 3.275737983632659e-08, + "logits/chosen": -1.892369031906128, + "logits/rejected": -1.886862874031067, + "logps/chosen": -38.22393035888672, + "logps/rejected": -197.01210021972656, + "loss": 0.396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5504482388496399, + "rewards/margins": 0.9413250088691711, + "rewards/rejected": -0.39087677001953125, + "step": 10718 + }, + { + "epoch": 0.62, + "learning_rate": 3.274853421723917e-08, + "logits/chosen": -2.0056798458099365, + "logits/rejected": -2.0051987171173096, + "logps/chosen": -50.75653839111328, + "logps/rejected": -168.67214965820312, + "loss": 0.2046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.645773708820343, + "rewards/margins": 2.0352649688720703, + "rewards/rejected": -1.389491319656372, + "step": 10719 + }, + { + "epoch": 0.62, + "learning_rate": 3.273968921100819e-08, + "logits/chosen": -1.8195514678955078, + "logits/rejected": -1.89293372631073, + "logps/chosen": -231.73220825195312, + "logps/rejected": -304.1689758300781, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6889740228652954, + "rewards/margins": 3.1227447986602783, + "rewards/rejected": -1.433770775794983, + "step": 10720 + }, + { + "epoch": 0.62, + "learning_rate": 3.273084481794789e-08, + "logits/chosen": -1.8096708059310913, + "logits/rejected": -1.8672443628311157, + "logps/chosen": -327.6011047363281, + "logps/rejected": -390.35418701171875, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3195953369140625, + "rewards/margins": 5.136843681335449, + "rewards/rejected": -1.8172485828399658, + "step": 10721 + }, + { + "epoch": 0.62, + "learning_rate": 3.272200103837246e-08, + "logits/chosen": -1.8667138814926147, + "logits/rejected": -1.874302864074707, + "logps/chosen": -96.31753540039062, + "logps/rejected": -332.7122802734375, + "loss": 0.3456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0764617919921875, + "rewards/margins": 3.9770445823669434, + "rewards/rejected": -4.053506374359131, + "step": 10722 + }, + { + "epoch": 0.62, + "learning_rate": 3.2713157872596054e-08, + "logits/chosen": -1.8926318883895874, + "logits/rejected": -1.9344838857650757, + "logps/chosen": -213.9165802001953, + "logps/rejected": -347.71856689453125, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7202545404434204, + "rewards/margins": 5.200540065765381, + "rewards/rejected": -3.48028564453125, + "step": 10723 + }, + { + "epoch": 0.62, + "learning_rate": 3.2704315320932853e-08, + "logits/chosen": -1.9893251657485962, + "logits/rejected": -1.987588882446289, + "logps/chosen": -15.61871337890625, + "logps/rejected": -159.24380493164062, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05502891540527344, + "rewards/margins": 2.9346508979797363, + "rewards/rejected": -2.879621982574463, + "step": 10724 + }, + { + "epoch": 0.62, + "learning_rate": 3.2695473383696956e-08, + "logits/chosen": -2.0246834754943848, + "logits/rejected": -2.018087387084961, + "logps/chosen": -196.02845764160156, + "logps/rejected": -321.0885009765625, + "loss": 0.1564, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9534882307052612, + "rewards/margins": 1.152047872543335, + "rewards/rejected": 0.801440417766571, + "step": 10725 + }, + { + "epoch": 0.62, + "learning_rate": 3.268663206120251e-08, + "logits/chosen": -1.8643193244934082, + "logits/rejected": -1.8486363887786865, + "logps/chosen": -0.024339709430933, + "logps/rejected": -394.5251770019531, + "loss": 0.3389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002046432113274932, + "rewards/margins": 6.8134846687316895, + "rewards/rejected": -6.811438083648682, + "step": 10726 + }, + { + "epoch": 0.62, + "learning_rate": 3.267779135376356e-08, + "logits/chosen": -1.83799147605896, + "logits/rejected": -1.8228455781936646, + "logps/chosen": -95.14312744140625, + "logps/rejected": -231.06814575195312, + "loss": 0.7405, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.026056671515107155, + "rewards/margins": -0.34185564517974854, + "rewards/rejected": 0.31579896807670593, + "step": 10727 + }, + { + "epoch": 0.62, + "learning_rate": 3.2668951261694224e-08, + "logits/chosen": -1.9023195505142212, + "logits/rejected": -1.9013264179229736, + "logps/chosen": -0.015463540330529213, + "logps/rejected": -127.75272369384766, + "loss": 0.5004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008145286119543016, + "rewards/margins": 0.9772999882698059, + "rewards/rejected": -0.9764854311943054, + "step": 10728 + }, + { + "epoch": 0.62, + "learning_rate": 3.266011178530849e-08, + "logits/chosen": -1.8344775438308716, + "logits/rejected": -1.8295152187347412, + "logps/chosen": -24.34748649597168, + "logps/rejected": -382.4967346191406, + "loss": 0.1213, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0127508640289307, + "rewards/margins": 8.062424659729004, + "rewards/rejected": -7.049673557281494, + "step": 10729 + }, + { + "epoch": 0.62, + "learning_rate": 3.2651272924920426e-08, + "logits/chosen": -1.6723065376281738, + "logits/rejected": -1.671105146408081, + "logps/chosen": -361.5924377441406, + "logps/rejected": -501.04559326171875, + "loss": 0.217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33383485674858093, + "rewards/margins": 1.1976135969161987, + "rewards/rejected": -0.8637787103652954, + "step": 10730 + }, + { + "epoch": 0.62, + "learning_rate": 3.264243468084398e-08, + "logits/chosen": -1.7487826347351074, + "logits/rejected": -1.72504723072052, + "logps/chosen": -95.19025421142578, + "logps/rejected": -278.32342529296875, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4189590215682983, + "rewards/margins": 1.008213758468628, + "rewards/rejected": 0.410745233297348, + "step": 10731 + }, + { + "epoch": 0.62, + "learning_rate": 3.263359705339318e-08, + "logits/chosen": -2.104081869125366, + "logits/rejected": -2.085031032562256, + "logps/chosen": -0.0001742764434311539, + "logps/rejected": -240.13226318359375, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0003721249522641301, + "rewards/margins": 3.5752716064453125, + "rewards/rejected": -3.574899435043335, + "step": 10732 + }, + { + "epoch": 0.62, + "learning_rate": 3.262476004288195e-08, + "logits/chosen": -1.8890984058380127, + "logits/rejected": -1.7676301002502441, + "logps/chosen": -134.37527465820312, + "logps/rejected": -462.6813659667969, + "loss": 0.0742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9849960803985596, + "rewards/margins": 3.6168532371520996, + "rewards/rejected": -1.6318572759628296, + "step": 10733 + }, + { + "epoch": 0.62, + "learning_rate": 3.261592364962425e-08, + "logits/chosen": -1.6839799880981445, + "logits/rejected": -1.6925359964370728, + "logps/chosen": -217.63986206054688, + "logps/rejected": -504.1849060058594, + "loss": 0.048, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.405771017074585, + "rewards/margins": 4.047766208648682, + "rewards/rejected": -1.6419953107833862, + "step": 10734 + }, + { + "epoch": 0.62, + "learning_rate": 3.260708787393396e-08, + "logits/chosen": -1.90573251247406, + "logits/rejected": -1.8791431188583374, + "logps/chosen": -286.5118713378906, + "logps/rejected": -553.4625244140625, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7000153064727783, + "rewards/margins": 5.623806953430176, + "rewards/rejected": -2.9237916469573975, + "step": 10735 + }, + { + "epoch": 0.62, + "learning_rate": 3.2598252716125e-08, + "logits/chosen": -2.0881993770599365, + "logits/rejected": -2.069571018218994, + "logps/chosen": -88.56607055664062, + "logps/rejected": -202.86477661132812, + "loss": 0.2429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5346435904502869, + "rewards/margins": 2.229693651199341, + "rewards/rejected": -1.6950501203536987, + "step": 10736 + }, + { + "epoch": 0.62, + "learning_rate": 3.2589418176511206e-08, + "logits/chosen": -1.9612194299697876, + "logits/rejected": -1.9572876691818237, + "logps/chosen": -38.73884963989258, + "logps/rejected": -225.066650390625, + "loss": 0.2163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.699222207069397, + "rewards/margins": 2.4838199615478516, + "rewards/rejected": -1.7845977544784546, + "step": 10737 + }, + { + "epoch": 0.62, + "learning_rate": 3.2580584255406465e-08, + "logits/chosen": -1.9538577795028687, + "logits/rejected": -1.941033124923706, + "logps/chosen": -160.71969604492188, + "logps/rejected": -202.72882080078125, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.198986768722534, + "rewards/margins": 0.04233074188232422, + "rewards/rejected": 2.15665602684021, + "step": 10738 + }, + { + "epoch": 0.62, + "learning_rate": 3.257175095312457e-08, + "logits/chosen": -1.7876032590866089, + "logits/rejected": -1.7889554500579834, + "logps/chosen": -14.82336711883545, + "logps/rejected": -149.5665740966797, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1401568502187729, + "rewards/margins": 2.798362970352173, + "rewards/rejected": -2.6582062244415283, + "step": 10739 + }, + { + "epoch": 0.62, + "learning_rate": 3.256291826997933e-08, + "logits/chosen": -1.7966536283493042, + "logits/rejected": -1.798374056816101, + "logps/chosen": -25.95116424560547, + "logps/rejected": -83.22627258300781, + "loss": 0.2002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1475776433944702, + "rewards/margins": 1.8497871160507202, + "rewards/rejected": -0.70220947265625, + "step": 10740 + }, + { + "epoch": 0.63, + "learning_rate": 3.255408620628453e-08, + "logits/chosen": -1.9143129587173462, + "logits/rejected": -1.9031081199645996, + "logps/chosen": -2.5272194761782885e-05, + "logps/rejected": -153.36587524414062, + "loss": 0.3766, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3840039276024072e-08, + "rewards/margins": 2.6000213623046875, + "rewards/rejected": -2.6000213623046875, + "step": 10741 + }, + { + "epoch": 0.63, + "learning_rate": 3.254525476235391e-08, + "logits/chosen": -1.9768983125686646, + "logits/rejected": -1.98484206199646, + "logps/chosen": -157.93145751953125, + "logps/rejected": -330.15789794921875, + "loss": 0.1309, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.559736728668213, + "rewards/margins": 1.3093949556350708, + "rewards/rejected": 1.250341773033142, + "step": 10742 + }, + { + "epoch": 0.63, + "learning_rate": 3.253642393850124e-08, + "logits/chosen": -1.8733371496200562, + "logits/rejected": -1.869664192199707, + "logps/chosen": -26.441055297851562, + "logps/rejected": -127.9857406616211, + "loss": 0.3795, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.380136102437973, + "rewards/margins": 1.1737464666366577, + "rewards/rejected": -0.7936103940010071, + "step": 10743 + }, + { + "epoch": 0.63, + "learning_rate": 3.2527593735040194e-08, + "logits/chosen": -1.7936240434646606, + "logits/rejected": -1.7730540037155151, + "logps/chosen": -264.0345764160156, + "logps/rejected": -558.5546875, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3363983631134033, + "rewards/margins": 3.284048557281494, + "rewards/rejected": 0.05234985426068306, + "step": 10744 + }, + { + "epoch": 0.63, + "learning_rate": 3.251876415228452e-08, + "logits/chosen": -1.7954199314117432, + "logits/rejected": -1.7793532609939575, + "logps/chosen": -228.19473266601562, + "logps/rejected": -400.16265869140625, + "loss": 0.1129, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2757294178009033, + "rewards/margins": 1.847543478012085, + "rewards/rejected": -0.5718140006065369, + "step": 10745 + }, + { + "epoch": 0.63, + "learning_rate": 3.250993519054782e-08, + "logits/chosen": -1.8709343671798706, + "logits/rejected": -1.870313048362732, + "logps/chosen": -27.023822784423828, + "logps/rejected": -257.51190185546875, + "loss": 0.2723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31877174973487854, + "rewards/margins": 2.592745065689087, + "rewards/rejected": -2.273973226547241, + "step": 10746 + }, + { + "epoch": 0.63, + "learning_rate": 3.25011068501438e-08, + "logits/chosen": -1.6269400119781494, + "logits/rejected": -1.6411800384521484, + "logps/chosen": -144.82730102539062, + "logps/rejected": -353.63385009765625, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7821823358535767, + "rewards/margins": 2.6981124877929688, + "rewards/rejected": -0.9159302115440369, + "step": 10747 + }, + { + "epoch": 0.63, + "learning_rate": 3.249227913138604e-08, + "logits/chosen": -2.0375688076019287, + "logits/rejected": -2.0972611904144287, + "logps/chosen": -158.97921752929688, + "logps/rejected": -217.38034057617188, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.90252685546875, + "rewards/margins": 0.45673826336860657, + "rewards/rejected": 0.44578859210014343, + "step": 10748 + }, + { + "epoch": 0.63, + "learning_rate": 3.2483452034588186e-08, + "logits/chosen": -1.9418835639953613, + "logits/rejected": -1.941571593284607, + "logps/chosen": -23.267818450927734, + "logps/rejected": -240.69607543945312, + "loss": 0.341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19543571770191193, + "rewards/margins": 3.3606860637664795, + "rewards/rejected": -3.556121826171875, + "step": 10749 + }, + { + "epoch": 0.63, + "learning_rate": 3.247462556006377e-08, + "logits/chosen": -1.9667162895202637, + "logits/rejected": -1.962485671043396, + "logps/chosen": -67.22929382324219, + "logps/rejected": -252.83302307128906, + "loss": 0.4273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19988098740577698, + "rewards/margins": 1.6532318592071533, + "rewards/rejected": -1.4533509016036987, + "step": 10750 + }, + { + "epoch": 0.63, + "learning_rate": 3.246579970812642e-08, + "logits/chosen": -1.9162683486938477, + "logits/rejected": -1.9032480716705322, + "logps/chosen": -148.0029296875, + "logps/rejected": -216.48828125, + "loss": 0.3174, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.892939805984497, + "rewards/margins": 0.20884859561920166, + "rewards/rejected": 1.6840912103652954, + "step": 10751 + }, + { + "epoch": 0.63, + "learning_rate": 3.245697447908959e-08, + "logits/chosen": -1.983814001083374, + "logits/rejected": -1.9666121006011963, + "logps/chosen": -162.1372528076172, + "logps/rejected": -219.6385955810547, + "loss": 0.24, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7516098022460938, + "rewards/margins": 0.5690977573394775, + "rewards/rejected": 2.182512044906616, + "step": 10752 + }, + { + "epoch": 0.63, + "learning_rate": 3.2448149873266866e-08, + "logits/chosen": -2.044830799102783, + "logits/rejected": -2.0306763648986816, + "logps/chosen": -37.53147888183594, + "logps/rejected": -222.28924560546875, + "loss": 0.228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5152096152305603, + "rewards/margins": 3.3228542804718018, + "rewards/rejected": -2.8076446056365967, + "step": 10753 + }, + { + "epoch": 0.63, + "learning_rate": 3.24393258909717e-08, + "logits/chosen": -1.8773880004882812, + "logits/rejected": -1.8794571161270142, + "logps/chosen": -92.38556671142578, + "logps/rejected": -199.38330078125, + "loss": 0.3525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9594940543174744, + "rewards/margins": 0.6258453726768494, + "rewards/rejected": 0.333648681640625, + "step": 10754 + }, + { + "epoch": 0.63, + "learning_rate": 3.24305025325176e-08, + "logits/chosen": -1.694410800933838, + "logits/rejected": -1.7000665664672852, + "logps/chosen": -1.3807719945907593, + "logps/rejected": -147.22369384765625, + "loss": 0.2548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33713895082473755, + "rewards/margins": 3.4786107540130615, + "rewards/rejected": -3.1414718627929688, + "step": 10755 + }, + { + "epoch": 0.63, + "learning_rate": 3.2421679798217966e-08, + "logits/chosen": -1.854212760925293, + "logits/rejected": -1.8436548709869385, + "logps/chosen": -217.3589630126953, + "logps/rejected": -360.08685302734375, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.104014873504639, + "rewards/margins": 4.566856861114502, + "rewards/rejected": -0.46284180879592896, + "step": 10756 + }, + { + "epoch": 0.63, + "learning_rate": 3.241285768838626e-08, + "logits/chosen": -1.7311675548553467, + "logits/rejected": -1.7374945878982544, + "logps/chosen": -0.023631248623132706, + "logps/rejected": -160.001708984375, + "loss": 0.3399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018188155954703689, + "rewards/margins": 3.7488198280334473, + "rewards/rejected": -3.750638723373413, + "step": 10757 + }, + { + "epoch": 0.63, + "learning_rate": 3.240403620333588e-08, + "logits/chosen": -2.0202956199645996, + "logits/rejected": -2.0100479125976562, + "logps/chosen": -0.008256092667579651, + "logps/rejected": -140.86610412597656, + "loss": 0.3315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006342448177747428, + "rewards/margins": 4.905197620391846, + "rewards/rejected": -4.905831813812256, + "step": 10758 + }, + { + "epoch": 0.63, + "learning_rate": 3.239521534338021e-08, + "logits/chosen": -1.9294984340667725, + "logits/rejected": -1.9432469606399536, + "logps/chosen": -231.0980987548828, + "logps/rejected": -346.0652160644531, + "loss": 0.0486, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8710464239120483, + "rewards/margins": 2.7869675159454346, + "rewards/rejected": -0.9159210324287415, + "step": 10759 + }, + { + "epoch": 0.63, + "learning_rate": 3.238639510883259e-08, + "logits/chosen": -1.6527870893478394, + "logits/rejected": -1.6788406372070312, + "logps/chosen": -291.8503723144531, + "logps/rejected": -476.38726806640625, + "loss": 0.039, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4415924549102783, + "rewards/margins": 4.2647247314453125, + "rewards/rejected": -2.823132276535034, + "step": 10760 + }, + { + "epoch": 0.63, + "learning_rate": 3.2377575500006375e-08, + "logits/chosen": -1.7997146844863892, + "logits/rejected": -1.8009071350097656, + "logps/chosen": -158.02752685546875, + "logps/rejected": -292.64373779296875, + "loss": 0.1709, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.009394884109497, + "rewards/margins": 1.546809434890747, + "rewards/rejected": -0.53741455078125, + "step": 10761 + }, + { + "epoch": 0.63, + "learning_rate": 3.236875651721489e-08, + "logits/chosen": -1.6627883911132812, + "logits/rejected": -1.661599040031433, + "logps/chosen": -54.38720703125, + "logps/rejected": -281.51171875, + "loss": 0.1733, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8559940457344055, + "rewards/margins": 5.40195369720459, + "rewards/rejected": -4.54595947265625, + "step": 10762 + }, + { + "epoch": 0.63, + "learning_rate": 3.2359938160771406e-08, + "logits/chosen": -1.8584245443344116, + "logits/rejected": -1.862461805343628, + "logps/chosen": -0.8193731307983398, + "logps/rejected": -40.964759826660156, + "loss": 0.6873, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0007543385145254433, + "rewards/margins": -0.09484846144914627, + "rewards/rejected": 0.09560280293226242, + "step": 10763 + }, + { + "epoch": 0.63, + "learning_rate": 3.235112043098922e-08, + "logits/chosen": -1.8049358129501343, + "logits/rejected": -1.8733716011047363, + "logps/chosen": -262.9608154296875, + "logps/rejected": -688.1196899414062, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.815100073814392, + "rewards/margins": 12.247637748718262, + "rewards/rejected": -10.432538032531738, + "step": 10764 + }, + { + "epoch": 0.63, + "learning_rate": 3.234230332818154e-08, + "logits/chosen": -1.880214810371399, + "logits/rejected": -1.8756707906723022, + "logps/chosen": -0.1929822564125061, + "logps/rejected": -160.6133270263672, + "loss": 0.6026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007396772038191557, + "rewards/margins": 0.41110149025917053, + "rewards/rejected": -0.4184982478618622, + "step": 10765 + }, + { + "epoch": 0.63, + "learning_rate": 3.233348685266165e-08, + "logits/chosen": -1.8506921529769897, + "logits/rejected": -1.8441797494888306, + "logps/chosen": -58.01372146606445, + "logps/rejected": -266.56878662109375, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061475373804569244, + "rewards/margins": 3.4162895679473877, + "rewards/rejected": -3.354814291000366, + "step": 10766 + }, + { + "epoch": 0.63, + "learning_rate": 3.23246710047427e-08, + "logits/chosen": -2.030688762664795, + "logits/rejected": -2.0240132808685303, + "logps/chosen": -53.00333023071289, + "logps/rejected": -132.32273864746094, + "loss": 0.468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1795940399169922, + "rewards/margins": 0.6475299596786499, + "rewards/rejected": -0.4679359495639801, + "step": 10767 + }, + { + "epoch": 0.63, + "learning_rate": 3.231585578473792e-08, + "logits/chosen": -2.034156560897827, + "logits/rejected": -1.9806417226791382, + "logps/chosen": -189.3638458251953, + "logps/rejected": -324.9039306640625, + "loss": 0.138, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7256683111190796, + "rewards/margins": 2.398581027984619, + "rewards/rejected": -0.67291259765625, + "step": 10768 + }, + { + "epoch": 0.63, + "learning_rate": 3.2307041192960414e-08, + "logits/chosen": -1.801669955253601, + "logits/rejected": -1.7215520143508911, + "logps/chosen": -188.2509002685547, + "logps/rejected": -333.6907653808594, + "loss": 0.13, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5928268432617188, + "rewards/margins": 1.8322341442108154, + "rewards/rejected": -0.23940734565258026, + "step": 10769 + }, + { + "epoch": 0.63, + "learning_rate": 3.229822722972339e-08, + "logits/chosen": -2.050577402114868, + "logits/rejected": -2.051387071609497, + "logps/chosen": -35.88143539428711, + "logps/rejected": -89.6971435546875, + "loss": 0.6059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021760940551757812, + "rewards/margins": 0.17503242194652557, + "rewards/rejected": -0.15327148139476776, + "step": 10770 + }, + { + "epoch": 0.63, + "learning_rate": 3.228941389533988e-08, + "logits/chosen": -1.9043091535568237, + "logits/rejected": -1.927311658859253, + "logps/chosen": -232.07073974609375, + "logps/rejected": -436.64508056640625, + "loss": 0.1539, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.553393602371216, + "rewards/margins": 1.160974144935608, + "rewards/rejected": 1.392419457435608, + "step": 10771 + }, + { + "epoch": 0.63, + "learning_rate": 3.2280601190123055e-08, + "logits/chosen": -1.7547496557235718, + "logits/rejected": -1.6824058294296265, + "logps/chosen": -154.52694702148438, + "logps/rejected": -371.330322265625, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.684112548828125, + "rewards/margins": 3.092193603515625, + "rewards/rejected": -0.4080810546875, + "step": 10772 + }, + { + "epoch": 0.63, + "learning_rate": 3.2271789114385925e-08, + "logits/chosen": -1.9418261051177979, + "logits/rejected": -1.9480172395706177, + "logps/chosen": -16.235206604003906, + "logps/rejected": -188.58203125, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30837878584861755, + "rewards/margins": 2.720507860183716, + "rewards/rejected": -2.4121291637420654, + "step": 10773 + }, + { + "epoch": 0.63, + "learning_rate": 3.226297766844158e-08, + "logits/chosen": -1.7396560907363892, + "logits/rejected": -1.7620652914047241, + "logps/chosen": -272.7491760253906, + "logps/rejected": -294.96612548828125, + "loss": 0.4537, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.4845612049102783, + "rewards/margins": -0.34728097915649414, + "rewards/rejected": 2.8318421840667725, + "step": 10774 + }, + { + "epoch": 0.63, + "learning_rate": 3.2254166852603015e-08, + "logits/chosen": -1.8530991077423096, + "logits/rejected": -1.8507622480392456, + "logps/chosen": -0.00010311383084626868, + "logps/rejected": -149.69412231445312, + "loss": 0.5629, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.921868483303115e-06, + "rewards/margins": 0.4973336458206177, + "rewards/rejected": -0.4973297119140625, + "step": 10775 + }, + { + "epoch": 0.63, + "learning_rate": 3.2245356667183265e-08, + "logits/chosen": -1.8206743001937866, + "logits/rejected": -1.8360952138900757, + "logps/chosen": -270.2724304199219, + "logps/rejected": -565.7918701171875, + "loss": 0.0489, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7848786115646362, + "rewards/margins": 3.228323459625244, + "rewards/rejected": -1.443444848060608, + "step": 10776 + }, + { + "epoch": 0.63, + "learning_rate": 3.2236547112495283e-08, + "logits/chosen": -1.8189479112625122, + "logits/rejected": -1.822007179260254, + "logps/chosen": -141.2345428466797, + "logps/rejected": -233.22024536132812, + "loss": 0.4514, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5346145629882812, + "rewards/margins": -0.04566502571105957, + "rewards/rejected": 1.5802795886993408, + "step": 10777 + }, + { + "epoch": 0.63, + "learning_rate": 3.222773818885204e-08, + "logits/chosen": -1.772477626800537, + "logits/rejected": -1.7611074447631836, + "logps/chosen": -87.10163116455078, + "logps/rejected": -236.44232177734375, + "loss": 0.2413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4803886413574219, + "rewards/margins": 2.6072471141815186, + "rewards/rejected": -2.1268584728240967, + "step": 10778 + }, + { + "epoch": 0.63, + "learning_rate": 3.2218929896566474e-08, + "logits/chosen": -2.130776882171631, + "logits/rejected": -2.128174066543579, + "logps/chosen": -37.246238708496094, + "logps/rejected": -290.406005859375, + "loss": 0.2003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6400734186172485, + "rewards/margins": 4.740067481994629, + "rewards/rejected": -4.09999418258667, + "step": 10779 + }, + { + "epoch": 0.63, + "learning_rate": 3.221012223595149e-08, + "logits/chosen": -1.7520495653152466, + "logits/rejected": -1.753678321838379, + "logps/chosen": -6.121076583862305, + "logps/rejected": -127.46893310546875, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23494167625904083, + "rewards/margins": 3.033292293548584, + "rewards/rejected": -2.7983505725860596, + "step": 10780 + }, + { + "epoch": 0.63, + "learning_rate": 3.2201315207319985e-08, + "logits/chosen": -1.7995916604995728, + "logits/rejected": -1.8010718822479248, + "logps/chosen": -13.163406372070312, + "logps/rejected": -62.82301330566406, + "loss": 0.6905, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.16950932145118713, + "rewards/margins": -0.19399014115333557, + "rewards/rejected": 0.3634994626045227, + "step": 10781 + }, + { + "epoch": 0.63, + "learning_rate": 3.219250881098482e-08, + "logits/chosen": -1.8784269094467163, + "logits/rejected": -1.7801591157913208, + "logps/chosen": -161.5924072265625, + "logps/rejected": -382.67486572265625, + "loss": 0.0878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6462570428848267, + "rewards/margins": 4.203685283660889, + "rewards/rejected": -2.5574281215667725, + "step": 10782 + }, + { + "epoch": 0.63, + "learning_rate": 3.218370304725886e-08, + "logits/chosen": -1.92696213722229, + "logits/rejected": -1.93767249584198, + "logps/chosen": -205.65492248535156, + "logps/rejected": -418.94866943359375, + "loss": 0.0587, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3246246576309204, + "rewards/margins": 6.822561740875244, + "rewards/rejected": -5.497937202453613, + "step": 10783 + }, + { + "epoch": 0.63, + "learning_rate": 3.2174897916454906e-08, + "logits/chosen": -1.8933937549591064, + "logits/rejected": -1.8871359825134277, + "logps/chosen": -175.88363647460938, + "logps/rejected": -342.5102233886719, + "loss": 0.0388, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5524628162384033, + "rewards/margins": 4.699114799499512, + "rewards/rejected": -3.1466522216796875, + "step": 10784 + }, + { + "epoch": 0.63, + "learning_rate": 3.216609341888578e-08, + "logits/chosen": -2.0173704624176025, + "logits/rejected": -1.9781339168548584, + "logps/chosen": -253.2734832763672, + "logps/rejected": -445.3659362792969, + "loss": 0.1288, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0344619750976562, + "rewards/margins": 1.269627332687378, + "rewards/rejected": 1.7648346424102783, + "step": 10785 + }, + { + "epoch": 0.63, + "learning_rate": 3.215728955486423e-08, + "logits/chosen": -1.8530442714691162, + "logits/rejected": -1.844804286956787, + "logps/chosen": -30.9187068939209, + "logps/rejected": -179.38238525390625, + "loss": 0.5598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5970842242240906, + "rewards/margins": 2.7471280097961426, + "rewards/rejected": -3.344212293624878, + "step": 10786 + }, + { + "epoch": 0.63, + "learning_rate": 3.214848632470307e-08, + "logits/chosen": -1.8978952169418335, + "logits/rejected": -1.900858759880066, + "logps/chosen": -0.08035049587488174, + "logps/rejected": -169.20225524902344, + "loss": 0.3537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015493467217311263, + "rewards/margins": 3.0907175540924072, + "rewards/rejected": -3.092266798019409, + "step": 10787 + }, + { + "epoch": 0.63, + "learning_rate": 3.213968372871496e-08, + "logits/chosen": -1.81192147731781, + "logits/rejected": -1.8151261806488037, + "logps/chosen": -172.90292358398438, + "logps/rejected": -333.4211730957031, + "loss": 0.054, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.005575656890869, + "rewards/margins": 2.4648377895355225, + "rewards/rejected": -0.45926210284233093, + "step": 10788 + }, + { + "epoch": 0.63, + "learning_rate": 3.213088176721268e-08, + "logits/chosen": -1.563682198524475, + "logits/rejected": -1.5745799541473389, + "logps/chosen": -78.79182434082031, + "logps/rejected": -260.52264404296875, + "loss": 0.1229, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2853211164474487, + "rewards/margins": 4.786169528961182, + "rewards/rejected": -3.5008485317230225, + "step": 10789 + }, + { + "epoch": 0.63, + "learning_rate": 3.212208044050886e-08, + "logits/chosen": -2.0564639568328857, + "logits/rejected": -2.0588176250457764, + "logps/chosen": -41.39365768432617, + "logps/rejected": -184.894775390625, + "loss": 0.5095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22405700385570526, + "rewards/margins": 0.49513089656829834, + "rewards/rejected": -0.2710739076137543, + "step": 10790 + }, + { + "epoch": 0.63, + "learning_rate": 3.2113279748916215e-08, + "logits/chosen": -1.8834787607192993, + "logits/rejected": -1.8663644790649414, + "logps/chosen": -218.67364501953125, + "logps/rejected": -480.20440673828125, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.524073839187622, + "rewards/margins": 5.509920120239258, + "rewards/rejected": -2.9858460426330566, + "step": 10791 + }, + { + "epoch": 0.63, + "learning_rate": 3.210447969274734e-08, + "logits/chosen": -1.934911847114563, + "logits/rejected": -1.9295309782028198, + "logps/chosen": -179.72291564941406, + "logps/rejected": -413.12158203125, + "loss": 0.0662, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.312953233718872, + "rewards/margins": 2.3438491821289062, + "rewards/rejected": -0.03089599683880806, + "step": 10792 + }, + { + "epoch": 0.63, + "learning_rate": 3.209568027231492e-08, + "logits/chosen": -2.097790479660034, + "logits/rejected": -2.0920968055725098, + "logps/chosen": -28.37451934814453, + "logps/rejected": -121.11656188964844, + "loss": 0.2154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4496791958808899, + "rewards/margins": 2.824467182159424, + "rewards/rejected": -2.3747880458831787, + "step": 10793 + }, + { + "epoch": 0.63, + "learning_rate": 3.2086881487931476e-08, + "logits/chosen": -1.855230450630188, + "logits/rejected": -1.8367464542388916, + "logps/chosen": -326.060546875, + "logps/rejected": -405.5579833984375, + "loss": 0.1069, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3868682384490967, + "rewards/margins": 1.6357146501541138, + "rewards/rejected": 0.7511535882949829, + "step": 10794 + }, + { + "epoch": 0.63, + "learning_rate": 3.207808333990965e-08, + "logits/chosen": -1.829480528831482, + "logits/rejected": -1.815643310546875, + "logps/chosen": -227.25265502929688, + "logps/rejected": -427.278564453125, + "loss": 0.0716, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0189576148986816, + "rewards/margins": 3.0646209716796875, + "rewards/rejected": -1.0456634759902954, + "step": 10795 + }, + { + "epoch": 0.63, + "learning_rate": 3.206928582856196e-08, + "logits/chosen": -1.8553274869918823, + "logits/rejected": -1.8550304174423218, + "logps/chosen": -36.824188232421875, + "logps/rejected": -192.79623413085938, + "loss": 0.3657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1993030607700348, + "rewards/margins": 4.33951997756958, + "rewards/rejected": -4.538823127746582, + "step": 10796 + }, + { + "epoch": 0.63, + "learning_rate": 3.206048895420095e-08, + "logits/chosen": -1.97538423538208, + "logits/rejected": -1.96140456199646, + "logps/chosen": -11.841320037841797, + "logps/rejected": -314.644775390625, + "loss": 0.2306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5240671038627625, + "rewards/margins": 3.3688762187957764, + "rewards/rejected": -2.844809055328369, + "step": 10797 + }, + { + "epoch": 0.63, + "learning_rate": 3.2051692717139124e-08, + "logits/chosen": -1.6987709999084473, + "logits/rejected": -1.6690312623977661, + "logps/chosen": -210.7084503173828, + "logps/rejected": -428.10302734375, + "loss": 0.087, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0190811157226562, + "rewards/margins": 1.9625442028045654, + "rewards/rejected": 0.05653686448931694, + "step": 10798 + }, + { + "epoch": 0.63, + "learning_rate": 3.204289711768897e-08, + "logits/chosen": -1.8969496488571167, + "logits/rejected": -1.8951324224472046, + "logps/chosen": -69.1385498046875, + "logps/rejected": -211.71353149414062, + "loss": 0.313, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.300190806388855, + "rewards/margins": 0.5773560404777527, + "rewards/rejected": 0.7228347659111023, + "step": 10799 + }, + { + "epoch": 0.63, + "learning_rate": 3.203410215616293e-08, + "logits/chosen": -1.9476814270019531, + "logits/rejected": -1.9430657625198364, + "logps/chosen": -202.3247528076172, + "logps/rejected": -548.00927734375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.773390293121338, + "rewards/margins": 7.929832458496094, + "rewards/rejected": -5.156442165374756, + "step": 10800 + }, + { + "epoch": 0.63, + "learning_rate": 3.202530783287349e-08, + "logits/chosen": -1.9545750617980957, + "logits/rejected": -1.9572749137878418, + "logps/chosen": -0.0024256359320133924, + "logps/rejected": -143.81912231445312, + "loss": 0.4039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002174715482397005, + "rewards/margins": 1.8388168811798096, + "rewards/rejected": -1.8390343189239502, + "step": 10801 + }, + { + "epoch": 0.63, + "learning_rate": 3.2016514148133025e-08, + "logits/chosen": -1.6470617055892944, + "logits/rejected": -1.6495509147644043, + "logps/chosen": -17.489212036132812, + "logps/rejected": -135.74903869628906, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3244836926460266, + "rewards/margins": 1.711930274963379, + "rewards/rejected": -1.387446641921997, + "step": 10802 + }, + { + "epoch": 0.63, + "learning_rate": 3.2007721102253935e-08, + "logits/chosen": -1.7348726987838745, + "logits/rejected": -1.7341195344924927, + "logps/chosen": -0.0005520915146917105, + "logps/rejected": -144.16329956054688, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.58137651043944e-05, + "rewards/margins": 3.2442851066589355, + "rewards/rejected": -3.2442193031311035, + "step": 10803 + }, + { + "epoch": 0.63, + "learning_rate": 3.199892869554861e-08, + "logits/chosen": -2.0592472553253174, + "logits/rejected": -2.068767547607422, + "logps/chosen": -0.0019422117620706558, + "logps/rejected": -171.99417114257812, + "loss": 0.3725, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0562602003337815e-05, + "rewards/margins": 2.9293923377990723, + "rewards/rejected": -2.929412841796875, + "step": 10804 + }, + { + "epoch": 0.63, + "learning_rate": 3.199013692832939e-08, + "logits/chosen": -1.7953107357025146, + "logits/rejected": -1.8339776992797852, + "logps/chosen": -220.84271240234375, + "logps/rejected": -343.98248291015625, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1974334716796875, + "rewards/margins": 5.646124362945557, + "rewards/rejected": -3.448690891265869, + "step": 10805 + }, + { + "epoch": 0.63, + "learning_rate": 3.198134580090861e-08, + "logits/chosen": -1.9062373638153076, + "logits/rejected": -1.907280445098877, + "logps/chosen": -191.6632080078125, + "logps/rejected": -353.29742431640625, + "loss": 0.0435, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7391449213027954, + "rewards/margins": 3.430691719055176, + "rewards/rejected": -1.6915466785430908, + "step": 10806 + }, + { + "epoch": 0.63, + "learning_rate": 3.197255531359855e-08, + "logits/chosen": -1.921263337135315, + "logits/rejected": -1.9123272895812988, + "logps/chosen": -187.30812072753906, + "logps/rejected": -395.8321838378906, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2033097743988037, + "rewards/margins": 0.01417398452758789, + "rewards/rejected": 2.189135789871216, + "step": 10807 + }, + { + "epoch": 0.63, + "learning_rate": 3.196376546671153e-08, + "logits/chosen": -1.9392547607421875, + "logits/rejected": -1.9306167364120483, + "logps/chosen": -190.0636749267578, + "logps/rejected": -343.5876159667969, + "loss": 0.2259, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.167956590652466, + "rewards/margins": 0.7286895513534546, + "rewards/rejected": 1.4392670392990112, + "step": 10808 + }, + { + "epoch": 0.63, + "learning_rate": 3.195497626055976e-08, + "logits/chosen": -1.964575171470642, + "logits/rejected": -1.9596302509307861, + "logps/chosen": -46.59169387817383, + "logps/rejected": -225.44290161132812, + "loss": 0.1981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9970428347587585, + "rewards/margins": 1.8243439197540283, + "rewards/rejected": -0.827301025390625, + "step": 10809 + }, + { + "epoch": 0.63, + "learning_rate": 3.194618769545553e-08, + "logits/chosen": -1.8067381381988525, + "logits/rejected": -1.7941373586654663, + "logps/chosen": -0.0033017797395586967, + "logps/rejected": -202.29608154296875, + "loss": 0.3513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00034194119507446885, + "rewards/margins": 3.856144905090332, + "rewards/rejected": -3.8558030128479004, + "step": 10810 + }, + { + "epoch": 0.63, + "learning_rate": 3.193739977171099e-08, + "logits/chosen": -1.9297202825546265, + "logits/rejected": -1.9462969303131104, + "logps/chosen": -198.21640014648438, + "logps/rejected": -328.88177490234375, + "loss": 0.0485, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5491623878479004, + "rewards/margins": 2.571782112121582, + "rewards/rejected": -0.02261962927877903, + "step": 10811 + }, + { + "epoch": 0.63, + "learning_rate": 3.1928612489638396e-08, + "logits/chosen": -1.8048083782196045, + "logits/rejected": -1.8044756650924683, + "logps/chosen": -142.42724609375, + "logps/rejected": -313.909912109375, + "loss": 0.0555, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9303680658340454, + "rewards/margins": 2.925485134124756, + "rewards/rejected": -0.9951171875, + "step": 10812 + }, + { + "epoch": 0.63, + "learning_rate": 3.191982584954985e-08, + "logits/chosen": -1.957364559173584, + "logits/rejected": -1.9634877443313599, + "logps/chosen": -47.75214385986328, + "logps/rejected": -312.8190002441406, + "loss": 0.1277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5386818647384644, + "rewards/margins": 2.2087655067443848, + "rewards/rejected": -0.6700836420059204, + "step": 10813 + }, + { + "epoch": 0.63, + "learning_rate": 3.1911039851757554e-08, + "logits/chosen": -2.0390055179595947, + "logits/rejected": -2.013700485229492, + "logps/chosen": -54.74083709716797, + "logps/rejected": -338.27838134765625, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08942260593175888, + "rewards/margins": 4.247448921203613, + "rewards/rejected": -4.158026218414307, + "step": 10814 + }, + { + "epoch": 0.63, + "learning_rate": 3.1902254496573586e-08, + "logits/chosen": -1.9473340511322021, + "logits/rejected": -1.935314655303955, + "logps/chosen": -74.61663818359375, + "logps/rejected": -242.36277770996094, + "loss": 0.1864, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3729279041290283, + "rewards/margins": 2.1555771827697754, + "rewards/rejected": -0.7826492190361023, + "step": 10815 + }, + { + "epoch": 0.63, + "learning_rate": 3.189346978431009e-08, + "logits/chosen": -1.9237715005874634, + "logits/rejected": -1.9782562255859375, + "logps/chosen": -185.57461547851562, + "logps/rejected": -395.8793029785156, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5487916469573975, + "rewards/margins": 6.018270969390869, + "rewards/rejected": -3.4694793224334717, + "step": 10816 + }, + { + "epoch": 0.63, + "learning_rate": 3.188468571527909e-08, + "logits/chosen": -2.011991500854492, + "logits/rejected": -2.02056622505188, + "logps/chosen": -10.687170028686523, + "logps/rejected": -209.6740264892578, + "loss": 0.382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4262922406196594, + "rewards/margins": 1.0097670555114746, + "rewards/rejected": -0.5834747552871704, + "step": 10817 + }, + { + "epoch": 0.63, + "learning_rate": 3.187590228979269e-08, + "logits/chosen": -2.1619842052459717, + "logits/rejected": -2.147942304611206, + "logps/chosen": -1.844086766242981, + "logps/rejected": -84.26611328125, + "loss": 0.6714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009853052906692028, + "rewards/margins": 0.07948195934295654, + "rewards/rejected": -0.06962890923023224, + "step": 10818 + }, + { + "epoch": 0.63, + "learning_rate": 3.1867119508162876e-08, + "logits/chosen": -2.0235676765441895, + "logits/rejected": -2.019505500793457, + "logps/chosen": -64.83090209960938, + "logps/rejected": -138.528076171875, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8266525268554688, + "rewards/margins": 1.083103895187378, + "rewards/rejected": -0.25645142793655396, + "step": 10819 + }, + { + "epoch": 0.63, + "learning_rate": 3.1858337370701684e-08, + "logits/chosen": -1.8704911470413208, + "logits/rejected": -1.8685842752456665, + "logps/chosen": -212.05662536621094, + "logps/rejected": -247.97694396972656, + "loss": 0.4298, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5246810913085938, + "rewards/margins": -0.05338132381439209, + "rewards/rejected": 1.5780624151229858, + "step": 10820 + }, + { + "epoch": 0.63, + "learning_rate": 3.184955587772108e-08, + "logits/chosen": -1.7910966873168945, + "logits/rejected": -1.797767162322998, + "logps/chosen": -0.3497546315193176, + "logps/rejected": -154.33456420898438, + "loss": 0.3344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1250992864370346, + "rewards/margins": 3.128840684890747, + "rewards/rejected": -3.003741502761841, + "step": 10821 + }, + { + "epoch": 0.63, + "learning_rate": 3.184077502953304e-08, + "logits/chosen": -1.845226526260376, + "logits/rejected": -1.9237416982650757, + "logps/chosen": -182.25225830078125, + "logps/rejected": -361.9483642578125, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.61981201171875, + "rewards/margins": 3.357897996902466, + "rewards/rejected": -1.7380859851837158, + "step": 10822 + }, + { + "epoch": 0.63, + "learning_rate": 3.1831994826449505e-08, + "logits/chosen": -1.8898723125457764, + "logits/rejected": -1.8618485927581787, + "logps/chosen": -224.9573211669922, + "logps/rejected": -455.6904602050781, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.4319748878479, + "rewards/margins": 4.499028205871582, + "rewards/rejected": -0.06705322116613388, + "step": 10823 + }, + { + "epoch": 0.63, + "learning_rate": 3.1823215268782366e-08, + "logits/chosen": -1.7983044385910034, + "logits/rejected": -1.78755521774292, + "logps/chosen": -68.65271759033203, + "logps/rejected": -320.9268493652344, + "loss": 0.1905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4632057249546051, + "rewards/margins": 4.175189971923828, + "rewards/rejected": -3.711984395980835, + "step": 10824 + }, + { + "epoch": 0.63, + "learning_rate": 3.181443635684355e-08, + "logits/chosen": -1.906049370765686, + "logits/rejected": -1.7987293004989624, + "logps/chosen": -325.4523620605469, + "logps/rejected": -724.404296875, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.110861301422119, + "rewards/margins": 4.520395278930664, + "rewards/rejected": -1.4095337390899658, + "step": 10825 + }, + { + "epoch": 0.63, + "learning_rate": 3.18056580909449e-08, + "logits/chosen": -1.8936653137207031, + "logits/rejected": -1.9706876277923584, + "logps/chosen": -173.62844848632812, + "logps/rejected": -329.5817565917969, + "loss": 0.0577, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.673437476158142, + "rewards/margins": 3.025347948074341, + "rewards/rejected": -1.3519104719161987, + "step": 10826 + }, + { + "epoch": 0.63, + "learning_rate": 3.179688047139828e-08, + "logits/chosen": -2.028111457824707, + "logits/rejected": -2.025430917739868, + "logps/chosen": -0.0007168496958911419, + "logps/rejected": -116.45526123046875, + "loss": 0.4194, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.516350574907847e-05, + "rewards/margins": 1.7976468801498413, + "rewards/rejected": -1.7977020740509033, + "step": 10827 + }, + { + "epoch": 0.63, + "learning_rate": 3.178810349851549e-08, + "logits/chosen": -1.9837490320205688, + "logits/rejected": -1.9862715005874634, + "logps/chosen": -4.141810417175293, + "logps/rejected": -48.455806732177734, + "loss": 0.7637, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.09895887225866318, + "rewards/margins": -0.34826213121414185, + "rewards/rejected": 0.4472210109233856, + "step": 10828 + }, + { + "epoch": 0.63, + "learning_rate": 3.177932717260837e-08, + "logits/chosen": -1.8122217655181885, + "logits/rejected": -1.8477355241775513, + "logps/chosen": -186.66790771484375, + "logps/rejected": -196.66754150390625, + "loss": 0.1988, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8088349103927612, + "rewards/margins": 0.816108763217926, + "rewards/rejected": 0.9927261471748352, + "step": 10829 + }, + { + "epoch": 0.63, + "learning_rate": 3.177055149398865e-08, + "logits/chosen": -1.799431324005127, + "logits/rejected": -1.7461947202682495, + "logps/chosen": -244.47915649414062, + "logps/rejected": -359.8363952636719, + "loss": 0.0786, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8949737548828125, + "rewards/margins": 2.1201539039611816, + "rewards/rejected": 0.7748199701309204, + "step": 10830 + }, + { + "epoch": 0.63, + "learning_rate": 3.176177646296815e-08, + "logits/chosen": -2.0432546138763428, + "logits/rejected": -2.022036075592041, + "logps/chosen": -24.678203582763672, + "logps/rejected": -255.9127655029297, + "loss": 0.3982, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022236062213778496, + "rewards/margins": 1.706012487411499, + "rewards/rejected": -1.7282485961914062, + "step": 10831 + }, + { + "epoch": 0.63, + "learning_rate": 3.175300207985852e-08, + "logits/chosen": -1.8697304725646973, + "logits/rejected": -1.9086010456085205, + "logps/chosen": -254.88311767578125, + "logps/rejected": -299.8821716308594, + "loss": 0.0558, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1596925258636475, + "rewards/margins": 2.2133331298828125, + "rewards/rejected": 0.9463592767715454, + "step": 10832 + }, + { + "epoch": 0.63, + "learning_rate": 3.174422834497155e-08, + "logits/chosen": -1.950556755065918, + "logits/rejected": -1.9523667097091675, + "logps/chosen": -50.04143142700195, + "logps/rejected": -142.06130981445312, + "loss": 0.202, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1898349523544312, + "rewards/margins": 1.4648349285125732, + "rewards/rejected": -0.2750000059604645, + "step": 10833 + }, + { + "epoch": 0.63, + "learning_rate": 3.173545525861885e-08, + "logits/chosen": -1.8700072765350342, + "logits/rejected": -1.8663402795791626, + "logps/chosen": -64.91670989990234, + "logps/rejected": -242.99557495117188, + "loss": 0.4431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34647294878959656, + "rewards/margins": 2.6546974182128906, + "rewards/rejected": -3.0011703968048096, + "step": 10834 + }, + { + "epoch": 0.63, + "learning_rate": 3.172668282111215e-08, + "logits/chosen": -2.0219645500183105, + "logits/rejected": -2.022709369659424, + "logps/chosen": -53.56635284423828, + "logps/rejected": -155.42898559570312, + "loss": 0.4092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0041790008544921875, + "rewards/margins": 1.5221691131591797, + "rewards/rejected": -1.5179901123046875, + "step": 10835 + }, + { + "epoch": 0.63, + "learning_rate": 3.1717911032763044e-08, + "logits/chosen": -1.837385892868042, + "logits/rejected": -1.8364040851593018, + "logps/chosen": -208.50929260253906, + "logps/rejected": -261.137451171875, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.024684190750122, + "rewards/margins": 0.677632212638855, + "rewards/rejected": 0.3470520079135895, + "step": 10836 + }, + { + "epoch": 0.63, + "learning_rate": 3.170913989388318e-08, + "logits/chosen": -2.010782241821289, + "logits/rejected": -2.01373028755188, + "logps/chosen": -13.512517929077148, + "logps/rejected": -81.01765441894531, + "loss": 0.2542, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3683992624282837, + "rewards/margins": 1.0324617624282837, + "rewards/rejected": 0.3359375, + "step": 10837 + }, + { + "epoch": 0.63, + "learning_rate": 3.170036940478412e-08, + "logits/chosen": -1.5986260175704956, + "logits/rejected": -1.5932358503341675, + "logps/chosen": -284.38006591796875, + "logps/rejected": -370.850830078125, + "loss": 0.0876, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.079815626144409, + "rewards/margins": 2.3588316440582275, + "rewards/rejected": -0.2790161073207855, + "step": 10838 + }, + { + "epoch": 0.63, + "learning_rate": 3.169159956577747e-08, + "logits/chosen": -1.655577540397644, + "logits/rejected": -1.62388014793396, + "logps/chosen": -271.08563232421875, + "logps/rejected": -347.865966796875, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.08453369140625, + "rewards/margins": 2.8688080310821533, + "rewards/rejected": 0.21572570502758026, + "step": 10839 + }, + { + "epoch": 0.63, + "learning_rate": 3.1682830377174744e-08, + "logits/chosen": -1.837569236755371, + "logits/rejected": -1.8420920372009277, + "logps/chosen": -13.89975643157959, + "logps/rejected": -285.72784423828125, + "loss": 0.2729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17289943993091583, + "rewards/margins": 4.023129940032959, + "rewards/rejected": -3.8502304553985596, + "step": 10840 + }, + { + "epoch": 0.63, + "learning_rate": 3.167406183928749e-08, + "logits/chosen": -2.00229811668396, + "logits/rejected": -1.953223466873169, + "logps/chosen": -203.32003784179688, + "logps/rejected": -494.8855895996094, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3208329677581787, + "rewards/margins": 11.566997528076172, + "rewards/rejected": -9.246164321899414, + "step": 10841 + }, + { + "epoch": 0.63, + "learning_rate": 3.1665293952427196e-08, + "logits/chosen": -1.7821913957595825, + "logits/rejected": -1.7412278652191162, + "logps/chosen": -156.97946166992188, + "logps/rejected": -349.85595703125, + "loss": 0.0436, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.188507080078125, + "rewards/margins": 2.810650587081909, + "rewards/rejected": 0.37785646319389343, + "step": 10842 + }, + { + "epoch": 0.63, + "learning_rate": 3.165652671690534e-08, + "logits/chosen": -2.0344276428222656, + "logits/rejected": -2.0287508964538574, + "logps/chosen": -16.99990463256836, + "logps/rejected": -198.94210815429688, + "loss": 0.4014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021656228229403496, + "rewards/margins": 2.1698436737060547, + "rewards/rejected": -2.1481873989105225, + "step": 10843 + }, + { + "epoch": 0.63, + "learning_rate": 3.16477601330334e-08, + "logits/chosen": -2.0442276000976562, + "logits/rejected": -2.0196635723114014, + "logps/chosen": -66.40190887451172, + "logps/rejected": -337.6502990722656, + "loss": 0.2445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2982475459575653, + "rewards/margins": 3.156627655029297, + "rewards/rejected": -2.858380079269409, + "step": 10844 + }, + { + "epoch": 0.63, + "learning_rate": 3.163899420112278e-08, + "logits/chosen": -1.9080548286437988, + "logits/rejected": -1.8959654569625854, + "logps/chosen": -59.53778839111328, + "logps/rejected": -210.44039916992188, + "loss": 0.2516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14491234719753265, + "rewards/margins": 2.324744701385498, + "rewards/rejected": -2.1798324584960938, + "step": 10845 + }, + { + "epoch": 0.63, + "learning_rate": 3.1630228921484906e-08, + "logits/chosen": -1.738206148147583, + "logits/rejected": -1.7373844385147095, + "logps/chosen": -8.973320007324219, + "logps/rejected": -31.218406677246094, + "loss": 0.6912, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.09375935047864914, + "rewards/margins": -0.09018077701330185, + "rewards/rejected": 0.183940127491951, + "step": 10846 + }, + { + "epoch": 0.63, + "learning_rate": 3.162146429443114e-08, + "logits/chosen": -1.822274923324585, + "logits/rejected": -1.8203074932098389, + "logps/chosen": -15.484952926635742, + "logps/rejected": -102.86637878417969, + "loss": 0.711, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.13606758415699005, + "rewards/margins": -0.3573223352432251, + "rewards/rejected": 0.49338990449905396, + "step": 10847 + }, + { + "epoch": 0.63, + "learning_rate": 3.161270032027289e-08, + "logits/chosen": -2.0094051361083984, + "logits/rejected": -2.0055251121520996, + "logps/chosen": -0.8412595987319946, + "logps/rejected": -150.32200622558594, + "loss": 0.4802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004109722562134266, + "rewards/margins": 1.1846975088119507, + "rewards/rejected": -1.1805877685546875, + "step": 10848 + }, + { + "epoch": 0.63, + "learning_rate": 3.1603936999321433e-08, + "logits/chosen": -1.7628353834152222, + "logits/rejected": -1.7599468231201172, + "logps/chosen": -218.74618530273438, + "logps/rejected": -427.38092041015625, + "loss": 0.1569, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3501815795898438, + "rewards/margins": 1.0780013799667358, + "rewards/rejected": 1.272180199623108, + "step": 10849 + }, + { + "epoch": 0.63, + "learning_rate": 3.159517433188815e-08, + "logits/chosen": -2.0928168296813965, + "logits/rejected": -2.081265449523926, + "logps/chosen": -63.13435363769531, + "logps/rejected": -217.04827880859375, + "loss": 0.303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30051422119140625, + "rewards/margins": 2.3417038917541504, + "rewards/rejected": -2.041189670562744, + "step": 10850 + }, + { + "epoch": 0.63, + "learning_rate": 3.158641231828428e-08, + "logits/chosen": -1.8351091146469116, + "logits/rejected": -1.853678822517395, + "logps/chosen": -137.34786987304688, + "logps/rejected": -341.2286376953125, + "loss": 0.0843, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5597031116485596, + "rewards/margins": 2.686100959777832, + "rewards/rejected": -1.126397728919983, + "step": 10851 + }, + { + "epoch": 0.63, + "learning_rate": 3.157765095882114e-08, + "logits/chosen": -1.9837329387664795, + "logits/rejected": -1.9710880517959595, + "logps/chosen": -201.99258422851562, + "logps/rejected": -406.80230712890625, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8397369384765625, + "rewards/margins": 4.067645072937012, + "rewards/rejected": -2.2279083728790283, + "step": 10852 + }, + { + "epoch": 0.63, + "learning_rate": 3.156889025380992e-08, + "logits/chosen": -2.069164276123047, + "logits/rejected": -2.0730202198028564, + "logps/chosen": -0.0003263753023929894, + "logps/rejected": -102.68440246582031, + "loss": 0.5795, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7326152121531777e-05, + "rewards/margins": 0.5046842694282532, + "rewards/rejected": -0.504656970500946, + "step": 10853 + }, + { + "epoch": 0.63, + "learning_rate": 3.156013020356191e-08, + "logits/chosen": -1.9380571842193604, + "logits/rejected": -1.930643916130066, + "logps/chosen": -36.57991027832031, + "logps/rejected": -184.14271545410156, + "loss": 0.2873, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5242706537246704, + "rewards/margins": 1.6719284057617188, + "rewards/rejected": -1.1476577520370483, + "step": 10854 + }, + { + "epoch": 0.63, + "learning_rate": 3.1551370808388235e-08, + "logits/chosen": -1.9827500581741333, + "logits/rejected": -1.9708056449890137, + "logps/chosen": -61.11129379272461, + "logps/rejected": -153.68051147460938, + "loss": 0.4882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10930366814136505, + "rewards/margins": 0.8333232998847961, + "rewards/rejected": -0.942626953125, + "step": 10855 + }, + { + "epoch": 0.63, + "learning_rate": 3.154261206860014e-08, + "logits/chosen": -2.0702171325683594, + "logits/rejected": -2.068333625793457, + "logps/chosen": -18.76944351196289, + "logps/rejected": -104.65042114257812, + "loss": 0.503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28762587904930115, + "rewards/margins": 0.5984336733818054, + "rewards/rejected": -0.3108077943325043, + "step": 10856 + }, + { + "epoch": 0.63, + "learning_rate": 3.153385398450873e-08, + "logits/chosen": -1.9672987461090088, + "logits/rejected": -1.9493699073791504, + "logps/chosen": -36.203426361083984, + "logps/rejected": -177.3660125732422, + "loss": 0.2387, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3112224340438843, + "rewards/margins": 1.2255290746688843, + "rewards/rejected": 0.085693359375, + "step": 10857 + }, + { + "epoch": 0.63, + "learning_rate": 3.152509655642517e-08, + "logits/chosen": -1.827682375907898, + "logits/rejected": -1.8164361715316772, + "logps/chosen": -23.296289443969727, + "logps/rejected": -171.39599609375, + "loss": 0.5018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2483997344970703, + "rewards/margins": 0.4053303003311157, + "rewards/rejected": -0.15693055093288422, + "step": 10858 + }, + { + "epoch": 0.63, + "learning_rate": 3.151633978466054e-08, + "logits/chosen": -1.8948858976364136, + "logits/rejected": -1.8989543914794922, + "logps/chosen": -196.25924682617188, + "logps/rejected": -363.8639831542969, + "loss": 0.0415, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.904638648033142, + "rewards/margins": 3.172576904296875, + "rewards/rejected": -1.267938256263733, + "step": 10859 + }, + { + "epoch": 0.63, + "learning_rate": 3.150758366952594e-08, + "logits/chosen": -1.825769305229187, + "logits/rejected": -1.8192758560180664, + "logps/chosen": -33.37495803833008, + "logps/rejected": -177.23208618164062, + "loss": 0.2149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9377064108848572, + "rewards/margins": 2.2795262336730957, + "rewards/rejected": -1.3418197631835938, + "step": 10860 + }, + { + "epoch": 0.63, + "learning_rate": 3.149882821133242e-08, + "logits/chosen": -1.7548530101776123, + "logits/rejected": -1.7545479536056519, + "logps/chosen": -284.3212585449219, + "logps/rejected": -403.7698059082031, + "loss": 0.0533, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0921692848205566, + "rewards/margins": 2.96051025390625, + "rewards/rejected": -0.8683410882949829, + "step": 10861 + }, + { + "epoch": 0.63, + "learning_rate": 3.149007341039103e-08, + "logits/chosen": -2.0588231086730957, + "logits/rejected": -2.0605385303497314, + "logps/chosen": -76.28309631347656, + "logps/rejected": -314.78814697265625, + "loss": 0.1644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7951225638389587, + "rewards/margins": 6.6424431800842285, + "rewards/rejected": -5.847320556640625, + "step": 10862 + }, + { + "epoch": 0.63, + "learning_rate": 3.1481319267012765e-08, + "logits/chosen": -1.9258190393447876, + "logits/rejected": -1.9179084300994873, + "logps/chosen": -15.95202922821045, + "logps/rejected": -128.95310974121094, + "loss": 0.5468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4488423466682434, + "rewards/margins": 2.04164457321167, + "rewards/rejected": -2.4904868602752686, + "step": 10863 + }, + { + "epoch": 0.63, + "learning_rate": 3.147256578150862e-08, + "logits/chosen": -1.8647246360778809, + "logits/rejected": -1.8706047534942627, + "logps/chosen": -99.7994384765625, + "logps/rejected": -351.96942138671875, + "loss": 0.1419, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.308083415031433, + "rewards/margins": 2.513387441635132, + "rewards/rejected": -1.2053040266036987, + "step": 10864 + }, + { + "epoch": 0.63, + "learning_rate": 3.146381295418958e-08, + "logits/chosen": -2.071638822555542, + "logits/rejected": -2.0642571449279785, + "logps/chosen": -77.58857727050781, + "logps/rejected": -135.6340789794922, + "loss": 0.3239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8645080924034119, + "rewards/margins": 1.4536758661270142, + "rewards/rejected": -0.5891677737236023, + "step": 10865 + }, + { + "epoch": 0.63, + "learning_rate": 3.145506078536656e-08, + "logits/chosen": -1.8670650720596313, + "logits/rejected": -1.8641605377197266, + "logps/chosen": -7.807363986968994, + "logps/rejected": -31.261737823486328, + "loss": 0.607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09035982936620712, + "rewards/margins": 0.33115115761756897, + "rewards/rejected": -0.24079132080078125, + "step": 10866 + }, + { + "epoch": 0.63, + "learning_rate": 3.144630927535051e-08, + "logits/chosen": -1.9259852170944214, + "logits/rejected": -1.961204171180725, + "logps/chosen": -189.33529663085938, + "logps/rejected": -344.5821533203125, + "loss": 0.1592, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1407792568206787, + "rewards/margins": 1.316999912261963, + "rewards/rejected": 0.823779284954071, + "step": 10867 + }, + { + "epoch": 0.63, + "learning_rate": 3.1437558424452284e-08, + "logits/chosen": -2.0095012187957764, + "logits/rejected": -2.0100913047790527, + "logps/chosen": -1.3228976726531982, + "logps/rejected": -72.90929412841797, + "loss": 0.4853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013473749160766602, + "rewards/margins": 1.0899267196655273, + "rewards/rejected": -1.103400468826294, + "step": 10868 + }, + { + "epoch": 0.63, + "learning_rate": 3.142880823298282e-08, + "logits/chosen": -2.073841094970703, + "logits/rejected": -2.064591407775879, + "logps/chosen": -56.967735290527344, + "logps/rejected": -384.9856872558594, + "loss": 0.1055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3457260131835938, + "rewards/margins": 3.2736892700195312, + "rewards/rejected": -1.9279632568359375, + "step": 10869 + }, + { + "epoch": 0.63, + "learning_rate": 3.142005870125289e-08, + "logits/chosen": -1.8778469562530518, + "logits/rejected": -1.8459839820861816, + "logps/chosen": -124.82154083251953, + "logps/rejected": -263.9743957519531, + "loss": 0.0926, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9656089544296265, + "rewards/margins": 2.1978631019592285, + "rewards/rejected": -0.2322540283203125, + "step": 10870 + }, + { + "epoch": 0.63, + "learning_rate": 3.141130982957339e-08, + "logits/chosen": -1.8432196378707886, + "logits/rejected": -1.818819522857666, + "logps/chosen": -228.77273559570312, + "logps/rejected": -376.30877685546875, + "loss": 0.1596, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6801726818084717, + "rewards/margins": 1.0326201915740967, + "rewards/rejected": 1.647552490234375, + "step": 10871 + }, + { + "epoch": 0.63, + "learning_rate": 3.140256161825507e-08, + "logits/chosen": -1.91264808177948, + "logits/rejected": -1.9236005544662476, + "logps/chosen": -47.84238052368164, + "logps/rejected": -284.93170166015625, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23902931809425354, + "rewards/margins": 2.415964365005493, + "rewards/rejected": -2.1769349575042725, + "step": 10872 + }, + { + "epoch": 0.63, + "learning_rate": 3.139381406760876e-08, + "logits/chosen": -2.109035015106201, + "logits/rejected": -2.106170654296875, + "logps/chosen": -17.313182830810547, + "logps/rejected": -286.2397766113281, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11873417347669601, + "rewards/margins": 5.932743549346924, + "rewards/rejected": -5.814009189605713, + "step": 10873 + }, + { + "epoch": 0.63, + "learning_rate": 3.138506717794515e-08, + "logits/chosen": -1.901395559310913, + "logits/rejected": -1.9014521837234497, + "logps/chosen": -25.642534255981445, + "logps/rejected": -210.44039916992188, + "loss": 0.3641, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020224571228027344, + "rewards/margins": 5.08834171295166, + "rewards/rejected": -5.1085662841796875, + "step": 10874 + }, + { + "epoch": 0.63, + "learning_rate": 3.137632094957504e-08, + "logits/chosen": -1.8934980630874634, + "logits/rejected": -1.885959506034851, + "logps/chosen": -42.3453369140625, + "logps/rejected": -301.2874755859375, + "loss": 0.1351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9283363223075867, + "rewards/margins": 4.699216365814209, + "rewards/rejected": -3.7708802223205566, + "step": 10875 + }, + { + "epoch": 0.63, + "learning_rate": 3.136757538280908e-08, + "logits/chosen": -1.904788851737976, + "logits/rejected": -1.8975961208343506, + "logps/chosen": -145.76470947265625, + "logps/rejected": -331.59478759765625, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8088104724884033, + "rewards/margins": 4.712005615234375, + "rewards/rejected": -2.9031951427459717, + "step": 10876 + }, + { + "epoch": 0.63, + "learning_rate": 3.1358830477958e-08, + "logits/chosen": -1.942287802696228, + "logits/rejected": -1.92827308177948, + "logps/chosen": -233.1514892578125, + "logps/rejected": -343.1308288574219, + "loss": 0.2208, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6584930419921875, + "rewards/margins": 0.6438171863555908, + "rewards/rejected": 2.0146758556365967, + "step": 10877 + }, + { + "epoch": 0.63, + "learning_rate": 3.135008623533244e-08, + "logits/chosen": -1.9007070064544678, + "logits/rejected": -1.8903545141220093, + "logps/chosen": -145.29339599609375, + "logps/rejected": -416.3760070800781, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.065403699874878, + "rewards/margins": 4.437269687652588, + "rewards/rejected": -2.37186598777771, + "step": 10878 + }, + { + "epoch": 0.63, + "learning_rate": 3.134134265524306e-08, + "logits/chosen": -1.8949058055877686, + "logits/rejected": -1.92487633228302, + "logps/chosen": -150.14544677734375, + "logps/rejected": -451.1340026855469, + "loss": 0.0386, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4576278924942017, + "rewards/margins": 4.974891662597656, + "rewards/rejected": -3.517263889312744, + "step": 10879 + }, + { + "epoch": 0.63, + "learning_rate": 3.133259973800044e-08, + "logits/chosen": -1.968869686126709, + "logits/rejected": -1.964562177658081, + "logps/chosen": -0.5311185717582703, + "logps/rejected": -185.24542236328125, + "loss": 0.3324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023860234767198563, + "rewards/margins": 4.297862529754639, + "rewards/rejected": -4.2740020751953125, + "step": 10880 + }, + { + "epoch": 0.63, + "learning_rate": 3.13238574839152e-08, + "logits/chosen": -1.7993355989456177, + "logits/rejected": -1.800329566001892, + "logps/chosen": -14.704375267028809, + "logps/rejected": -76.4940185546875, + "loss": 0.6661, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1049795150756836, + "rewards/margins": 0.27679920196533203, + "rewards/rejected": -0.3817787170410156, + "step": 10881 + }, + { + "epoch": 0.63, + "learning_rate": 3.131511589329788e-08, + "logits/chosen": -2.1050212383270264, + "logits/rejected": -2.108060121536255, + "logps/chosen": -31.448631286621094, + "logps/rejected": -120.11862182617188, + "loss": 0.3259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.147835612297058, + "rewards/margins": 0.6955795884132385, + "rewards/rejected": 0.4522560238838196, + "step": 10882 + }, + { + "epoch": 0.63, + "learning_rate": 3.1306374966459057e-08, + "logits/chosen": -1.8726694583892822, + "logits/rejected": -1.8944453001022339, + "logps/chosen": -206.3813018798828, + "logps/rejected": -397.55029296875, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.264665365219116, + "rewards/margins": 5.8586015701293945, + "rewards/rejected": -3.5939362049102783, + "step": 10883 + }, + { + "epoch": 0.63, + "learning_rate": 3.129763470370924e-08, + "logits/chosen": -1.8626344203948975, + "logits/rejected": -1.8627327680587769, + "logps/chosen": -9.002108573913574, + "logps/rejected": -0.2770681381225586, + "loss": 0.7302, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0722414031624794, + "rewards/margins": -0.08098592609167099, + "rewards/rejected": 0.008744525723159313, + "step": 10884 + }, + { + "epoch": 0.63, + "learning_rate": 3.12888951053589e-08, + "logits/chosen": -1.8487622737884521, + "logits/rejected": -1.8494173288345337, + "logps/chosen": -74.32962036132812, + "logps/rejected": -137.15394592285156, + "loss": 0.5363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10664749145507812, + "rewards/margins": 0.19654160737991333, + "rewards/rejected": -0.08989410847425461, + "step": 10885 + }, + { + "epoch": 0.63, + "learning_rate": 3.1280156171718556e-08, + "logits/chosen": -1.9501841068267822, + "logits/rejected": -1.9586793184280396, + "logps/chosen": -9.410476684570312, + "logps/rejected": -184.6829833984375, + "loss": 0.3662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05600881576538086, + "rewards/margins": 3.6614503860473633, + "rewards/rejected": -3.717459201812744, + "step": 10886 + }, + { + "epoch": 0.63, + "learning_rate": 3.127141790309862e-08, + "logits/chosen": -1.824695348739624, + "logits/rejected": -1.7386448383331299, + "logps/chosen": -398.86273193359375, + "logps/rejected": -641.7996826171875, + "loss": 0.2418, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.868560791015625, + "rewards/margins": 0.5535888671875, + "rewards/rejected": 2.314971923828125, + "step": 10887 + }, + { + "epoch": 0.63, + "learning_rate": 3.126268029980954e-08, + "logits/chosen": -1.8424675464630127, + "logits/rejected": -1.844078779220581, + "logps/chosen": -5.86504174862057e-05, + "logps/rejected": -20.780763626098633, + "loss": 0.6951, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.9430797237873776e-06, + "rewards/margins": -0.0033882497809827328, + "rewards/rejected": 0.0033863068092614412, + "step": 10888 + }, + { + "epoch": 0.63, + "learning_rate": 3.125394336216168e-08, + "logits/chosen": -1.9099688529968262, + "logits/rejected": -1.9089206457138062, + "logps/chosen": -0.058572955429553986, + "logps/rejected": -246.7611541748047, + "loss": 0.3269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02942478470504284, + "rewards/margins": 5.7830424308776855, + "rewards/rejected": -5.753617763519287, + "step": 10889 + }, + { + "epoch": 0.63, + "learning_rate": 3.12452070904655e-08, + "logits/chosen": -1.9895316362380981, + "logits/rejected": -1.9781197309494019, + "logps/chosen": -0.00011133821681141853, + "logps/rejected": -125.02596282958984, + "loss": 0.7126, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.668722507019993e-05, + "rewards/margins": -0.10775690525770187, + "rewards/rejected": 0.10777359455823898, + "step": 10890 + }, + { + "epoch": 0.63, + "learning_rate": 3.123647148503126e-08, + "logits/chosen": -2.0179026126861572, + "logits/rejected": -2.010143756866455, + "logps/chosen": -316.46636962890625, + "logps/rejected": -470.60479736328125, + "loss": 0.0638, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9123779535293579, + "rewards/margins": 5.387646675109863, + "rewards/rejected": -4.475268840789795, + "step": 10891 + }, + { + "epoch": 0.63, + "learning_rate": 3.122773654616936e-08, + "logits/chosen": -1.8274872303009033, + "logits/rejected": -1.8309712409973145, + "logps/chosen": -7.7841081619262695, + "logps/rejected": -45.01438903808594, + "loss": 0.6058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21525907516479492, + "rewards/margins": 0.14931440353393555, + "rewards/rejected": 0.06594467163085938, + "step": 10892 + }, + { + "epoch": 0.63, + "learning_rate": 3.121900227419007e-08, + "logits/chosen": -1.9731512069702148, + "logits/rejected": -1.9692414999008179, + "logps/chosen": -9.378235816955566, + "logps/rejected": -68.0132827758789, + "loss": 0.4177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22421912848949432, + "rewards/margins": 1.0494502782821655, + "rewards/rejected": -0.8252311944961548, + "step": 10893 + }, + { + "epoch": 0.63, + "learning_rate": 3.12102686694037e-08, + "logits/chosen": -1.9600194692611694, + "logits/rejected": -1.965958595275879, + "logps/chosen": -1.6927675460465252e-05, + "logps/rejected": -107.36787414550781, + "loss": 0.4509, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.006759238312952e-07, + "rewards/margins": 1.5483561754226685, + "rewards/rejected": -1.5483566522598267, + "step": 10894 + }, + { + "epoch": 0.63, + "learning_rate": 3.120153573212048e-08, + "logits/chosen": -1.9041212797164917, + "logits/rejected": -1.903992772102356, + "logps/chosen": -20.345720291137695, + "logps/rejected": -99.78688049316406, + "loss": 0.4234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2710794508457184, + "rewards/margins": 0.865795373916626, + "rewards/rejected": -0.5947158932685852, + "step": 10895 + }, + { + "epoch": 0.63, + "learning_rate": 3.1192803462650686e-08, + "logits/chosen": -1.8778045177459717, + "logits/rejected": -1.8823344707489014, + "logps/chosen": -10.09518051147461, + "logps/rejected": -125.40834045410156, + "loss": 0.3786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009555530734360218, + "rewards/margins": 1.8620107173919678, + "rewards/rejected": -1.8524551391601562, + "step": 10896 + }, + { + "epoch": 0.63, + "learning_rate": 3.1184071861304476e-08, + "logits/chosen": -1.592070460319519, + "logits/rejected": -1.5904781818389893, + "logps/chosen": -227.96005249023438, + "logps/rejected": -303.1371765136719, + "loss": 0.2103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3672332763671875, + "rewards/margins": 1.32562255859375, + "rewards/rejected": -0.9583892822265625, + "step": 10897 + }, + { + "epoch": 0.63, + "learning_rate": 3.1175340928392105e-08, + "logits/chosen": -1.8333892822265625, + "logits/rejected": -1.8330830335617065, + "logps/chosen": -1.7120598554611206, + "logps/rejected": -150.11392211914062, + "loss": 0.6322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020375562831759453, + "rewards/margins": 0.32941389083862305, + "rewards/rejected": -0.34978944063186646, + "step": 10898 + }, + { + "epoch": 0.63, + "learning_rate": 3.116661066422368e-08, + "logits/chosen": -1.9274054765701294, + "logits/rejected": -1.92918062210083, + "logps/chosen": -9.880815505981445, + "logps/rejected": -112.12551879882812, + "loss": 0.4475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1050347313284874, + "rewards/margins": 1.4084235429763794, + "rewards/rejected": -1.513458251953125, + "step": 10899 + }, + { + "epoch": 0.63, + "learning_rate": 3.115788106910939e-08, + "logits/chosen": -2.1472110748291016, + "logits/rejected": -2.140652656555176, + "logps/chosen": -157.52767944335938, + "logps/rejected": -311.6527099609375, + "loss": 0.0661, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7639297246932983, + "rewards/margins": 5.495912075042725, + "rewards/rejected": -3.731982469558716, + "step": 10900 + }, + { + "epoch": 0.63, + "learning_rate": 3.114915214335931e-08, + "logits/chosen": -1.9514942169189453, + "logits/rejected": -1.948575735092163, + "logps/chosen": -81.80586242675781, + "logps/rejected": -168.28111267089844, + "loss": 0.8053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9215790033340454, + "rewards/margins": 1.0244582891464233, + "rewards/rejected": -1.9460372924804688, + "step": 10901 + }, + { + "epoch": 0.63, + "learning_rate": 3.114042388728358e-08, + "logits/chosen": -1.8799359798431396, + "logits/rejected": -1.8784489631652832, + "logps/chosen": -230.6990966796875, + "logps/rejected": -306.12603759765625, + "loss": 0.0511, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1131227016448975, + "rewards/margins": 2.756845235824585, + "rewards/rejected": -0.6437225341796875, + "step": 10902 + }, + { + "epoch": 0.63, + "learning_rate": 3.113169630119222e-08, + "logits/chosen": -1.9664857387542725, + "logits/rejected": -1.960178017616272, + "logps/chosen": -41.54018020629883, + "logps/rejected": -138.98753356933594, + "loss": 0.3739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7614124417304993, + "rewards/margins": 0.7113651633262634, + "rewards/rejected": 0.05004730448126793, + "step": 10903 + }, + { + "epoch": 0.63, + "learning_rate": 3.112296938539531e-08, + "logits/chosen": -1.9202966690063477, + "logits/rejected": -1.876425862312317, + "logps/chosen": -233.89942932128906, + "logps/rejected": -295.6192626953125, + "loss": 0.0793, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7909958362579346, + "rewards/margins": 3.2261123657226562, + "rewards/rejected": -1.4351166486740112, + "step": 10904 + }, + { + "epoch": 0.63, + "learning_rate": 3.111424314020288e-08, + "logits/chosen": -1.7456703186035156, + "logits/rejected": -1.74141526222229, + "logps/chosen": -246.67083740234375, + "logps/rejected": -338.0712890625, + "loss": 0.1846, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6871094703674316, + "rewards/margins": 0.8615113496780396, + "rewards/rejected": 1.825598120689392, + "step": 10905 + }, + { + "epoch": 0.63, + "learning_rate": 3.1105517565924896e-08, + "logits/chosen": -1.7183982133865356, + "logits/rejected": -1.7250778675079346, + "logps/chosen": -63.26605224609375, + "logps/rejected": -141.00144958496094, + "loss": 0.6077, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04084663465619087, + "rewards/margins": -0.11262017488479614, + "rewards/rejected": 0.15346680581569672, + "step": 10906 + }, + { + "epoch": 0.63, + "learning_rate": 3.109679266287137e-08, + "logits/chosen": -1.9157053232192993, + "logits/rejected": -1.9075391292572021, + "logps/chosen": -0.06338316202163696, + "logps/rejected": -286.93853759765625, + "loss": 0.333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006305725779384375, + "rewards/margins": 4.626987457275391, + "rewards/rejected": -4.633293151855469, + "step": 10907 + }, + { + "epoch": 0.63, + "learning_rate": 3.108806843135222e-08, + "logits/chosen": -1.7983779907226562, + "logits/rejected": -1.8066039085388184, + "logps/chosen": -246.4029541015625, + "logps/rejected": -297.2447509765625, + "loss": 0.0652, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0242340564727783, + "rewards/margins": 2.118112087249756, + "rewards/rejected": 0.9061218500137329, + "step": 10908 + }, + { + "epoch": 0.63, + "learning_rate": 3.107934487167739e-08, + "logits/chosen": -1.879217505455017, + "logits/rejected": -1.9306021928787231, + "logps/chosen": -310.7597351074219, + "logps/rejected": -506.3792724609375, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4745758771896362, + "rewards/margins": 4.1276702880859375, + "rewards/rejected": -2.653094530105591, + "step": 10909 + }, + { + "epoch": 0.63, + "learning_rate": 3.107062198415678e-08, + "logits/chosen": -1.9341377019882202, + "logits/rejected": -1.960672378540039, + "logps/chosen": -208.92254638671875, + "logps/rejected": -344.1578369140625, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.631413221359253, + "rewards/margins": 5.24069356918335, + "rewards/rejected": -2.6092803478240967, + "step": 10910 + }, + { + "epoch": 0.63, + "learning_rate": 3.1061899769100286e-08, + "logits/chosen": -1.9797316789627075, + "logits/rejected": -1.958103895187378, + "logps/chosen": -237.8175811767578, + "logps/rejected": -438.422607421875, + "loss": 0.1294, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.121241807937622, + "rewards/margins": 1.71330726146698, + "rewards/rejected": 0.4079345762729645, + "step": 10911 + }, + { + "epoch": 0.64, + "learning_rate": 3.105317822681772e-08, + "logits/chosen": -1.8015525341033936, + "logits/rejected": -1.8032903671264648, + "logps/chosen": -0.1512182652950287, + "logps/rejected": -112.90155029296875, + "loss": 0.4527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043154359445907176, + "rewards/margins": 1.3853137493133545, + "rewards/rejected": -1.3857452869415283, + "step": 10912 + }, + { + "epoch": 0.64, + "learning_rate": 3.104445735761897e-08, + "logits/chosen": -1.91909658908844, + "logits/rejected": -1.8990596532821655, + "logps/chosen": -317.9858703613281, + "logps/rejected": -396.902587890625, + "loss": 0.2302, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8569916486740112, + "rewards/margins": 0.6484955549240112, + "rewards/rejected": 1.20849609375, + "step": 10913 + }, + { + "epoch": 0.64, + "learning_rate": 3.103573716181379e-08, + "logits/chosen": -1.8788187503814697, + "logits/rejected": -1.8777350187301636, + "logps/chosen": -2.0298686027526855, + "logps/rejected": -111.53034973144531, + "loss": 0.4942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023490453138947487, + "rewards/margins": 1.0513951778411865, + "rewards/rejected": -1.074885606765747, + "step": 10914 + }, + { + "epoch": 0.64, + "learning_rate": 3.102701763971202e-08, + "logits/chosen": -1.8712868690490723, + "logits/rejected": -1.8723615407943726, + "logps/chosen": -28.336288452148438, + "logps/rejected": -203.29322814941406, + "loss": 0.2319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32433435320854187, + "rewards/margins": 2.9342479705810547, + "rewards/rejected": -2.6099135875701904, + "step": 10915 + }, + { + "epoch": 0.64, + "learning_rate": 3.1018298791623357e-08, + "logits/chosen": -2.0285627841949463, + "logits/rejected": -2.0205447673797607, + "logps/chosen": -49.396568298339844, + "logps/rejected": -277.5970458984375, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39151841402053833, + "rewards/margins": 3.2076926231384277, + "rewards/rejected": -2.816174268722534, + "step": 10916 + }, + { + "epoch": 0.64, + "learning_rate": 3.10095806178576e-08, + "logits/chosen": -1.9536347389221191, + "logits/rejected": -1.9115021228790283, + "logps/chosen": -120.03024291992188, + "logps/rejected": -212.8800811767578, + "loss": 0.2179, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.504974365234375, + "rewards/margins": 1.1659348011016846, + "rewards/rejected": 0.3390396237373352, + "step": 10917 + }, + { + "epoch": 0.64, + "learning_rate": 3.1000863118724406e-08, + "logits/chosen": -2.0315823554992676, + "logits/rejected": -2.0360472202301025, + "logps/chosen": -32.15062713623047, + "logps/rejected": -205.68502807617188, + "loss": 0.4205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1095159575343132, + "rewards/margins": 1.4850214719772339, + "rewards/rejected": -1.594537377357483, + "step": 10918 + }, + { + "epoch": 0.64, + "learning_rate": 3.099214629453351e-08, + "logits/chosen": -1.6421724557876587, + "logits/rejected": -1.6441371440887451, + "logps/chosen": -56.47913360595703, + "logps/rejected": -150.532470703125, + "loss": 1.239, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8515503406524658, + "rewards/margins": 0.7407569885253906, + "rewards/rejected": -2.5923073291778564, + "step": 10919 + }, + { + "epoch": 0.64, + "learning_rate": 3.098343014559454e-08, + "logits/chosen": -1.9479968547821045, + "logits/rejected": -1.9377167224884033, + "logps/chosen": -0.6273462176322937, + "logps/rejected": -123.59657287597656, + "loss": 0.472, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12270152568817139, + "rewards/margins": 1.0093042850494385, + "rewards/rejected": -0.8866028189659119, + "step": 10920 + }, + { + "epoch": 0.64, + "learning_rate": 3.0974714672217165e-08, + "logits/chosen": -1.8031425476074219, + "logits/rejected": -1.8183209896087646, + "logps/chosen": -141.71875, + "logps/rejected": -310.162841796875, + "loss": 0.1454, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.17057204246521, + "rewards/margins": 1.29459547996521, + "rewards/rejected": 0.8759765625, + "step": 10921 + }, + { + "epoch": 0.64, + "learning_rate": 3.096599987471098e-08, + "logits/chosen": -1.9130322933197021, + "logits/rejected": -1.9098939895629883, + "logps/chosen": -12.617159843444824, + "logps/rejected": -232.75885009765625, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0961613655090332, + "rewards/margins": 2.966580629348755, + "rewards/rejected": -2.8704192638397217, + "step": 10922 + }, + { + "epoch": 0.64, + "learning_rate": 3.0957285753385586e-08, + "logits/chosen": -2.0216879844665527, + "logits/rejected": -2.026310443878174, + "logps/chosen": -34.401588439941406, + "logps/rejected": -255.33827209472656, + "loss": 0.2588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.143218994140625, + "rewards/margins": 2.628488302230835, + "rewards/rejected": -2.48526930809021, + "step": 10923 + }, + { + "epoch": 0.64, + "learning_rate": 3.094857230855055e-08, + "logits/chosen": -2.095597743988037, + "logits/rejected": -2.0858144760131836, + "logps/chosen": -4.079667568206787, + "logps/rejected": -127.12332153320312, + "loss": 0.4067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11575040966272354, + "rewards/margins": 1.6023486852645874, + "rewards/rejected": -1.486598253250122, + "step": 10924 + }, + { + "epoch": 0.64, + "learning_rate": 3.093985954051541e-08, + "logits/chosen": -1.8880928754806519, + "logits/rejected": -1.8434669971466064, + "logps/chosen": -202.1949005126953, + "logps/rejected": -392.5723876953125, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5892913341522217, + "rewards/margins": 3.768810987472534, + "rewards/rejected": -1.1795196533203125, + "step": 10925 + }, + { + "epoch": 0.64, + "learning_rate": 3.09311474495897e-08, + "logits/chosen": -1.7023921012878418, + "logits/rejected": -1.729231834411621, + "logps/chosen": -247.72451782226562, + "logps/rejected": -317.94097900390625, + "loss": 0.047, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0498292446136475, + "rewards/margins": 2.619677782058716, + "rewards/rejected": 0.4301513731479645, + "step": 10926 + }, + { + "epoch": 0.64, + "learning_rate": 3.092243603608291e-08, + "logits/chosen": -1.7191157341003418, + "logits/rejected": -1.658629059791565, + "logps/chosen": -302.7221984863281, + "logps/rejected": -614.1925659179688, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7204254865646362, + "rewards/margins": 4.402230739593506, + "rewards/rejected": -2.681805372238159, + "step": 10927 + }, + { + "epoch": 0.64, + "learning_rate": 3.091372530030451e-08, + "logits/chosen": -2.0477523803710938, + "logits/rejected": -2.045466661453247, + "logps/chosen": -81.75856018066406, + "logps/rejected": -125.67842864990234, + "loss": 0.5807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10482330620288849, + "rewards/margins": 0.5057754516601562, + "rewards/rejected": -0.6105987429618835, + "step": 10928 + }, + { + "epoch": 0.64, + "learning_rate": 3.0905015242563944e-08, + "logits/chosen": -1.9410340785980225, + "logits/rejected": -1.9245398044586182, + "logps/chosen": -26.692211151123047, + "logps/rejected": -284.5468444824219, + "loss": 0.3372, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13412781059741974, + "rewards/margins": 3.6295838356018066, + "rewards/rejected": -3.76371169090271, + "step": 10929 + }, + { + "epoch": 0.64, + "learning_rate": 3.089630586317065e-08, + "logits/chosen": -2.0700201988220215, + "logits/rejected": -2.070988655090332, + "logps/chosen": -34.39910888671875, + "logps/rejected": -52.4542121887207, + "loss": 0.7287, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1489276885986328, + "rewards/margins": -0.22924193739891052, + "rewards/rejected": 0.0803142562508583, + "step": 10930 + }, + { + "epoch": 0.64, + "learning_rate": 3.0887597162433985e-08, + "logits/chosen": -1.5594959259033203, + "logits/rejected": -1.581230878829956, + "logps/chosen": -360.44476318359375, + "logps/rejected": -530.4903564453125, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.253430128097534, + "rewards/margins": 8.487548828125, + "rewards/rejected": -5.234118938446045, + "step": 10931 + }, + { + "epoch": 0.64, + "learning_rate": 3.08788891406634e-08, + "logits/chosen": -1.9524112939834595, + "logits/rejected": -1.9551159143447876, + "logps/chosen": -2.950369119644165, + "logps/rejected": -41.61251449584961, + "loss": 0.56, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2612334191799164, + "rewards/margins": 0.19936054944992065, + "rewards/rejected": 0.06187286600470543, + "step": 10932 + }, + { + "epoch": 0.64, + "learning_rate": 3.087018179816818e-08, + "logits/chosen": -2.0701255798339844, + "logits/rejected": -2.070681571960449, + "logps/chosen": -0.0008605331531725824, + "logps/rejected": -22.313230514526367, + "loss": 0.6881, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.563362431828864e-05, + "rewards/margins": 0.01874363049864769, + "rewards/rejected": -0.018769264221191406, + "step": 10933 + }, + { + "epoch": 0.64, + "learning_rate": 3.08614751352577e-08, + "logits/chosen": -1.904354214668274, + "logits/rejected": -1.8948463201522827, + "logps/chosen": -236.07386779785156, + "logps/rejected": -572.0285034179688, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.816217064857483, + "rewards/margins": 6.930627346038818, + "rewards/rejected": -5.114410400390625, + "step": 10934 + }, + { + "epoch": 0.64, + "learning_rate": 3.08527691522412e-08, + "logits/chosen": -2.2014694213867188, + "logits/rejected": -2.190985679626465, + "logps/chosen": -1.806138038635254, + "logps/rejected": -286.16973876953125, + "loss": 0.3179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13626888394355774, + "rewards/margins": 4.757254600524902, + "rewards/rejected": -4.620985507965088, + "step": 10935 + }, + { + "epoch": 0.64, + "learning_rate": 3.0844063849428044e-08, + "logits/chosen": -1.8873242139816284, + "logits/rejected": -1.8778256177902222, + "logps/chosen": -0.003226103726774454, + "logps/rejected": -258.6017150878906, + "loss": 0.3483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002616435522213578, + "rewards/margins": 5.479249000549316, + "rewards/rejected": -5.47951078414917, + "step": 10936 + }, + { + "epoch": 0.64, + "learning_rate": 3.08353592271274e-08, + "logits/chosen": -1.8955247402191162, + "logits/rejected": -1.8881953954696655, + "logps/chosen": -20.008129119873047, + "logps/rejected": -230.8079071044922, + "loss": 0.2574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3952249586582184, + "rewards/margins": 2.229395627975464, + "rewards/rejected": -1.8341705799102783, + "step": 10937 + }, + { + "epoch": 0.64, + "learning_rate": 3.0826655285648575e-08, + "logits/chosen": -2.12329363822937, + "logits/rejected": -2.1057465076446533, + "logps/chosen": -47.28266143798828, + "logps/rejected": -301.8863525390625, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16171418130397797, + "rewards/margins": 4.510249137878418, + "rewards/rejected": -4.348535060882568, + "step": 10938 + }, + { + "epoch": 0.64, + "learning_rate": 3.081795202530072e-08, + "logits/chosen": -1.977475643157959, + "logits/rejected": -1.9613794088363647, + "logps/chosen": -179.48402404785156, + "logps/rejected": -354.55908203125, + "loss": 0.0442, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3797378540039062, + "rewards/margins": 4.168159484863281, + "rewards/rejected": -2.788421630859375, + "step": 10939 + }, + { + "epoch": 0.64, + "learning_rate": 3.080924944639305e-08, + "logits/chosen": -1.9270883798599243, + "logits/rejected": -1.9446394443511963, + "logps/chosen": -296.2590637207031, + "logps/rejected": -547.8195190429688, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2450408935546875, + "rewards/margins": 7.281838893890381, + "rewards/rejected": -6.036798000335693, + "step": 10940 + }, + { + "epoch": 0.64, + "learning_rate": 3.08005475492347e-08, + "logits/chosen": -2.0085699558258057, + "logits/rejected": -2.007765531539917, + "logps/chosen": -0.0027825168799608946, + "logps/rejected": -147.20388793945312, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9847130867419764e-05, + "rewards/margins": 3.7063450813293457, + "rewards/rejected": -3.706394910812378, + "step": 10941 + }, + { + "epoch": 0.64, + "learning_rate": 3.079184633413483e-08, + "logits/chosen": -1.906865119934082, + "logits/rejected": -1.8954150676727295, + "logps/chosen": -116.23570251464844, + "logps/rejected": -339.0864562988281, + "loss": 0.3206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10127716511487961, + "rewards/margins": 4.779497146606445, + "rewards/rejected": -4.678219795227051, + "step": 10942 + }, + { + "epoch": 0.64, + "learning_rate": 3.078314580140252e-08, + "logits/chosen": -1.9461194276809692, + "logits/rejected": -1.8772368431091309, + "logps/chosen": -242.41134643554688, + "logps/rejected": -639.1201171875, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3290557861328125, + "rewards/margins": 6.518460273742676, + "rewards/rejected": -3.189404249191284, + "step": 10943 + }, + { + "epoch": 0.64, + "learning_rate": 3.0774445951346876e-08, + "logits/chosen": -2.102792978286743, + "logits/rejected": -2.1035075187683105, + "logps/chosen": -37.1970329284668, + "logps/rejected": -299.5966796875, + "loss": 0.3468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2007301300764084, + "rewards/margins": 4.086483001708984, + "rewards/rejected": -4.287213325500488, + "step": 10944 + }, + { + "epoch": 0.64, + "learning_rate": 3.076574678427697e-08, + "logits/chosen": -1.905603289604187, + "logits/rejected": -1.9024676084518433, + "logps/chosen": -16.320783615112305, + "logps/rejected": -219.81484985351562, + "loss": 0.3356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039400864392519, + "rewards/margins": 3.1649410724639893, + "rewards/rejected": -3.125540256500244, + "step": 10945 + }, + { + "epoch": 0.64, + "learning_rate": 3.07570483005018e-08, + "logits/chosen": -1.94584321975708, + "logits/rejected": -1.9412744045257568, + "logps/chosen": -1.38267982006073, + "logps/rejected": -110.58885192871094, + "loss": 0.4244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11412335932254791, + "rewards/margins": 1.4372144937515259, + "rewards/rejected": -1.3230911493301392, + "step": 10946 + }, + { + "epoch": 0.64, + "learning_rate": 3.074835050033042e-08, + "logits/chosen": -1.7977663278579712, + "logits/rejected": -1.7773816585540771, + "logps/chosen": -200.12753295898438, + "logps/rejected": -278.76629638671875, + "loss": 0.2302, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.11582350730896, + "rewards/margins": 0.600961446762085, + "rewards/rejected": 1.514862060546875, + "step": 10947 + }, + { + "epoch": 0.64, + "learning_rate": 3.0739653384071785e-08, + "logits/chosen": -1.8757246732711792, + "logits/rejected": -1.876544713973999, + "logps/chosen": -220.83895874023438, + "logps/rejected": -341.19171142578125, + "loss": 0.2759, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0093841552734375, + "rewards/margins": 0.3448333740234375, + "rewards/rejected": 2.66455078125, + "step": 10948 + }, + { + "epoch": 0.64, + "learning_rate": 3.0730956952034895e-08, + "logits/chosen": -1.9984376430511475, + "logits/rejected": -1.993149757385254, + "logps/chosen": -82.22333526611328, + "logps/rejected": -372.13018798828125, + "loss": 0.2854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2383575439453125, + "rewards/margins": 4.7233123779296875, + "rewards/rejected": -4.484954833984375, + "step": 10949 + }, + { + "epoch": 0.64, + "learning_rate": 3.0722261204528634e-08, + "logits/chosen": -1.9336330890655518, + "logits/rejected": -1.922560691833496, + "logps/chosen": -32.65981674194336, + "logps/rejected": -254.17645263671875, + "loss": 0.1754, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0294827222824097, + "rewards/margins": 2.376525640487671, + "rewards/rejected": -1.3470429182052612, + "step": 10950 + }, + { + "epoch": 0.64, + "learning_rate": 3.071356614186199e-08, + "logits/chosen": -1.8694803714752197, + "logits/rejected": -1.85171639919281, + "logps/chosen": -4.7242207527160645, + "logps/rejected": -152.2991943359375, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12665167450904846, + "rewards/margins": 2.7227189540863037, + "rewards/rejected": -2.596067190170288, + "step": 10951 + }, + { + "epoch": 0.64, + "learning_rate": 3.070487176434379e-08, + "logits/chosen": -1.9165409803390503, + "logits/rejected": -1.9160491228103638, + "logps/chosen": -29.500370025634766, + "logps/rejected": -92.40658569335938, + "loss": 0.3569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7600826621055603, + "rewards/margins": 0.8043819665908813, + "rewards/rejected": -0.04429931566119194, + "step": 10952 + }, + { + "epoch": 0.64, + "learning_rate": 3.069617807228295e-08, + "logits/chosen": -1.8522666692733765, + "logits/rejected": -1.8496160507202148, + "logps/chosen": -24.01000213623047, + "logps/rejected": -168.088623046875, + "loss": 0.3118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7431632876396179, + "rewards/margins": 1.4450371265411377, + "rewards/rejected": -0.701873779296875, + "step": 10953 + }, + { + "epoch": 0.64, + "learning_rate": 3.0687485065988275e-08, + "logits/chosen": -1.607638955116272, + "logits/rejected": -1.6151187419891357, + "logps/chosen": -13.38459300994873, + "logps/rejected": -249.1329803466797, + "loss": 0.2703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26966410875320435, + "rewards/margins": 4.617316246032715, + "rewards/rejected": -4.347651958465576, + "step": 10954 + }, + { + "epoch": 0.64, + "learning_rate": 3.067879274576861e-08, + "logits/chosen": -2.025226593017578, + "logits/rejected": -2.0187997817993164, + "logps/chosen": -23.035951614379883, + "logps/rejected": -164.79733276367188, + "loss": 0.5284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06982727348804474, + "rewards/margins": 0.6044647097587585, + "rewards/rejected": -0.534637451171875, + "step": 10955 + }, + { + "epoch": 0.64, + "learning_rate": 3.0670101111932716e-08, + "logits/chosen": -1.9427025318145752, + "logits/rejected": -1.9379868507385254, + "logps/chosen": -9.536339348414913e-05, + "logps/rejected": -72.03294372558594, + "loss": 0.5456, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7522223743071663e-06, + "rewards/margins": 0.655116081237793, + "rewards/rejected": -0.6551178097724915, + "step": 10956 + }, + { + "epoch": 0.64, + "learning_rate": 3.066141016478942e-08, + "logits/chosen": -2.075143575668335, + "logits/rejected": -2.132350444793701, + "logps/chosen": -291.42303466796875, + "logps/rejected": -349.92669677734375, + "loss": 0.0606, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8984527587890625, + "rewards/margins": 2.2221922874450684, + "rewards/rejected": 0.6762604117393494, + "step": 10957 + }, + { + "epoch": 0.64, + "learning_rate": 3.0652719904647384e-08, + "logits/chosen": -1.5324989557266235, + "logits/rejected": -1.5458240509033203, + "logps/chosen": -199.77101135253906, + "logps/rejected": -271.03564453125, + "loss": 0.2334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.894213855266571, + "rewards/margins": 1.853753685951233, + "rewards/rejected": -0.9595398306846619, + "step": 10958 + }, + { + "epoch": 0.64, + "learning_rate": 3.0644030331815406e-08, + "logits/chosen": -1.8687942028045654, + "logits/rejected": -1.8587074279785156, + "logps/chosen": -192.763916015625, + "logps/rejected": -257.96759033203125, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5209197998046875, + "rewards/margins": 4.9051361083984375, + "rewards/rejected": -1.38421630859375, + "step": 10959 + }, + { + "epoch": 0.64, + "learning_rate": 3.0635341446602134e-08, + "logits/chosen": -1.994262933731079, + "logits/rejected": -2.0241758823394775, + "logps/chosen": -162.20449829101562, + "logps/rejected": -230.0772705078125, + "loss": 0.2144, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4498581886291504, + "rewards/margins": 0.8190079927444458, + "rewards/rejected": 1.6308501958847046, + "step": 10960 + }, + { + "epoch": 0.64, + "learning_rate": 3.062665324931628e-08, + "logits/chosen": -1.9532173871994019, + "logits/rejected": -1.9397159814834595, + "logps/chosen": -37.319801330566406, + "logps/rejected": -162.59823608398438, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8336326479911804, + "rewards/margins": 0.609685480594635, + "rewards/rejected": 0.22394715249538422, + "step": 10961 + }, + { + "epoch": 0.64, + "learning_rate": 3.0617965740266445e-08, + "logits/chosen": -1.9107786417007446, + "logits/rejected": -1.8747940063476562, + "logps/chosen": -186.66690063476562, + "logps/rejected": -441.8238525390625, + "loss": 0.064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9858245849609375, + "rewards/margins": 2.700430393218994, + "rewards/rejected": -0.7146057486534119, + "step": 10962 + }, + { + "epoch": 0.64, + "learning_rate": 3.060927891976128e-08, + "logits/chosen": -1.9114971160888672, + "logits/rejected": -1.9180773496627808, + "logps/chosen": -14.959315299987793, + "logps/rejected": -164.127197265625, + "loss": 0.3586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006400871556252241, + "rewards/margins": 1.9492079019546509, + "rewards/rejected": -1.9428070783615112, + "step": 10963 + }, + { + "epoch": 0.64, + "learning_rate": 3.060059278810938e-08, + "logits/chosen": -1.9463804960250854, + "logits/rejected": -1.9416757822036743, + "logps/chosen": -16.387638092041016, + "logps/rejected": -124.062744140625, + "loss": 0.3852, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3849363327026367, + "rewards/margins": 1.1496702432632446, + "rewards/rejected": -0.7647339105606079, + "step": 10964 + }, + { + "epoch": 0.64, + "learning_rate": 3.0591907345619314e-08, + "logits/chosen": -1.7467141151428223, + "logits/rejected": -1.7361372709274292, + "logps/chosen": -16.20608139038086, + "logps/rejected": -180.9189453125, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3994888365268707, + "rewards/margins": 2.900644063949585, + "rewards/rejected": -2.501155138015747, + "step": 10965 + }, + { + "epoch": 0.64, + "learning_rate": 3.0583222592599643e-08, + "logits/chosen": -1.8023263216018677, + "logits/rejected": -1.795896291732788, + "logps/chosen": -14.873153686523438, + "logps/rejected": -189.51123046875, + "loss": 0.3647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02218770980834961, + "rewards/margins": 3.106898546218872, + "rewards/rejected": -3.0847108364105225, + "step": 10966 + }, + { + "epoch": 0.64, + "learning_rate": 3.057453852935887e-08, + "logits/chosen": -1.9464237689971924, + "logits/rejected": -1.9528334140777588, + "logps/chosen": -381.4881591796875, + "logps/rejected": -480.9267578125, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8139313459396362, + "rewards/margins": 3.738879442214966, + "rewards/rejected": -1.9249480962753296, + "step": 10967 + }, + { + "epoch": 0.64, + "learning_rate": 3.056585515620553e-08, + "logits/chosen": -1.7153364419937134, + "logits/rejected": -1.6686575412750244, + "logps/chosen": -342.2618103027344, + "logps/rejected": -630.359375, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9782683849334717, + "rewards/margins": 5.011532783508301, + "rewards/rejected": -2.03326416015625, + "step": 10968 + }, + { + "epoch": 0.64, + "learning_rate": 3.0557172473448056e-08, + "logits/chosen": -1.7144747972488403, + "logits/rejected": -1.7004331350326538, + "logps/chosen": -286.9900817871094, + "logps/rejected": -330.2118225097656, + "loss": 0.2169, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.31003737449646, + "rewards/margins": 0.8092286586761475, + "rewards/rejected": 1.5008087158203125, + "step": 10969 + }, + { + "epoch": 0.64, + "learning_rate": 3.054849048139494e-08, + "logits/chosen": -1.8240748643875122, + "logits/rejected": -1.8169353008270264, + "logps/chosen": -55.7276496887207, + "logps/rejected": -285.2869567871094, + "loss": 0.2128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6480445861816406, + "rewards/margins": 3.2127206325531006, + "rewards/rejected": -2.56467604637146, + "step": 10970 + }, + { + "epoch": 0.64, + "learning_rate": 3.053980918035456e-08, + "logits/chosen": -1.9440512657165527, + "logits/rejected": -1.939818263053894, + "logps/chosen": -7.643549919128418, + "logps/rejected": -102.82450103759766, + "loss": 0.4205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10127449035644531, + "rewards/margins": 1.5336865186691284, + "rewards/rejected": -1.432412028312683, + "step": 10971 + }, + { + "epoch": 0.64, + "learning_rate": 3.053112857063538e-08, + "logits/chosen": -2.1282577514648438, + "logits/rejected": -2.118551015853882, + "logps/chosen": -58.68281936645508, + "logps/rejected": -360.81744384765625, + "loss": 0.1656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6808841824531555, + "rewards/margins": 4.145572185516357, + "rewards/rejected": -3.4646880626678467, + "step": 10972 + }, + { + "epoch": 0.64, + "learning_rate": 3.0522448652545716e-08, + "logits/chosen": -2.05303692817688, + "logits/rejected": -2.0447468757629395, + "logps/chosen": -41.690956115722656, + "logps/rejected": -145.02992248535156, + "loss": 0.4474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5865592956542969, + "rewards/margins": 0.4061118960380554, + "rewards/rejected": 0.18044738471508026, + "step": 10973 + }, + { + "epoch": 0.64, + "learning_rate": 3.0513769426393984e-08, + "logits/chosen": -1.7828670740127563, + "logits/rejected": -1.8412389755249023, + "logps/chosen": -268.8238525390625, + "logps/rejected": -245.2730255126953, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.473803758621216, + "rewards/margins": 4.290168762207031, + "rewards/rejected": -0.8163650631904602, + "step": 10974 + }, + { + "epoch": 0.64, + "learning_rate": 3.050509089248845e-08, + "logits/chosen": -1.749922275543213, + "logits/rejected": -1.7457756996154785, + "logps/chosen": -179.31967163085938, + "logps/rejected": -352.9368896484375, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1205735206604004, + "rewards/margins": 3.7305283546447754, + "rewards/rejected": -0.609954833984375, + "step": 10975 + }, + { + "epoch": 0.64, + "learning_rate": 3.049641305113748e-08, + "logits/chosen": -1.896488070487976, + "logits/rejected": -1.8708841800689697, + "logps/chosen": -150.00527954101562, + "logps/rejected": -363.65277099609375, + "loss": 0.1398, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8604599237442017, + "rewards/margins": 1.4445693492889404, + "rewards/rejected": 0.41589051485061646, + "step": 10976 + }, + { + "epoch": 0.64, + "learning_rate": 3.048773590264929e-08, + "logits/chosen": -1.9876818656921387, + "logits/rejected": -1.9759985208511353, + "logps/chosen": -66.56594848632812, + "logps/rejected": -337.8058776855469, + "loss": 0.1188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8520561456680298, + "rewards/margins": 5.398244380950928, + "rewards/rejected": -4.5461883544921875, + "step": 10977 + }, + { + "epoch": 0.64, + "learning_rate": 3.047905944733219e-08, + "logits/chosen": -2.0206568241119385, + "logits/rejected": -2.0205655097961426, + "logps/chosen": -57.02449417114258, + "logps/rejected": -126.78677368164062, + "loss": 0.9031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8918983340263367, + "rewards/margins": 0.2837989926338196, + "rewards/rejected": -1.1756973266601562, + "step": 10978 + }, + { + "epoch": 0.64, + "learning_rate": 3.0470383685494355e-08, + "logits/chosen": -1.9362338781356812, + "logits/rejected": -1.9380379915237427, + "logps/chosen": -107.37686157226562, + "logps/rejected": -276.1014099121094, + "loss": 0.1755, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9246078729629517, + "rewards/margins": 2.2608351707458496, + "rewards/rejected": -1.3362274169921875, + "step": 10979 + }, + { + "epoch": 0.64, + "learning_rate": 3.046170861744405e-08, + "logits/chosen": -1.932646632194519, + "logits/rejected": -1.9249833822250366, + "logps/chosen": -42.440711975097656, + "logps/rejected": -120.62010192871094, + "loss": 0.1858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9143131375312805, + "rewards/margins": 2.1597485542297363, + "rewards/rejected": -1.245435357093811, + "step": 10980 + }, + { + "epoch": 0.64, + "learning_rate": 3.045303424348941e-08, + "logits/chosen": -1.9220150709152222, + "logits/rejected": -1.936353325843811, + "logps/chosen": -196.49740600585938, + "logps/rejected": -283.94586181640625, + "loss": 0.1332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6159607172012329, + "rewards/margins": 2.254879951477051, + "rewards/rejected": -1.6389191150665283, + "step": 10981 + }, + { + "epoch": 0.64, + "learning_rate": 3.044436056393862e-08, + "logits/chosen": -1.8596243858337402, + "logits/rejected": -1.8675644397735596, + "logps/chosen": -252.89247131347656, + "logps/rejected": -377.2276611328125, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.113203525543213, + "rewards/margins": 4.614784240722656, + "rewards/rejected": -2.5015809535980225, + "step": 10982 + }, + { + "epoch": 0.64, + "learning_rate": 3.043568757909979e-08, + "logits/chosen": -1.882412314414978, + "logits/rejected": -1.8981821537017822, + "logps/chosen": -172.96347045898438, + "logps/rejected": -354.80108642578125, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0440125465393066, + "rewards/margins": 4.385644912719727, + "rewards/rejected": -2.341632127761841, + "step": 10983 + }, + { + "epoch": 0.64, + "learning_rate": 3.042701528928104e-08, + "logits/chosen": -2.122779130935669, + "logits/rejected": -2.1289381980895996, + "logps/chosen": -13.825142860412598, + "logps/rejected": -230.46905517578125, + "loss": 0.2606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3327345848083496, + "rewards/margins": 4.016995429992676, + "rewards/rejected": -3.684260606765747, + "step": 10984 + }, + { + "epoch": 0.64, + "learning_rate": 3.041834369479045e-08, + "logits/chosen": -1.9946174621582031, + "logits/rejected": -1.987958312034607, + "logps/chosen": -158.70782470703125, + "logps/rejected": -475.3264465332031, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9161239862442017, + "rewards/margins": 4.522398471832275, + "rewards/rejected": -2.606274366378784, + "step": 10985 + }, + { + "epoch": 0.64, + "learning_rate": 3.040967279593607e-08, + "logits/chosen": -1.8768428564071655, + "logits/rejected": -1.852781057357788, + "logps/chosen": -220.21820068359375, + "logps/rejected": -391.8109436035156, + "loss": 0.1352, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1245148181915283, + "rewards/margins": 1.5709900856018066, + "rewards/rejected": 0.5535247921943665, + "step": 10986 + }, + { + "epoch": 0.64, + "learning_rate": 3.040100259302595e-08, + "logits/chosen": -1.858288288116455, + "logits/rejected": -1.8574209213256836, + "logps/chosen": -0.04148026555776596, + "logps/rejected": -145.09239196777344, + "loss": 0.5066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0049436516128480434, + "rewards/margins": 0.7406935095787048, + "rewards/rejected": -0.7357498407363892, + "step": 10987 + }, + { + "epoch": 0.64, + "learning_rate": 3.0392333086368074e-08, + "logits/chosen": -1.697605848312378, + "logits/rejected": -1.6930110454559326, + "logps/chosen": -167.06582641601562, + "logps/rejected": -397.101806640625, + "loss": 0.0637, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9564805030822754, + "rewards/margins": 2.6299819946289062, + "rewards/rejected": 0.326498419046402, + "step": 10988 + }, + { + "epoch": 0.64, + "learning_rate": 3.038366427627045e-08, + "logits/chosen": -1.9964826107025146, + "logits/rejected": -2.0033576488494873, + "logps/chosen": -93.17947387695312, + "logps/rejected": -234.77183532714844, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4832901060581207, + "rewards/margins": 1.9451385736465454, + "rewards/rejected": -1.461848497390747, + "step": 10989 + }, + { + "epoch": 0.64, + "learning_rate": 3.037499616304101e-08, + "logits/chosen": -1.9940155744552612, + "logits/rejected": -1.975309133529663, + "logps/chosen": -0.0013758232817053795, + "logps/rejected": -222.81419372558594, + "loss": 0.3538, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0941107575490605e-05, + "rewards/margins": 3.9365904331207275, + "rewards/rejected": -3.936601400375366, + "step": 10990 + }, + { + "epoch": 0.64, + "learning_rate": 3.036632874698772e-08, + "logits/chosen": -1.9459335803985596, + "logits/rejected": -1.9355939626693726, + "logps/chosen": -44.9123649597168, + "logps/rejected": -285.0522766113281, + "loss": 0.2623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18742942810058594, + "rewards/margins": 4.891601085662842, + "rewards/rejected": -4.704171657562256, + "step": 10991 + }, + { + "epoch": 0.64, + "learning_rate": 3.035766202841845e-08, + "logits/chosen": -1.880954384803772, + "logits/rejected": -1.894913673400879, + "logps/chosen": -242.30523681640625, + "logps/rejected": -496.05889892578125, + "loss": 0.0387, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.596502661705017, + "rewards/margins": 4.153622627258301, + "rewards/rejected": -2.557119846343994, + "step": 10992 + }, + { + "epoch": 0.64, + "learning_rate": 3.034899600764114e-08, + "logits/chosen": -1.9817191362380981, + "logits/rejected": -1.9551841020584106, + "logps/chosen": -302.8687744140625, + "logps/rejected": -612.1443481445312, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.898529052734375, + "rewards/margins": 9.152539253234863, + "rewards/rejected": -5.254010200500488, + "step": 10993 + }, + { + "epoch": 0.64, + "learning_rate": 3.034033068496358e-08, + "logits/chosen": -2.0344316959381104, + "logits/rejected": -2.036560535430908, + "logps/chosen": -19.143821716308594, + "logps/rejected": -195.43914794921875, + "loss": 0.2617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17559757828712463, + "rewards/margins": 3.9109127521514893, + "rewards/rejected": -3.7353150844573975, + "step": 10994 + }, + { + "epoch": 0.64, + "learning_rate": 3.033166606069367e-08, + "logits/chosen": -1.8550704717636108, + "logits/rejected": -1.9214465618133545, + "logps/chosen": -179.52992248535156, + "logps/rejected": -282.8338928222656, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.576512098312378, + "rewards/margins": 3.1814467906951904, + "rewards/rejected": -0.6049346923828125, + "step": 10995 + }, + { + "epoch": 0.64, + "learning_rate": 3.032300213513916e-08, + "logits/chosen": -1.9768176078796387, + "logits/rejected": -1.9761213064193726, + "logps/chosen": -7.20015013939701e-05, + "logps/rejected": -145.6033935546875, + "loss": 0.4089, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.0532322032049706e-07, + "rewards/margins": 1.7076343297958374, + "rewards/rejected": -1.7076339721679688, + "step": 10996 + }, + { + "epoch": 0.64, + "learning_rate": 3.031433890860789e-08, + "logits/chosen": -1.901633858680725, + "logits/rejected": -1.9181774854660034, + "logps/chosen": -181.39453125, + "logps/rejected": -356.13763427734375, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.225787401199341, + "rewards/margins": 5.491876602172852, + "rewards/rejected": -3.2660889625549316, + "step": 10997 + }, + { + "epoch": 0.64, + "learning_rate": 3.030567638140758e-08, + "logits/chosen": -1.9736429452896118, + "logits/rejected": -2.001721143722534, + "logps/chosen": -177.62069702148438, + "logps/rejected": -247.01849365234375, + "loss": 0.1283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8806259632110596, + "rewards/margins": 1.55406653881073, + "rewards/rejected": 0.326559454202652, + "step": 10998 + }, + { + "epoch": 0.64, + "learning_rate": 3.029701455384599e-08, + "logits/chosen": -1.8323876857757568, + "logits/rejected": -1.8408817052841187, + "logps/chosen": -194.8043975830078, + "logps/rejected": -213.89730834960938, + "loss": 0.5812, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5267471671104431, + "rewards/margins": -0.35240477323532104, + "rewards/rejected": 0.8791519403457642, + "step": 10999 + }, + { + "epoch": 0.64, + "learning_rate": 3.028835342623081e-08, + "logits/chosen": -1.9815551042556763, + "logits/rejected": -1.9840734004974365, + "logps/chosen": -234.38865661621094, + "logps/rejected": -303.3233947753906, + "loss": 0.0976, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.636091709136963, + "rewards/margins": 1.8187363147735596, + "rewards/rejected": 0.8173553347587585, + "step": 11000 + }, + { + "epoch": 0.64, + "learning_rate": 3.027969299886974e-08, + "logits/chosen": -1.9113516807556152, + "logits/rejected": -1.8931121826171875, + "logps/chosen": -87.09050750732422, + "logps/rejected": -372.36492919921875, + "loss": 0.2565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006061553955078125, + "rewards/margins": 5.151264190673828, + "rewards/rejected": -5.14520263671875, + "step": 11001 + }, + { + "epoch": 0.64, + "learning_rate": 3.027103327207043e-08, + "logits/chosen": -1.9402422904968262, + "logits/rejected": -1.9408360719680786, + "logps/chosen": -5.6457438468933105, + "logps/rejected": -0.005535611882805824, + "loss": 0.6723, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.008258151821792126, + "rewards/margins": -0.008918927982449532, + "rewards/rejected": 0.0006607764516957104, + "step": 11002 + }, + { + "epoch": 0.64, + "learning_rate": 3.0262374246140545e-08, + "logits/chosen": -1.9136459827423096, + "logits/rejected": -1.900428056716919, + "logps/chosen": -0.2592090964317322, + "logps/rejected": -337.9564208984375, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08055328577756882, + "rewards/margins": 3.0323448181152344, + "rewards/rejected": -2.951791524887085, + "step": 11003 + }, + { + "epoch": 0.64, + "learning_rate": 3.0253715921387656e-08, + "logits/chosen": -1.885353446006775, + "logits/rejected": -1.8443388938903809, + "logps/chosen": -153.20382690429688, + "logps/rejected": -294.0020751953125, + "loss": 0.1303, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8951644897460938, + "rewards/margins": 1.460575819015503, + "rewards/rejected": 0.43458864092826843, + "step": 11004 + }, + { + "epoch": 0.64, + "learning_rate": 3.024505829811937e-08, + "logits/chosen": -1.889742136001587, + "logits/rejected": -1.88228178024292, + "logps/chosen": -76.16220092773438, + "logps/rejected": -352.2195739746094, + "loss": 0.1402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9914383292198181, + "rewards/margins": 4.839433193206787, + "rewards/rejected": -3.8479950428009033, + "step": 11005 + }, + { + "epoch": 0.64, + "learning_rate": 3.023640137664325e-08, + "logits/chosen": -2.0045278072357178, + "logits/rejected": -1.987926959991455, + "logps/chosen": -10.194174766540527, + "logps/rejected": -96.15519714355469, + "loss": 0.272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6516419649124146, + "rewards/margins": 1.6005250215530396, + "rewards/rejected": -0.948883056640625, + "step": 11006 + }, + { + "epoch": 0.64, + "learning_rate": 3.022774515726682e-08, + "logits/chosen": -1.8114843368530273, + "logits/rejected": -1.7984663248062134, + "logps/chosen": -87.9561767578125, + "logps/rejected": -333.089111328125, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09680328518152237, + "rewards/margins": 4.508628845214844, + "rewards/rejected": -4.605432033538818, + "step": 11007 + }, + { + "epoch": 0.64, + "learning_rate": 3.0219089640297615e-08, + "logits/chosen": -1.7309536933898926, + "logits/rejected": -1.7304085493087769, + "logps/chosen": -124.26119995117188, + "logps/rejected": -206.95111083984375, + "loss": 0.1573, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6162292957305908, + "rewards/margins": 1.7599854469299316, + "rewards/rejected": -0.14375610649585724, + "step": 11008 + }, + { + "epoch": 0.64, + "learning_rate": 3.0210434826043084e-08, + "logits/chosen": -1.941200852394104, + "logits/rejected": -1.9782668352127075, + "logps/chosen": -157.5687255859375, + "logps/rejected": -449.1232604980469, + "loss": 0.058, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5281280279159546, + "rewards/margins": 5.252764701843262, + "rewards/rejected": -3.7246367931365967, + "step": 11009 + }, + { + "epoch": 0.64, + "learning_rate": 3.020178071481072e-08, + "logits/chosen": -1.705384612083435, + "logits/rejected": -1.6727701425552368, + "logps/chosen": -245.65615844726562, + "logps/rejected": -300.8714599609375, + "loss": 0.2072, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1312286853790283, + "rewards/margins": 0.8581268787384033, + "rewards/rejected": 1.273101806640625, + "step": 11010 + }, + { + "epoch": 0.64, + "learning_rate": 3.0193127306907944e-08, + "logits/chosen": -1.9950265884399414, + "logits/rejected": -1.997628927230835, + "logps/chosen": -0.2200019508600235, + "logps/rejected": -100.3790054321289, + "loss": 0.338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0062335701659321785, + "rewards/margins": 3.3311681747436523, + "rewards/rejected": -3.3374016284942627, + "step": 11011 + }, + { + "epoch": 0.64, + "learning_rate": 3.0184474602642184e-08, + "logits/chosen": -1.8784984350204468, + "logits/rejected": -1.8467859029769897, + "logps/chosen": -0.22556860744953156, + "logps/rejected": -311.4950866699219, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02033083885908127, + "rewards/margins": 6.068289279937744, + "rewards/rejected": -6.0479583740234375, + "step": 11012 + }, + { + "epoch": 0.64, + "learning_rate": 3.017582260232079e-08, + "logits/chosen": -1.8690906763076782, + "logits/rejected": -1.8687316179275513, + "logps/chosen": -10.904047012329102, + "logps/rejected": -208.90817260742188, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11211395263671875, + "rewards/margins": 3.8692307472229004, + "rewards/rejected": -3.7571167945861816, + "step": 11013 + }, + { + "epoch": 0.64, + "learning_rate": 3.0167171306251174e-08, + "logits/chosen": -1.9665098190307617, + "logits/rejected": -1.9733490943908691, + "logps/chosen": -61.374820709228516, + "logps/rejected": -124.81105041503906, + "loss": 0.7877, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.06259651482105255, + "rewards/margins": -0.47992515563964844, + "rewards/rejected": 0.5425216555595398, + "step": 11014 + }, + { + "epoch": 0.64, + "learning_rate": 3.015852071474062e-08, + "logits/chosen": -1.909672498703003, + "logits/rejected": -1.9019047021865845, + "logps/chosen": -2.7600643634796143, + "logps/rejected": -186.72103881835938, + "loss": 0.4887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12578777968883514, + "rewards/margins": 1.3372646570205688, + "rewards/rejected": -1.4630523920059204, + "step": 11015 + }, + { + "epoch": 0.64, + "learning_rate": 3.0149870828096494e-08, + "logits/chosen": -1.9328688383102417, + "logits/rejected": -1.9760104417800903, + "logps/chosen": -225.0738525390625, + "logps/rejected": -358.496826171875, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.907604932785034, + "rewards/margins": 2.2480344772338867, + "rewards/rejected": 0.6595703363418579, + "step": 11016 + }, + { + "epoch": 0.64, + "learning_rate": 3.014122164662602e-08, + "logits/chosen": -1.9916071891784668, + "logits/rejected": -1.9861263036727905, + "logps/chosen": -55.01934051513672, + "logps/rejected": -235.70204162597656, + "loss": 0.4982, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7895515561103821, + "rewards/margins": 2.047534942626953, + "rewards/rejected": -2.8370864391326904, + "step": 11017 + }, + { + "epoch": 0.64, + "learning_rate": 3.013257317063653e-08, + "logits/chosen": -1.8908166885375977, + "logits/rejected": -1.8954540491104126, + "logps/chosen": -0.16181212663650513, + "logps/rejected": -178.7369842529297, + "loss": 0.3772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010075270984089002, + "rewards/margins": 3.459690809249878, + "rewards/rejected": -3.459791660308838, + "step": 11018 + }, + { + "epoch": 0.64, + "learning_rate": 3.01239254004352e-08, + "logits/chosen": -2.061419725418091, + "logits/rejected": -2.0437018871307373, + "logps/chosen": -214.5696258544922, + "logps/rejected": -418.28021240234375, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7722549438476562, + "rewards/margins": 5.140510559082031, + "rewards/rejected": -3.368255615234375, + "step": 11019 + }, + { + "epoch": 0.64, + "learning_rate": 3.0115278336329286e-08, + "logits/chosen": -1.986548900604248, + "logits/rejected": -1.980581283569336, + "logps/chosen": -39.26559066772461, + "logps/rejected": -124.811279296875, + "loss": 0.3129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17811165750026703, + "rewards/margins": 1.7440693378448486, + "rewards/rejected": -1.5659576654434204, + "step": 11020 + }, + { + "epoch": 0.64, + "learning_rate": 3.010663197862592e-08, + "logits/chosen": -1.8529537916183472, + "logits/rejected": -1.8867137432098389, + "logps/chosen": -252.48947143554688, + "logps/rejected": -432.70953369140625, + "loss": 0.0449, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5578094720840454, + "rewards/margins": 8.598672866821289, + "rewards/rejected": -7.040863037109375, + "step": 11021 + }, + { + "epoch": 0.64, + "learning_rate": 3.0097986327632316e-08, + "logits/chosen": -1.8215571641921997, + "logits/rejected": -1.8663523197174072, + "logps/chosen": -401.2267150878906, + "logps/rejected": -500.3043212890625, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2783355712890625, + "rewards/margins": 4.861670017242432, + "rewards/rejected": -2.583334445953369, + "step": 11022 + }, + { + "epoch": 0.64, + "learning_rate": 3.008934138365558e-08, + "logits/chosen": -2.0965259075164795, + "logits/rejected": -2.07706356048584, + "logps/chosen": -113.07772827148438, + "logps/rejected": -237.43368530273438, + "loss": 0.076, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.299429416656494, + "rewards/margins": 2.452328681945801, + "rewards/rejected": -0.15289917588233948, + "step": 11023 + }, + { + "epoch": 0.64, + "learning_rate": 3.008069714700284e-08, + "logits/chosen": -1.8479657173156738, + "logits/rejected": -1.8494333028793335, + "logps/chosen": -131.5331268310547, + "logps/rejected": -209.17889404296875, + "loss": 0.1779, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7042694091796875, + "rewards/margins": 1.0519988536834717, + "rewards/rejected": 1.6522705554962158, + "step": 11024 + }, + { + "epoch": 0.64, + "learning_rate": 3.007205361798116e-08, + "logits/chosen": -2.086118459701538, + "logits/rejected": -2.085939407348633, + "logps/chosen": -29.578210830688477, + "logps/rejected": -116.84221649169922, + "loss": 0.6221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.222514346241951, + "rewards/margins": 0.6290039420127869, + "rewards/rejected": -0.8515182733535767, + "step": 11025 + }, + { + "epoch": 0.64, + "learning_rate": 3.0063410796897624e-08, + "logits/chosen": -2.0085184574127197, + "logits/rejected": -1.9869084358215332, + "logps/chosen": -143.21231079101562, + "logps/rejected": -405.6064453125, + "loss": 0.12, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1888656616210938, + "rewards/margins": 2.8710250854492188, + "rewards/rejected": -1.682159423828125, + "step": 11026 + }, + { + "epoch": 0.64, + "learning_rate": 3.005476868405926e-08, + "logits/chosen": -2.030904531478882, + "logits/rejected": -2.011829137802124, + "logps/chosen": -31.661685943603516, + "logps/rejected": -193.4068603515625, + "loss": 0.2931, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35390129685401917, + "rewards/margins": 2.4998533725738525, + "rewards/rejected": -2.145951986312866, + "step": 11027 + }, + { + "epoch": 0.64, + "learning_rate": 3.0046127279773064e-08, + "logits/chosen": -1.9237595796585083, + "logits/rejected": -1.9256443977355957, + "logps/chosen": -14.19046688079834, + "logps/rejected": -62.86494064331055, + "loss": 0.792, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.17342472076416016, + "rewards/margins": -0.23030948638916016, + "rewards/rejected": 0.056884765625, + "step": 11028 + }, + { + "epoch": 0.64, + "learning_rate": 3.003748658434604e-08, + "logits/chosen": -1.774075984954834, + "logits/rejected": -1.75835382938385, + "logps/chosen": -6.5044026374816895, + "logps/rejected": -119.46166229248047, + "loss": 0.3332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22511878609657288, + "rewards/margins": 2.461472272872925, + "rewards/rejected": -2.2363533973693848, + "step": 11029 + }, + { + "epoch": 0.64, + "learning_rate": 3.002884659808514e-08, + "logits/chosen": -1.8107892274856567, + "logits/rejected": -1.7925390005111694, + "logps/chosen": -166.92982482910156, + "logps/rejected": -325.27044677734375, + "loss": 0.4357, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0783814191818237, + "rewards/margins": 0.06320810317993164, + "rewards/rejected": 1.015173316001892, + "step": 11030 + }, + { + "epoch": 0.64, + "learning_rate": 3.0020207321297295e-08, + "logits/chosen": -1.9998736381530762, + "logits/rejected": -1.992510437965393, + "logps/chosen": -0.03120066225528717, + "logps/rejected": -401.38751220703125, + "loss": 0.3513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018247537082061172, + "rewards/margins": 8.870423316955566, + "rewards/rejected": -8.872247695922852, + "step": 11031 + }, + { + "epoch": 0.64, + "learning_rate": 3.0011568754289416e-08, + "logits/chosen": -2.050518751144409, + "logits/rejected": -2.0295605659484863, + "logps/chosen": -137.49066162109375, + "logps/rejected": -223.93299865722656, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.16319739818573, + "rewards/margins": 0.6556610465049744, + "rewards/rejected": 0.5075363516807556, + "step": 11032 + }, + { + "epoch": 0.64, + "learning_rate": 3.00029308973684e-08, + "logits/chosen": -1.8652688264846802, + "logits/rejected": -1.8553813695907593, + "logps/chosen": -158.461669921875, + "logps/rejected": -334.73150634765625, + "loss": 0.1354, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3372589349746704, + "rewards/margins": 1.5599091053009033, + "rewards/rejected": -0.22265015542507172, + "step": 11033 + }, + { + "epoch": 0.64, + "learning_rate": 2.999429375084107e-08, + "logits/chosen": -1.7769993543624878, + "logits/rejected": -1.772679328918457, + "logps/chosen": -174.52410888671875, + "logps/rejected": -413.0726623535156, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3728530406951904, + "rewards/margins": 5.737095832824707, + "rewards/rejected": -3.3642425537109375, + "step": 11034 + }, + { + "epoch": 0.64, + "learning_rate": 2.998565731501432e-08, + "logits/chosen": -2.0201339721679688, + "logits/rejected": -2.012753486633301, + "logps/chosen": -58.180023193359375, + "logps/rejected": -252.11122131347656, + "loss": 0.1381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.93905109167099, + "rewards/margins": 3.60146427154541, + "rewards/rejected": -2.6624131202697754, + "step": 11035 + }, + { + "epoch": 0.64, + "learning_rate": 2.997702159019489e-08, + "logits/chosen": -1.8865821361541748, + "logits/rejected": -1.8759961128234863, + "logps/chosen": -194.528076171875, + "logps/rejected": -310.99603271484375, + "loss": 0.118, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.28570556640625, + "rewards/margins": 1.826776146888733, + "rewards/rejected": -0.5410705804824829, + "step": 11036 + }, + { + "epoch": 0.64, + "learning_rate": 2.996838657668962e-08, + "logits/chosen": -1.8076512813568115, + "logits/rejected": -1.8076282739639282, + "logps/chosen": -1.2434265613555908, + "logps/rejected": -142.64825439453125, + "loss": 0.4286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03769148513674736, + "rewards/margins": 1.542548418045044, + "rewards/rejected": -1.5048569440841675, + "step": 11037 + }, + { + "epoch": 0.64, + "learning_rate": 2.995975227480523e-08, + "logits/chosen": -1.6695805788040161, + "logits/rejected": -1.674075961112976, + "logps/chosen": -234.01995849609375, + "logps/rejected": -374.6563720703125, + "loss": 0.2731, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9125946760177612, + "rewards/margins": 0.6087006330490112, + "rewards/rejected": 1.30389404296875, + "step": 11038 + }, + { + "epoch": 0.64, + "learning_rate": 2.9951118684848484e-08, + "logits/chosen": -1.7831389904022217, + "logits/rejected": -1.786426067352295, + "logps/chosen": -174.22802734375, + "logps/rejected": -278.44232177734375, + "loss": 0.3952, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3823226988315582, + "rewards/margins": 0.4375869929790497, + "rewards/rejected": -0.05526428297162056, + "step": 11039 + }, + { + "epoch": 0.64, + "learning_rate": 2.994248580712604e-08, + "logits/chosen": -1.8813964128494263, + "logits/rejected": -1.9551746845245361, + "logps/chosen": -168.435791015625, + "logps/rejected": -225.561767578125, + "loss": 0.2053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2278717756271362, + "rewards/margins": 1.0101654529571533, + "rewards/rejected": 0.21770630776882172, + "step": 11040 + }, + { + "epoch": 0.64, + "learning_rate": 2.993385364194464e-08, + "logits/chosen": -1.985938549041748, + "logits/rejected": -1.9718356132507324, + "logps/chosen": -180.7608642578125, + "logps/rejected": -376.2452087402344, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.204455614089966, + "rewards/margins": 2.8707306385040283, + "rewards/rejected": -0.6662750244140625, + "step": 11041 + }, + { + "epoch": 0.64, + "learning_rate": 2.99252221896109e-08, + "logits/chosen": -1.7367640733718872, + "logits/rejected": -1.72731614112854, + "logps/chosen": -106.31022644042969, + "logps/rejected": -292.1186828613281, + "loss": 0.1373, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0269287824630737, + "rewards/margins": 5.864374160766602, + "rewards/rejected": -4.837445259094238, + "step": 11042 + }, + { + "epoch": 0.64, + "learning_rate": 2.991659145043147e-08, + "logits/chosen": -1.8079603910446167, + "logits/rejected": -1.8037551641464233, + "logps/chosen": -209.281005859375, + "logps/rejected": -340.268798828125, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9930665493011475, + "rewards/margins": 3.6504945755004883, + "rewards/rejected": -0.657427966594696, + "step": 11043 + }, + { + "epoch": 0.64, + "learning_rate": 2.9907961424712944e-08, + "logits/chosen": -1.8351061344146729, + "logits/rejected": -1.8274480104446411, + "logps/chosen": -15.18411922454834, + "logps/rejected": -220.3325653076172, + "loss": 0.1072, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.71502685546875, + "rewards/margins": 2.806532382965088, + "rewards/rejected": -1.0915054082870483, + "step": 11044 + }, + { + "epoch": 0.64, + "learning_rate": 2.989933211276192e-08, + "logits/chosen": -1.8757808208465576, + "logits/rejected": -1.8646907806396484, + "logps/chosen": -71.87169647216797, + "logps/rejected": -131.08224487304688, + "loss": 0.4196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3621177673339844, + "rewards/margins": 0.8330490589141846, + "rewards/rejected": -0.4709312617778778, + "step": 11045 + }, + { + "epoch": 0.64, + "learning_rate": 2.989070351488493e-08, + "logits/chosen": -1.8625723123550415, + "logits/rejected": -1.8597968816757202, + "logps/chosen": -223.62527465820312, + "logps/rejected": -414.9622802734375, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4690979719161987, + "rewards/margins": 6.163376331329346, + "rewards/rejected": -4.694278240203857, + "step": 11046 + }, + { + "epoch": 0.64, + "learning_rate": 2.988207563138851e-08, + "logits/chosen": -2.0308690071105957, + "logits/rejected": -2.0285189151763916, + "logps/chosen": -45.260536193847656, + "logps/rejected": -210.6176300048828, + "loss": 0.3827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10080184787511826, + "rewards/margins": 1.748268961906433, + "rewards/rejected": -1.8490707874298096, + "step": 11047 + }, + { + "epoch": 0.64, + "learning_rate": 2.98734484625792e-08, + "logits/chosen": -2.135423183441162, + "logits/rejected": -2.13277530670166, + "logps/chosen": -118.7069320678711, + "logps/rejected": -290.36297607421875, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21027527749538422, + "rewards/margins": 1.817895531654358, + "rewards/rejected": -2.0281708240509033, + "step": 11048 + }, + { + "epoch": 0.64, + "learning_rate": 2.986482200876342e-08, + "logits/chosen": -1.8137062788009644, + "logits/rejected": -1.811497688293457, + "logps/chosen": -28.203121185302734, + "logps/rejected": -170.44358825683594, + "loss": 0.1797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8155536651611328, + "rewards/margins": 3.048863649368286, + "rewards/rejected": -2.2333099842071533, + "step": 11049 + }, + { + "epoch": 0.64, + "learning_rate": 2.985619627024768e-08, + "logits/chosen": -1.9141768217086792, + "logits/rejected": -1.9525445699691772, + "logps/chosen": -292.95654296875, + "logps/rejected": -527.4654541015625, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.743865966796875, + "rewards/margins": 6.247629165649414, + "rewards/rejected": -3.50376296043396, + "step": 11050 + }, + { + "epoch": 0.64, + "learning_rate": 2.984757124733836e-08, + "logits/chosen": -1.8837292194366455, + "logits/rejected": -1.9075590372085571, + "logps/chosen": -210.25131225585938, + "logps/rejected": -314.5108642578125, + "loss": 0.0811, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.661065697669983, + "rewards/margins": 1.9853333234786987, + "rewards/rejected": -0.32426759600639343, + "step": 11051 + }, + { + "epoch": 0.64, + "learning_rate": 2.98389469403419e-08, + "logits/chosen": -1.866940975189209, + "logits/rejected": -1.8778152465820312, + "logps/chosen": -278.2198486328125, + "logps/rejected": -394.7878112792969, + "loss": 0.0778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3444702625274658, + "rewards/margins": 3.0669891834259033, + "rewards/rejected": -1.7225189208984375, + "step": 11052 + }, + { + "epoch": 0.64, + "learning_rate": 2.983032334956466e-08, + "logits/chosen": -2.005746841430664, + "logits/rejected": -2.0011818408966064, + "logps/chosen": -19.273666381835938, + "logps/rejected": -170.98779296875, + "loss": 0.455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1600521057844162, + "rewards/margins": 2.166492223739624, + "rewards/rejected": -2.3265442848205566, + "step": 11053 + }, + { + "epoch": 0.64, + "learning_rate": 2.982170047531299e-08, + "logits/chosen": -1.900022029876709, + "logits/rejected": -1.904080867767334, + "logps/chosen": -55.268070220947266, + "logps/rejected": -275.84564208984375, + "loss": 0.2683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015211868099868298, + "rewards/margins": 2.47037935256958, + "rewards/rejected": -2.485591173171997, + "step": 11054 + }, + { + "epoch": 0.64, + "learning_rate": 2.9813078317893214e-08, + "logits/chosen": -1.863169550895691, + "logits/rejected": -1.861020803451538, + "logps/chosen": -13.500693321228027, + "logps/rejected": -204.96095275878906, + "loss": 0.3072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18744631111621857, + "rewards/margins": 4.107415676116943, + "rewards/rejected": -3.919969320297241, + "step": 11055 + }, + { + "epoch": 0.64, + "learning_rate": 2.9804456877611664e-08, + "logits/chosen": -1.8167985677719116, + "logits/rejected": -1.8560131788253784, + "logps/chosen": -212.10157775878906, + "logps/rejected": -288.9725036621094, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0778000354766846, + "rewards/margins": 1.847370982170105, + "rewards/rejected": 1.2304290533065796, + "step": 11056 + }, + { + "epoch": 0.64, + "learning_rate": 2.9795836154774567e-08, + "logits/chosen": -1.892953872680664, + "logits/rejected": -1.8770934343338013, + "logps/chosen": -180.8003387451172, + "logps/rejected": -533.3556518554688, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9241104125976562, + "rewards/margins": 7.582167148590088, + "rewards/rejected": -4.658056735992432, + "step": 11057 + }, + { + "epoch": 0.64, + "learning_rate": 2.978721614968823e-08, + "logits/chosen": -1.9308111667633057, + "logits/rejected": -1.9233813285827637, + "logps/chosen": -6.470114707946777, + "logps/rejected": -148.23992919921875, + "loss": 0.3553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014757538214325905, + "rewards/margins": 2.672715187072754, + "rewards/rejected": -2.6579575538635254, + "step": 11058 + }, + { + "epoch": 0.64, + "learning_rate": 2.977859686265881e-08, + "logits/chosen": -2.0377309322357178, + "logits/rejected": -2.0307042598724365, + "logps/chosen": -0.0019134155008941889, + "logps/rejected": -261.2410888671875, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00012802907440345734, + "rewards/margins": 7.958014965057373, + "rewards/rejected": -7.957887172698975, + "step": 11059 + }, + { + "epoch": 0.64, + "learning_rate": 2.976997829399258e-08, + "logits/chosen": -1.6835952997207642, + "logits/rejected": -1.6877073049545288, + "logps/chosen": -266.93878173828125, + "logps/rejected": -350.813720703125, + "loss": 0.4573, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.490164279937744, + "rewards/margins": -0.385162353515625, + "rewards/rejected": 2.875326633453369, + "step": 11060 + }, + { + "epoch": 0.64, + "learning_rate": 2.976136044399565e-08, + "logits/chosen": -1.8143916130065918, + "logits/rejected": -1.8732683658599854, + "logps/chosen": -211.19869995117188, + "logps/rejected": -381.853515625, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.305194139480591, + "rewards/margins": 4.666571140289307, + "rewards/rejected": -2.361377000808716, + "step": 11061 + }, + { + "epoch": 0.64, + "learning_rate": 2.9752743312974203e-08, + "logits/chosen": -1.9805514812469482, + "logits/rejected": -1.9842272996902466, + "logps/chosen": -0.005035111214965582, + "logps/rejected": -197.52529907226562, + "loss": 0.3803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001352424151264131, + "rewards/margins": 2.2953250408172607, + "rewards/rejected": -2.2939727306365967, + "step": 11062 + }, + { + "epoch": 0.64, + "learning_rate": 2.9744126901234356e-08, + "logits/chosen": -1.8798292875289917, + "logits/rejected": -1.8606116771697998, + "logps/chosen": -362.005615234375, + "logps/rejected": -471.683349609375, + "loss": 0.1829, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2755249738693237, + "rewards/margins": 1.2577210664749146, + "rewards/rejected": 0.01780395582318306, + "step": 11063 + }, + { + "epoch": 0.64, + "learning_rate": 2.973551120908221e-08, + "logits/chosen": -2.0210459232330322, + "logits/rejected": -2.024519920349121, + "logps/chosen": -0.04239083454012871, + "logps/rejected": -172.20263671875, + "loss": 0.4346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034161682706326246, + "rewards/margins": 1.6445056200027466, + "rewards/rejected": -1.6479218006134033, + "step": 11064 + }, + { + "epoch": 0.64, + "learning_rate": 2.9726896236823816e-08, + "logits/chosen": -2.036602735519409, + "logits/rejected": -2.0163068771362305, + "logps/chosen": -59.17209243774414, + "logps/rejected": -164.7118377685547, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5742374658584595, + "rewards/margins": 1.3828494548797607, + "rewards/rejected": -0.808612048625946, + "step": 11065 + }, + { + "epoch": 0.64, + "learning_rate": 2.9718281984765238e-08, + "logits/chosen": -1.930667519569397, + "logits/rejected": -1.9107252359390259, + "logps/chosen": -99.39427185058594, + "logps/rejected": -625.03759765625, + "loss": 0.4499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5483741760253906, + "rewards/margins": 11.372615814208984, + "rewards/rejected": -11.920989990234375, + "step": 11066 + }, + { + "epoch": 0.64, + "learning_rate": 2.97096684532125e-08, + "logits/chosen": -1.986014723777771, + "logits/rejected": -1.978466272354126, + "logps/chosen": -63.87996292114258, + "logps/rejected": -228.29782104492188, + "loss": 0.221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7116832733154297, + "rewards/margins": 2.563894271850586, + "rewards/rejected": -1.8522109985351562, + "step": 11067 + }, + { + "epoch": 0.64, + "learning_rate": 2.970105564247158e-08, + "logits/chosen": -1.7598094940185547, + "logits/rejected": -1.7580078840255737, + "logps/chosen": -2.9062719345092773, + "logps/rejected": -172.072509765625, + "loss": 0.5813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12308641523122787, + "rewards/margins": 0.5286775231361389, + "rewards/rejected": -0.651763916015625, + "step": 11068 + }, + { + "epoch": 0.64, + "learning_rate": 2.9692443552848466e-08, + "logits/chosen": -1.9533717632293701, + "logits/rejected": -1.951313853263855, + "logps/chosen": -49.79786682128906, + "logps/rejected": -109.78282928466797, + "loss": 0.4984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19805069267749786, + "rewards/margins": 0.41530531644821167, + "rewards/rejected": -0.217254638671875, + "step": 11069 + }, + { + "epoch": 0.64, + "learning_rate": 2.9683832184649082e-08, + "logits/chosen": -1.8014110326766968, + "logits/rejected": -1.8919765949249268, + "logps/chosen": -230.64865112304688, + "logps/rejected": -390.94293212890625, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.716162085533142, + "rewards/margins": 4.625405788421631, + "rewards/rejected": -2.9092438220977783, + "step": 11070 + }, + { + "epoch": 0.64, + "learning_rate": 2.9675221538179364e-08, + "logits/chosen": -1.807844877243042, + "logits/rejected": -1.774956464767456, + "logps/chosen": -202.41949462890625, + "logps/rejected": -515.1788940429688, + "loss": 0.0915, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4681488275527954, + "rewards/margins": 3.1517181396484375, + "rewards/rejected": -1.683569312095642, + "step": 11071 + }, + { + "epoch": 0.64, + "learning_rate": 2.966661161374519e-08, + "logits/chosen": -1.975663423538208, + "logits/rejected": -1.9651166200637817, + "logps/chosen": -57.12584686279297, + "logps/rejected": -231.03829956054688, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17486344277858734, + "rewards/margins": 2.1044838428497314, + "rewards/rejected": -1.929620385169983, + "step": 11072 + }, + { + "epoch": 0.64, + "learning_rate": 2.9658002411652438e-08, + "logits/chosen": -1.8962160348892212, + "logits/rejected": -1.8917834758758545, + "logps/chosen": -21.339406967163086, + "logps/rejected": -211.91583251953125, + "loss": 0.412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5202903747558594, + "rewards/margins": 0.8830177783966064, + "rewards/rejected": -0.3627273738384247, + "step": 11073 + }, + { + "epoch": 0.64, + "learning_rate": 2.9649393932206933e-08, + "logits/chosen": -1.8231747150421143, + "logits/rejected": -1.812691569328308, + "logps/chosen": -81.1280517578125, + "logps/rejected": -292.3931884765625, + "loss": 0.1306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8560134768486023, + "rewards/margins": 2.333421230316162, + "rewards/rejected": -1.4774078130722046, + "step": 11074 + }, + { + "epoch": 0.64, + "learning_rate": 2.9640786175714528e-08, + "logits/chosen": -1.8372610807418823, + "logits/rejected": -1.8218134641647339, + "logps/chosen": -147.43621826171875, + "logps/rejected": -252.61416625976562, + "loss": 0.2415, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.620477318763733, + "rewards/margins": 1.0295014381408691, + "rewards/rejected": 0.5909759402275085, + "step": 11075 + }, + { + "epoch": 0.64, + "learning_rate": 2.9632179142480952e-08, + "logits/chosen": -1.8567886352539062, + "logits/rejected": -1.829595923423767, + "logps/chosen": -144.96316528320312, + "logps/rejected": -232.60777282714844, + "loss": 0.1416, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0161635875701904, + "rewards/margins": 1.7104979753494263, + "rewards/rejected": 0.3056655824184418, + "step": 11076 + }, + { + "epoch": 0.64, + "learning_rate": 2.962357283281203e-08, + "logits/chosen": -1.8924566507339478, + "logits/rejected": -1.876402735710144, + "logps/chosen": -9.207763671875, + "logps/rejected": -235.76370239257812, + "loss": 0.2166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5333163142204285, + "rewards/margins": 5.368648052215576, + "rewards/rejected": -4.835331916809082, + "step": 11077 + }, + { + "epoch": 0.64, + "learning_rate": 2.9614967247013446e-08, + "logits/chosen": -1.8227684497833252, + "logits/rejected": -1.8718467950820923, + "logps/chosen": -235.80540466308594, + "logps/rejected": -378.8104248046875, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8683853149414062, + "rewards/margins": 2.987901210784912, + "rewards/rejected": -0.11951599270105362, + "step": 11078 + }, + { + "epoch": 0.64, + "learning_rate": 2.960636238539097e-08, + "logits/chosen": -1.8825747966766357, + "logits/rejected": -1.9202253818511963, + "logps/chosen": -211.22311401367188, + "logps/rejected": -242.03091430664062, + "loss": 0.4311, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.624140977859497, + "rewards/margins": -0.19039762020111084, + "rewards/rejected": 1.814538598060608, + "step": 11079 + }, + { + "epoch": 0.64, + "learning_rate": 2.9597758248250226e-08, + "logits/chosen": -1.9429467916488647, + "logits/rejected": -1.924066185951233, + "logps/chosen": -220.7179412841797, + "logps/rejected": -395.766845703125, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.137831211090088, + "rewards/margins": 5.645750522613525, + "rewards/rejected": -3.5079193115234375, + "step": 11080 + }, + { + "epoch": 0.64, + "learning_rate": 2.9589154835896946e-08, + "logits/chosen": -1.7610764503479004, + "logits/rejected": -1.8285317420959473, + "logps/chosen": -204.36795043945312, + "logps/rejected": -369.1237487792969, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3340651988983154, + "rewards/margins": 3.974931240081787, + "rewards/rejected": -1.6408661603927612, + "step": 11081 + }, + { + "epoch": 0.64, + "learning_rate": 2.9580552148636685e-08, + "logits/chosen": -1.801391363143921, + "logits/rejected": -1.8296895027160645, + "logps/chosen": -256.8193664550781, + "logps/rejected": -439.5483093261719, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8482513427734375, + "rewards/margins": 6.108376979827881, + "rewards/rejected": -4.260125637054443, + "step": 11082 + }, + { + "epoch": 0.64, + "learning_rate": 2.9571950186775118e-08, + "logits/chosen": -1.8942897319793701, + "logits/rejected": -1.8844062089920044, + "logps/chosen": -27.72748565673828, + "logps/rejected": -202.80718994140625, + "loss": 0.3127, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4380180537700653, + "rewards/margins": 2.975911855697632, + "rewards/rejected": -2.537893772125244, + "step": 11083 + }, + { + "epoch": 0.65, + "learning_rate": 2.9563348950617794e-08, + "logits/chosen": -1.8555704355239868, + "logits/rejected": -1.8522958755493164, + "logps/chosen": -246.23355102539062, + "logps/rejected": -448.23919677734375, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5518739223480225, + "rewards/margins": 4.498199462890625, + "rewards/rejected": -0.9463257193565369, + "step": 11084 + }, + { + "epoch": 0.65, + "learning_rate": 2.9554748440470296e-08, + "logits/chosen": -1.8975131511688232, + "logits/rejected": -1.8767213821411133, + "logps/chosen": -42.86079406738281, + "logps/rejected": -317.0605163574219, + "loss": 0.1763, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4808017909526825, + "rewards/margins": 5.253326892852783, + "rewards/rejected": -4.772525310516357, + "step": 11085 + }, + { + "epoch": 0.65, + "learning_rate": 2.9546148656638125e-08, + "logits/chosen": -1.8384995460510254, + "logits/rejected": -1.8666397333145142, + "logps/chosen": -286.8082275390625, + "logps/rejected": -286.8550109863281, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0342957973480225, + "rewards/margins": 2.1593170166015625, + "rewards/rejected": 0.8749786615371704, + "step": 11086 + }, + { + "epoch": 0.65, + "learning_rate": 2.9537549599426815e-08, + "logits/chosen": -1.900451898574829, + "logits/rejected": -1.9726738929748535, + "logps/chosen": -215.56765747070312, + "logps/rejected": -464.83782958984375, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.619549512863159, + "rewards/margins": 5.425097465515137, + "rewards/rejected": -2.8055481910705566, + "step": 11087 + }, + { + "epoch": 0.65, + "learning_rate": 2.9528951269141837e-08, + "logits/chosen": -1.7628364562988281, + "logits/rejected": -1.7558938264846802, + "logps/chosen": -1.0316624641418457, + "logps/rejected": -128.86410522460938, + "loss": 0.4794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037077952176332474, + "rewards/margins": 1.0238093137741089, + "rewards/rejected": -1.0608872175216675, + "step": 11088 + }, + { + "epoch": 0.65, + "learning_rate": 2.9520353666088636e-08, + "logits/chosen": -1.776951551437378, + "logits/rejected": -1.8010149002075195, + "logps/chosen": -274.2679443359375, + "logps/rejected": -403.46337890625, + "loss": 0.4384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9243835806846619, + "rewards/margins": 0.10668337345123291, + "rewards/rejected": 0.817700207233429, + "step": 11089 + }, + { + "epoch": 0.65, + "learning_rate": 2.951175679057266e-08, + "logits/chosen": -1.9881194829940796, + "logits/rejected": -1.9658743143081665, + "logps/chosen": -131.9819793701172, + "logps/rejected": -202.66722106933594, + "loss": 0.4016, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.8328964710235596, + "rewards/margins": -0.12595975399017334, + "rewards/rejected": 1.958856225013733, + "step": 11090 + }, + { + "epoch": 0.65, + "learning_rate": 2.9503160642899295e-08, + "logits/chosen": -1.8835117816925049, + "logits/rejected": -1.8692415952682495, + "logps/chosen": -0.0009106253855861723, + "logps/rejected": -265.1665344238281, + "loss": 0.3631, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.505355461034924e-05, + "rewards/margins": 3.1597530841827393, + "rewards/rejected": -3.1598281860351562, + "step": 11091 + }, + { + "epoch": 0.65, + "learning_rate": 2.9494565223373935e-08, + "logits/chosen": -1.8083287477493286, + "logits/rejected": -1.8031134605407715, + "logps/chosen": -137.15232849121094, + "logps/rejected": -297.8016052246094, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.441441297531128, + "rewards/margins": 4.107487678527832, + "rewards/rejected": -1.666046142578125, + "step": 11092 + }, + { + "epoch": 0.65, + "learning_rate": 2.9485970532301908e-08, + "logits/chosen": -1.981175184249878, + "logits/rejected": -1.9514517784118652, + "logps/chosen": -246.08486938476562, + "logps/rejected": -426.7271423339844, + "loss": 0.0323, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6373016834259033, + "rewards/margins": 3.7657103538513184, + "rewards/rejected": -1.1284087896347046, + "step": 11093 + }, + { + "epoch": 0.65, + "learning_rate": 2.947737656998856e-08, + "logits/chosen": -1.8912866115570068, + "logits/rejected": -1.8849636316299438, + "logps/chosen": -158.70828247070312, + "logps/rejected": -427.882568359375, + "loss": 0.0957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.136529564857483, + "rewards/margins": 3.003323554992676, + "rewards/rejected": -1.8667938709259033, + "step": 11094 + }, + { + "epoch": 0.65, + "learning_rate": 2.9468783336739167e-08, + "logits/chosen": -1.8580225706100464, + "logits/rejected": -1.9123053550720215, + "logps/chosen": -241.451171875, + "logps/rejected": -329.5757141113281, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5501770973205566, + "rewards/margins": 3.5335235595703125, + "rewards/rejected": -0.9833465814590454, + "step": 11095 + }, + { + "epoch": 0.65, + "learning_rate": 2.9460190832859043e-08, + "logits/chosen": -1.7157526016235352, + "logits/rejected": -1.7004318237304688, + "logps/chosen": -231.9323272705078, + "logps/rejected": -353.07574462890625, + "loss": 0.2438, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5925369262695312, + "rewards/margins": 0.5115585327148438, + "rewards/rejected": 2.0809783935546875, + "step": 11096 + }, + { + "epoch": 0.65, + "learning_rate": 2.9451599058653388e-08, + "logits/chosen": -2.1257340908050537, + "logits/rejected": -2.1259586811065674, + "logps/chosen": -1.5609310865402222, + "logps/rejected": -136.6534423828125, + "loss": 0.3034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3029184341430664, + "rewards/margins": 2.872860908508301, + "rewards/rejected": -2.5699424743652344, + "step": 11097 + }, + { + "epoch": 0.65, + "learning_rate": 2.944300801442748e-08, + "logits/chosen": -1.711172342300415, + "logits/rejected": -1.6967418193817139, + "logps/chosen": -184.8715057373047, + "logps/rejected": -265.4958190917969, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2903823852539062, + "rewards/margins": 3.6823105812072754, + "rewards/rejected": -1.3919280767440796, + "step": 11098 + }, + { + "epoch": 0.65, + "learning_rate": 2.9434417700486448e-08, + "logits/chosen": -1.8687944412231445, + "logits/rejected": -1.8299660682678223, + "logps/chosen": -270.51019287109375, + "logps/rejected": -375.4283447265625, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9174256324768066, + "rewards/margins": 5.368252754211426, + "rewards/rejected": -2.450827121734619, + "step": 11099 + }, + { + "epoch": 0.65, + "learning_rate": 2.9425828117135528e-08, + "logits/chosen": -1.8863774538040161, + "logits/rejected": -1.8985832929611206, + "logps/chosen": -338.9031677246094, + "logps/rejected": -526.2154541015625, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.977001965045929, + "rewards/margins": 4.600274562835693, + "rewards/rejected": -3.623272657394409, + "step": 11100 + }, + { + "epoch": 0.65, + "learning_rate": 2.9417239264679798e-08, + "logits/chosen": -1.7707427740097046, + "logits/rejected": -1.7542250156402588, + "logps/chosen": -26.91093635559082, + "logps/rejected": -145.6447296142578, + "loss": 0.2554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7196363806724548, + "rewards/margins": 1.9705443382263184, + "rewards/rejected": -1.2509078979492188, + "step": 11101 + }, + { + "epoch": 0.65, + "learning_rate": 2.9408651143424446e-08, + "logits/chosen": -1.7979865074157715, + "logits/rejected": -1.8326560258865356, + "logps/chosen": -229.9173126220703, + "logps/rejected": -398.82049560546875, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9192185401916504, + "rewards/margins": 4.73961067199707, + "rewards/rejected": -1.8203918933868408, + "step": 11102 + }, + { + "epoch": 0.65, + "learning_rate": 2.9400063753674497e-08, + "logits/chosen": -1.768287181854248, + "logits/rejected": -1.7704384326934814, + "logps/chosen": -73.78826904296875, + "logps/rejected": -211.8052215576172, + "loss": 0.3935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0020843506790697575, + "rewards/margins": 1.2614792585372925, + "rewards/rejected": -1.259394884109497, + "step": 11103 + }, + { + "epoch": 0.65, + "learning_rate": 2.9391477095735067e-08, + "logits/chosen": -2.0099401473999023, + "logits/rejected": -2.003300666809082, + "logps/chosen": -3.1828527426114306e-05, + "logps/rejected": -133.87045288085938, + "loss": 0.4563, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.768953587586111e-08, + "rewards/margins": 1.3852509260177612, + "rewards/rejected": -1.3852509260177612, + "step": 11104 + }, + { + "epoch": 0.65, + "learning_rate": 2.9382891169911174e-08, + "logits/chosen": -1.919188141822815, + "logits/rejected": -1.9172511100769043, + "logps/chosen": -8.878195762634277, + "logps/rejected": -270.3709411621094, + "loss": 0.3443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07486181706190109, + "rewards/margins": 3.30319881439209, + "rewards/rejected": -3.228337049484253, + "step": 11105 + }, + { + "epoch": 0.65, + "learning_rate": 2.937430597650784e-08, + "logits/chosen": -1.9207862615585327, + "logits/rejected": -1.923545002937317, + "logps/chosen": -25.597454071044922, + "logps/rejected": -265.6810302734375, + "loss": 0.2897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15007667243480682, + "rewards/margins": 4.286255359649658, + "rewards/rejected": -4.136178493499756, + "step": 11106 + }, + { + "epoch": 0.65, + "learning_rate": 2.9365721515830032e-08, + "logits/chosen": -2.039989471435547, + "logits/rejected": -2.039797067642212, + "logps/chosen": -10.358161926269531, + "logps/rejected": -66.45222473144531, + "loss": 0.4705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49662142992019653, + "rewards/margins": 0.42270633578300476, + "rewards/rejected": 0.07391510158777237, + "step": 11107 + }, + { + "epoch": 0.65, + "learning_rate": 2.935713778818274e-08, + "logits/chosen": -1.9246269464492798, + "logits/rejected": -1.9187500476837158, + "logps/chosen": -18.77724838256836, + "logps/rejected": -347.75921630859375, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19144058227539062, + "rewards/margins": 5.972684383392334, + "rewards/rejected": -5.781243801116943, + "step": 11108 + }, + { + "epoch": 0.65, + "learning_rate": 2.9348554793870893e-08, + "logits/chosen": -1.9482139348983765, + "logits/rejected": -1.9501328468322754, + "logps/chosen": -52.56800842285156, + "logps/rejected": -182.98052978515625, + "loss": 0.1639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8870506286621094, + "rewards/margins": 2.2874412536621094, + "rewards/rejected": -1.400390625, + "step": 11109 + }, + { + "epoch": 0.65, + "learning_rate": 2.9339972533199393e-08, + "logits/chosen": -1.8149033784866333, + "logits/rejected": -1.75186026096344, + "logps/chosen": -247.98684692382812, + "logps/rejected": -425.1236877441406, + "loss": 0.0965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9457610845565796, + "rewards/margins": 2.4215025901794434, + "rewards/rejected": -0.47574159502983093, + "step": 11110 + }, + { + "epoch": 0.65, + "learning_rate": 2.9331391006473134e-08, + "logits/chosen": -1.7881865501403809, + "logits/rejected": -1.7085899114608765, + "logps/chosen": -310.1090087890625, + "logps/rejected": -524.1788940429688, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.498303174972534, + "rewards/margins": 3.672882080078125, + "rewards/rejected": -1.1745789051055908, + "step": 11111 + }, + { + "epoch": 0.65, + "learning_rate": 2.9322810213996958e-08, + "logits/chosen": -1.9694569110870361, + "logits/rejected": -1.965086817741394, + "logps/chosen": -0.0008526098681613803, + "logps/rejected": -283.0502624511719, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.278022530954331e-05, + "rewards/margins": 5.348837852478027, + "rewards/rejected": -5.348910808563232, + "step": 11112 + }, + { + "epoch": 0.65, + "learning_rate": 2.9314230156075714e-08, + "logits/chosen": -1.99399733543396, + "logits/rejected": -1.997531533241272, + "logps/chosen": -63.17961883544922, + "logps/rejected": -270.1183776855469, + "loss": 0.4916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007915496826171875, + "rewards/margins": 0.3991508483886719, + "rewards/rejected": -0.3912353515625, + "step": 11113 + }, + { + "epoch": 0.65, + "learning_rate": 2.9305650833014193e-08, + "logits/chosen": -1.892874002456665, + "logits/rejected": -1.89042329788208, + "logps/chosen": -46.56574630737305, + "logps/rejected": -115.14179992675781, + "loss": 0.3414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2621799409389496, + "rewards/margins": 1.5126957893371582, + "rewards/rejected": -1.2505158185958862, + "step": 11114 + }, + { + "epoch": 0.65, + "learning_rate": 2.9297072245117194e-08, + "logits/chosen": -1.9078632593154907, + "logits/rejected": -1.9387027025222778, + "logps/chosen": -160.14266967773438, + "logps/rejected": -343.1896057128906, + "loss": 0.2141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6417617797851562, + "rewards/margins": 0.8493026494979858, + "rewards/rejected": -0.20754089951515198, + "step": 11115 + }, + { + "epoch": 0.65, + "learning_rate": 2.9288494392689433e-08, + "logits/chosen": -1.971434235572815, + "logits/rejected": -1.9532405138015747, + "logps/chosen": -199.36050415039062, + "logps/rejected": -376.21746826171875, + "loss": 0.07, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.679046630859375, + "rewards/margins": 3.0031676292419434, + "rewards/rejected": -1.324121117591858, + "step": 11116 + }, + { + "epoch": 0.65, + "learning_rate": 2.9279917276035694e-08, + "logits/chosen": -2.0223448276519775, + "logits/rejected": -2.015204906463623, + "logps/chosen": -0.00014721977640874684, + "logps/rejected": -174.7406005859375, + "loss": 0.3382, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2538219127454795e-05, + "rewards/margins": 4.731710433959961, + "rewards/rejected": -4.731688022613525, + "step": 11117 + }, + { + "epoch": 0.65, + "learning_rate": 2.927134089546061e-08, + "logits/chosen": -1.981368899345398, + "logits/rejected": -1.9931937456130981, + "logps/chosen": -168.4899444580078, + "logps/rejected": -294.9267883300781, + "loss": 0.0775, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.294847249984741, + "rewards/margins": 2.273402452468872, + "rewards/rejected": 0.02144470252096653, + "step": 11118 + }, + { + "epoch": 0.65, + "learning_rate": 2.926276525126893e-08, + "logits/chosen": -1.9015108346939087, + "logits/rejected": -1.8964649438858032, + "logps/chosen": -9.455456733703613, + "logps/rejected": -221.3170928955078, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03726387023925781, + "rewards/margins": 4.811288833618164, + "rewards/rejected": -4.774024963378906, + "step": 11119 + }, + { + "epoch": 0.65, + "learning_rate": 2.9254190343765227e-08, + "logits/chosen": -2.1185221672058105, + "logits/rejected": -2.117845296859741, + "logps/chosen": -10.711910247802734, + "logps/rejected": -146.928955078125, + "loss": 0.34, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7253581881523132, + "rewards/margins": 0.9966136813163757, + "rewards/rejected": -0.2712554931640625, + "step": 11120 + }, + { + "epoch": 0.65, + "learning_rate": 2.9245616173254206e-08, + "logits/chosen": -2.0939841270446777, + "logits/rejected": -2.103904962539673, + "logps/chosen": -84.10324096679688, + "logps/rejected": -281.8599548339844, + "loss": 0.175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6634140014648438, + "rewards/margins": 2.640272617340088, + "rewards/rejected": -1.9768584966659546, + "step": 11121 + }, + { + "epoch": 0.65, + "learning_rate": 2.9237042740040373e-08, + "logits/chosen": -2.01611065864563, + "logits/rejected": -2.0053153038024902, + "logps/chosen": -29.698211669921875, + "logps/rejected": -196.8768310546875, + "loss": 0.143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9195038080215454, + "rewards/margins": 3.113137722015381, + "rewards/rejected": -2.193634033203125, + "step": 11122 + }, + { + "epoch": 0.65, + "learning_rate": 2.922847004442839e-08, + "logits/chosen": -1.8354125022888184, + "logits/rejected": -1.80937922000885, + "logps/chosen": -130.55767822265625, + "logps/rejected": -240.18865966796875, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9251968264579773, + "rewards/margins": 0.8615310788154602, + "rewards/rejected": 0.06366576999425888, + "step": 11123 + }, + { + "epoch": 0.65, + "learning_rate": 2.9219898086722717e-08, + "logits/chosen": -1.7961808443069458, + "logits/rejected": -1.857726812362671, + "logps/chosen": -303.2594909667969, + "logps/rejected": -393.487060546875, + "loss": 0.163, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5632110834121704, + "rewards/margins": 1.131750464439392, + "rewards/rejected": 0.43146058917045593, + "step": 11124 + }, + { + "epoch": 0.65, + "learning_rate": 2.9211326867227924e-08, + "logits/chosen": -1.7750133275985718, + "logits/rejected": -1.7889214754104614, + "logps/chosen": -263.24072265625, + "logps/rejected": -561.5535888671875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2733185291290283, + "rewards/margins": 7.445675849914551, + "rewards/rejected": -5.172357082366943, + "step": 11125 + }, + { + "epoch": 0.65, + "learning_rate": 2.9202756386248488e-08, + "logits/chosen": -1.842896819114685, + "logits/rejected": -1.8502507209777832, + "logps/chosen": -328.1822814941406, + "logps/rejected": -362.29803466796875, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9261382818222046, + "rewards/margins": 3.809612989425659, + "rewards/rejected": -1.8834747076034546, + "step": 11126 + }, + { + "epoch": 0.65, + "learning_rate": 2.9194186644088875e-08, + "logits/chosen": -2.0178492069244385, + "logits/rejected": -2.016059398651123, + "logps/chosen": -11.174121856689453, + "logps/rejected": -320.11993408203125, + "loss": 0.2381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3961125314235687, + "rewards/margins": 5.483817100524902, + "rewards/rejected": -5.087704658508301, + "step": 11127 + }, + { + "epoch": 0.65, + "learning_rate": 2.9185617641053534e-08, + "logits/chosen": -1.9587477445602417, + "logits/rejected": -1.9581996202468872, + "logps/chosen": -59.976654052734375, + "logps/rejected": -221.189453125, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2282203733921051, + "rewards/margins": 2.0997445583343506, + "rewards/rejected": -1.8715240955352783, + "step": 11128 + }, + { + "epoch": 0.65, + "learning_rate": 2.917704937744686e-08, + "logits/chosen": -1.7438488006591797, + "logits/rejected": -1.745915174484253, + "logps/chosen": -190.16822814941406, + "logps/rejected": -415.57476806640625, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9198105335235596, + "rewards/margins": 4.0941572189331055, + "rewards/rejected": -2.174346923828125, + "step": 11129 + }, + { + "epoch": 0.65, + "learning_rate": 2.9168481853573247e-08, + "logits/chosen": -1.9521442651748657, + "logits/rejected": -1.9905885457992554, + "logps/chosen": -263.4317626953125, + "logps/rejected": -347.3118591308594, + "loss": 0.2409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.420571893453598, + "rewards/margins": 0.791485607624054, + "rewards/rejected": -0.37091371417045593, + "step": 11130 + }, + { + "epoch": 0.65, + "learning_rate": 2.915991506973705e-08, + "logits/chosen": -1.8977227210998535, + "logits/rejected": -1.8900209665298462, + "logps/chosen": -26.91574478149414, + "logps/rejected": -196.9640350341797, + "loss": 0.365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16577492654323578, + "rewards/margins": 1.4505252838134766, + "rewards/rejected": -1.2847503423690796, + "step": 11131 + }, + { + "epoch": 0.65, + "learning_rate": 2.9151349026242632e-08, + "logits/chosen": -2.10823917388916, + "logits/rejected": -2.1020805835723877, + "logps/chosen": -0.10556040704250336, + "logps/rejected": -179.99681091308594, + "loss": 0.486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003835945622995496, + "rewards/margins": 1.2069505453109741, + "rewards/rejected": -1.2107864618301392, + "step": 11132 + }, + { + "epoch": 0.65, + "learning_rate": 2.9142783723394266e-08, + "logits/chosen": -1.9815678596496582, + "logits/rejected": -1.9496947526931763, + "logps/chosen": -176.4925079345703, + "logps/rejected": -404.0148010253906, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0327072143554688, + "rewards/margins": 5.357222080230713, + "rewards/rejected": -2.324514865875244, + "step": 11133 + }, + { + "epoch": 0.65, + "learning_rate": 2.9134219161496243e-08, + "logits/chosen": -1.973617672920227, + "logits/rejected": -1.9490857124328613, + "logps/chosen": -156.27783203125, + "logps/rejected": -400.45440673828125, + "loss": 0.7234, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.216761827468872, + "rewards/margins": 6.268424987792969, + "rewards/rejected": -7.48518705368042, + "step": 11134 + }, + { + "epoch": 0.65, + "learning_rate": 2.9125655340852816e-08, + "logits/chosen": -2.0278706550598145, + "logits/rejected": -2.036060333251953, + "logps/chosen": -194.48304748535156, + "logps/rejected": -341.48236083984375, + "loss": 0.151, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1583603620529175, + "rewards/margins": 1.7758255004882812, + "rewards/rejected": -0.6174651980400085, + "step": 11135 + }, + { + "epoch": 0.65, + "learning_rate": 2.9117092261768245e-08, + "logits/chosen": -1.9446619749069214, + "logits/rejected": -1.9465875625610352, + "logps/chosen": -41.221832275390625, + "logps/rejected": -234.0088348388672, + "loss": 0.1622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8259197473526001, + "rewards/margins": 5.406942367553711, + "rewards/rejected": -4.5810227394104, + "step": 11136 + }, + { + "epoch": 0.65, + "learning_rate": 2.910852992454668e-08, + "logits/chosen": -1.9021177291870117, + "logits/rejected": -1.8594146966934204, + "logps/chosen": -244.475341796875, + "logps/rejected": -354.14788818359375, + "loss": 0.1076, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4026947021484375, + "rewards/margins": 1.7772216796875, + "rewards/rejected": 0.6254730224609375, + "step": 11137 + }, + { + "epoch": 0.65, + "learning_rate": 2.909996832949233e-08, + "logits/chosen": -2.061877489089966, + "logits/rejected": -2.051349639892578, + "logps/chosen": -20.483238220214844, + "logps/rejected": -277.7000732421875, + "loss": 0.3774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08143406361341476, + "rewards/margins": 1.9259634017944336, + "rewards/rejected": -2.007397413253784, + "step": 11138 + }, + { + "epoch": 0.65, + "learning_rate": 2.9091407476909334e-08, + "logits/chosen": -1.68794584274292, + "logits/rejected": -1.7284860610961914, + "logps/chosen": -257.03814697265625, + "logps/rejected": -323.39886474609375, + "loss": 0.2884, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.669030785560608, + "rewards/margins": 0.6025817394256592, + "rewards/rejected": 1.0664490461349487, + "step": 11139 + }, + { + "epoch": 0.65, + "learning_rate": 2.9082847367101837e-08, + "logits/chosen": -1.700840711593628, + "logits/rejected": -1.6686735153198242, + "logps/chosen": -365.26849365234375, + "logps/rejected": -460.30572509765625, + "loss": 0.2471, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.095080614089966, + "rewards/margins": 0.5388001203536987, + "rewards/rejected": 1.556280493736267, + "step": 11140 + }, + { + "epoch": 0.65, + "learning_rate": 2.9074288000373904e-08, + "logits/chosen": -1.9619879722595215, + "logits/rejected": -2.013683795928955, + "logps/chosen": -193.1782989501953, + "logps/rejected": -198.0211181640625, + "loss": 0.2511, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7345718145370483, + "rewards/margins": 0.7731048464775085, + "rewards/rejected": 0.9614669680595398, + "step": 11141 + }, + { + "epoch": 0.65, + "learning_rate": 2.906572937702961e-08, + "logits/chosen": -1.884448766708374, + "logits/rejected": -1.911733627319336, + "logps/chosen": -265.83966064453125, + "logps/rejected": -410.8002624511719, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.948638916015625, + "rewards/margins": 3.9958527088165283, + "rewards/rejected": -2.0472137928009033, + "step": 11142 + }, + { + "epoch": 0.65, + "learning_rate": 2.9057171497373023e-08, + "logits/chosen": -2.192281484603882, + "logits/rejected": -2.1769731044769287, + "logps/chosen": -51.12413024902344, + "logps/rejected": -278.3826599121094, + "loss": 0.1392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9848350882530212, + "rewards/margins": 3.926894426345825, + "rewards/rejected": -2.942059278488159, + "step": 11143 + }, + { + "epoch": 0.65, + "learning_rate": 2.9048614361708156e-08, + "logits/chosen": -1.9659639596939087, + "logits/rejected": -1.959189772605896, + "logps/chosen": -11.931988716125488, + "logps/rejected": -113.02251434326172, + "loss": 0.4356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2907090187072754, + "rewards/margins": 0.5141243934631348, + "rewards/rejected": -0.22341537475585938, + "step": 11144 + }, + { + "epoch": 0.65, + "learning_rate": 2.9040057970338983e-08, + "logits/chosen": -1.7685199975967407, + "logits/rejected": -1.7554329633712769, + "logps/chosen": -230.56747436523438, + "logps/rejected": -303.83978271484375, + "loss": 0.156, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.027691602706909, + "rewards/margins": 1.0626952648162842, + "rewards/rejected": 0.964996337890625, + "step": 11145 + }, + { + "epoch": 0.65, + "learning_rate": 2.903150232356947e-08, + "logits/chosen": -1.7606935501098633, + "logits/rejected": -1.728481411933899, + "logps/chosen": -285.50042724609375, + "logps/rejected": -349.9294738769531, + "loss": 0.0623, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.526159644126892, + "rewards/margins": 2.5223846435546875, + "rewards/rejected": -0.9962249994277954, + "step": 11146 + }, + { + "epoch": 0.65, + "learning_rate": 2.902294742170356e-08, + "logits/chosen": -1.7888728380203247, + "logits/rejected": -1.8155174255371094, + "logps/chosen": -275.33740234375, + "logps/rejected": -353.204345703125, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2274110317230225, + "rewards/margins": 5.827117919921875, + "rewards/rejected": -3.5997071266174316, + "step": 11147 + }, + { + "epoch": 0.65, + "learning_rate": 2.9014393265045166e-08, + "logits/chosen": -1.884606957435608, + "logits/rejected": -1.8868852853775024, + "logps/chosen": -8.431266784667969, + "logps/rejected": -175.49713134765625, + "loss": 0.341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029873276129364967, + "rewards/margins": 3.263455629348755, + "rewards/rejected": -3.2933290004730225, + "step": 11148 + }, + { + "epoch": 0.65, + "learning_rate": 2.9005839853898202e-08, + "logits/chosen": -2.0301785469055176, + "logits/rejected": -2.0255496501922607, + "logps/chosen": -39.712440490722656, + "logps/rejected": -275.0850524902344, + "loss": 0.1621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9081177115440369, + "rewards/margins": 4.5474653244018555, + "rewards/rejected": -3.639347791671753, + "step": 11149 + }, + { + "epoch": 0.65, + "learning_rate": 2.8997287188566455e-08, + "logits/chosen": -1.9433361291885376, + "logits/rejected": -1.9533385038375854, + "logps/chosen": -76.40388488769531, + "logps/rejected": -201.5714569091797, + "loss": 0.5015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3514717221260071, + "rewards/margins": 0.3656044006347656, + "rewards/rejected": -0.014132690615952015, + "step": 11150 + }, + { + "epoch": 0.65, + "learning_rate": 2.898873526935384e-08, + "logits/chosen": -2.1879913806915283, + "logits/rejected": -2.170375108718872, + "logps/chosen": -41.23821258544922, + "logps/rejected": -460.80548095703125, + "loss": 0.1151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7448230981826782, + "rewards/margins": 6.4529008865356445, + "rewards/rejected": -5.708077907562256, + "step": 11151 + }, + { + "epoch": 0.65, + "learning_rate": 2.898018409656411e-08, + "logits/chosen": -2.0648481845855713, + "logits/rejected": -2.0491819381713867, + "logps/chosen": -24.529117584228516, + "logps/rejected": -160.35415649414062, + "loss": 0.1798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5165451169013977, + "rewards/margins": 3.932880401611328, + "rewards/rejected": -3.416335344314575, + "step": 11152 + }, + { + "epoch": 0.65, + "learning_rate": 2.8971633670501062e-08, + "logits/chosen": -1.8492118120193481, + "logits/rejected": -1.8555744886398315, + "logps/chosen": -101.14790344238281, + "logps/rejected": -352.6685485839844, + "loss": 0.1484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5070754885673523, + "rewards/margins": 2.7318711280822754, + "rewards/rejected": -2.2247955799102783, + "step": 11153 + }, + { + "epoch": 0.65, + "learning_rate": 2.896308399146844e-08, + "logits/chosen": -1.9389941692352295, + "logits/rejected": -1.9413843154907227, + "logps/chosen": -158.41604614257812, + "logps/rejected": -323.41375732421875, + "loss": 0.1025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.927203416824341, + "rewards/margins": 1.613616943359375, + "rewards/rejected": 1.3135864734649658, + "step": 11154 + }, + { + "epoch": 0.65, + "learning_rate": 2.895453505977e-08, + "logits/chosen": -1.8306411504745483, + "logits/rejected": -1.8357137441635132, + "logps/chosen": -0.023720940575003624, + "logps/rejected": -133.61361694335938, + "loss": 0.3664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00658576563000679, + "rewards/margins": 2.8178727626800537, + "rewards/rejected": -2.8112869262695312, + "step": 11155 + }, + { + "epoch": 0.65, + "learning_rate": 2.8945986875709405e-08, + "logits/chosen": -1.8080381155014038, + "logits/rejected": -1.8176254034042358, + "logps/chosen": -64.20825958251953, + "logps/rejected": -209.04461669921875, + "loss": 1.182, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4089241027832031, + "rewards/margins": -1.8932030200958252, + "rewards/rejected": 2.3021271228790283, + "step": 11156 + }, + { + "epoch": 0.65, + "learning_rate": 2.8937439439590337e-08, + "logits/chosen": -1.9374364614486694, + "logits/rejected": -1.9333841800689697, + "logps/chosen": -18.591896057128906, + "logps/rejected": -152.28684997558594, + "loss": 0.2767, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25422993302345276, + "rewards/margins": 3.351928949356079, + "rewards/rejected": -3.097698926925659, + "step": 11157 + }, + { + "epoch": 0.65, + "learning_rate": 2.8928892751716448e-08, + "logits/chosen": -1.7938486337661743, + "logits/rejected": -1.7679835557937622, + "logps/chosen": -158.84432983398438, + "logps/rejected": -248.5330352783203, + "loss": 0.1045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7724578380584717, + "rewards/margins": 1.7127089500427246, + "rewards/rejected": 1.059748888015747, + "step": 11158 + }, + { + "epoch": 0.65, + "learning_rate": 2.8920346812391382e-08, + "logits/chosen": -1.9919308423995972, + "logits/rejected": -1.9967442750930786, + "logps/chosen": -154.50311279296875, + "logps/rejected": -252.80899047851562, + "loss": 0.1238, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5113937854766846, + "rewards/margins": 1.725422739982605, + "rewards/rejected": -0.21402893960475922, + "step": 11159 + }, + { + "epoch": 0.65, + "learning_rate": 2.8911801621918686e-08, + "logits/chosen": -2.115386962890625, + "logits/rejected": -2.1392087936401367, + "logps/chosen": -184.6500701904297, + "logps/rejected": -163.7837677001953, + "loss": 0.4901, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.297235131263733, + "rewards/margins": -0.34421849250793457, + "rewards/rejected": 1.6414536237716675, + "step": 11160 + }, + { + "epoch": 0.65, + "learning_rate": 2.8903257180601946e-08, + "logits/chosen": -1.8918691873550415, + "logits/rejected": -1.8967499732971191, + "logps/chosen": -0.00790327601134777, + "logps/rejected": -299.6871643066406, + "loss": 0.333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002087519533233717, + "rewards/margins": 5.838808059692383, + "rewards/rejected": -5.839016914367676, + "step": 11161 + }, + { + "epoch": 0.65, + "learning_rate": 2.8894713488744717e-08, + "logits/chosen": -1.9872971773147583, + "logits/rejected": -1.9690757989883423, + "logps/chosen": -65.51298522949219, + "logps/rejected": -313.6690673828125, + "loss": 0.1717, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.166754961013794, + "rewards/margins": 2.1415016651153564, + "rewards/rejected": -0.9747467041015625, + "step": 11162 + }, + { + "epoch": 0.65, + "learning_rate": 2.8886170546650513e-08, + "logits/chosen": -2.1324808597564697, + "logits/rejected": -2.120708465576172, + "logps/chosen": -13.340461730957031, + "logps/rejected": -188.29515075683594, + "loss": 0.4517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23764429986476898, + "rewards/margins": 1.8340827226638794, + "rewards/rejected": -2.0717270374298096, + "step": 11163 + }, + { + "epoch": 0.65, + "learning_rate": 2.887762835462279e-08, + "logits/chosen": -1.9109300374984741, + "logits/rejected": -1.9017592668533325, + "logps/chosen": -21.151887893676758, + "logps/rejected": -262.1882629394531, + "loss": 0.2122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5260378122329712, + "rewards/margins": 3.402916431427002, + "rewards/rejected": -2.876878499984741, + "step": 11164 + }, + { + "epoch": 0.65, + "learning_rate": 2.8869086912965035e-08, + "logits/chosen": -1.9760518074035645, + "logits/rejected": -1.9674196243286133, + "logps/chosen": -37.64339828491211, + "logps/rejected": -166.7532958984375, + "loss": 0.9047, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.16244927048683167, + "rewards/margins": -0.7954663038253784, + "rewards/rejected": 0.6330170035362244, + "step": 11165 + }, + { + "epoch": 0.65, + "learning_rate": 2.886054622198067e-08, + "logits/chosen": -1.7647093534469604, + "logits/rejected": -1.8053700923919678, + "logps/chosen": -267.3628845214844, + "logps/rejected": -359.9768981933594, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9167754650115967, + "rewards/margins": 3.422442674636841, + "rewards/rejected": -0.5056671500205994, + "step": 11166 + }, + { + "epoch": 0.65, + "learning_rate": 2.885200628197313e-08, + "logits/chosen": -1.7780041694641113, + "logits/rejected": -1.7749855518341064, + "logps/chosen": -34.86798858642578, + "logps/rejected": -203.69314575195312, + "loss": 0.2843, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.206199288368225, + "rewards/margins": 0.9200847744941711, + "rewards/rejected": 0.28611451387405396, + "step": 11167 + }, + { + "epoch": 0.65, + "learning_rate": 2.884346709324575e-08, + "logits/chosen": -2.071549415588379, + "logits/rejected": -2.0795090198516846, + "logps/chosen": -45.232032775878906, + "logps/rejected": -54.87949752807617, + "loss": 1.4781, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.0949829816818237, + "rewards/margins": -1.3155910968780518, + "rewards/rejected": 0.22060814499855042, + "step": 11168 + }, + { + "epoch": 0.65, + "learning_rate": 2.8834928656101887e-08, + "logits/chosen": -1.8334823846817017, + "logits/rejected": -1.8259209394454956, + "logps/chosen": -26.88625717163086, + "logps/rejected": -208.9332275390625, + "loss": 0.1076, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5464527606964111, + "rewards/margins": 3.2685046195983887, + "rewards/rejected": -1.722051978111267, + "step": 11169 + }, + { + "epoch": 0.65, + "learning_rate": 2.8826390970844926e-08, + "logits/chosen": -2.0394701957702637, + "logits/rejected": -2.003831386566162, + "logps/chosen": -341.4258728027344, + "logps/rejected": -645.1826171875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.310537815093994, + "rewards/margins": 5.551767349243164, + "rewards/rejected": -2.241229295730591, + "step": 11170 + }, + { + "epoch": 0.65, + "learning_rate": 2.8817854037778116e-08, + "logits/chosen": -1.8125550746917725, + "logits/rejected": -1.8009835481643677, + "logps/chosen": -86.49113464355469, + "logps/rejected": -350.1796875, + "loss": 0.1825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.77608722448349, + "rewards/margins": 2.3041725158691406, + "rewards/rejected": -1.5280853509902954, + "step": 11171 + }, + { + "epoch": 0.65, + "learning_rate": 2.880931785720474e-08, + "logits/chosen": -2.046459913253784, + "logits/rejected": -2.078612804412842, + "logps/chosen": -273.348388671875, + "logps/rejected": -379.4532775878906, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5558350086212158, + "rewards/margins": 5.123449802398682, + "rewards/rejected": -3.567614793777466, + "step": 11172 + }, + { + "epoch": 0.65, + "learning_rate": 2.880078242942805e-08, + "logits/chosen": -1.974794864654541, + "logits/rejected": -1.985605239868164, + "logps/chosen": -58.27581787109375, + "logps/rejected": -135.0547332763672, + "loss": 0.4949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2800705134868622, + "rewards/margins": 0.4349762201309204, + "rewards/rejected": -0.15490570664405823, + "step": 11173 + }, + { + "epoch": 0.65, + "learning_rate": 2.8792247754751282e-08, + "logits/chosen": -1.9163665771484375, + "logits/rejected": -1.9000253677368164, + "logps/chosen": -269.01385498046875, + "logps/rejected": -487.14813232421875, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.64750075340271, + "rewards/margins": 4.121203899383545, + "rewards/rejected": -1.4737030267715454, + "step": 11174 + }, + { + "epoch": 0.65, + "learning_rate": 2.8783713833477597e-08, + "logits/chosen": -1.863635540008545, + "logits/rejected": -1.8093478679656982, + "logps/chosen": -245.925537109375, + "logps/rejected": -484.4698181152344, + "loss": 0.1028, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.371572971343994, + "rewards/margins": 2.0503053665161133, + "rewards/rejected": 0.321267694234848, + "step": 11175 + }, + { + "epoch": 0.65, + "learning_rate": 2.877518066591017e-08, + "logits/chosen": -1.8338497877120972, + "logits/rejected": -1.8460206985473633, + "logps/chosen": -141.75172424316406, + "logps/rejected": -316.7242126464844, + "loss": 0.0722, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4654220342636108, + "rewards/margins": 2.4431350231170654, + "rewards/rejected": -0.9777130484580994, + "step": 11176 + }, + { + "epoch": 0.65, + "learning_rate": 2.8766648252352148e-08, + "logits/chosen": -1.7313334941864014, + "logits/rejected": -1.7326338291168213, + "logps/chosen": -5.965658187866211, + "logps/rejected": -100.23973083496094, + "loss": 0.507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02803020551800728, + "rewards/margins": 0.8488740921020508, + "rewards/rejected": -0.876904308795929, + "step": 11177 + }, + { + "epoch": 0.65, + "learning_rate": 2.875811659310667e-08, + "logits/chosen": -1.9483826160430908, + "logits/rejected": -1.950282096862793, + "logps/chosen": -8.733125686645508, + "logps/rejected": -93.63325500488281, + "loss": 0.439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23790302872657776, + "rewards/margins": 0.7768579721450806, + "rewards/rejected": -0.5389549136161804, + "step": 11178 + }, + { + "epoch": 0.65, + "learning_rate": 2.874958568847677e-08, + "logits/chosen": -1.987339973449707, + "logits/rejected": -1.9761942625045776, + "logps/chosen": -0.11404532939195633, + "logps/rejected": -350.4268798828125, + "loss": 0.3721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004003678914159536, + "rewards/margins": 5.110503673553467, + "rewards/rejected": -5.106500148773193, + "step": 11179 + }, + { + "epoch": 0.65, + "learning_rate": 2.8741055538765537e-08, + "logits/chosen": -1.982951045036316, + "logits/rejected": -1.9687812328338623, + "logps/chosen": -0.0032979948446154594, + "logps/rejected": -260.92138671875, + "loss": 0.3584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001041249415720813, + "rewards/margins": 3.6709697246551514, + "rewards/rejected": -3.6710739135742188, + "step": 11180 + }, + { + "epoch": 0.65, + "learning_rate": 2.8732526144275993e-08, + "logits/chosen": -1.9945272207260132, + "logits/rejected": -1.9969784021377563, + "logps/chosen": -17.305557250976562, + "logps/rejected": -158.14385986328125, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20348358154296875, + "rewards/margins": 2.320117235183716, + "rewards/rejected": -2.116633653640747, + "step": 11181 + }, + { + "epoch": 0.65, + "learning_rate": 2.872399750531118e-08, + "logits/chosen": -1.933323860168457, + "logits/rejected": -1.9758601188659668, + "logps/chosen": -167.42990112304688, + "logps/rejected": -373.26324462890625, + "loss": 0.0418, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.994268774986267, + "rewards/margins": 3.0304627418518066, + "rewards/rejected": -1.03619384765625, + "step": 11182 + }, + { + "epoch": 0.65, + "learning_rate": 2.8715469622174026e-08, + "logits/chosen": -2.0138511657714844, + "logits/rejected": -2.016847848892212, + "logps/chosen": -2.776477098464966, + "logps/rejected": -126.7641830444336, + "loss": 0.4428, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014819598756730556, + "rewards/margins": 1.092084288597107, + "rewards/rejected": -1.1069039106369019, + "step": 11183 + }, + { + "epoch": 0.65, + "learning_rate": 2.8706942495167495e-08, + "logits/chosen": -1.874786138534546, + "logits/rejected": -1.8699216842651367, + "logps/chosen": -20.37118148803711, + "logps/rejected": -144.31492614746094, + "loss": 0.4025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4684768617153168, + "rewards/margins": 0.9964233636856079, + "rewards/rejected": -0.5279464721679688, + "step": 11184 + }, + { + "epoch": 0.65, + "learning_rate": 2.8698416124594537e-08, + "logits/chosen": -1.9687764644622803, + "logits/rejected": -1.9706734418869019, + "logps/chosen": -141.58871459960938, + "logps/rejected": -282.31304931640625, + "loss": 0.5568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4643264710903168, + "rewards/margins": 1.0314620733261108, + "rewards/rejected": -1.49578857421875, + "step": 11185 + }, + { + "epoch": 0.65, + "learning_rate": 2.8689890510758047e-08, + "logits/chosen": -2.002793073654175, + "logits/rejected": -1.9972461462020874, + "logps/chosen": -27.774988174438477, + "logps/rejected": -58.86100387573242, + "loss": 0.4201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6573762893676758, + "rewards/margins": 0.44581127166748047, + "rewards/rejected": 0.2115650177001953, + "step": 11186 + }, + { + "epoch": 0.65, + "learning_rate": 2.8681365653960864e-08, + "logits/chosen": -2.0129263401031494, + "logits/rejected": -1.9899213314056396, + "logps/chosen": -38.45771026611328, + "logps/rejected": -467.6667785644531, + "loss": 0.2057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6683349609375, + "rewards/margins": 9.48923397064209, + "rewards/rejected": -8.82089900970459, + "step": 11187 + }, + { + "epoch": 0.65, + "learning_rate": 2.8672841554505838e-08, + "logits/chosen": -1.8151649236679077, + "logits/rejected": -1.799859881401062, + "logps/chosen": -87.76905822753906, + "logps/rejected": -239.308349609375, + "loss": 0.1352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7880752682685852, + "rewards/margins": 3.873945713043213, + "rewards/rejected": -3.0858705043792725, + "step": 11188 + }, + { + "epoch": 0.65, + "learning_rate": 2.8664318212695836e-08, + "logits/chosen": -1.9639075994491577, + "logits/rejected": -1.9563357830047607, + "logps/chosen": -73.3453140258789, + "logps/rejected": -124.19306182861328, + "loss": 0.2529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.970868706703186, + "rewards/margins": 1.2001746892929077, + "rewards/rejected": -0.22930602729320526, + "step": 11189 + }, + { + "epoch": 0.65, + "learning_rate": 2.86557956288336e-08, + "logits/chosen": -1.9260292053222656, + "logits/rejected": -1.9238638877868652, + "logps/chosen": -23.837751388549805, + "logps/rejected": -286.8775634765625, + "loss": 0.1833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6076276898384094, + "rewards/margins": 4.0918869972229, + "rewards/rejected": -3.4842591285705566, + "step": 11190 + }, + { + "epoch": 0.65, + "learning_rate": 2.8647273803221934e-08, + "logits/chosen": -1.8985544443130493, + "logits/rejected": -1.8807337284088135, + "logps/chosen": -188.1525115966797, + "logps/rejected": -337.82586669921875, + "loss": 0.227, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.998133897781372, + "rewards/margins": 0.96275794506073, + "rewards/rejected": 1.035375952720642, + "step": 11191 + }, + { + "epoch": 0.65, + "learning_rate": 2.8638752736163507e-08, + "logits/chosen": -1.960055947303772, + "logits/rejected": -1.9589662551879883, + "logps/chosen": -178.91941833496094, + "logps/rejected": -206.78128051757812, + "loss": 0.2488, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.028517246246338, + "rewards/margins": 0.6665207147598267, + "rewards/rejected": 1.3619965314865112, + "step": 11192 + }, + { + "epoch": 0.65, + "learning_rate": 2.863023242796111e-08, + "logits/chosen": -1.8648428916931152, + "logits/rejected": -1.8574304580688477, + "logps/chosen": -0.007884383201599121, + "logps/rejected": -357.2061767578125, + "loss": 0.3336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009022084064781666, + "rewards/margins": 5.858341693878174, + "rewards/rejected": -5.8493194580078125, + "step": 11193 + }, + { + "epoch": 0.65, + "learning_rate": 2.8621712878917366e-08, + "logits/chosen": -1.940081000328064, + "logits/rejected": -1.9383280277252197, + "logps/chosen": -9.581849098205566, + "logps/rejected": -188.49005126953125, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23619966208934784, + "rewards/margins": 4.610684394836426, + "rewards/rejected": -4.374484539031982, + "step": 11194 + }, + { + "epoch": 0.65, + "learning_rate": 2.861319408933496e-08, + "logits/chosen": -1.8183956146240234, + "logits/rejected": -1.8140217065811157, + "logps/chosen": -10.324273109436035, + "logps/rejected": -132.32626342773438, + "loss": 0.4413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7050139307975769, + "rewards/margins": 0.2672148644924164, + "rewards/rejected": 0.4377990663051605, + "step": 11195 + }, + { + "epoch": 0.65, + "learning_rate": 2.8604676059516502e-08, + "logits/chosen": -2.0774471759796143, + "logits/rejected": -2.070920944213867, + "logps/chosen": -60.24470520019531, + "logps/rejected": -320.6645202636719, + "loss": 0.239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039524078369140625, + "rewards/margins": 3.210285186767578, + "rewards/rejected": -3.1707611083984375, + "step": 11196 + }, + { + "epoch": 0.65, + "learning_rate": 2.8596158789764634e-08, + "logits/chosen": -1.883236050605774, + "logits/rejected": -1.9198379516601562, + "logps/chosen": -231.80601501464844, + "logps/rejected": -327.3568115234375, + "loss": 0.0746, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.108097791671753, + "rewards/margins": 2.0481154918670654, + "rewards/rejected": 1.0599822998046875, + "step": 11197 + }, + { + "epoch": 0.65, + "learning_rate": 2.8587642280381884e-08, + "logits/chosen": -1.8515102863311768, + "logits/rejected": -1.8847633600234985, + "logps/chosen": -179.5762481689453, + "logps/rejected": -323.270751953125, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.240635633468628, + "rewards/margins": 2.3330185413360596, + "rewards/rejected": -0.09238281100988388, + "step": 11198 + }, + { + "epoch": 0.65, + "learning_rate": 2.8579126531670816e-08, + "logits/chosen": -1.8368092775344849, + "logits/rejected": -1.8819555044174194, + "logps/chosen": -208.20394897460938, + "logps/rejected": -406.28228759765625, + "loss": 0.0613, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.219081163406372, + "rewards/margins": 2.7749741077423096, + "rewards/rejected": -1.5558929443359375, + "step": 11199 + }, + { + "epoch": 0.65, + "learning_rate": 2.857061154393396e-08, + "logits/chosen": -1.8927193880081177, + "logits/rejected": -1.8858757019042969, + "logps/chosen": -257.4571228027344, + "logps/rejected": -471.67132568359375, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.338400363922119, + "rewards/margins": 4.929678440093994, + "rewards/rejected": -1.591278076171875, + "step": 11200 + }, + { + "epoch": 0.65, + "learning_rate": 2.856209731747382e-08, + "logits/chosen": -1.9228776693344116, + "logits/rejected": -1.9762288331985474, + "logps/chosen": -185.9462890625, + "logps/rejected": -450.29248046875, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.151933431625366, + "rewards/margins": 8.642573356628418, + "rewards/rejected": -6.490640163421631, + "step": 11201 + }, + { + "epoch": 0.65, + "learning_rate": 2.8553583852592827e-08, + "logits/chosen": -1.9311017990112305, + "logits/rejected": -1.9314072132110596, + "logps/chosen": -87.18424224853516, + "logps/rejected": -187.04464721679688, + "loss": 0.3905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5739585757255554, + "rewards/margins": 0.582158625125885, + "rewards/rejected": -0.00820007361471653, + "step": 11202 + }, + { + "epoch": 0.65, + "learning_rate": 2.854507114959344e-08, + "logits/chosen": -1.9512933492660522, + "logits/rejected": -1.9466980695724487, + "logps/chosen": -11.176365852355957, + "logps/rejected": -128.5538787841797, + "loss": 0.491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13153038918972015, + "rewards/margins": 0.811826765537262, + "rewards/rejected": -0.6802963614463806, + "step": 11203 + }, + { + "epoch": 0.65, + "learning_rate": 2.8536559208778078e-08, + "logits/chosen": -2.0805740356445312, + "logits/rejected": -2.0717129707336426, + "logps/chosen": -0.0003313844499643892, + "logps/rejected": -153.1206817626953, + "loss": 0.348, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.232521935889963e-05, + "rewards/margins": 4.3725666999816895, + "rewards/rejected": -4.372579097747803, + "step": 11204 + }, + { + "epoch": 0.65, + "learning_rate": 2.8528048030449138e-08, + "logits/chosen": -1.7775384187698364, + "logits/rejected": -1.7870166301727295, + "logps/chosen": -230.7244873046875, + "logps/rejected": -432.4524230957031, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0276641845703125, + "rewards/margins": 5.615936279296875, + "rewards/rejected": -2.5882720947265625, + "step": 11205 + }, + { + "epoch": 0.65, + "learning_rate": 2.8519537614908946e-08, + "logits/chosen": -1.8486227989196777, + "logits/rejected": -1.8430891036987305, + "logps/chosen": -68.1390380859375, + "logps/rejected": -173.63162231445312, + "loss": 0.2372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7303856015205383, + "rewards/margins": 1.8442542552947998, + "rewards/rejected": -1.1138687133789062, + "step": 11206 + }, + { + "epoch": 0.65, + "learning_rate": 2.851102796245985e-08, + "logits/chosen": -1.968216896057129, + "logits/rejected": -1.9513553380966187, + "logps/chosen": -234.17617797851562, + "logps/rejected": -309.301513671875, + "loss": 0.2073, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.807519555091858, + "rewards/margins": 0.7882232666015625, + "rewards/rejected": 1.0192962884902954, + "step": 11207 + }, + { + "epoch": 0.65, + "learning_rate": 2.8502519073404153e-08, + "logits/chosen": -2.028109073638916, + "logits/rejected": -2.031059741973877, + "logps/chosen": -6.293832302093506, + "logps/rejected": -171.7506866455078, + "loss": 0.3896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0077475071884691715, + "rewards/margins": 1.917356014251709, + "rewards/rejected": -1.9096084833145142, + "step": 11208 + }, + { + "epoch": 0.65, + "learning_rate": 2.8494010948044134e-08, + "logits/chosen": -1.635722041130066, + "logits/rejected": -1.657487392425537, + "logps/chosen": -240.91592407226562, + "logps/rejected": -280.1866760253906, + "loss": 0.1932, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8941620588302612, + "rewards/margins": 1.3610780239105225, + "rewards/rejected": 0.5330840945243835, + "step": 11209 + }, + { + "epoch": 0.65, + "learning_rate": 2.848550358668206e-08, + "logits/chosen": -1.6584923267364502, + "logits/rejected": -1.664991021156311, + "logps/chosen": -171.25222778320312, + "logps/rejected": -318.3962097167969, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.669175863265991, + "rewards/margins": 3.6088883876800537, + "rewards/rejected": -0.9397125244140625, + "step": 11210 + }, + { + "epoch": 0.65, + "learning_rate": 2.8476996989620104e-08, + "logits/chosen": -1.9194810390472412, + "logits/rejected": -1.919177770614624, + "logps/chosen": -0.053508806973695755, + "logps/rejected": -235.35939025878906, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.496659185970202e-05, + "rewards/margins": 4.695542812347412, + "rewards/rejected": -4.6956377029418945, + "step": 11211 + }, + { + "epoch": 0.65, + "learning_rate": 2.8468491157160536e-08, + "logits/chosen": -1.7708975076675415, + "logits/rejected": -1.7227929830551147, + "logps/chosen": -241.14443969726562, + "logps/rejected": -283.1337890625, + "loss": 0.5351, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0459579229354858, + "rewards/margins": -0.06132364273071289, + "rewards/rejected": 1.1072815656661987, + "step": 11212 + }, + { + "epoch": 0.65, + "learning_rate": 2.845998608960546e-08, + "logits/chosen": -1.6679307222366333, + "logits/rejected": -1.6736705303192139, + "logps/chosen": -245.08180236816406, + "logps/rejected": -462.13995361328125, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6575729846954346, + "rewards/margins": 5.898777961730957, + "rewards/rejected": -2.2412049770355225, + "step": 11213 + }, + { + "epoch": 0.65, + "learning_rate": 2.8451481787257037e-08, + "logits/chosen": -1.9641937017440796, + "logits/rejected": -1.9484301805496216, + "logps/chosen": -58.486968994140625, + "logps/rejected": -255.040283203125, + "loss": 0.2264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3512313961982727, + "rewards/margins": 2.3162522315979004, + "rewards/rejected": -1.965020775794983, + "step": 11214 + }, + { + "epoch": 0.65, + "learning_rate": 2.8442978250417395e-08, + "logits/chosen": -1.9626818895339966, + "logits/rejected": -1.9720473289489746, + "logps/chosen": -240.92398071289062, + "logps/rejected": -366.348388671875, + "loss": 0.1194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8639618158340454, + "rewards/margins": 1.9335510730743408, + "rewards/rejected": -0.06958923488855362, + "step": 11215 + }, + { + "epoch": 0.65, + "learning_rate": 2.8434475479388624e-08, + "logits/chosen": -1.6690374612808228, + "logits/rejected": -1.647408366203308, + "logps/chosen": -285.18206787109375, + "logps/rejected": -333.06524658203125, + "loss": 0.0885, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.455975294113159, + "rewards/margins": 1.9477965831756592, + "rewards/rejected": 0.5081787109375, + "step": 11216 + }, + { + "epoch": 0.65, + "learning_rate": 2.842597347447276e-08, + "logits/chosen": -2.009986400604248, + "logits/rejected": -1.988012671470642, + "logps/chosen": -11.771273612976074, + "logps/rejected": -280.82098388671875, + "loss": 0.3939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0766880065202713, + "rewards/margins": 2.0599703788757324, + "rewards/rejected": -1.983282446861267, + "step": 11217 + }, + { + "epoch": 0.65, + "learning_rate": 2.841747223597184e-08, + "logits/chosen": -1.8166284561157227, + "logits/rejected": -1.8675532341003418, + "logps/chosen": -225.4310302734375, + "logps/rejected": -325.8370666503906, + "loss": 0.1202, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1881349086761475, + "rewards/margins": 1.43424391746521, + "rewards/rejected": 0.7538909912109375, + "step": 11218 + }, + { + "epoch": 0.65, + "learning_rate": 2.8408971764187884e-08, + "logits/chosen": -1.9788448810577393, + "logits/rejected": -1.9816527366638184, + "logps/chosen": -4.994696617126465, + "logps/rejected": -106.91252136230469, + "loss": 0.4666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18144471943378448, + "rewards/margins": 1.307843565940857, + "rewards/rejected": -1.489288330078125, + "step": 11219 + }, + { + "epoch": 0.65, + "learning_rate": 2.8400472059422886e-08, + "logits/chosen": -1.8174043893814087, + "logits/rejected": -1.8294124603271484, + "logps/chosen": -9.918006981024519e-05, + "logps/rejected": -187.1936492919922, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3840402718633413e-07, + "rewards/margins": 3.731881618499756, + "rewards/rejected": -3.731881856918335, + "step": 11220 + }, + { + "epoch": 0.65, + "learning_rate": 2.839197312197875e-08, + "logits/chosen": -2.07413911819458, + "logits/rejected": -2.067004919052124, + "logps/chosen": -12.955671310424805, + "logps/rejected": -100.374755859375, + "loss": 0.4174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18776722252368927, + "rewards/margins": 2.2086713314056396, + "rewards/rejected": -2.3964385986328125, + "step": 11221 + }, + { + "epoch": 0.65, + "learning_rate": 2.8383474952157428e-08, + "logits/chosen": -1.8773202896118164, + "logits/rejected": -1.8812568187713623, + "logps/chosen": -45.48878860473633, + "logps/rejected": -160.61456298828125, + "loss": 0.5148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1703346222639084, + "rewards/margins": 0.3075904846191406, + "rewards/rejected": -0.13725586235523224, + "step": 11222 + }, + { + "epoch": 0.65, + "learning_rate": 2.8374977550260816e-08, + "logits/chosen": -1.8088364601135254, + "logits/rejected": -1.8393226861953735, + "logps/chosen": -227.70797729492188, + "logps/rejected": -479.01611328125, + "loss": 0.0386, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2653290033340454, + "rewards/margins": 5.365063667297363, + "rewards/rejected": -4.099734783172607, + "step": 11223 + }, + { + "epoch": 0.65, + "learning_rate": 2.8366480916590795e-08, + "logits/chosen": -2.035734176635742, + "logits/rejected": -2.018489360809326, + "logps/chosen": -147.3568115234375, + "logps/rejected": -235.33160400390625, + "loss": 0.2179, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0339325666427612, + "rewards/margins": 0.9075363874435425, + "rewards/rejected": 0.12639617919921875, + "step": 11224 + }, + { + "epoch": 0.65, + "learning_rate": 2.8357985051449175e-08, + "logits/chosen": -1.8717864751815796, + "logits/rejected": -1.8719152212142944, + "logps/chosen": -0.20088423788547516, + "logps/rejected": -137.72740173339844, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015434570610523224, + "rewards/margins": 2.347487449645996, + "rewards/rejected": -2.362921953201294, + "step": 11225 + }, + { + "epoch": 0.65, + "learning_rate": 2.8349489955137785e-08, + "logits/chosen": -2.0762274265289307, + "logits/rejected": -2.0726001262664795, + "logps/chosen": -1.493488073348999, + "logps/rejected": -188.94989013671875, + "loss": 0.387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037550092674791813, + "rewards/margins": 2.4593491554260254, + "rewards/rejected": -2.463104248046875, + "step": 11226 + }, + { + "epoch": 0.65, + "learning_rate": 2.834099562795842e-08, + "logits/chosen": -1.6308437585830688, + "logits/rejected": -1.622504472732544, + "logps/chosen": -0.2645168900489807, + "logps/rejected": -154.0035858154297, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007937312475405633, + "rewards/margins": 2.804939031600952, + "rewards/rejected": -2.8057327270507812, + "step": 11227 + }, + { + "epoch": 0.65, + "learning_rate": 2.8332502070212854e-08, + "logits/chosen": -1.943395733833313, + "logits/rejected": -1.9295955896377563, + "logps/chosen": -103.83484649658203, + "logps/rejected": -205.1726531982422, + "loss": 0.4794, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.8072509765625, + "rewards/margins": -0.3372666835784912, + "rewards/rejected": 2.144517660140991, + "step": 11228 + }, + { + "epoch": 0.65, + "learning_rate": 2.8324009282202786e-08, + "logits/chosen": -1.9851264953613281, + "logits/rejected": -1.9749505519866943, + "logps/chosen": -32.78033447265625, + "logps/rejected": -144.5806427001953, + "loss": 0.4558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08363189548254013, + "rewards/margins": 1.2436127662658691, + "rewards/rejected": -1.3272446393966675, + "step": 11229 + }, + { + "epoch": 0.65, + "learning_rate": 2.8315517264229915e-08, + "logits/chosen": -2.0507421493530273, + "logits/rejected": -2.0444869995117188, + "logps/chosen": -3.291377067565918, + "logps/rejected": -285.5799255371094, + "loss": 0.3476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.183850958943367, + "rewards/margins": 1.9581795930862427, + "rewards/rejected": -1.774328589439392, + "step": 11230 + }, + { + "epoch": 0.65, + "learning_rate": 2.830702601659598e-08, + "logits/chosen": -1.9662259817123413, + "logits/rejected": -1.9366416931152344, + "logps/chosen": -122.35610961914062, + "logps/rejected": -533.652587890625, + "loss": 0.2634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.315530389547348, + "rewards/margins": 8.929465293884277, + "rewards/rejected": -8.613934516906738, + "step": 11231 + }, + { + "epoch": 0.65, + "learning_rate": 2.8298535539602575e-08, + "logits/chosen": -1.9667044878005981, + "logits/rejected": -1.9676599502563477, + "logps/chosen": -7.193872928619385, + "logps/rejected": -38.447425842285156, + "loss": 0.6831, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1442553997039795, + "rewards/margins": 0.12770581245422363, + "rewards/rejected": -0.2719612121582031, + "step": 11232 + }, + { + "epoch": 0.65, + "learning_rate": 2.8290045833551334e-08, + "logits/chosen": -1.8486604690551758, + "logits/rejected": -1.8296732902526855, + "logps/chosen": -222.9658203125, + "logps/rejected": -324.6585693359375, + "loss": 0.1721, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5601485967636108, + "rewards/margins": 1.229295253753662, + "rewards/rejected": 0.33085328340530396, + "step": 11233 + }, + { + "epoch": 0.65, + "learning_rate": 2.8281556898743864e-08, + "logits/chosen": -1.9030922651290894, + "logits/rejected": -1.9051767587661743, + "logps/chosen": -170.96669006347656, + "logps/rejected": -213.62246704101562, + "loss": 0.1967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5290329456329346, + "rewards/margins": 1.503498911857605, + "rewards/rejected": 0.02553405798971653, + "step": 11234 + }, + { + "epoch": 0.65, + "learning_rate": 2.8273068735481752e-08, + "logits/chosen": -1.7650377750396729, + "logits/rejected": -1.774054765701294, + "logps/chosen": -36.04869842529297, + "logps/rejected": -222.64468383789062, + "loss": 0.4959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.505584180355072, + "rewards/margins": 3.23068904876709, + "rewards/rejected": -3.7362732887268066, + "step": 11235 + }, + { + "epoch": 0.65, + "learning_rate": 2.8264581344066495e-08, + "logits/chosen": -1.7942144870758057, + "logits/rejected": -1.788145899772644, + "logps/chosen": -350.67535400390625, + "logps/rejected": -394.0729064941406, + "loss": 0.1606, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1976380348205566, + "rewards/margins": 1.213205099105835, + "rewards/rejected": 0.9844329953193665, + "step": 11236 + }, + { + "epoch": 0.65, + "learning_rate": 2.825609472479963e-08, + "logits/chosen": -1.901454210281372, + "logits/rejected": -1.9095196723937988, + "logps/chosen": -8.722689628601074, + "logps/rejected": -134.48275756835938, + "loss": 0.3268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4886713922023773, + "rewards/margins": 1.4117106199264526, + "rewards/rejected": -0.9230392575263977, + "step": 11237 + }, + { + "epoch": 0.65, + "learning_rate": 2.824760887798263e-08, + "logits/chosen": -1.9787328243255615, + "logits/rejected": -1.9862143993377686, + "logps/chosen": -151.21383666992188, + "logps/rejected": -296.0863952636719, + "loss": 0.0493, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5465760231018066, + "rewards/margins": 2.5422608852386475, + "rewards/rejected": 0.004315185826271772, + "step": 11238 + }, + { + "epoch": 0.65, + "learning_rate": 2.8239123803916997e-08, + "logits/chosen": -2.023228406906128, + "logits/rejected": -2.001988410949707, + "logps/chosen": -37.833518981933594, + "logps/rejected": -233.7560272216797, + "loss": 0.2464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2458091825246811, + "rewards/margins": 3.12910532951355, + "rewards/rejected": -2.883296251296997, + "step": 11239 + }, + { + "epoch": 0.65, + "learning_rate": 2.8230639502904108e-08, + "logits/chosen": -1.7309308052062988, + "logits/rejected": -1.8002759218215942, + "logps/chosen": -301.02728271484375, + "logps/rejected": -341.1460876464844, + "loss": 0.0898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.428948998451233, + "rewards/margins": 1.8254609107971191, + "rewards/rejected": -0.39651185274124146, + "step": 11240 + }, + { + "epoch": 0.65, + "learning_rate": 2.822215597524539e-08, + "logits/chosen": -1.7833987474441528, + "logits/rejected": -1.766614317893982, + "logps/chosen": -142.10107421875, + "logps/rejected": -304.28350830078125, + "loss": 0.0521, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4414551258087158, + "rewards/margins": 3.6025819778442383, + "rewards/rejected": -2.1611268520355225, + "step": 11241 + }, + { + "epoch": 0.65, + "learning_rate": 2.821367322124222e-08, + "logits/chosen": -1.9867569208145142, + "logits/rejected": -1.988287329673767, + "logps/chosen": -258.34515380859375, + "logps/rejected": -365.6771545410156, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.746103048324585, + "rewards/margins": 3.108996629714966, + "rewards/rejected": -0.362893670797348, + "step": 11242 + }, + { + "epoch": 0.65, + "learning_rate": 2.820519124119596e-08, + "logits/chosen": -1.9290757179260254, + "logits/rejected": -1.9316949844360352, + "logps/chosen": -0.47016245126724243, + "logps/rejected": -271.0989990234375, + "loss": 0.353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06468648463487625, + "rewards/margins": 3.1823318004608154, + "rewards/rejected": -3.117645263671875, + "step": 11243 + }, + { + "epoch": 0.65, + "learning_rate": 2.81967100354079e-08, + "logits/chosen": -1.931220293045044, + "logits/rejected": -1.8822929859161377, + "logps/chosen": -151.98858642578125, + "logps/rejected": -540.854736328125, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0713746547698975, + "rewards/margins": 8.621670722961426, + "rewards/rejected": -6.550296306610107, + "step": 11244 + }, + { + "epoch": 0.65, + "learning_rate": 2.8188229604179354e-08, + "logits/chosen": -1.9906622171401978, + "logits/rejected": -1.9891326427459717, + "logps/chosen": -11.34929084777832, + "logps/rejected": -67.7531509399414, + "loss": 0.4948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3939889967441559, + "rewards/margins": 0.5282472372055054, + "rewards/rejected": -0.13425827026367188, + "step": 11245 + }, + { + "epoch": 0.65, + "learning_rate": 2.817974994781158e-08, + "logits/chosen": -2.0029430389404297, + "logits/rejected": -2.0119495391845703, + "logps/chosen": -179.33401489257812, + "logps/rejected": -330.2816467285156, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2864320278167725, + "rewards/margins": 2.2902956008911133, + "rewards/rejected": 0.996136486530304, + "step": 11246 + }, + { + "epoch": 0.65, + "learning_rate": 2.8171271066605845e-08, + "logits/chosen": -2.116098642349243, + "logits/rejected": -2.114983320236206, + "logps/chosen": -52.63947677612305, + "logps/rejected": -112.35205078125, + "loss": 0.7511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9815120697021484, + "rewards/margins": 0.9672161340713501, + "rewards/rejected": -1.9487282037734985, + "step": 11247 + }, + { + "epoch": 0.65, + "learning_rate": 2.816279296086331e-08, + "logits/chosen": -1.9071348905563354, + "logits/rejected": -1.896093726158142, + "logps/chosen": -166.47288513183594, + "logps/rejected": -394.4141845703125, + "loss": 0.0374, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5845627784729004, + "rewards/margins": 3.1776351928710938, + "rewards/rejected": -0.5930725336074829, + "step": 11248 + }, + { + "epoch": 0.65, + "learning_rate": 2.8154315630885184e-08, + "logits/chosen": -1.746404767036438, + "logits/rejected": -1.7416642904281616, + "logps/chosen": -21.50796890258789, + "logps/rejected": -168.79129028320312, + "loss": 0.5072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022562026977539062, + "rewards/margins": 1.0716472864151, + "rewards/rejected": -1.0942093133926392, + "step": 11249 + }, + { + "epoch": 0.65, + "learning_rate": 2.8145839076972654e-08, + "logits/chosen": -2.0481271743774414, + "logits/rejected": -2.0353407859802246, + "logps/chosen": -68.963623046875, + "logps/rejected": -180.04574584960938, + "loss": 0.3741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7132217288017273, + "rewards/margins": 0.6340743899345398, + "rewards/rejected": 0.0791473388671875, + "step": 11250 + }, + { + "epoch": 0.65, + "learning_rate": 2.8137363299426798e-08, + "logits/chosen": -1.7035884857177734, + "logits/rejected": -1.6926480531692505, + "logps/chosen": -429.83624267578125, + "logps/rejected": -520.1755981445312, + "loss": 0.1569, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5846741199493408, + "rewards/margins": 1.2101197242736816, + "rewards/rejected": 0.37455445528030396, + "step": 11251 + }, + { + "epoch": 0.65, + "learning_rate": 2.8128888298548764e-08, + "logits/chosen": -1.642831563949585, + "logits/rejected": -1.6397373676300049, + "logps/chosen": -24.194072723388672, + "logps/rejected": -307.4005126953125, + "loss": 0.1274, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1122432947158813, + "rewards/margins": 6.5291056632995605, + "rewards/rejected": -5.416862487792969, + "step": 11252 + }, + { + "epoch": 0.65, + "learning_rate": 2.812041407463956e-08, + "logits/chosen": -1.8803949356079102, + "logits/rejected": -1.9007714986801147, + "logps/chosen": -260.64111328125, + "logps/rejected": -537.9183959960938, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3963990211486816, + "rewards/margins": 11.254156112670898, + "rewards/rejected": -8.857757568359375, + "step": 11253 + }, + { + "epoch": 0.65, + "learning_rate": 2.8111940628000307e-08, + "logits/chosen": -1.7917343378067017, + "logits/rejected": -1.791526198387146, + "logps/chosen": -0.05338728055357933, + "logps/rejected": -90.66560363769531, + "loss": 0.6124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004447986837476492, + "rewards/margins": 0.24406582117080688, + "rewards/rejected": -0.24851380288600922, + "step": 11254 + }, + { + "epoch": 0.65, + "learning_rate": 2.8103467958931982e-08, + "logits/chosen": -1.8823707103729248, + "logits/rejected": -1.8809356689453125, + "logps/chosen": -169.3966064453125, + "logps/rejected": -290.430908203125, + "loss": 0.2355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3225173950195312, + "rewards/margins": 0.7594009041786194, + "rewards/rejected": 0.5631164908409119, + "step": 11255 + }, + { + "epoch": 0.66, + "learning_rate": 2.809499606773557e-08, + "logits/chosen": -2.025794506072998, + "logits/rejected": -2.011892557144165, + "logps/chosen": -82.22744750976562, + "logps/rejected": -112.82439422607422, + "loss": 0.6669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5656967163085938, + "rewards/margins": 0.861803412437439, + "rewards/rejected": -1.4275001287460327, + "step": 11256 + }, + { + "epoch": 0.66, + "learning_rate": 2.8086524954712053e-08, + "logits/chosen": -1.969512939453125, + "logits/rejected": -1.9678661823272705, + "logps/chosen": -216.22494506835938, + "logps/rejected": -454.4482421875, + "loss": 0.0914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.842761218547821, + "rewards/margins": 2.566540479660034, + "rewards/rejected": -1.723779320716858, + "step": 11257 + }, + { + "epoch": 0.66, + "learning_rate": 2.8078054620162378e-08, + "logits/chosen": -1.8909809589385986, + "logits/rejected": -1.9457290172576904, + "logps/chosen": -167.27964782714844, + "logps/rejected": -245.14727783203125, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8589675426483154, + "rewards/margins": 2.60884690284729, + "rewards/rejected": 0.2501205503940582, + "step": 11258 + }, + { + "epoch": 0.66, + "learning_rate": 2.8069585064387414e-08, + "logits/chosen": -2.011408805847168, + "logits/rejected": -1.9928686618804932, + "logps/chosen": -11.026315689086914, + "logps/rejected": -214.71368408203125, + "loss": 0.331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0721045508980751, + "rewards/margins": 4.4845404624938965, + "rewards/rejected": -4.412436008453369, + "step": 11259 + }, + { + "epoch": 0.66, + "learning_rate": 2.8061116287688063e-08, + "logits/chosen": -2.165205717086792, + "logits/rejected": -2.1466033458709717, + "logps/chosen": -13.802922248840332, + "logps/rejected": -143.86993408203125, + "loss": 0.3833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07568273693323135, + "rewards/margins": 2.0345053672790527, + "rewards/rejected": -1.9588226079940796, + "step": 11260 + }, + { + "epoch": 0.66, + "learning_rate": 2.8052648290365177e-08, + "logits/chosen": -1.8256696462631226, + "logits/rejected": -1.8335739374160767, + "logps/chosen": -0.06569767743349075, + "logps/rejected": -123.59405517578125, + "loss": 0.5297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00443396857008338, + "rewards/margins": 0.9345769882202148, + "rewards/rejected": -0.9301429986953735, + "step": 11261 + }, + { + "epoch": 0.66, + "learning_rate": 2.8044181072719596e-08, + "logits/chosen": -1.8061895370483398, + "logits/rejected": -1.8032605648040771, + "logps/chosen": -9.455385208129883, + "logps/rejected": -301.0050048828125, + "loss": 0.2181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4235067367553711, + "rewards/margins": 5.382339954376221, + "rewards/rejected": -4.95883321762085, + "step": 11262 + }, + { + "epoch": 0.66, + "learning_rate": 2.8035714635052088e-08, + "logits/chosen": -1.999852180480957, + "logits/rejected": -2.0068702697753906, + "logps/chosen": -12.721367835998535, + "logps/rejected": -127.73356628417969, + "loss": 0.4483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6284083724021912, + "rewards/margins": 0.4714534282684326, + "rewards/rejected": 0.15695495903491974, + "step": 11263 + }, + { + "epoch": 0.66, + "learning_rate": 2.802724897766343e-08, + "logits/chosen": -2.103017807006836, + "logits/rejected": -2.1024656295776367, + "logps/chosen": -26.340042114257812, + "logps/rejected": -164.09506225585938, + "loss": 0.3943, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0897441878914833, + "rewards/margins": 1.297935128211975, + "rewards/rejected": -1.20819091796875, + "step": 11264 + }, + { + "epoch": 0.66, + "learning_rate": 2.801878410085437e-08, + "logits/chosen": -1.9967834949493408, + "logits/rejected": -1.9897507429122925, + "logps/chosen": -57.20069122314453, + "logps/rejected": -184.12664794921875, + "loss": 0.1725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8548752069473267, + "rewards/margins": 3.4772963523864746, + "rewards/rejected": -2.6224212646484375, + "step": 11265 + }, + { + "epoch": 0.66, + "learning_rate": 2.8010320004925636e-08, + "logits/chosen": -1.9632726907730103, + "logits/rejected": -1.9588459730148315, + "logps/chosen": -0.00022338092094287276, + "logps/rejected": -99.20589447021484, + "loss": 0.5502, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.671035640058108e-05, + "rewards/margins": 0.6874803900718689, + "rewards/rejected": -0.687433660030365, + "step": 11266 + }, + { + "epoch": 0.66, + "learning_rate": 2.8001856690177883e-08, + "logits/chosen": -1.756983995437622, + "logits/rejected": -1.7400087118148804, + "logps/chosen": -285.52099609375, + "logps/rejected": -526.8428344726562, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.655719041824341, + "rewards/margins": 4.072534084320068, + "rewards/rejected": -1.416815161705017, + "step": 11267 + }, + { + "epoch": 0.66, + "learning_rate": 2.7993394156911786e-08, + "logits/chosen": -2.099782943725586, + "logits/rejected": -2.0881688594818115, + "logps/chosen": -0.8778712153434753, + "logps/rejected": -243.9698944091797, + "loss": 0.3108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19167248904705048, + "rewards/margins": 4.206261157989502, + "rewards/rejected": -4.014588832855225, + "step": 11268 + }, + { + "epoch": 0.66, + "learning_rate": 2.7984932405427976e-08, + "logits/chosen": -2.167917490005493, + "logits/rejected": -2.166444778442383, + "logps/chosen": -40.68037796020508, + "logps/rejected": -298.65692138671875, + "loss": 0.1384, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0388835668563843, + "rewards/margins": 6.1813764572143555, + "rewards/rejected": -5.142492771148682, + "step": 11269 + }, + { + "epoch": 0.66, + "learning_rate": 2.797647143602705e-08, + "logits/chosen": -1.9034875631332397, + "logits/rejected": -1.9022542238235474, + "logps/chosen": -15.376165390014648, + "logps/rejected": -100.33090209960938, + "loss": 0.4413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18745861947536469, + "rewards/margins": 1.1345411539077759, + "rewards/rejected": -0.94708251953125, + "step": 11270 + }, + { + "epoch": 0.66, + "learning_rate": 2.7968011249009604e-08, + "logits/chosen": -1.8637951612472534, + "logits/rejected": -1.8495043516159058, + "logps/chosen": -234.7142333984375, + "logps/rejected": -648.2219848632812, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7859954833984375, + "rewards/margins": 7.817203044891357, + "rewards/rejected": -5.03120756149292, + "step": 11271 + }, + { + "epoch": 0.66, + "learning_rate": 2.7959551844676134e-08, + "logits/chosen": -2.006244659423828, + "logits/rejected": -1.9928516149520874, + "logps/chosen": -123.91722869873047, + "logps/rejected": -234.74952697753906, + "loss": 0.4287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15652771294116974, + "rewards/margins": 2.6625747680664062, + "rewards/rejected": -2.8191025257110596, + "step": 11272 + }, + { + "epoch": 0.66, + "learning_rate": 2.7951093223327238e-08, + "logits/chosen": -2.0202653408050537, + "logits/rejected": -2.0205323696136475, + "logps/chosen": -10.423501014709473, + "logps/rejected": -152.15914916992188, + "loss": 0.3622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07801685482263565, + "rewards/margins": 2.1307802200317383, + "rewards/rejected": -2.0527634620666504, + "step": 11273 + }, + { + "epoch": 0.66, + "learning_rate": 2.7942635385263334e-08, + "logits/chosen": -1.9968069791793823, + "logits/rejected": -1.9950597286224365, + "logps/chosen": -114.28022766113281, + "logps/rejected": -217.675537109375, + "loss": 0.4068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3031959533691406, + "rewards/margins": 0.7090553045272827, + "rewards/rejected": -0.4058593809604645, + "step": 11274 + }, + { + "epoch": 0.66, + "learning_rate": 2.793417833078492e-08, + "logits/chosen": -2.131746292114258, + "logits/rejected": -2.1303162574768066, + "logps/chosen": -0.0005971290520392358, + "logps/rejected": -234.0274658203125, + "loss": 0.336, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0583357834548224e-05, + "rewards/margins": 4.059683322906494, + "rewards/rejected": -4.059672832489014, + "step": 11275 + }, + { + "epoch": 0.66, + "learning_rate": 2.792572206019243e-08, + "logits/chosen": -2.0263638496398926, + "logits/rejected": -1.9217052459716797, + "logps/chosen": -261.5820617675781, + "logps/rejected": -522.8828125, + "loss": 0.1982, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5036346912384033, + "rewards/margins": 1.3154327869415283, + "rewards/rejected": 0.188201904296875, + "step": 11276 + }, + { + "epoch": 0.66, + "learning_rate": 2.7917266573786286e-08, + "logits/chosen": -1.9760568141937256, + "logits/rejected": -1.9718000888824463, + "logps/chosen": -47.65653991699219, + "logps/rejected": -269.1109619140625, + "loss": 0.6477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9340759515762329, + "rewards/margins": 3.318094253540039, + "rewards/rejected": -4.252170085906982, + "step": 11277 + }, + { + "epoch": 0.66, + "learning_rate": 2.7908811871866833e-08, + "logits/chosen": -1.6703401803970337, + "logits/rejected": -1.672288417816162, + "logps/chosen": -0.004627651534974575, + "logps/rejected": -137.2804718017578, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026993246865458786, + "rewards/margins": 2.7340610027313232, + "rewards/rejected": -2.734330892562866, + "step": 11278 + }, + { + "epoch": 0.66, + "learning_rate": 2.7900357954734443e-08, + "logits/chosen": -1.7908140420913696, + "logits/rejected": -1.7825454473495483, + "logps/chosen": -51.2191047668457, + "logps/rejected": -186.46932983398438, + "loss": 0.3957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17948876321315765, + "rewards/margins": 0.9945858120918274, + "rewards/rejected": -0.8150970339775085, + "step": 11279 + }, + { + "epoch": 0.66, + "learning_rate": 2.7891904822689437e-08, + "logits/chosen": -2.120060920715332, + "logits/rejected": -2.1195385456085205, + "logps/chosen": -38.46428680419922, + "logps/rejected": -366.0467224121094, + "loss": 0.1956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3327011168003082, + "rewards/margins": 4.714021682739258, + "rewards/rejected": -4.381320476531982, + "step": 11280 + }, + { + "epoch": 0.66, + "learning_rate": 2.7883452476032133e-08, + "logits/chosen": -2.0642173290252686, + "logits/rejected": -2.0674076080322266, + "logps/chosen": -0.000143403114634566, + "logps/rejected": -184.20205688476562, + "loss": 0.3576, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.802483888648567e-06, + "rewards/margins": 3.3464438915252686, + "rewards/rejected": -3.346447706222534, + "step": 11281 + }, + { + "epoch": 0.66, + "learning_rate": 2.7875000915062762e-08, + "logits/chosen": -2.0032849311828613, + "logits/rejected": -1.9979192018508911, + "logps/chosen": -44.69023132324219, + "logps/rejected": -206.07135009765625, + "loss": 0.2628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3126815855503082, + "rewards/margins": 1.8970855474472046, + "rewards/rejected": -1.5844039916992188, + "step": 11282 + }, + { + "epoch": 0.66, + "learning_rate": 2.7866550140081584e-08, + "logits/chosen": -1.9197546243667603, + "logits/rejected": -1.8946926593780518, + "logps/chosen": -251.74822998046875, + "logps/rejected": -350.41802978515625, + "loss": 0.0825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42362672090530396, + "rewards/margins": 4.493124485015869, + "rewards/rejected": -4.069497585296631, + "step": 11283 + }, + { + "epoch": 0.66, + "learning_rate": 2.7858100151388808e-08, + "logits/chosen": -1.8377939462661743, + "logits/rejected": -1.8052723407745361, + "logps/chosen": -266.06915283203125, + "logps/rejected": -536.3572998046875, + "loss": 0.1281, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8521485328674316, + "rewards/margins": 1.4035706520080566, + "rewards/rejected": 1.448577880859375, + "step": 11284 + }, + { + "epoch": 0.66, + "learning_rate": 2.7849650949284653e-08, + "logits/chosen": -1.8333001136779785, + "logits/rejected": -1.830964207649231, + "logps/chosen": -41.5205078125, + "logps/rejected": -197.88311767578125, + "loss": 0.1911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9168796539306641, + "rewards/margins": 2.3243839740753174, + "rewards/rejected": -1.4075043201446533, + "step": 11285 + }, + { + "epoch": 0.66, + "learning_rate": 2.7841202534069218e-08, + "logits/chosen": -1.9737049341201782, + "logits/rejected": -1.981985092163086, + "logps/chosen": -263.0768127441406, + "logps/rejected": -387.6802978515625, + "loss": 0.0497, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0789947509765625, + "rewards/margins": 2.5925567150115967, + "rewards/rejected": -0.513562023639679, + "step": 11286 + }, + { + "epoch": 0.66, + "learning_rate": 2.7832754906042655e-08, + "logits/chosen": -1.8298643827438354, + "logits/rejected": -1.817271113395691, + "logps/chosen": -0.02108614705502987, + "logps/rejected": -154.4003448486328, + "loss": 0.4052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009382505086250603, + "rewards/margins": 1.683729648590088, + "rewards/rejected": -1.684667944908142, + "step": 11287 + }, + { + "epoch": 0.66, + "learning_rate": 2.7824308065505075e-08, + "logits/chosen": -1.9961239099502563, + "logits/rejected": -1.987399935722351, + "logps/chosen": -78.09927368164062, + "logps/rejected": -360.50958251953125, + "loss": 0.239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5144829154014587, + "rewards/margins": 5.013219833374023, + "rewards/rejected": -4.49873685836792, + "step": 11288 + }, + { + "epoch": 0.66, + "learning_rate": 2.7815862012756562e-08, + "logits/chosen": -1.9238619804382324, + "logits/rejected": -1.9145652055740356, + "logps/chosen": -11.229933738708496, + "logps/rejected": -176.14468383789062, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23388758301734924, + "rewards/margins": 2.1882212162017822, + "rewards/rejected": -1.9543335437774658, + "step": 11289 + }, + { + "epoch": 0.66, + "learning_rate": 2.7807416748097136e-08, + "logits/chosen": -1.8040187358856201, + "logits/rejected": -1.7664804458618164, + "logps/chosen": -289.32781982421875, + "logps/rejected": -466.0093994140625, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6506134271621704, + "rewards/margins": 5.438070774078369, + "rewards/rejected": -3.787457227706909, + "step": 11290 + }, + { + "epoch": 0.66, + "learning_rate": 2.7798972271826793e-08, + "logits/chosen": -2.0238640308380127, + "logits/rejected": -2.0179669857025146, + "logps/chosen": -13.89657211303711, + "logps/rejected": -121.60355377197266, + "loss": 0.5188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38759633898735046, + "rewards/margins": 0.40035727620124817, + "rewards/rejected": -0.012760925106704235, + "step": 11291 + }, + { + "epoch": 0.66, + "learning_rate": 2.7790528584245587e-08, + "logits/chosen": -2.1439595222473145, + "logits/rejected": -2.1415300369262695, + "logps/chosen": -4.617761135101318, + "logps/rejected": -111.44329833984375, + "loss": 0.3266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29666969180107117, + "rewards/margins": 2.0470314025878906, + "rewards/rejected": -1.750361680984497, + "step": 11292 + }, + { + "epoch": 0.66, + "learning_rate": 2.7782085685653435e-08, + "logits/chosen": -1.9162626266479492, + "logits/rejected": -1.91610848903656, + "logps/chosen": -17.120080947875977, + "logps/rejected": -106.0039291381836, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.592289388179779, + "rewards/margins": 1.6638798713684082, + "rewards/rejected": -1.0715904235839844, + "step": 11293 + }, + { + "epoch": 0.66, + "learning_rate": 2.777364357635029e-08, + "logits/chosen": -1.7125760316848755, + "logits/rejected": -1.7336465120315552, + "logps/chosen": -361.3857421875, + "logps/rejected": -569.633544921875, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2592713832855225, + "rewards/margins": 2.1610779762268066, + "rewards/rejected": 0.09819336235523224, + "step": 11294 + }, + { + "epoch": 0.66, + "learning_rate": 2.776520225663601e-08, + "logits/chosen": -1.8655352592468262, + "logits/rejected": -1.865993857383728, + "logps/chosen": -274.3133544921875, + "logps/rejected": -408.2845458984375, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5663726329803467, + "rewards/margins": 3.118255615234375, + "rewards/rejected": -0.5518829226493835, + "step": 11295 + }, + { + "epoch": 0.66, + "learning_rate": 2.7756761726810552e-08, + "logits/chosen": -1.81322181224823, + "logits/rejected": -1.7774220705032349, + "logps/chosen": -309.46832275390625, + "logps/rejected": -516.6741333007812, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.660247802734375, + "rewards/margins": 4.681454658508301, + "rewards/rejected": -1.0212067365646362, + "step": 11296 + }, + { + "epoch": 0.66, + "learning_rate": 2.7748321987173696e-08, + "logits/chosen": -1.8179925680160522, + "logits/rejected": -1.8237019777297974, + "logps/chosen": -10.05295181274414, + "logps/rejected": -104.75189208984375, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32655802369117737, + "rewards/margins": 2.3751139640808105, + "rewards/rejected": -2.048555850982666, + "step": 11297 + }, + { + "epoch": 0.66, + "learning_rate": 2.7739883038025286e-08, + "logits/chosen": -1.7909438610076904, + "logits/rejected": -1.7774680852890015, + "logps/chosen": -0.0023286223877221346, + "logps/rejected": -111.35507202148438, + "loss": 0.5407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00024716087500564754, + "rewards/margins": 0.7335136532783508, + "rewards/rejected": -0.7332664728164673, + "step": 11298 + }, + { + "epoch": 0.66, + "learning_rate": 2.773144487966512e-08, + "logits/chosen": -1.9347174167633057, + "logits/rejected": -1.9313524961471558, + "logps/chosen": -285.1260681152344, + "logps/rejected": -438.5551452636719, + "loss": 0.1534, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.457086205482483, + "rewards/margins": 1.4567718505859375, + "rewards/rejected": 0.0003143310605082661, + "step": 11299 + }, + { + "epoch": 0.66, + "learning_rate": 2.7723007512392976e-08, + "logits/chosen": -1.949672818183899, + "logits/rejected": -1.8982479572296143, + "logps/chosen": -223.0341796875, + "logps/rejected": -309.5990905761719, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.582379341125488, + "rewards/margins": 3.66355299949646, + "rewards/rejected": 0.9188262820243835, + "step": 11300 + }, + { + "epoch": 0.66, + "learning_rate": 2.7714570936508557e-08, + "logits/chosen": -2.1073625087738037, + "logits/rejected": -2.1090617179870605, + "logps/chosen": -3.3099448680877686, + "logps/rejected": -133.7184600830078, + "loss": 0.3372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17057721316814423, + "rewards/margins": 2.532501220703125, + "rewards/rejected": -2.361923933029175, + "step": 11301 + }, + { + "epoch": 0.66, + "learning_rate": 2.770613515231159e-08, + "logits/chosen": -1.8836416006088257, + "logits/rejected": -1.922959327697754, + "logps/chosen": -177.9676971435547, + "logps/rejected": -297.70623779296875, + "loss": 0.2368, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.514949083328247, + "rewards/margins": 0.7642502188682556, + "rewards/rejected": 0.7506988644599915, + "step": 11302 + }, + { + "epoch": 0.66, + "learning_rate": 2.7697700160101745e-08, + "logits/chosen": -1.7805463075637817, + "logits/rejected": -1.7757954597473145, + "logps/chosen": -65.65786743164062, + "logps/rejected": -252.95187377929688, + "loss": 0.4029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2851966917514801, + "rewards/margins": 1.189588189125061, + "rewards/rejected": -0.9043914675712585, + "step": 11303 + }, + { + "epoch": 0.66, + "learning_rate": 2.768926596017871e-08, + "logits/chosen": -1.9031753540039062, + "logits/rejected": -1.8991785049438477, + "logps/chosen": -47.57709884643555, + "logps/rejected": -106.41877746582031, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8961887359619141, + "rewards/margins": 0.8781192898750305, + "rewards/rejected": 0.01806945912539959, + "step": 11304 + }, + { + "epoch": 0.66, + "learning_rate": 2.768083255284206e-08, + "logits/chosen": -1.9416435956954956, + "logits/rejected": -1.9313793182373047, + "logps/chosen": -143.30593872070312, + "logps/rejected": -278.5053405761719, + "loss": 0.2874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07831268757581711, + "rewards/margins": 0.9270248413085938, + "rewards/rejected": -0.848712146282196, + "step": 11305 + }, + { + "epoch": 0.66, + "learning_rate": 2.7672399938391412e-08, + "logits/chosen": -1.7246369123458862, + "logits/rejected": -1.7228593826293945, + "logps/chosen": -135.66238403320312, + "logps/rejected": -275.2781982421875, + "loss": 0.1513, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9710479974746704, + "rewards/margins": 1.5623137950897217, + "rewards/rejected": 0.40873414278030396, + "step": 11306 + }, + { + "epoch": 0.66, + "learning_rate": 2.7663968117126335e-08, + "logits/chosen": -1.784479022026062, + "logits/rejected": -1.6637533903121948, + "logps/chosen": -176.35467529296875, + "logps/rejected": -582.3120727539062, + "loss": 0.0629, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0857772827148438, + "rewards/margins": 3.3918991088867188, + "rewards/rejected": -1.306121826171875, + "step": 11307 + }, + { + "epoch": 0.66, + "learning_rate": 2.7655537089346393e-08, + "logits/chosen": -1.6586230993270874, + "logits/rejected": -1.670043706893921, + "logps/chosen": -0.0754278227686882, + "logps/rejected": -231.87838745117188, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0047502899542450905, + "rewards/margins": 7.042286396026611, + "rewards/rejected": -7.047036647796631, + "step": 11308 + }, + { + "epoch": 0.66, + "learning_rate": 2.7647106855351042e-08, + "logits/chosen": -2.047015905380249, + "logits/rejected": -2.1032462120056152, + "logps/chosen": -241.70245361328125, + "logps/rejected": -263.8597106933594, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4819977283477783, + "rewards/margins": 2.8642821311950684, + "rewards/rejected": 0.6177154779434204, + "step": 11309 + }, + { + "epoch": 0.66, + "learning_rate": 2.7638677415439783e-08, + "logits/chosen": -1.806218147277832, + "logits/rejected": -1.8326448202133179, + "logps/chosen": -256.97467041015625, + "logps/rejected": -503.43768310546875, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.23907470703125, + "rewards/margins": 3.061474561691284, + "rewards/rejected": -0.822399914264679, + "step": 11310 + }, + { + "epoch": 0.66, + "learning_rate": 2.763024876991212e-08, + "logits/chosen": -1.946040391921997, + "logits/rejected": -1.9394097328186035, + "logps/chosen": -250.48883056640625, + "logps/rejected": -442.2870178222656, + "loss": 0.2334, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0589356422424316, + "rewards/margins": 0.5418457984924316, + "rewards/rejected": 2.51708984375, + "step": 11311 + }, + { + "epoch": 0.66, + "learning_rate": 2.7621820919067418e-08, + "logits/chosen": -1.8533363342285156, + "logits/rejected": -1.8267414569854736, + "logps/chosen": -217.2135009765625, + "logps/rejected": -422.67431640625, + "loss": 0.0399, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6169464588165283, + "rewards/margins": 3.245004415512085, + "rewards/rejected": -0.6280578970909119, + "step": 11312 + }, + { + "epoch": 0.66, + "learning_rate": 2.7613393863205126e-08, + "logits/chosen": -1.8681766986846924, + "logits/rejected": -1.8589231967926025, + "logps/chosen": -2.038163900375366, + "logps/rejected": -172.53134155273438, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17624910175800323, + "rewards/margins": 2.557241201400757, + "rewards/rejected": -2.3809921741485596, + "step": 11313 + }, + { + "epoch": 0.66, + "learning_rate": 2.7604967602624552e-08, + "logits/chosen": -2.036328077316284, + "logits/rejected": -2.017214775085449, + "logps/chosen": -1.6689227777533233e-05, + "logps/rejected": -138.3555908203125, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.172252374701202e-07, + "rewards/margins": 3.3250234127044678, + "rewards/rejected": -3.3250229358673096, + "step": 11314 + }, + { + "epoch": 0.66, + "learning_rate": 2.759654213762511e-08, + "logits/chosen": -1.876438021659851, + "logits/rejected": -1.8759318590164185, + "logps/chosen": -10.754852294921875, + "logps/rejected": -103.06126403808594, + "loss": 0.3216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6184701919555664, + "rewards/margins": 1.138048768043518, + "rewards/rejected": -0.5195785760879517, + "step": 11315 + }, + { + "epoch": 0.66, + "learning_rate": 2.7588117468506062e-08, + "logits/chosen": -1.6985403299331665, + "logits/rejected": -1.7107385396957397, + "logps/chosen": -185.96038818359375, + "logps/rejected": -307.15692138671875, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9203567504882812, + "rewards/margins": 3.527268886566162, + "rewards/rejected": -1.6069122552871704, + "step": 11316 + }, + { + "epoch": 0.66, + "learning_rate": 2.7579693595566707e-08, + "logits/chosen": -1.8778141736984253, + "logits/rejected": -1.874449610710144, + "logps/chosen": -288.67071533203125, + "logps/rejected": -348.11761474609375, + "loss": 0.1534, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.907318115234375, + "rewards/margins": 1.1482481956481934, + "rewards/rejected": 0.7590698599815369, + "step": 11317 + }, + { + "epoch": 0.66, + "learning_rate": 2.7571270519106305e-08, + "logits/chosen": -1.8629095554351807, + "logits/rejected": -1.8522244691848755, + "logps/chosen": -62.969032287597656, + "logps/rejected": -277.3883361816406, + "loss": 0.1571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9611870050430298, + "rewards/margins": 3.102715492248535, + "rewards/rejected": -2.141528367996216, + "step": 11318 + }, + { + "epoch": 0.66, + "learning_rate": 2.7562848239424108e-08, + "logits/chosen": -1.9586269855499268, + "logits/rejected": -1.9480445384979248, + "logps/chosen": -182.30699157714844, + "logps/rejected": -256.1129150390625, + "loss": 0.151, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.439030408859253, + "rewards/margins": 1.0957564115524292, + "rewards/rejected": 1.3432739973068237, + "step": 11319 + }, + { + "epoch": 0.66, + "learning_rate": 2.755442675681927e-08, + "logits/chosen": -1.7117558717727661, + "logits/rejected": -1.7658778429031372, + "logps/chosen": -245.4990997314453, + "logps/rejected": -234.8127899169922, + "loss": 0.0492, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.724339246749878, + "rewards/margins": 2.413983106613159, + "rewards/rejected": 0.31035614013671875, + "step": 11320 + }, + { + "epoch": 0.66, + "learning_rate": 2.754600607159099e-08, + "logits/chosen": -1.996880292892456, + "logits/rejected": -1.9932020902633667, + "logps/chosen": -24.19458770751953, + "logps/rejected": -133.42539978027344, + "loss": 0.4152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38446560502052307, + "rewards/margins": 0.8799718618392944, + "rewards/rejected": -0.49550628662109375, + "step": 11321 + }, + { + "epoch": 0.66, + "learning_rate": 2.7537586184038404e-08, + "logits/chosen": -1.794472098350525, + "logits/rejected": -1.7802873849868774, + "logps/chosen": -221.51107788085938, + "logps/rejected": -330.58807373046875, + "loss": 0.0864, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2436904907226562, + "rewards/margins": 2.6922898292541504, + "rewards/rejected": -1.4485992193222046, + "step": 11322 + }, + { + "epoch": 0.66, + "learning_rate": 2.7529167094460653e-08, + "logits/chosen": -2.056636333465576, + "logits/rejected": -2.0567586421966553, + "logps/chosen": -7.723145961761475, + "logps/rejected": -92.4924545288086, + "loss": 0.3752, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1560133546590805, + "rewards/margins": 1.6518027782440186, + "rewards/rejected": -1.4957894086837769, + "step": 11323 + }, + { + "epoch": 0.66, + "learning_rate": 2.7520748803156784e-08, + "logits/chosen": -1.8895035982131958, + "logits/rejected": -1.8807945251464844, + "logps/chosen": -46.06819152832031, + "logps/rejected": -118.60212707519531, + "loss": 0.572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0851673111319542, + "rewards/margins": 0.32515373826026917, + "rewards/rejected": -0.23998641967773438, + "step": 11324 + }, + { + "epoch": 0.66, + "learning_rate": 2.7512331310425874e-08, + "logits/chosen": -2.002504587173462, + "logits/rejected": -1.990129828453064, + "logps/chosen": -5.0780253410339355, + "logps/rejected": -165.18377685546875, + "loss": 0.3741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07315649837255478, + "rewards/margins": 1.6128095388412476, + "rewards/rejected": -1.5396530628204346, + "step": 11325 + }, + { + "epoch": 0.66, + "learning_rate": 2.750391461656696e-08, + "logits/chosen": -2.043938636779785, + "logits/rejected": -2.034005880355835, + "logps/chosen": -53.535247802734375, + "logps/rejected": -178.67080688476562, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4544326961040497, + "rewards/margins": 2.3192214965820312, + "rewards/rejected": -1.8647888898849487, + "step": 11326 + }, + { + "epoch": 0.66, + "learning_rate": 2.7495498721879057e-08, + "logits/chosen": -1.9299979209899902, + "logits/rejected": -1.9326715469360352, + "logps/chosen": -4.827062606811523, + "logps/rejected": -235.48922729492188, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06375866383314133, + "rewards/margins": 2.8577349185943604, + "rewards/rejected": -2.9214935302734375, + "step": 11327 + }, + { + "epoch": 0.66, + "learning_rate": 2.74870836266611e-08, + "logits/chosen": -1.735010027885437, + "logits/rejected": -1.7318676710128784, + "logps/chosen": -32.28056335449219, + "logps/rejected": -185.17019653320312, + "loss": 0.2028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9155387878417969, + "rewards/margins": 2.965956926345825, + "rewards/rejected": -2.0504181385040283, + "step": 11328 + }, + { + "epoch": 0.66, + "learning_rate": 2.7478669331212055e-08, + "logits/chosen": -1.9384996891021729, + "logits/rejected": -1.9400144815444946, + "logps/chosen": -169.579833984375, + "logps/rejected": -320.0820007324219, + "loss": 0.0452, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9129791259765625, + "rewards/margins": 3.57000732421875, + "rewards/rejected": -0.6570281982421875, + "step": 11329 + }, + { + "epoch": 0.66, + "learning_rate": 2.747025583583084e-08, + "logits/chosen": -1.8981380462646484, + "logits/rejected": -1.890577793121338, + "logps/chosen": -28.27353858947754, + "logps/rejected": -140.53469848632812, + "loss": 0.5188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07292308658361435, + "rewards/margins": 0.7092882394790649, + "rewards/rejected": -0.7822113037109375, + "step": 11330 + }, + { + "epoch": 0.66, + "learning_rate": 2.7461843140816343e-08, + "logits/chosen": -1.9197649955749512, + "logits/rejected": -1.9208205938339233, + "logps/chosen": -4.38712739944458, + "logps/rejected": -152.39515686035156, + "loss": 0.2972, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.327942430973053, + "rewards/margins": 2.7379446029663086, + "rewards/rejected": -2.4100022315979004, + "step": 11331 + }, + { + "epoch": 0.66, + "learning_rate": 2.745343124646744e-08, + "logits/chosen": -2.0759851932525635, + "logits/rejected": -2.060166597366333, + "logps/chosen": -234.80320739746094, + "logps/rejected": -359.27410888671875, + "loss": 0.0996, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.0426530838012695, + "rewards/margins": 1.535048007965088, + "rewards/rejected": 2.5076050758361816, + "step": 11332 + }, + { + "epoch": 0.66, + "learning_rate": 2.744502015308291e-08, + "logits/chosen": -1.86322820186615, + "logits/rejected": -1.8621736764907837, + "logps/chosen": -11.229307174682617, + "logps/rejected": -93.62704467773438, + "loss": 0.566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.347520649433136, + "rewards/margins": 0.20578785240650177, + "rewards/rejected": 0.14173279702663422, + "step": 11333 + }, + { + "epoch": 0.66, + "learning_rate": 2.7436609860961634e-08, + "logits/chosen": -1.9321497678756714, + "logits/rejected": -1.9118798971176147, + "logps/chosen": -169.57608032226562, + "logps/rejected": -476.348876953125, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9659759998321533, + "rewards/margins": 7.073935508728027, + "rewards/rejected": -5.107959270477295, + "step": 11334 + }, + { + "epoch": 0.66, + "learning_rate": 2.742820037040232e-08, + "logits/chosen": -1.7301527261734009, + "logits/rejected": -1.7384912967681885, + "logps/chosen": -214.39051818847656, + "logps/rejected": -402.2525939941406, + "loss": 0.196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9778839349746704, + "rewards/margins": 1.515252709388733, + "rewards/rejected": -0.5373687744140625, + "step": 11335 + }, + { + "epoch": 0.66, + "learning_rate": 2.7419791681703746e-08, + "logits/chosen": -1.7372891902923584, + "logits/rejected": -1.7516515254974365, + "logps/chosen": -285.27154541015625, + "logps/rejected": -458.3428649902344, + "loss": 0.0374, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.99627685546875, + "rewards/margins": 3.5690245628356934, + "rewards/rejected": -0.5727478265762329, + "step": 11336 + }, + { + "epoch": 0.66, + "learning_rate": 2.7411383795164623e-08, + "logits/chosen": -2.016066312789917, + "logits/rejected": -2.0204010009765625, + "logps/chosen": -159.53646850585938, + "logps/rejected": -286.5960693359375, + "loss": 0.4889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4214187562465668, + "rewards/margins": 2.6632585525512695, + "rewards/rejected": -3.084677219390869, + "step": 11337 + }, + { + "epoch": 0.66, + "learning_rate": 2.7402976711083653e-08, + "logits/chosen": -2.058079242706299, + "logits/rejected": -2.0439839363098145, + "logps/chosen": -63.58699035644531, + "logps/rejected": -154.18804931640625, + "loss": 0.6559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3553116023540497, + "rewards/margins": 0.5413665771484375, + "rewards/rejected": -0.8966781497001648, + "step": 11338 + }, + { + "epoch": 0.66, + "learning_rate": 2.7394570429759467e-08, + "logits/chosen": -1.9854100942611694, + "logits/rejected": -1.986594557762146, + "logps/chosen": -26.251379013061523, + "logps/rejected": -99.86212158203125, + "loss": 0.5963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01775493659079075, + "rewards/margins": 0.40498030185699463, + "rewards/rejected": -0.38722535967826843, + "step": 11339 + }, + { + "epoch": 0.66, + "learning_rate": 2.738616495149072e-08, + "logits/chosen": -1.8539050817489624, + "logits/rejected": -1.8674436807632446, + "logps/chosen": -218.83209228515625, + "logps/rejected": -375.85711669921875, + "loss": 0.0882, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5985122919082642, + "rewards/margins": 3.6236467361450195, + "rewards/rejected": -3.025134325027466, + "step": 11340 + }, + { + "epoch": 0.66, + "learning_rate": 2.7377760276576002e-08, + "logits/chosen": -1.938238501548767, + "logits/rejected": -1.9361321926116943, + "logps/chosen": -15.942688941955566, + "logps/rejected": -71.46543884277344, + "loss": 0.495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39777669310569763, + "rewards/margins": 0.45738285779953003, + "rewards/rejected": -0.059606172144412994, + "step": 11341 + }, + { + "epoch": 0.66, + "learning_rate": 2.7369356405313927e-08, + "logits/chosen": -2.0576744079589844, + "logits/rejected": -2.0549073219299316, + "logps/chosen": -12.822779655456543, + "logps/rejected": -139.32424926757812, + "loss": 0.3716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1904105246067047, + "rewards/margins": 1.772098422050476, + "rewards/rejected": -1.5816879272460938, + "step": 11342 + }, + { + "epoch": 0.66, + "learning_rate": 2.736095333800298e-08, + "logits/chosen": -1.7859920263290405, + "logits/rejected": -1.7760508060455322, + "logps/chosen": -3.7416839599609375, + "logps/rejected": -121.53972625732422, + "loss": 0.5765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09242486953735352, + "rewards/margins": 0.57978355884552, + "rewards/rejected": -0.6722084283828735, + "step": 11343 + }, + { + "epoch": 0.66, + "learning_rate": 2.735255107494171e-08, + "logits/chosen": -1.904109239578247, + "logits/rejected": -1.9064342975616455, + "logps/chosen": -0.12826988101005554, + "logps/rejected": -212.1953582763672, + "loss": 0.3874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0046996669843792915, + "rewards/margins": 3.2203094959259033, + "rewards/rejected": -3.2250092029571533, + "step": 11344 + }, + { + "epoch": 0.66, + "learning_rate": 2.7344149616428612e-08, + "logits/chosen": -2.1267263889312744, + "logits/rejected": -2.1200826168060303, + "logps/chosen": -13.536202430725098, + "logps/rejected": -94.19380187988281, + "loss": 0.5567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29131537675857544, + "rewards/margins": 0.37889471650123596, + "rewards/rejected": -0.08757934719324112, + "step": 11345 + }, + { + "epoch": 0.66, + "learning_rate": 2.7335748962762162e-08, + "logits/chosen": -1.6978111267089844, + "logits/rejected": -1.697270154953003, + "logps/chosen": -160.46112060546875, + "logps/rejected": -248.38626098632812, + "loss": 0.5013, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.4501495361328125, + "rewards/margins": -0.4414856433868408, + "rewards/rejected": 1.8916351795196533, + "step": 11346 + }, + { + "epoch": 0.66, + "learning_rate": 2.732734911424075e-08, + "logits/chosen": -2.0230274200439453, + "logits/rejected": -2.0082101821899414, + "logps/chosen": -206.49485778808594, + "logps/rejected": -440.19439697265625, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6028289794921875, + "rewards/margins": 5.248056411743164, + "rewards/rejected": -3.6452271938323975, + "step": 11347 + }, + { + "epoch": 0.66, + "learning_rate": 2.7318950071162807e-08, + "logits/chosen": -1.86665678024292, + "logits/rejected": -1.8743878602981567, + "logps/chosen": -23.04854965209961, + "logps/rejected": -161.62855529785156, + "loss": 0.6573, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6107506155967712, + "rewards/margins": -0.396428644657135, + "rewards/rejected": 1.0071792602539062, + "step": 11348 + }, + { + "epoch": 0.66, + "learning_rate": 2.731055183382669e-08, + "logits/chosen": -1.9367475509643555, + "logits/rejected": -1.9276014566421509, + "logps/chosen": -139.23983764648438, + "logps/rejected": -401.6052551269531, + "loss": 0.0253, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0569961071014404, + "rewards/margins": 4.2164201736450195, + "rewards/rejected": -2.159423828125, + "step": 11349 + }, + { + "epoch": 0.66, + "learning_rate": 2.7302154402530787e-08, + "logits/chosen": -1.8847256898880005, + "logits/rejected": -1.938245177268982, + "logps/chosen": -212.81285095214844, + "logps/rejected": -206.5756072998047, + "loss": 0.1029, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1680328845977783, + "rewards/margins": 1.7946014404296875, + "rewards/rejected": 0.37343141436576843, + "step": 11350 + }, + { + "epoch": 0.66, + "learning_rate": 2.7293757777573362e-08, + "logits/chosen": -1.9156103134155273, + "logits/rejected": -1.912839412689209, + "logps/chosen": -200.72535705566406, + "logps/rejected": -239.89620971679688, + "loss": 0.5949, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6808929443359375, + "rewards/margins": -0.4264572858810425, + "rewards/rejected": 1.10735023021698, + "step": 11351 + }, + { + "epoch": 0.66, + "learning_rate": 2.728536195925271e-08, + "logits/chosen": -1.7103387117385864, + "logits/rejected": -1.6943336725234985, + "logps/chosen": -152.47496032714844, + "logps/rejected": -365.71295166015625, + "loss": 0.1319, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7407974004745483, + "rewards/margins": 1.464421033859253, + "rewards/rejected": 0.276376336812973, + "step": 11352 + }, + { + "epoch": 0.66, + "learning_rate": 2.7276966947867153e-08, + "logits/chosen": -1.7565878629684448, + "logits/rejected": -1.7563852071762085, + "logps/chosen": -12.805362701416016, + "logps/rejected": -12.173120498657227, + "loss": 0.6864, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.09045916050672531, + "rewards/margins": -0.07575397938489914, + "rewards/rejected": 0.16621313989162445, + "step": 11353 + }, + { + "epoch": 0.66, + "learning_rate": 2.7268572743714856e-08, + "logits/chosen": -1.7663400173187256, + "logits/rejected": -1.757049560546875, + "logps/chosen": -219.45135498046875, + "logps/rejected": -355.5498046875, + "loss": 0.127, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6360459327697754, + "rewards/margins": 1.3059494495391846, + "rewards/rejected": 1.3300964832305908, + "step": 11354 + }, + { + "epoch": 0.66, + "learning_rate": 2.726017934709407e-08, + "logits/chosen": -1.8137556314468384, + "logits/rejected": -1.812606692314148, + "logps/chosen": -18.00643539428711, + "logps/rejected": -86.91610717773438, + "loss": 0.6332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2779953181743622, + "rewards/margins": 0.08602830767631531, + "rewards/rejected": 0.19196701049804688, + "step": 11355 + }, + { + "epoch": 0.66, + "learning_rate": 2.7251786758302907e-08, + "logits/chosen": -1.833000659942627, + "logits/rejected": -1.840559482574463, + "logps/chosen": -223.50303649902344, + "logps/rejected": -365.3042907714844, + "loss": 0.1955, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8228103518486023, + "rewards/margins": 1.3787460327148438, + "rewards/rejected": -0.5559356808662415, + "step": 11356 + }, + { + "epoch": 0.66, + "learning_rate": 2.724339497763959e-08, + "logits/chosen": -1.8969087600708008, + "logits/rejected": -1.9561926126480103, + "logps/chosen": -215.40187072753906, + "logps/rejected": -517.0076904296875, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.181965708732605, + "rewards/margins": 6.3643388748168945, + "rewards/rejected": -5.182373046875, + "step": 11357 + }, + { + "epoch": 0.66, + "learning_rate": 2.7235004005402184e-08, + "logits/chosen": -1.8295955657958984, + "logits/rejected": -1.8143751621246338, + "logps/chosen": -39.17948913574219, + "logps/rejected": -332.1384582519531, + "loss": 0.116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0803565979003906, + "rewards/margins": 8.396411895751953, + "rewards/rejected": -7.3160552978515625, + "step": 11358 + }, + { + "epoch": 0.66, + "learning_rate": 2.7226613841888786e-08, + "logits/chosen": -1.8840527534484863, + "logits/rejected": -1.944926381111145, + "logps/chosen": -171.4529571533203, + "logps/rejected": -394.9369812011719, + "loss": 0.0254, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7093826532363892, + "rewards/margins": 5.78091287612915, + "rewards/rejected": -4.071530342102051, + "step": 11359 + }, + { + "epoch": 0.66, + "learning_rate": 2.7218224487397456e-08, + "logits/chosen": -1.9600634574890137, + "logits/rejected": -1.9272570610046387, + "logps/chosen": -258.45330810546875, + "logps/rejected": -355.30145263671875, + "loss": 0.0947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3655426502227783, + "rewards/margins": 2.2393524646759033, + "rewards/rejected": -0.873809814453125, + "step": 11360 + }, + { + "epoch": 0.66, + "learning_rate": 2.7209835942226256e-08, + "logits/chosen": -1.7374231815338135, + "logits/rejected": -1.786865472793579, + "logps/chosen": -329.73272705078125, + "logps/rejected": -496.9385986328125, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.343414306640625, + "rewards/margins": 3.658029079437256, + "rewards/rejected": -0.314614862203598, + "step": 11361 + }, + { + "epoch": 0.66, + "learning_rate": 2.7201448206673138e-08, + "logits/chosen": -1.758971095085144, + "logits/rejected": -1.7644402980804443, + "logps/chosen": -39.987335205078125, + "logps/rejected": -330.5517578125, + "loss": 0.1272, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0872948169708252, + "rewards/margins": 5.048201560974121, + "rewards/rejected": -3.960906982421875, + "step": 11362 + }, + { + "epoch": 0.66, + "learning_rate": 2.7193061281036106e-08, + "logits/chosen": -1.925041913986206, + "logits/rejected": -1.9249072074890137, + "logps/chosen": -43.57267761230469, + "logps/rejected": -261.1263427734375, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015981674194335938, + "rewards/margins": 2.9659924507141113, + "rewards/rejected": -2.9500107765197754, + "step": 11363 + }, + { + "epoch": 0.66, + "learning_rate": 2.7184675165613097e-08, + "logits/chosen": -1.8146692514419556, + "logits/rejected": -1.825762391090393, + "logps/chosen": -241.3623046875, + "logps/rejected": -345.92578125, + "loss": 0.2586, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3034896850585938, + "rewards/margins": 1.1501144170761108, + "rewards/rejected": 0.15337525308132172, + "step": 11364 + }, + { + "epoch": 0.66, + "learning_rate": 2.7176289860702045e-08, + "logits/chosen": -2.126844644546509, + "logits/rejected": -2.129849910736084, + "logps/chosen": -0.00018559694581199437, + "logps/rejected": -124.2690200805664, + "loss": 0.353, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.817957910243422e-06, + "rewards/margins": 3.4868149757385254, + "rewards/rejected": -3.4868218898773193, + "step": 11365 + }, + { + "epoch": 0.66, + "learning_rate": 2.7167905366600798e-08, + "logits/chosen": -1.9228414297103882, + "logits/rejected": -1.926727294921875, + "logps/chosen": -54.875274658203125, + "logps/rejected": -163.9293670654297, + "loss": 0.6137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24744033813476562, + "rewards/margins": 0.6346840262413025, + "rewards/rejected": -0.8821243643760681, + "step": 11366 + }, + { + "epoch": 0.66, + "learning_rate": 2.715952168360724e-08, + "logits/chosen": -1.60307776927948, + "logits/rejected": -1.62189781665802, + "logps/chosen": -203.33001708984375, + "logps/rejected": -334.5648193359375, + "loss": 0.1155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4767426252365112, + "rewards/margins": 2.8995392322540283, + "rewards/rejected": -1.422796607017517, + "step": 11367 + }, + { + "epoch": 0.66, + "learning_rate": 2.7151138812019193e-08, + "logits/chosen": -2.1019134521484375, + "logits/rejected": -2.1015186309814453, + "logps/chosen": -37.70393371582031, + "logps/rejected": -118.58110046386719, + "loss": 0.4009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16230888664722443, + "rewards/margins": 1.475896954536438, + "rewards/rejected": -1.31358802318573, + "step": 11368 + }, + { + "epoch": 0.66, + "learning_rate": 2.7142756752134488e-08, + "logits/chosen": -1.773946762084961, + "logits/rejected": -1.7489959001541138, + "logps/chosen": -232.980712890625, + "logps/rejected": -389.85205078125, + "loss": 0.0371, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.993975877761841, + "rewards/margins": 4.238903999328613, + "rewards/rejected": -1.244928002357483, + "step": 11369 + }, + { + "epoch": 0.66, + "learning_rate": 2.7134375504250844e-08, + "logits/chosen": -1.952846884727478, + "logits/rejected": -1.950441598892212, + "logps/chosen": -39.07188415527344, + "logps/rejected": -316.4366455078125, + "loss": 0.2094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3552631437778473, + "rewards/margins": 4.7089619636535645, + "rewards/rejected": -4.35369873046875, + "step": 11370 + }, + { + "epoch": 0.66, + "learning_rate": 2.712599506866601e-08, + "logits/chosen": -1.9302738904953003, + "logits/rejected": -1.923921823501587, + "logps/chosen": -53.38477325439453, + "logps/rejected": -267.228759765625, + "loss": 0.1923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5617973208427429, + "rewards/margins": 2.7918710708618164, + "rewards/rejected": -2.2300736904144287, + "step": 11371 + }, + { + "epoch": 0.66, + "learning_rate": 2.7117615445677765e-08, + "logits/chosen": -2.030184745788574, + "logits/rejected": -2.0324277877807617, + "logps/chosen": -9.808629989624023, + "logps/rejected": -221.68568420410156, + "loss": 0.2149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3555435240268707, + "rewards/margins": 4.219892978668213, + "rewards/rejected": -3.864349365234375, + "step": 11372 + }, + { + "epoch": 0.66, + "learning_rate": 2.7109236635583722e-08, + "logits/chosen": -1.757125735282898, + "logits/rejected": -1.7522797584533691, + "logps/chosen": -43.127403259277344, + "logps/rejected": -170.45506286621094, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7253494262695312, + "rewards/margins": 0.95440673828125, + "rewards/rejected": -0.22905731201171875, + "step": 11373 + }, + { + "epoch": 0.66, + "learning_rate": 2.710085863868159e-08, + "logits/chosen": -1.9633344411849976, + "logits/rejected": -1.9736144542694092, + "logps/chosen": -298.9068298339844, + "logps/rejected": -585.2189331054688, + "loss": 0.0738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5484558343887329, + "rewards/margins": 6.525146484375, + "rewards/rejected": -5.976690769195557, + "step": 11374 + }, + { + "epoch": 0.66, + "learning_rate": 2.709248145526893e-08, + "logits/chosen": -1.8821039199829102, + "logits/rejected": -1.8751094341278076, + "logps/chosen": -60.5325927734375, + "logps/rejected": -150.2394561767578, + "loss": 0.1743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9205177426338196, + "rewards/margins": 3.5673880577087402, + "rewards/rejected": -2.6468703746795654, + "step": 11375 + }, + { + "epoch": 0.66, + "learning_rate": 2.7084105085643422e-08, + "logits/chosen": -1.6898640394210815, + "logits/rejected": -1.6942534446716309, + "logps/chosen": -17.790451049804688, + "logps/rejected": -229.60919189453125, + "loss": 0.1594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8614994287490845, + "rewards/margins": 3.766197681427002, + "rewards/rejected": -2.904698133468628, + "step": 11376 + }, + { + "epoch": 0.66, + "learning_rate": 2.7075729530102577e-08, + "logits/chosen": -2.056527614593506, + "logits/rejected": -2.047800064086914, + "logps/chosen": -18.18532371520996, + "logps/rejected": -141.6704559326172, + "loss": 0.5434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1938314437866211, + "rewards/margins": 0.40345823764801025, + "rewards/rejected": -0.20962677896022797, + "step": 11377 + }, + { + "epoch": 0.66, + "learning_rate": 2.7067354788943948e-08, + "logits/chosen": -2.058830499649048, + "logits/rejected": -2.035426378250122, + "logps/chosen": -53.58158874511719, + "logps/rejected": -272.1915588378906, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18870773911476135, + "rewards/margins": 2.2047111988067627, + "rewards/rejected": -2.016003370285034, + "step": 11378 + }, + { + "epoch": 0.66, + "learning_rate": 2.705898086246505e-08, + "logits/chosen": -1.7212917804718018, + "logits/rejected": -1.716181755065918, + "logps/chosen": -22.544715881347656, + "logps/rejected": -261.7237548828125, + "loss": 0.164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7464979290962219, + "rewards/margins": 4.493160724639893, + "rewards/rejected": -3.7466628551483154, + "step": 11379 + }, + { + "epoch": 0.66, + "learning_rate": 2.7050607750963396e-08, + "logits/chosen": -2.0535271167755127, + "logits/rejected": -2.043246269226074, + "logps/chosen": -127.20099639892578, + "logps/rejected": -401.4732360839844, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07238846272230148, + "rewards/margins": 4.096811771392822, + "rewards/rejected": -4.024423122406006, + "step": 11380 + }, + { + "epoch": 0.66, + "learning_rate": 2.7042235454736394e-08, + "logits/chosen": -1.8336933851242065, + "logits/rejected": -1.8358420133590698, + "logps/chosen": -0.09957506507635117, + "logps/rejected": -31.30121421813965, + "loss": 0.651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021469586063176394, + "rewards/margins": 0.04883837699890137, + "rewards/rejected": -0.05098533630371094, + "step": 11381 + }, + { + "epoch": 0.66, + "learning_rate": 2.7033863974081482e-08, + "logits/chosen": -2.011927366256714, + "logits/rejected": -2.010960817337036, + "logps/chosen": -0.5442089438438416, + "logps/rejected": -58.24433517456055, + "loss": 0.607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027348686009645462, + "rewards/margins": 0.1836150884628296, + "rewards/rejected": -0.15626640617847443, + "step": 11382 + }, + { + "epoch": 0.66, + "learning_rate": 2.702549330929606e-08, + "logits/chosen": -1.7621632814407349, + "logits/rejected": -1.7457590103149414, + "logps/chosen": -212.68197631835938, + "logps/rejected": -572.3076782226562, + "loss": 0.1401, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2914870977401733, + "rewards/margins": 3.825819492340088, + "rewards/rejected": -2.534332275390625, + "step": 11383 + }, + { + "epoch": 0.66, + "learning_rate": 2.7017123460677515e-08, + "logits/chosen": -1.8540881872177124, + "logits/rejected": -1.8828022480010986, + "logps/chosen": -193.24708557128906, + "logps/rejected": -343.9998779296875, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7604827880859375, + "rewards/margins": 3.5881927013397217, + "rewards/rejected": -0.827709972858429, + "step": 11384 + }, + { + "epoch": 0.66, + "learning_rate": 2.700875442852315e-08, + "logits/chosen": -1.9765841960906982, + "logits/rejected": -1.984391212463379, + "logps/chosen": -0.16329413652420044, + "logps/rejected": -117.29952239990234, + "loss": 0.5853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008410240523517132, + "rewards/margins": 0.4458797574043274, + "rewards/rejected": -0.4542900025844574, + "step": 11385 + }, + { + "epoch": 0.66, + "learning_rate": 2.7000386213130295e-08, + "logits/chosen": -1.9621707201004028, + "logits/rejected": -1.9349874258041382, + "logps/chosen": -231.99053955078125, + "logps/rejected": -400.5059814453125, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5164246559143066, + "rewards/margins": 3.0052309036254883, + "rewards/rejected": 0.5111938714981079, + "step": 11386 + }, + { + "epoch": 0.66, + "learning_rate": 2.699201881479622e-08, + "logits/chosen": -1.7857133150100708, + "logits/rejected": -1.7772520780563354, + "logps/chosen": -143.85931396484375, + "logps/rejected": -436.56689453125, + "loss": 0.0626, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.18016517162323, + "rewards/margins": 4.7586565017700195, + "rewards/rejected": -3.5784912109375, + "step": 11387 + }, + { + "epoch": 0.66, + "learning_rate": 2.69836522338182e-08, + "logits/chosen": -1.716919183731079, + "logits/rejected": -1.7159373760223389, + "logps/chosen": -122.05540466308594, + "logps/rejected": -245.1248779296875, + "loss": 0.4326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00437164306640625, + "rewards/margins": 0.7437454462051392, + "rewards/rejected": -0.7393738031387329, + "step": 11388 + }, + { + "epoch": 0.66, + "learning_rate": 2.697528647049342e-08, + "logits/chosen": -1.9604763984680176, + "logits/rejected": -1.9422134160995483, + "logps/chosen": -107.85733032226562, + "logps/rejected": -215.7495574951172, + "loss": 0.1387, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8775131702423096, + "rewards/margins": 1.6688873767852783, + "rewards/rejected": 0.20862579345703125, + "step": 11389 + }, + { + "epoch": 0.66, + "learning_rate": 2.6966921525119085e-08, + "logits/chosen": -1.8535019159317017, + "logits/rejected": -1.8542770147323608, + "logps/chosen": -0.0008140116115100682, + "logps/rejected": -104.80999755859375, + "loss": 0.3768, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7398637282894924e-05, + "rewards/margins": 2.4331374168395996, + "rewards/rejected": -2.433120012283325, + "step": 11390 + }, + { + "epoch": 0.66, + "learning_rate": 2.695855739799237e-08, + "logits/chosen": -1.7442907094955444, + "logits/rejected": -1.7387728691101074, + "logps/chosen": -25.671316146850586, + "logps/rejected": -178.53099060058594, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47138291597366333, + "rewards/margins": 1.4633979797363281, + "rewards/rejected": -0.9920150637626648, + "step": 11391 + }, + { + "epoch": 0.66, + "learning_rate": 2.6950194089410404e-08, + "logits/chosen": -1.9505456686019897, + "logits/rejected": -1.9832954406738281, + "logps/chosen": -200.29905700683594, + "logps/rejected": -437.9920959472656, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.123600721359253, + "rewards/margins": 5.833357334136963, + "rewards/rejected": -3.70975661277771, + "step": 11392 + }, + { + "epoch": 0.66, + "learning_rate": 2.6941831599670306e-08, + "logits/chosen": -1.8323161602020264, + "logits/rejected": -1.796019434928894, + "logps/chosen": -298.67193603515625, + "logps/rejected": -557.6102905273438, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9384949207305908, + "rewards/margins": 9.217493057250977, + "rewards/rejected": -7.278997898101807, + "step": 11393 + }, + { + "epoch": 0.66, + "learning_rate": 2.6933469929069108e-08, + "logits/chosen": -1.8775995969772339, + "logits/rejected": -1.8951271772384644, + "logps/chosen": -158.72137451171875, + "logps/rejected": -446.4119873046875, + "loss": 0.0254, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7977173328399658, + "rewards/margins": 6.307913780212402, + "rewards/rejected": -4.510196208953857, + "step": 11394 + }, + { + "epoch": 0.66, + "learning_rate": 2.6925109077903923e-08, + "logits/chosen": -1.9366817474365234, + "logits/rejected": -1.9361581802368164, + "logps/chosen": -66.06500244140625, + "logps/rejected": -301.9323425292969, + "loss": 0.0547, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1356399059295654, + "rewards/margins": 3.2580275535583496, + "rewards/rejected": -1.1223877668380737, + "step": 11395 + }, + { + "epoch": 0.66, + "learning_rate": 2.691674904647172e-08, + "logits/chosen": -1.7444416284561157, + "logits/rejected": -1.741010308265686, + "logps/chosen": -174.7003173828125, + "logps/rejected": -329.14599609375, + "loss": 0.059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9673614501953125, + "rewards/margins": 2.6189117431640625, + "rewards/rejected": -0.65155029296875, + "step": 11396 + }, + { + "epoch": 0.66, + "learning_rate": 2.6908389835069523e-08, + "logits/chosen": -1.7941855192184448, + "logits/rejected": -1.7981207370758057, + "logps/chosen": -0.043390315026044846, + "logps/rejected": -125.16419219970703, + "loss": 0.4162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007305686362087727, + "rewards/margins": 1.8505293130874634, + "rewards/rejected": -1.8432235717773438, + "step": 11397 + }, + { + "epoch": 0.66, + "learning_rate": 2.6900031443994226e-08, + "logits/chosen": -2.196779489517212, + "logits/rejected": -2.1941275596618652, + "logps/chosen": -80.06690979003906, + "logps/rejected": -177.36697387695312, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6420341730117798, + "rewards/margins": 1.2143867015838623, + "rewards/rejected": -0.5723525881767273, + "step": 11398 + }, + { + "epoch": 0.66, + "learning_rate": 2.6891673873542854e-08, + "logits/chosen": -1.9740705490112305, + "logits/rejected": -1.997531771659851, + "logps/chosen": -124.39932250976562, + "logps/rejected": -472.81683349609375, + "loss": 0.0347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.231597900390625, + "rewards/margins": 5.85060453414917, + "rewards/rejected": -4.619006633758545, + "step": 11399 + }, + { + "epoch": 0.66, + "learning_rate": 2.6883317124012233e-08, + "logits/chosen": -2.0404465198516846, + "logits/rejected": -2.042880058288574, + "logps/chosen": -4.63335657119751, + "logps/rejected": -84.25548553466797, + "loss": 0.6587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.056346893310546875, + "rewards/margins": 0.12249527871608734, + "rewards/rejected": -0.17884217202663422, + "step": 11400 + }, + { + "epoch": 0.66, + "learning_rate": 2.6874961195699268e-08, + "logits/chosen": -1.787719488143921, + "logits/rejected": -1.7965278625488281, + "logps/chosen": -239.36880493164062, + "logps/rejected": -277.96990966796875, + "loss": 0.0924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7570708990097046, + "rewards/margins": 1.8521575927734375, + "rewards/rejected": -0.09508667141199112, + "step": 11401 + }, + { + "epoch": 0.66, + "learning_rate": 2.68666060889008e-08, + "logits/chosen": -1.9021722078323364, + "logits/rejected": -1.9043956995010376, + "logps/chosen": -102.75572967529297, + "logps/rejected": -239.78575134277344, + "loss": 0.4472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3680381774902344, + "rewards/margins": 4.013234615325928, + "rewards/rejected": -4.381272792816162, + "step": 11402 + }, + { + "epoch": 0.66, + "learning_rate": 2.6858251803913655e-08, + "logits/chosen": -1.8339166641235352, + "logits/rejected": -1.7850620746612549, + "logps/chosen": -308.6164855957031, + "logps/rejected": -656.158203125, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6045684814453125, + "rewards/margins": 6.852456569671631, + "rewards/rejected": -5.247888088226318, + "step": 11403 + }, + { + "epoch": 0.66, + "learning_rate": 2.6849898341034595e-08, + "logits/chosen": -2.0336670875549316, + "logits/rejected": -2.0303568840026855, + "logps/chosen": -8.255758285522461, + "logps/rejected": -122.65795135498047, + "loss": 0.6166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08710165321826935, + "rewards/margins": 0.23039236664772034, + "rewards/rejected": -0.143290713429451, + "step": 11404 + }, + { + "epoch": 0.66, + "learning_rate": 2.6841545700560374e-08, + "logits/chosen": -1.8534603118896484, + "logits/rejected": -1.858620524406433, + "logps/chosen": -92.2510986328125, + "logps/rejected": -331.4556579589844, + "loss": 0.1448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9616600275039673, + "rewards/margins": 5.705122947692871, + "rewards/rejected": -4.743463039398193, + "step": 11405 + }, + { + "epoch": 0.66, + "learning_rate": 2.6833193882787735e-08, + "logits/chosen": -1.9759063720703125, + "logits/rejected": -1.9622148275375366, + "logps/chosen": -70.81659698486328, + "logps/rejected": -313.1861877441406, + "loss": 0.0817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5746796131134033, + "rewards/margins": 4.613970756530762, + "rewards/rejected": -3.0392913818359375, + "step": 11406 + }, + { + "epoch": 0.66, + "learning_rate": 2.6824842888013387e-08, + "logits/chosen": -1.665895938873291, + "logits/rejected": -1.6746737957000732, + "logps/chosen": -294.0082092285156, + "logps/rejected": -426.2681884765625, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2705048322677612, + "rewards/margins": 3.8087005615234375, + "rewards/rejected": -2.538195848464966, + "step": 11407 + }, + { + "epoch": 0.66, + "learning_rate": 2.6816492716533967e-08, + "logits/chosen": -1.787125587463379, + "logits/rejected": -1.781359314918518, + "logps/chosen": -148.20135498046875, + "logps/rejected": -224.591796875, + "loss": 1.4758, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.857445478439331, + "rewards/margins": 1.6503093242645264, + "rewards/rejected": -4.507754802703857, + "step": 11408 + }, + { + "epoch": 0.66, + "learning_rate": 2.680814336864613e-08, + "logits/chosen": -1.7311137914657593, + "logits/rejected": -1.7418060302734375, + "logps/chosen": -4.224701881408691, + "logps/rejected": -98.87251281738281, + "loss": 0.4274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24107566475868225, + "rewards/margins": 0.8709036111831665, + "rewards/rejected": -0.6298279166221619, + "step": 11409 + }, + { + "epoch": 0.66, + "learning_rate": 2.6799794844646477e-08, + "logits/chosen": -1.9009515047073364, + "logits/rejected": -1.910438895225525, + "logps/chosen": -185.2834930419922, + "logps/rejected": -278.6306457519531, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3172805309295654, + "rewards/margins": 2.3051986694335938, + "rewards/rejected": 0.012081908993422985, + "step": 11410 + }, + { + "epoch": 0.66, + "learning_rate": 2.6791447144831624e-08, + "logits/chosen": -1.9176347255706787, + "logits/rejected": -1.9171239137649536, + "logps/chosen": -149.44381713867188, + "logps/rejected": -366.6493835449219, + "loss": 0.0979, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4915374517440796, + "rewards/margins": 4.361065864562988, + "rewards/rejected": -2.869528293609619, + "step": 11411 + }, + { + "epoch": 0.66, + "learning_rate": 2.6783100269498065e-08, + "logits/chosen": -1.9527509212493896, + "logits/rejected": -2.0038247108459473, + "logps/chosen": -246.67587280273438, + "logps/rejected": -502.36065673828125, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8041168451309204, + "rewards/margins": 6.149832248687744, + "rewards/rejected": -4.345715522766113, + "step": 11412 + }, + { + "epoch": 0.66, + "learning_rate": 2.6774754218942334e-08, + "logits/chosen": -1.7199100255966187, + "logits/rejected": -1.7300585508346558, + "logps/chosen": -0.8743261694908142, + "logps/rejected": -81.97798156738281, + "loss": 0.6529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008169591426849365, + "rewards/margins": 0.04906085878610611, + "rewards/rejected": -0.040891267359256744, + "step": 11413 + }, + { + "epoch": 0.66, + "learning_rate": 2.6766408993460983e-08, + "logits/chosen": -1.8031907081604004, + "logits/rejected": -1.821014165878296, + "logps/chosen": -171.5203857421875, + "logps/rejected": -300.9678039550781, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4060486555099487, + "rewards/margins": 0.4451386332511902, + "rewards/rejected": 0.9609100222587585, + "step": 11414 + }, + { + "epoch": 0.66, + "learning_rate": 2.675806459335041e-08, + "logits/chosen": -1.8792318105697632, + "logits/rejected": -1.8607455492019653, + "logps/chosen": -166.3155517578125, + "logps/rejected": -421.55694580078125, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6549255847930908, + "rewards/margins": 4.122912406921387, + "rewards/rejected": -2.467987060546875, + "step": 11415 + }, + { + "epoch": 0.66, + "learning_rate": 2.674972101890709e-08, + "logits/chosen": -1.9181735515594482, + "logits/rejected": -1.9064724445343018, + "logps/chosen": -31.800827026367188, + "logps/rejected": -362.95263671875, + "loss": 0.4006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23697738349437714, + "rewards/margins": 3.173490285873413, + "rewards/rejected": -3.4104676246643066, + "step": 11416 + }, + { + "epoch": 0.66, + "learning_rate": 2.6741378270427374e-08, + "logits/chosen": -1.9619635343551636, + "logits/rejected": -1.9659370183944702, + "logps/chosen": -13.683252334594727, + "logps/rejected": -149.5157470703125, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42884159088134766, + "rewards/margins": 2.3592844009399414, + "rewards/rejected": -1.9304428100585938, + "step": 11417 + }, + { + "epoch": 0.66, + "learning_rate": 2.6733036348207704e-08, + "logits/chosen": -1.904510736465454, + "logits/rejected": -1.8907538652420044, + "logps/chosen": -0.08693400770425797, + "logps/rejected": -260.19146728515625, + "loss": 0.3507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007714306470006704, + "rewards/margins": 5.184925079345703, + "rewards/rejected": -5.192639350891113, + "step": 11418 + }, + { + "epoch": 0.66, + "learning_rate": 2.6724695252544383e-08, + "logits/chosen": -1.6666030883789062, + "logits/rejected": -1.6617212295532227, + "logps/chosen": -55.91865539550781, + "logps/rejected": -202.33575439453125, + "loss": 0.2646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6851574182510376, + "rewards/margins": 2.2075772285461426, + "rewards/rejected": -1.522419810295105, + "step": 11419 + }, + { + "epoch": 0.66, + "learning_rate": 2.671635498373373e-08, + "logits/chosen": -1.9041041135787964, + "logits/rejected": -1.855796217918396, + "logps/chosen": -206.87136840820312, + "logps/rejected": -309.8896484375, + "loss": 0.2163, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5768508911132812, + "rewards/margins": 0.7868484258651733, + "rewards/rejected": 1.790002465248108, + "step": 11420 + }, + { + "epoch": 0.66, + "learning_rate": 2.6708015542072044e-08, + "logits/chosen": -1.7925693988800049, + "logits/rejected": -1.7889156341552734, + "logps/chosen": -198.4861602783203, + "logps/rejected": -417.7855224609375, + "loss": 0.3052, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.092337131500244, + "rewards/margins": 0.21191418170928955, + "rewards/rejected": 1.8804229497909546, + "step": 11421 + }, + { + "epoch": 0.66, + "learning_rate": 2.6699676927855597e-08, + "logits/chosen": -1.7698661088943481, + "logits/rejected": -1.774804949760437, + "logps/chosen": -3.3644838333129883, + "logps/rejected": -115.33319091796875, + "loss": 0.3034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19251075387001038, + "rewards/margins": 1.8046544790267944, + "rewards/rejected": -1.6121437549591064, + "step": 11422 + }, + { + "epoch": 0.66, + "learning_rate": 2.6691339141380588e-08, + "logits/chosen": -1.995057225227356, + "logits/rejected": -2.028794527053833, + "logps/chosen": -248.86448669433594, + "logps/rejected": -314.36676025390625, + "loss": 0.0706, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.950312852859497, + "rewards/margins": 2.2278764247894287, + "rewards/rejected": -0.2775634825229645, + "step": 11423 + }, + { + "epoch": 0.66, + "learning_rate": 2.6683002182943214e-08, + "logits/chosen": -1.9635659456253052, + "logits/rejected": -1.9754737615585327, + "logps/chosen": -129.10043334960938, + "logps/rejected": -201.22401428222656, + "loss": 0.5535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013752746395766735, + "rewards/margins": 0.44172823429107666, + "rewards/rejected": -0.42797547578811646, + "step": 11424 + }, + { + "epoch": 0.66, + "learning_rate": 2.6674666052839667e-08, + "logits/chosen": -1.7670599222183228, + "logits/rejected": -1.7582085132598877, + "logps/chosen": -2.6961824893951416, + "logps/rejected": -230.21255493164062, + "loss": 0.3333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05940990522503853, + "rewards/margins": 4.155757427215576, + "rewards/rejected": -4.096347332000732, + "step": 11425 + }, + { + "epoch": 0.66, + "learning_rate": 2.66663307513661e-08, + "logits/chosen": -1.7700873613357544, + "logits/rejected": -1.7777701616287231, + "logps/chosen": -202.25381469726562, + "logps/rejected": -321.3684387207031, + "loss": 0.1192, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.204241991043091, + "rewards/margins": 1.851654052734375, + "rewards/rejected": 0.35258790850639343, + "step": 11426 + }, + { + "epoch": 0.66, + "learning_rate": 2.6657996278818578e-08, + "logits/chosen": -1.8811757564544678, + "logits/rejected": -1.8820710182189941, + "logps/chosen": -153.9656219482422, + "logps/rejected": -408.7962646484375, + "loss": 0.3743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5408768057823181, + "rewards/margins": 3.0567734241485596, + "rewards/rejected": -3.5976502895355225, + "step": 11427 + }, + { + "epoch": 0.67, + "learning_rate": 2.6649662635493207e-08, + "logits/chosen": -2.12371826171875, + "logits/rejected": -2.1450159549713135, + "logps/chosen": -219.00460815429688, + "logps/rejected": -226.98046875, + "loss": 0.1979, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1593307256698608, + "rewards/margins": 1.8122482299804688, + "rewards/rejected": -0.6529175043106079, + "step": 11428 + }, + { + "epoch": 0.67, + "learning_rate": 2.664132982168604e-08, + "logits/chosen": -1.9781197309494019, + "logits/rejected": -1.9638288021087646, + "logps/chosen": -57.17985534667969, + "logps/rejected": -112.69514465332031, + "loss": 0.3144, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6550933718681335, + "rewards/margins": 0.7869278192520142, + "rewards/rejected": -0.13183441758155823, + "step": 11429 + }, + { + "epoch": 0.67, + "learning_rate": 2.6632997837693115e-08, + "logits/chosen": -2.157334566116333, + "logits/rejected": -2.153972864151001, + "logps/chosen": -47.02886962890625, + "logps/rejected": -296.5107727050781, + "loss": 0.1919, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6743454337120056, + "rewards/margins": 3.9936418533325195, + "rewards/rejected": -3.319296360015869, + "step": 11430 + }, + { + "epoch": 0.67, + "learning_rate": 2.6624666683810394e-08, + "logits/chosen": -1.8407542705535889, + "logits/rejected": -1.8424935340881348, + "logps/chosen": -24.800344467163086, + "logps/rejected": -229.10279846191406, + "loss": 0.4378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27555084228515625, + "rewards/margins": 1.080021619796753, + "rewards/rejected": -0.8044708371162415, + "step": 11431 + }, + { + "epoch": 0.67, + "learning_rate": 2.6616336360333824e-08, + "logits/chosen": -1.7754889726638794, + "logits/rejected": -1.7380400896072388, + "logps/chosen": -244.11965942382812, + "logps/rejected": -531.686279296875, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0928313732147217, + "rewards/margins": 3.9700775146484375, + "rewards/rejected": -1.8772461414337158, + "step": 11432 + }, + { + "epoch": 0.67, + "learning_rate": 2.6608006867559412e-08, + "logits/chosen": -1.5171453952789307, + "logits/rejected": -1.553261399269104, + "logps/chosen": -169.93331909179688, + "logps/rejected": -393.5489196777344, + "loss": 0.0391, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.044787645339966, + "rewards/margins": 6.471316337585449, + "rewards/rejected": -4.4265289306640625, + "step": 11433 + }, + { + "epoch": 0.67, + "learning_rate": 2.6599678205782998e-08, + "logits/chosen": -1.835955262184143, + "logits/rejected": -1.8358162641525269, + "logps/chosen": -319.5080871582031, + "logps/rejected": -377.98223876953125, + "loss": 0.2727, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5400421619415283, + "rewards/margins": 0.35593557357788086, + "rewards/rejected": 3.1841065883636475, + "step": 11434 + }, + { + "epoch": 0.67, + "learning_rate": 2.6591350375300498e-08, + "logits/chosen": -2.051429510116577, + "logits/rejected": -2.0547635555267334, + "logps/chosen": -187.258056640625, + "logps/rejected": -303.500732421875, + "loss": 0.1049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2320663928985596, + "rewards/margins": 2.817836046218872, + "rewards/rejected": -1.5857696533203125, + "step": 11435 + }, + { + "epoch": 0.67, + "learning_rate": 2.65830233764077e-08, + "logits/chosen": -1.7661762237548828, + "logits/rejected": -1.8006881475448608, + "logps/chosen": -321.3606872558594, + "logps/rejected": -407.5865478515625, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8220123052597046, + "rewards/margins": 4.384350776672363, + "rewards/rejected": -2.562338352203369, + "step": 11436 + }, + { + "epoch": 0.67, + "learning_rate": 2.6574697209400488e-08, + "logits/chosen": -1.9400895833969116, + "logits/rejected": -1.929484248161316, + "logps/chosen": -0.9617798924446106, + "logps/rejected": -173.6894073486328, + "loss": 0.401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026443809270858765, + "rewards/margins": 2.0451159477233887, + "rewards/rejected": -2.018672227859497, + "step": 11437 + }, + { + "epoch": 0.67, + "learning_rate": 2.6566371874574604e-08, + "logits/chosen": -2.0370750427246094, + "logits/rejected": -2.029090642929077, + "logps/chosen": -203.01718139648438, + "logps/rejected": -403.32244873046875, + "loss": 0.0324, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.18794584274292, + "rewards/margins": 2.937893867492676, + "rewards/rejected": 1.2500518560409546, + "step": 11438 + }, + { + "epoch": 0.67, + "learning_rate": 2.6558047372225817e-08, + "logits/chosen": -1.7130087614059448, + "logits/rejected": -1.7172685861587524, + "logps/chosen": -0.06195354834198952, + "logps/rejected": -102.77525329589844, + "loss": 0.6775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006925456109456718, + "rewards/margins": 0.07516957819461823, + "rewards/rejected": -0.07586212456226349, + "step": 11439 + }, + { + "epoch": 0.67, + "learning_rate": 2.6549723702649852e-08, + "logits/chosen": -1.8752864599227905, + "logits/rejected": -1.8636118173599243, + "logps/chosen": -19.115158081054688, + "logps/rejected": -224.27903747558594, + "loss": 0.3367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5600166320800781, + "rewards/margins": 1.1172691583633423, + "rewards/rejected": -0.5572525262832642, + "step": 11440 + }, + { + "epoch": 0.67, + "learning_rate": 2.654140086614243e-08, + "logits/chosen": -2.0104355812072754, + "logits/rejected": -2.0144546031951904, + "logps/chosen": -10.259113311767578, + "logps/rejected": -166.6732177734375, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035523321479558945, + "rewards/margins": 3.6729822158813477, + "rewards/rejected": -3.6374588012695312, + "step": 11441 + }, + { + "epoch": 0.67, + "learning_rate": 2.6533078862999176e-08, + "logits/chosen": -1.9231265783309937, + "logits/rejected": -1.897118091583252, + "logps/chosen": -287.7275695800781, + "logps/rejected": -542.093505859375, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.916595458984375, + "rewards/margins": 7.046298503875732, + "rewards/rejected": -4.129703044891357, + "step": 11442 + }, + { + "epoch": 0.67, + "learning_rate": 2.6524757693515752e-08, + "logits/chosen": -1.8349957466125488, + "logits/rejected": -1.8194659948349, + "logps/chosen": -277.2947998046875, + "logps/rejected": -463.46563720703125, + "loss": 0.066, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.762594699859619, + "rewards/margins": 2.5158815383911133, + "rewards/rejected": 0.24671326577663422, + "step": 11443 + }, + { + "epoch": 0.67, + "learning_rate": 2.6516437357987766e-08, + "logits/chosen": -1.7799077033996582, + "logits/rejected": -1.7790241241455078, + "logps/chosen": -192.63180541992188, + "logps/rejected": -342.1902160644531, + "loss": 0.0769, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.028886556625366, + "rewards/margins": 2.1062028408050537, + "rewards/rejected": -0.0773162841796875, + "step": 11444 + }, + { + "epoch": 0.67, + "learning_rate": 2.650811785671081e-08, + "logits/chosen": -1.7233837842941284, + "logits/rejected": -1.718912124633789, + "logps/chosen": -24.401159286499023, + "logps/rejected": -139.9893341064453, + "loss": 0.2548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6874979138374329, + "rewards/margins": 2.176041603088379, + "rewards/rejected": -1.4885437488555908, + "step": 11445 + }, + { + "epoch": 0.67, + "learning_rate": 2.64997991899804e-08, + "logits/chosen": -1.7652504444122314, + "logits/rejected": -1.7372666597366333, + "logps/chosen": -238.11221313476562, + "logps/rejected": -503.8538513183594, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0435242652893066, + "rewards/margins": 8.027877807617188, + "rewards/rejected": -4.984353542327881, + "step": 11446 + }, + { + "epoch": 0.67, + "learning_rate": 2.6491481358092082e-08, + "logits/chosen": -2.07130765914917, + "logits/rejected": -2.0775601863861084, + "logps/chosen": -12.321849822998047, + "logps/rejected": -70.60748291015625, + "loss": 0.4983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23389282822608948, + "rewards/margins": 0.97844398021698, + "rewards/rejected": -1.212336778640747, + "step": 11447 + }, + { + "epoch": 0.67, + "learning_rate": 2.6483164361341333e-08, + "logits/chosen": -2.082183361053467, + "logits/rejected": -2.0630154609680176, + "logps/chosen": -15.253750801086426, + "logps/rejected": -171.90480041503906, + "loss": 0.1911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8962557911872864, + "rewards/margins": 2.646406888961792, + "rewards/rejected": -1.7501510381698608, + "step": 11448 + }, + { + "epoch": 0.67, + "learning_rate": 2.6474848200023636e-08, + "logits/chosen": -1.769912600517273, + "logits/rejected": -1.7160016298294067, + "logps/chosen": -251.98333740234375, + "logps/rejected": -500.5824279785156, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.171673536300659, + "rewards/margins": 7.873342514038086, + "rewards/rejected": -5.701669216156006, + "step": 11449 + }, + { + "epoch": 0.67, + "learning_rate": 2.6466532874434388e-08, + "logits/chosen": -1.9933775663375854, + "logits/rejected": -1.988695740699768, + "logps/chosen": -206.0242919921875, + "logps/rejected": -270.3343200683594, + "loss": 0.2474, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.565466284751892, + "rewards/margins": 0.5769866704940796, + "rewards/rejected": 0.9884796142578125, + "step": 11450 + }, + { + "epoch": 0.67, + "learning_rate": 2.6458218384869008e-08, + "logits/chosen": -1.8325722217559814, + "logits/rejected": -1.8461107015609741, + "logps/chosen": -217.1494598388672, + "logps/rejected": -299.8609313964844, + "loss": 0.0939, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1405410766601562, + "rewards/margins": 1.8427292108535767, + "rewards/rejected": 0.297811895608902, + "step": 11451 + }, + { + "epoch": 0.67, + "learning_rate": 2.6449904731622863e-08, + "logits/chosen": -2.0085413455963135, + "logits/rejected": -2.0030667781829834, + "logps/chosen": -2.2822532653808594, + "logps/rejected": -177.16250610351562, + "loss": 0.3942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.058145906776189804, + "rewards/margins": 1.9190635681152344, + "rewards/rejected": -1.977209448814392, + "step": 11452 + }, + { + "epoch": 0.67, + "learning_rate": 2.6441591914991302e-08, + "logits/chosen": -1.6104315519332886, + "logits/rejected": -1.6498026847839355, + "logps/chosen": -243.2327423095703, + "logps/rejected": -385.54730224609375, + "loss": 0.0476, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.073042392730713, + "rewards/margins": 2.567042589187622, + "rewards/rejected": -0.49400025606155396, + "step": 11453 + }, + { + "epoch": 0.67, + "learning_rate": 2.6433279935269652e-08, + "logits/chosen": -1.7438031435012817, + "logits/rejected": -1.7784664630889893, + "logps/chosen": -281.570556640625, + "logps/rejected": -498.3099365234375, + "loss": 0.0493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.928631603717804, + "rewards/margins": 6.236780166625977, + "rewards/rejected": -5.308148384094238, + "step": 11454 + }, + { + "epoch": 0.67, + "learning_rate": 2.6424968792753137e-08, + "logits/chosen": -1.8003108501434326, + "logits/rejected": -1.797057867050171, + "logps/chosen": -155.77792358398438, + "logps/rejected": -338.01214599609375, + "loss": 0.1009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7817566394805908, + "rewards/margins": 2.8429017066955566, + "rewards/rejected": -1.0611450672149658, + "step": 11455 + }, + { + "epoch": 0.67, + "learning_rate": 2.6416658487737087e-08, + "logits/chosen": -1.964261770248413, + "logits/rejected": -1.9374656677246094, + "logps/chosen": -177.40103149414062, + "logps/rejected": -388.1209716796875, + "loss": 0.0727, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2989227771759033, + "rewards/margins": 2.553353786468506, + "rewards/rejected": -1.254431128501892, + "step": 11456 + }, + { + "epoch": 0.67, + "learning_rate": 2.6408349020516663e-08, + "logits/chosen": -1.9358372688293457, + "logits/rejected": -1.9403069019317627, + "logps/chosen": -70.71925354003906, + "logps/rejected": -231.1186981201172, + "loss": 0.2444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6193069815635681, + "rewards/margins": 1.8866729736328125, + "rewards/rejected": -1.2673660516738892, + "step": 11457 + }, + { + "epoch": 0.67, + "learning_rate": 2.640004039138711e-08, + "logits/chosen": -1.8202998638153076, + "logits/rejected": -1.8508491516113281, + "logps/chosen": -236.6770477294922, + "logps/rejected": -514.0687255859375, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2393875122070312, + "rewards/margins": 5.2437639236450195, + "rewards/rejected": -3.004376173019409, + "step": 11458 + }, + { + "epoch": 0.67, + "learning_rate": 2.6391732600643513e-08, + "logits/chosen": -1.9723650217056274, + "logits/rejected": -1.9533889293670654, + "logps/chosen": -87.95918273925781, + "logps/rejected": -246.86209106445312, + "loss": 0.1814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.758587658405304, + "rewards/margins": 4.429757595062256, + "rewards/rejected": -3.6711699962615967, + "step": 11459 + }, + { + "epoch": 0.67, + "learning_rate": 2.6383425648581097e-08, + "logits/chosen": -2.0476090908050537, + "logits/rejected": -2.048840045928955, + "logps/chosen": -9.433420181274414, + "logps/rejected": -45.74864196777344, + "loss": 0.6419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00815582275390625, + "rewards/margins": 0.20800895988941193, + "rewards/rejected": -0.21616478264331818, + "step": 11460 + }, + { + "epoch": 0.67, + "learning_rate": 2.6375119535494905e-08, + "logits/chosen": -1.763533353805542, + "logits/rejected": -1.7642139196395874, + "logps/chosen": -0.6821771860122681, + "logps/rejected": -318.96649169921875, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007459103944711387, + "rewards/margins": 6.332747936248779, + "rewards/rejected": -6.33200216293335, + "step": 11461 + }, + { + "epoch": 0.67, + "learning_rate": 2.6366814261680027e-08, + "logits/chosen": -1.8825342655181885, + "logits/rejected": -1.9283480644226074, + "logps/chosen": -224.70372009277344, + "logps/rejected": -521.9088745117188, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9882431030273438, + "rewards/margins": 8.731000900268555, + "rewards/rejected": -6.742758274078369, + "step": 11462 + }, + { + "epoch": 0.67, + "learning_rate": 2.6358509827431507e-08, + "logits/chosen": -1.870463252067566, + "logits/rejected": -1.8146167993545532, + "logps/chosen": -195.18331909179688, + "logps/rejected": -428.4423828125, + "loss": 0.1498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9720428586006165, + "rewards/margins": 3.7073090076446533, + "rewards/rejected": -2.7352662086486816, + "step": 11463 + }, + { + "epoch": 0.67, + "learning_rate": 2.6350206233044387e-08, + "logits/chosen": -1.8429718017578125, + "logits/rejected": -1.8240588903427124, + "logps/chosen": -255.8555450439453, + "logps/rejected": -423.4636535644531, + "loss": 0.1133, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.017106771469116, + "rewards/margins": 1.6881394386291504, + "rewards/rejected": 0.32896730303764343, + "step": 11464 + }, + { + "epoch": 0.67, + "learning_rate": 2.6341903478813598e-08, + "logits/chosen": -2.050940752029419, + "logits/rejected": -2.043935775756836, + "logps/chosen": -31.763134002685547, + "logps/rejected": -131.2296142578125, + "loss": 0.523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.458047479391098, + "rewards/margins": 1.039668321609497, + "rewards/rejected": -1.4977158308029175, + "step": 11465 + }, + { + "epoch": 0.67, + "learning_rate": 2.633360156503413e-08, + "logits/chosen": -1.9234538078308105, + "logits/rejected": -2.003101348876953, + "logps/chosen": -270.33087158203125, + "logps/rejected": -326.5196228027344, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9604828357696533, + "rewards/margins": 2.425311326980591, + "rewards/rejected": 0.5351715087890625, + "step": 11466 + }, + { + "epoch": 0.67, + "learning_rate": 2.632530049200089e-08, + "logits/chosen": -1.87771737575531, + "logits/rejected": -1.8812569379806519, + "logps/chosen": -0.01850024238228798, + "logps/rejected": -182.85333251953125, + "loss": 0.3345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00178597925696522, + "rewards/margins": 3.843682289123535, + "rewards/rejected": -3.845468282699585, + "step": 11467 + }, + { + "epoch": 0.67, + "learning_rate": 2.631700026000881e-08, + "logits/chosen": -1.871907114982605, + "logits/rejected": -1.8453541994094849, + "logps/chosen": -209.35438537597656, + "logps/rejected": -418.5207824707031, + "loss": 0.0451, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.136552572250366, + "rewards/margins": 3.5136735439300537, + "rewards/rejected": -1.3771209716796875, + "step": 11468 + }, + { + "epoch": 0.67, + "learning_rate": 2.6308700869352707e-08, + "logits/chosen": -1.8744986057281494, + "logits/rejected": -1.850291132926941, + "logps/chosen": -159.54820251464844, + "logps/rejected": -287.6472473144531, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.543718099594116, + "rewards/margins": 2.136531114578247, + "rewards/rejected": 0.407186895608902, + "step": 11469 + }, + { + "epoch": 0.67, + "learning_rate": 2.630040232032743e-08, + "logits/chosen": -1.9786674976348877, + "logits/rejected": -1.9542464017868042, + "logps/chosen": -124.33685302734375, + "logps/rejected": -411.6548767089844, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.890489339828491, + "rewards/margins": 6.027748107910156, + "rewards/rejected": -3.137259006500244, + "step": 11470 + }, + { + "epoch": 0.67, + "learning_rate": 2.6292104613227794e-08, + "logits/chosen": -1.9539817571640015, + "logits/rejected": -1.9558098316192627, + "logps/chosen": -21.880531311035156, + "logps/rejected": -88.6004409790039, + "loss": 0.3627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04523048549890518, + "rewards/margins": 1.4742953777313232, + "rewards/rejected": -1.5195258855819702, + "step": 11471 + }, + { + "epoch": 0.67, + "learning_rate": 2.6283807748348587e-08, + "logits/chosen": -2.0219221115112305, + "logits/rejected": -2.027282238006592, + "logps/chosen": -8.380454063415527, + "logps/rejected": -185.0364227294922, + "loss": 0.2866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4124262034893036, + "rewards/margins": 2.1819589138031006, + "rewards/rejected": -1.7695327997207642, + "step": 11472 + }, + { + "epoch": 0.67, + "learning_rate": 2.6275511725984517e-08, + "logits/chosen": -1.8865529298782349, + "logits/rejected": -1.8209238052368164, + "logps/chosen": -216.70428466796875, + "logps/rejected": -318.22491455078125, + "loss": 0.1016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.596948266029358, + "rewards/margins": 2.430288791656494, + "rewards/rejected": -0.8333404660224915, + "step": 11473 + }, + { + "epoch": 0.67, + "learning_rate": 2.6267216546430304e-08, + "logits/chosen": -1.9566359519958496, + "logits/rejected": -1.961349368095398, + "logps/chosen": -235.98492431640625, + "logps/rejected": -409.6474914550781, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.728540062904358, + "rewards/margins": 4.5539398193359375, + "rewards/rejected": -2.825399875640869, + "step": 11474 + }, + { + "epoch": 0.67, + "learning_rate": 2.6258922209980684e-08, + "logits/chosen": -1.994204044342041, + "logits/rejected": -1.9662307500839233, + "logps/chosen": -198.205810546875, + "logps/rejected": -550.1987915039062, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.603703498840332, + "rewards/margins": 4.389244079589844, + "rewards/rejected": 0.21445922553539276, + "step": 11475 + }, + { + "epoch": 0.67, + "learning_rate": 2.6250628716930257e-08, + "logits/chosen": -2.1079509258270264, + "logits/rejected": -2.0909242630004883, + "logps/chosen": -7.736531551927328e-05, + "logps/rejected": -169.91180419921875, + "loss": 0.3544, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5853773902563262e-06, + "rewards/margins": 3.571272373199463, + "rewards/rejected": -3.571270704269409, + "step": 11476 + }, + { + "epoch": 0.67, + "learning_rate": 2.624233606757369e-08, + "logits/chosen": -1.77915358543396, + "logits/rejected": -1.772605299949646, + "logps/chosen": -177.3242645263672, + "logps/rejected": -165.11439514160156, + "loss": 0.3082, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5309616327285767, + "rewards/margins": 0.608837902545929, + "rewards/rejected": 0.9221237301826477, + "step": 11477 + }, + { + "epoch": 0.67, + "learning_rate": 2.6234044262205523e-08, + "logits/chosen": -1.8836534023284912, + "logits/rejected": -1.8571503162384033, + "logps/chosen": -247.6602783203125, + "logps/rejected": -557.152587890625, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5199646949768066, + "rewards/margins": 5.705975532531738, + "rewards/rejected": -3.1860108375549316, + "step": 11478 + }, + { + "epoch": 0.67, + "learning_rate": 2.622575330112039e-08, + "logits/chosen": -1.8844763040542603, + "logits/rejected": -1.8799951076507568, + "logps/chosen": -0.0005421858513727784, + "logps/rejected": -234.7386932373047, + "loss": 0.3736, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7422581954160705e-05, + "rewards/margins": 2.827298641204834, + "rewards/rejected": -2.827336072921753, + "step": 11479 + }, + { + "epoch": 0.67, + "learning_rate": 2.621746318461277e-08, + "logits/chosen": -1.9646815061569214, + "logits/rejected": -1.9583020210266113, + "logps/chosen": -13.603302001953125, + "logps/rejected": -145.27252197265625, + "loss": 0.2073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.620213508605957, + "rewards/margins": 1.9542025327682495, + "rewards/rejected": -1.3339890241622925, + "step": 11480 + }, + { + "epoch": 0.67, + "learning_rate": 2.6209173912977194e-08, + "logits/chosen": -1.8450652360916138, + "logits/rejected": -1.799196720123291, + "logps/chosen": -180.84786987304688, + "logps/rejected": -351.676025390625, + "loss": 0.0725, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5873048305511475, + "rewards/margins": 2.2450106143951416, + "rewards/rejected": 0.342294305562973, + "step": 11481 + }, + { + "epoch": 0.67, + "learning_rate": 2.6200885486508134e-08, + "logits/chosen": -1.9713029861450195, + "logits/rejected": -1.9700592756271362, + "logps/chosen": -38.09038162231445, + "logps/rejected": -273.79852294921875, + "loss": 0.2301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.527199923992157, + "rewards/margins": 3.4808361530303955, + "rewards/rejected": -2.9536361694335938, + "step": 11482 + }, + { + "epoch": 0.67, + "learning_rate": 2.6192597905500057e-08, + "logits/chosen": -2.0944228172302246, + "logits/rejected": -2.095259189605713, + "logps/chosen": -9.686118125915527, + "logps/rejected": -114.05681610107422, + "loss": 0.4919, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2651699185371399, + "rewards/margins": 0.6620273590087891, + "rewards/rejected": -0.39685747027397156, + "step": 11483 + }, + { + "epoch": 0.67, + "learning_rate": 2.618431117024733e-08, + "logits/chosen": -1.8615268468856812, + "logits/rejected": -1.8559937477111816, + "logps/chosen": -8.25428581237793, + "logps/rejected": -265.7441101074219, + "loss": 0.4992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004072666168212891, + "rewards/margins": 0.8994736671447754, + "rewards/rejected": -0.8954010009765625, + "step": 11484 + }, + { + "epoch": 0.67, + "learning_rate": 2.6176025281044374e-08, + "logits/chosen": -1.7905133962631226, + "logits/rejected": -1.7771506309509277, + "logps/chosen": -201.16427612304688, + "logps/rejected": -345.980224609375, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8575652837753296, + "rewards/margins": 3.6663970947265625, + "rewards/rejected": -1.808831810951233, + "step": 11485 + }, + { + "epoch": 0.67, + "learning_rate": 2.6167740238185532e-08, + "logits/chosen": -1.7918850183486938, + "logits/rejected": -1.7566511631011963, + "logps/chosen": -92.99089050292969, + "logps/rejected": -421.9984130859375, + "loss": 0.3787, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19276733696460724, + "rewards/margins": 8.459171295166016, + "rewards/rejected": -8.651938438415527, + "step": 11486 + }, + { + "epoch": 0.67, + "learning_rate": 2.6159456041965155e-08, + "logits/chosen": -2.0054101943969727, + "logits/rejected": -1.9978901147842407, + "logps/chosen": -27.22966957092285, + "logps/rejected": -260.0827331542969, + "loss": 0.2538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3959888517856598, + "rewards/margins": 4.7831854820251465, + "rewards/rejected": -4.3871965408325195, + "step": 11487 + }, + { + "epoch": 0.67, + "learning_rate": 2.6151172692677494e-08, + "logits/chosen": -1.8551150560379028, + "logits/rejected": -1.8540374040603638, + "logps/chosen": -55.925758361816406, + "logps/rejected": -298.06964111328125, + "loss": 0.1608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9068870544433594, + "rewards/margins": 2.78432559967041, + "rewards/rejected": -1.8774384260177612, + "step": 11488 + }, + { + "epoch": 0.67, + "learning_rate": 2.6142890190616833e-08, + "logits/chosen": -1.6893954277038574, + "logits/rejected": -1.68400239944458, + "logps/chosen": -52.183876037597656, + "logps/rejected": -214.733154296875, + "loss": 0.4419, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.6377609968185425, + "rewards/margins": -0.09401702880859375, + "rewards/rejected": 1.7317780256271362, + "step": 11489 + }, + { + "epoch": 0.67, + "learning_rate": 2.6134608536077414e-08, + "logits/chosen": -1.7379014492034912, + "logits/rejected": -1.731157660484314, + "logps/chosen": -143.81207275390625, + "logps/rejected": -291.40576171875, + "loss": 0.0677, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.181799292564392, + "rewards/margins": 4.180383205413818, + "rewards/rejected": -2.998584032058716, + "step": 11490 + }, + { + "epoch": 0.67, + "learning_rate": 2.612632772935346e-08, + "logits/chosen": -1.8278051614761353, + "logits/rejected": -1.8529146909713745, + "logps/chosen": -230.37332153320312, + "logps/rejected": -476.7864990234375, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9655730724334717, + "rewards/margins": 4.653314113616943, + "rewards/rejected": -1.6877411603927612, + "step": 11491 + }, + { + "epoch": 0.67, + "learning_rate": 2.61180477707391e-08, + "logits/chosen": -1.9070773124694824, + "logits/rejected": -1.9053139686584473, + "logps/chosen": -37.646854400634766, + "logps/rejected": -101.17756652832031, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6403889060020447, + "rewards/margins": 0.8225910663604736, + "rewards/rejected": -0.18220214545726776, + "step": 11492 + }, + { + "epoch": 0.67, + "learning_rate": 2.6109768660528487e-08, + "logits/chosen": -2.0638129711151123, + "logits/rejected": -2.0479846000671387, + "logps/chosen": -63.37621307373047, + "logps/rejected": -226.30934143066406, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.396283745765686, + "rewards/margins": 0.1085350513458252, + "rewards/rejected": 1.2877486944198608, + "step": 11493 + }, + { + "epoch": 0.67, + "learning_rate": 2.6101490399015778e-08, + "logits/chosen": -1.7574340105056763, + "logits/rejected": -1.7492204904556274, + "logps/chosen": -321.440185546875, + "logps/rejected": -469.84759521484375, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7847992181777954, + "rewards/margins": 7.363241672515869, + "rewards/rejected": -5.578442573547363, + "step": 11494 + }, + { + "epoch": 0.67, + "learning_rate": 2.6093212986495028e-08, + "logits/chosen": -1.8788362741470337, + "logits/rejected": -1.8829113245010376, + "logps/chosen": -84.4119644165039, + "logps/rejected": -152.92372131347656, + "loss": 0.2373, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0320366621017456, + "rewards/margins": 1.124813199043274, + "rewards/rejected": -0.09277649223804474, + "step": 11495 + }, + { + "epoch": 0.67, + "learning_rate": 2.60849364232603e-08, + "logits/chosen": -2.0072755813598633, + "logits/rejected": -2.0041561126708984, + "logps/chosen": -30.360374450683594, + "logps/rejected": -201.59222412109375, + "loss": 0.2137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.538639485836029, + "rewards/margins": 3.927398443222046, + "rewards/rejected": -3.388758897781372, + "step": 11496 + }, + { + "epoch": 0.67, + "learning_rate": 2.607666070960558e-08, + "logits/chosen": -1.8335931301116943, + "logits/rejected": -1.843559741973877, + "logps/chosen": -30.23609733581543, + "logps/rejected": -330.3008117675781, + "loss": 0.17, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7986505627632141, + "rewards/margins": 4.389370441436768, + "rewards/rejected": -3.590719699859619, + "step": 11497 + }, + { + "epoch": 0.67, + "learning_rate": 2.6068385845824932e-08, + "logits/chosen": -1.8034085035324097, + "logits/rejected": -1.7844419479370117, + "logps/chosen": -152.5452880859375, + "logps/rejected": -277.90179443359375, + "loss": 0.1271, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.705867052078247, + "rewards/margins": 2.0536484718322754, + "rewards/rejected": -0.34778138995170593, + "step": 11498 + }, + { + "epoch": 0.67, + "learning_rate": 2.6060111832212255e-08, + "logits/chosen": -1.9267300367355347, + "logits/rejected": -1.9155642986297607, + "logps/chosen": -18.746858596801758, + "logps/rejected": -171.59722900390625, + "loss": 0.269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23983649909496307, + "rewards/margins": 4.973222255706787, + "rewards/rejected": -4.7333855628967285, + "step": 11499 + }, + { + "epoch": 0.67, + "learning_rate": 2.6051838669061533e-08, + "logits/chosen": -2.1311533451080322, + "logits/rejected": -2.1298532485961914, + "logps/chosen": -51.04671859741211, + "logps/rejected": -273.16607666015625, + "loss": 0.2839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5369182825088501, + "rewards/margins": 1.4613628387451172, + "rewards/rejected": -0.9244446158409119, + "step": 11500 + }, + { + "epoch": 0.67, + "learning_rate": 2.60435663566666e-08, + "logits/chosen": -2.0705153942108154, + "logits/rejected": -2.0703964233398438, + "logps/chosen": -43.18609619140625, + "logps/rejected": -188.44598388671875, + "loss": 0.3249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10046615451574326, + "rewards/margins": 2.261188507080078, + "rewards/rejected": -2.361654758453369, + "step": 11501 + }, + { + "epoch": 0.67, + "learning_rate": 2.6035294895321424e-08, + "logits/chosen": -1.9371042251586914, + "logits/rejected": -1.9296379089355469, + "logps/chosen": -313.7857360839844, + "logps/rejected": -410.5845947265625, + "loss": 0.0811, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.742257833480835, + "rewards/margins": 2.1119446754455566, + "rewards/rejected": 0.6303130984306335, + "step": 11502 + }, + { + "epoch": 0.67, + "learning_rate": 2.6027024285319772e-08, + "logits/chosen": -1.7346961498260498, + "logits/rejected": -1.7213870286941528, + "logps/chosen": -156.16909790039062, + "logps/rejected": -281.3195495605469, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.747273325920105, + "rewards/margins": 3.895451545715332, + "rewards/rejected": -2.1481781005859375, + "step": 11503 + }, + { + "epoch": 0.67, + "learning_rate": 2.6018754526955476e-08, + "logits/chosen": -1.9271477460861206, + "logits/rejected": -1.9166419506072998, + "logps/chosen": -2.0987086296081543, + "logps/rejected": -140.9959716796875, + "loss": 0.6427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01195993460714817, + "rewards/margins": 0.12275076657533646, + "rewards/rejected": -0.13471069931983948, + "step": 11504 + }, + { + "epoch": 0.67, + "learning_rate": 2.6010485620522333e-08, + "logits/chosen": -1.8711854219436646, + "logits/rejected": -1.8679522275924683, + "logps/chosen": -13.046040534973145, + "logps/rejected": -151.5368194580078, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05599689483642578, + "rewards/margins": 2.660085916519165, + "rewards/rejected": -2.716082811355591, + "step": 11505 + }, + { + "epoch": 0.67, + "learning_rate": 2.60022175663141e-08, + "logits/chosen": -1.9856376647949219, + "logits/rejected": -1.9836217164993286, + "logps/chosen": -33.858612060546875, + "logps/rejected": -151.99867248535156, + "loss": 0.483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2733692228794098, + "rewards/margins": 0.6500949859619141, + "rewards/rejected": -0.3767257630825043, + "step": 11506 + }, + { + "epoch": 0.67, + "learning_rate": 2.5993950364624468e-08, + "logits/chosen": -1.6903443336486816, + "logits/rejected": -1.6757146120071411, + "logps/chosen": -90.5408935546875, + "logps/rejected": -325.1561279296875, + "loss": 0.1162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.119820475578308, + "rewards/margins": 4.075502872467041, + "rewards/rejected": -2.9556825160980225, + "step": 11507 + }, + { + "epoch": 0.67, + "learning_rate": 2.5985684015747144e-08, + "logits/chosen": -1.841361165046692, + "logits/rejected": -1.8409284353256226, + "logps/chosen": -28.81890106201172, + "logps/rejected": -147.32862854003906, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42378807067871094, + "rewards/margins": 1.2155742645263672, + "rewards/rejected": -0.7917861938476562, + "step": 11508 + }, + { + "epoch": 0.67, + "learning_rate": 2.597741851997579e-08, + "logits/chosen": -1.795274019241333, + "logits/rejected": -1.711314082145691, + "logps/chosen": -240.0498504638672, + "logps/rejected": -541.2984008789062, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6933579444885254, + "rewards/margins": 3.873948812484741, + "rewards/rejected": -1.1805908679962158, + "step": 11509 + }, + { + "epoch": 0.67, + "learning_rate": 2.5969153877604062e-08, + "logits/chosen": -1.7180604934692383, + "logits/rejected": -1.7944483757019043, + "logps/chosen": -140.9263458251953, + "logps/rejected": -298.93157958984375, + "loss": 0.1233, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.053981065750122, + "rewards/margins": 1.8630447387695312, + "rewards/rejected": -0.809063732624054, + "step": 11510 + }, + { + "epoch": 0.67, + "learning_rate": 2.5960890088925514e-08, + "logits/chosen": -1.9267208576202393, + "logits/rejected": -1.9307115077972412, + "logps/chosen": -20.829853057861328, + "logps/rejected": -104.5545654296875, + "loss": 0.62, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07660255581140518, + "rewards/margins": 0.4346340596675873, + "rewards/rejected": -0.5112366080284119, + "step": 11511 + }, + { + "epoch": 0.67, + "learning_rate": 2.595262715423373e-08, + "logits/chosen": -1.9206688404083252, + "logits/rejected": -1.9123320579528809, + "logps/chosen": -8.535188680980355e-05, + "logps/rejected": -95.62312316894531, + "loss": 0.6542, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0979089185857447e-06, + "rewards/margins": 0.13618221879005432, + "rewards/rejected": -0.136180117726326, + "step": 11512 + }, + { + "epoch": 0.67, + "learning_rate": 2.5944365073822268e-08, + "logits/chosen": -1.8141183853149414, + "logits/rejected": -1.7932591438293457, + "logps/chosen": -172.5684356689453, + "logps/rejected": -400.72540283203125, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9701706171035767, + "rewards/margins": 3.5756211280822754, + "rewards/rejected": -1.6054505109786987, + "step": 11513 + }, + { + "epoch": 0.67, + "learning_rate": 2.5936103847984626e-08, + "logits/chosen": -1.9914685487747192, + "logits/rejected": -1.9898474216461182, + "logps/chosen": -5.394063949584961, + "logps/rejected": -113.33381652832031, + "loss": 0.5822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17670126259326935, + "rewards/margins": 0.23765422403812408, + "rewards/rejected": -0.41435548663139343, + "step": 11514 + }, + { + "epoch": 0.67, + "learning_rate": 2.59278434770143e-08, + "logits/chosen": -1.958235502243042, + "logits/rejected": -1.9602856636047363, + "logps/chosen": -0.20707271993160248, + "logps/rejected": -168.2977752685547, + "loss": 0.3293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015283719403669238, + "rewards/margins": 4.613289833068848, + "rewards/rejected": -4.614818096160889, + "step": 11515 + }, + { + "epoch": 0.67, + "learning_rate": 2.5919583961204683e-08, + "logits/chosen": -2.024653911590576, + "logits/rejected": -2.0239429473876953, + "logps/chosen": -245.41729736328125, + "logps/rejected": -460.8577880859375, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2977356910705566, + "rewards/margins": 4.024444580078125, + "rewards/rejected": -1.726709008216858, + "step": 11516 + }, + { + "epoch": 0.67, + "learning_rate": 2.5911325300849275e-08, + "logits/chosen": -1.814548134803772, + "logits/rejected": -1.8101420402526855, + "logps/chosen": -133.26295471191406, + "logps/rejected": -206.92453002929688, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1067978143692017, + "rewards/margins": 0.8012878894805908, + "rewards/rejected": 0.3055099546909332, + "step": 11517 + }, + { + "epoch": 0.67, + "learning_rate": 2.5903067496241404e-08, + "logits/chosen": -2.136732816696167, + "logits/rejected": -2.136345863342285, + "logps/chosen": -31.086284637451172, + "logps/rejected": -271.5906982421875, + "loss": 0.1974, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8462325930595398, + "rewards/margins": 3.2266387939453125, + "rewards/rejected": -2.380406141281128, + "step": 11518 + }, + { + "epoch": 0.67, + "learning_rate": 2.5894810547674467e-08, + "logits/chosen": -1.7298825979232788, + "logits/rejected": -1.709310531616211, + "logps/chosen": -171.62367248535156, + "logps/rejected": -275.7586669921875, + "loss": 0.1448, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9887192249298096, + "rewards/margins": 1.5481553077697754, + "rewards/rejected": 0.44056397676467896, + "step": 11519 + }, + { + "epoch": 0.67, + "learning_rate": 2.5886554455441733e-08, + "logits/chosen": -2.0688905715942383, + "logits/rejected": -2.05902099609375, + "logps/chosen": -0.6916434168815613, + "logps/rejected": -367.7685546875, + "loss": 0.3429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.039895348250865936, + "rewards/margins": 7.531628608703613, + "rewards/rejected": -7.571524143218994, + "step": 11520 + }, + { + "epoch": 0.67, + "learning_rate": 2.587829921983657e-08, + "logits/chosen": -2.076702833175659, + "logits/rejected": -2.069552183151245, + "logps/chosen": -184.7315673828125, + "logps/rejected": -448.585205078125, + "loss": 0.0997, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1522400379180908, + "rewards/margins": 2.3180785179138184, + "rewards/rejected": -1.165838599205017, + "step": 11521 + }, + { + "epoch": 0.67, + "learning_rate": 2.5870044841152193e-08, + "logits/chosen": -1.8971649408340454, + "logits/rejected": -1.904726266860962, + "logps/chosen": -170.33758544921875, + "logps/rejected": -260.965087890625, + "loss": 0.0786, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8768768310546875, + "rewards/margins": 2.2898712158203125, + "rewards/rejected": -0.412994384765625, + "step": 11522 + }, + { + "epoch": 0.67, + "learning_rate": 2.5861791319681858e-08, + "logits/chosen": -1.7919949293136597, + "logits/rejected": -1.8487355709075928, + "logps/chosen": -212.131591796875, + "logps/rejected": -376.64984130859375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2172653675079346, + "rewards/margins": 7.394099235534668, + "rewards/rejected": -4.1768341064453125, + "step": 11523 + }, + { + "epoch": 0.67, + "learning_rate": 2.5853538655718767e-08, + "logits/chosen": -1.6701310873031616, + "logits/rejected": -1.6639186143875122, + "logps/chosen": -235.06100463867188, + "logps/rejected": -307.87445068359375, + "loss": 0.4069, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.35186767578125, + "rewards/margins": -0.07567453384399414, + "rewards/rejected": 2.427542209625244, + "step": 11524 + }, + { + "epoch": 0.67, + "learning_rate": 2.5845286849556115e-08, + "logits/chosen": -1.943185806274414, + "logits/rejected": -1.9349616765975952, + "logps/chosen": -240.4424591064453, + "logps/rejected": -304.9022216796875, + "loss": 0.1174, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8359177112579346, + "rewards/margins": 1.5801010131835938, + "rewards/rejected": 1.2558166980743408, + "step": 11525 + }, + { + "epoch": 0.67, + "learning_rate": 2.5837035901487008e-08, + "logits/chosen": -1.7651326656341553, + "logits/rejected": -1.7678370475769043, + "logps/chosen": -116.98703002929688, + "logps/rejected": -273.25384521484375, + "loss": 0.2026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42352601885795593, + "rewards/margins": 3.8378617763519287, + "rewards/rejected": -3.4143357276916504, + "step": 11526 + }, + { + "epoch": 0.67, + "learning_rate": 2.5828785811804576e-08, + "logits/chosen": -2.054213285446167, + "logits/rejected": -2.0470666885375977, + "logps/chosen": -12.442811965942383, + "logps/rejected": -293.7843017578125, + "loss": 0.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11579208821058273, + "rewards/margins": 6.098076820373535, + "rewards/rejected": -5.9822845458984375, + "step": 11527 + }, + { + "epoch": 0.67, + "learning_rate": 2.582053658080191e-08, + "logits/chosen": -1.948009967803955, + "logits/rejected": -1.9843122959136963, + "logps/chosen": -209.4473876953125, + "logps/rejected": -231.6709747314453, + "loss": 0.0945, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7820510864257812, + "rewards/margins": 1.7638579607009888, + "rewards/rejected": 1.0181931257247925, + "step": 11528 + }, + { + "epoch": 0.67, + "learning_rate": 2.581228820877208e-08, + "logits/chosen": -1.9268304109573364, + "logits/rejected": -1.9265834093093872, + "logps/chosen": -50.52848434448242, + "logps/rejected": -221.4427947998047, + "loss": 0.1127, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.37079656124115, + "rewards/margins": 2.7469944953918457, + "rewards/rejected": -1.3761978149414062, + "step": 11529 + }, + { + "epoch": 0.67, + "learning_rate": 2.5804040696008068e-08, + "logits/chosen": -1.9561388492584229, + "logits/rejected": -1.9499714374542236, + "logps/chosen": -39.234127044677734, + "logps/rejected": -181.28970336914062, + "loss": 0.1229, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2839592695236206, + "rewards/margins": 3.2942652702331543, + "rewards/rejected": -2.010305881500244, + "step": 11530 + }, + { + "epoch": 0.67, + "learning_rate": 2.5795794042802886e-08, + "logits/chosen": -1.8328418731689453, + "logits/rejected": -1.8967363834381104, + "logps/chosen": -196.11959838867188, + "logps/rejected": -339.57904052734375, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.114096164703369, + "rewards/margins": 4.599630832672119, + "rewards/rejected": -2.48553466796875, + "step": 11531 + }, + { + "epoch": 0.67, + "learning_rate": 2.5787548249449496e-08, + "logits/chosen": -1.7899848222732544, + "logits/rejected": -1.7835248708724976, + "logps/chosen": -9.029129028320312, + "logps/rejected": -91.54922485351562, + "loss": 0.4228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08900213241577148, + "rewards/margins": 1.0817146301269531, + "rewards/rejected": -0.9927124381065369, + "step": 11532 + }, + { + "epoch": 0.67, + "learning_rate": 2.5779303316240842e-08, + "logits/chosen": -2.0790493488311768, + "logits/rejected": -2.075463056564331, + "logps/chosen": -117.4249038696289, + "logps/rejected": -292.9976501464844, + "loss": 0.5677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5291534662246704, + "rewards/margins": 1.325445532798767, + "rewards/rejected": -1.8545989990234375, + "step": 11533 + }, + { + "epoch": 0.67, + "learning_rate": 2.5771059243469796e-08, + "logits/chosen": -1.968636393547058, + "logits/rejected": -1.9520318508148193, + "logps/chosen": -162.0128173828125, + "logps/rejected": -344.3004150390625, + "loss": 0.2223, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.796807885169983, + "rewards/margins": 0.7764160633087158, + "rewards/rejected": 1.020391821861267, + "step": 11534 + }, + { + "epoch": 0.67, + "learning_rate": 2.5762816031429225e-08, + "logits/chosen": -1.9099851846694946, + "logits/rejected": -1.911512017250061, + "logps/chosen": -32.25279235839844, + "logps/rejected": -77.32643127441406, + "loss": 0.4228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04372291639447212, + "rewards/margins": 1.6499816179275513, + "rewards/rejected": -1.606258749961853, + "step": 11535 + }, + { + "epoch": 0.67, + "learning_rate": 2.575457368041202e-08, + "logits/chosen": -1.9949703216552734, + "logits/rejected": -2.028376579284668, + "logps/chosen": -298.16387939453125, + "logps/rejected": -373.0292053222656, + "loss": 0.1089, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.754785180091858, + "rewards/margins": 1.7115997076034546, + "rewards/rejected": 0.04318542405962944, + "step": 11536 + }, + { + "epoch": 0.67, + "learning_rate": 2.5746332190710928e-08, + "logits/chosen": -1.7907378673553467, + "logits/rejected": -1.791308045387268, + "logps/chosen": -0.3270713984966278, + "logps/rejected": -245.87879943847656, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012449214234948158, + "rewards/margins": 5.049700736999512, + "rewards/rejected": -5.0372514724731445, + "step": 11537 + }, + { + "epoch": 0.67, + "learning_rate": 2.5738091562618786e-08, + "logits/chosen": -1.750022053718567, + "logits/rejected": -1.7046101093292236, + "logps/chosen": -279.9477844238281, + "logps/rejected": -580.572265625, + "loss": 0.1156, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6545318365097046, + "rewards/margins": 2.394723415374756, + "rewards/rejected": -0.740191638469696, + "step": 11538 + }, + { + "epoch": 0.67, + "learning_rate": 2.5729851796428266e-08, + "logits/chosen": -2.05505633354187, + "logits/rejected": -2.059133768081665, + "logps/chosen": -47.88182830810547, + "logps/rejected": -284.58245849609375, + "loss": 0.4013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12355270236730576, + "rewards/margins": 2.1072471141815186, + "rewards/rejected": -1.9836944341659546, + "step": 11539 + }, + { + "epoch": 0.67, + "learning_rate": 2.5721612892432165e-08, + "logits/chosen": -2.0377397537231445, + "logits/rejected": -2.0289409160614014, + "logps/chosen": -0.00011384149547666311, + "logps/rejected": -175.29173278808594, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.869664609141182e-06, + "rewards/margins": 5.731625556945801, + "rewards/rejected": -5.7316155433654785, + "step": 11540 + }, + { + "epoch": 0.67, + "learning_rate": 2.5713374850923118e-08, + "logits/chosen": -1.7891122102737427, + "logits/rejected": -1.7791249752044678, + "logps/chosen": -215.03890991210938, + "logps/rejected": -408.0831604003906, + "loss": 0.1176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7118316888809204, + "rewards/margins": 2.748220920562744, + "rewards/rejected": -1.0363892316818237, + "step": 11541 + }, + { + "epoch": 0.67, + "learning_rate": 2.5705137672193788e-08, + "logits/chosen": -1.8953009843826294, + "logits/rejected": -1.8818773031234741, + "logps/chosen": -50.89099884033203, + "logps/rejected": -147.99838256835938, + "loss": 0.2666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4704704284667969, + "rewards/margins": 1.5976189374923706, + "rewards/rejected": -1.1271485090255737, + "step": 11542 + }, + { + "epoch": 0.67, + "learning_rate": 2.5696901356536814e-08, + "logits/chosen": -2.0058560371398926, + "logits/rejected": -2.005260467529297, + "logps/chosen": -32.186126708984375, + "logps/rejected": -130.42025756835938, + "loss": 0.4188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6361988186836243, + "rewards/margins": 0.8301289081573486, + "rewards/rejected": -0.19393005967140198, + "step": 11543 + }, + { + "epoch": 0.67, + "learning_rate": 2.56886659042448e-08, + "logits/chosen": -2.009303331375122, + "logits/rejected": -1.9941630363464355, + "logps/chosen": -72.1773681640625, + "logps/rejected": -353.030517578125, + "loss": 0.1603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9806221127510071, + "rewards/margins": 2.5800912380218506, + "rewards/rejected": -1.5994690656661987, + "step": 11544 + }, + { + "epoch": 0.67, + "learning_rate": 2.5680431315610274e-08, + "logits/chosen": -1.7584551572799683, + "logits/rejected": -1.7945952415466309, + "logps/chosen": -165.54367065429688, + "logps/rejected": -283.4419250488281, + "loss": 0.1266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.332937628030777, + "rewards/margins": 2.5895845890045166, + "rewards/rejected": -2.2566468715667725, + "step": 11545 + }, + { + "epoch": 0.67, + "learning_rate": 2.5672197590925786e-08, + "logits/chosen": -1.7697244882583618, + "logits/rejected": -1.808956265449524, + "logps/chosen": -186.5450439453125, + "logps/rejected": -271.6986083984375, + "loss": 0.2659, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.33170485496521, + "rewards/margins": 0.49155592918395996, + "rewards/rejected": 1.84014892578125, + "step": 11546 + }, + { + "epoch": 0.67, + "learning_rate": 2.5663964730483843e-08, + "logits/chosen": -1.8832428455352783, + "logits/rejected": -1.871572494506836, + "logps/chosen": -135.46168518066406, + "logps/rejected": -193.0447998046875, + "loss": 0.2395, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2962052822113037, + "rewards/margins": 0.7667664289474487, + "rewards/rejected": 1.529438853263855, + "step": 11547 + }, + { + "epoch": 0.67, + "learning_rate": 2.5655732734576928e-08, + "logits/chosen": -1.6815170049667358, + "logits/rejected": -1.677381157875061, + "logps/chosen": -175.0515594482422, + "logps/rejected": -356.63067626953125, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8405990600585938, + "rewards/margins": 4.138575553894043, + "rewards/rejected": -2.2979767322540283, + "step": 11548 + }, + { + "epoch": 0.67, + "learning_rate": 2.564750160349745e-08, + "logits/chosen": -1.994402527809143, + "logits/rejected": -1.9955945014953613, + "logps/chosen": -14.589841842651367, + "logps/rejected": -101.70137023925781, + "loss": 0.3844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38451024889945984, + "rewards/margins": 1.0152581930160522, + "rewards/rejected": -0.6307479739189148, + "step": 11549 + }, + { + "epoch": 0.67, + "learning_rate": 2.5639271337537838e-08, + "logits/chosen": -1.9347741603851318, + "logits/rejected": -1.920620322227478, + "logps/chosen": -0.009275918826460838, + "logps/rejected": -354.8250732421875, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006892586243338883, + "rewards/margins": 9.5916748046875, + "rewards/rejected": -9.592364311218262, + "step": 11550 + }, + { + "epoch": 0.67, + "learning_rate": 2.5631041936990466e-08, + "logits/chosen": -1.877563238143921, + "logits/rejected": -1.8654431104660034, + "logps/chosen": -216.5, + "logps/rejected": -409.8358459472656, + "loss": 0.2102, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6281189918518066, + "rewards/margins": 0.7123291492462158, + "rewards/rejected": 1.9157898426055908, + "step": 11551 + }, + { + "epoch": 0.67, + "learning_rate": 2.5622813402147702e-08, + "logits/chosen": -1.9186375141143799, + "logits/rejected": -1.8937621116638184, + "logps/chosen": -184.43670654296875, + "logps/rejected": -277.4036865234375, + "loss": 0.0494, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3529999256134033, + "rewards/margins": 2.556204319000244, + "rewards/rejected": 0.796795666217804, + "step": 11552 + }, + { + "epoch": 0.67, + "learning_rate": 2.5614585733301835e-08, + "logits/chosen": -2.038189172744751, + "logits/rejected": -2.042935609817505, + "logps/chosen": -13.956292152404785, + "logps/rejected": -99.27694702148438, + "loss": 0.3772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03349428251385689, + "rewards/margins": 1.7405673265457153, + "rewards/rejected": -1.7740615606307983, + "step": 11553 + }, + { + "epoch": 0.67, + "learning_rate": 2.5606358930745143e-08, + "logits/chosen": -1.7968800067901611, + "logits/rejected": -1.84384286403656, + "logps/chosen": -231.02011108398438, + "logps/rejected": -338.0614013671875, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.780146837234497, + "rewards/margins": 4.440507888793945, + "rewards/rejected": -2.660360813140869, + "step": 11554 + }, + { + "epoch": 0.67, + "learning_rate": 2.5598132994769943e-08, + "logits/chosen": -1.7470691204071045, + "logits/rejected": -1.7382432222366333, + "logps/chosen": -9.069743156433105, + "logps/rejected": -163.334716796875, + "loss": 0.3468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22111396491527557, + "rewards/margins": 2.9400761127471924, + "rewards/rejected": -2.7189621925354004, + "step": 11555 + }, + { + "epoch": 0.67, + "learning_rate": 2.5589907925668402e-08, + "logits/chosen": -2.051327705383301, + "logits/rejected": -2.0423851013183594, + "logps/chosen": -43.42367935180664, + "logps/rejected": -300.05755615234375, + "loss": 0.2723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4674438536167145, + "rewards/margins": 1.8121154308319092, + "rewards/rejected": -1.344671607017517, + "step": 11556 + }, + { + "epoch": 0.67, + "learning_rate": 2.558168372373275e-08, + "logits/chosen": -1.8371611833572388, + "logits/rejected": -1.837424635887146, + "logps/chosen": -0.004836472682654858, + "logps/rejected": -139.70664978027344, + "loss": 0.6028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002777591871563345, + "rewards/margins": 0.35456690192222595, + "rewards/rejected": -0.3548446595668793, + "step": 11557 + }, + { + "epoch": 0.67, + "learning_rate": 2.5573460389255104e-08, + "logits/chosen": -1.866371989250183, + "logits/rejected": -1.8695634603500366, + "logps/chosen": -21.99285316467285, + "logps/rejected": -218.06216430664062, + "loss": 0.3304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.818215012550354, + "rewards/margins": 0.7731969952583313, + "rewards/rejected": 0.04501800611615181, + "step": 11558 + }, + { + "epoch": 0.67, + "learning_rate": 2.556523792252766e-08, + "logits/chosen": -1.8283313512802124, + "logits/rejected": -1.8175945281982422, + "logps/chosen": -157.0839080810547, + "logps/rejected": -238.2682342529297, + "loss": 0.4225, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5043472051620483, + "rewards/margins": -0.07382512092590332, + "rewards/rejected": 1.5781723260879517, + "step": 11559 + }, + { + "epoch": 0.67, + "learning_rate": 2.5557016323842474e-08, + "logits/chosen": -1.8680088520050049, + "logits/rejected": -1.863950490951538, + "logps/chosen": -151.78636169433594, + "logps/rejected": -397.2052001953125, + "loss": 0.0888, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1747452020645142, + "rewards/margins": 5.5385637283325195, + "rewards/rejected": -4.363818645477295, + "step": 11560 + }, + { + "epoch": 0.67, + "learning_rate": 2.5548795593491644e-08, + "logits/chosen": -2.074254274368286, + "logits/rejected": -2.0629079341888428, + "logps/chosen": -49.65984344482422, + "logps/rejected": -192.64682006835938, + "loss": 0.3954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.044802095741033554, + "rewards/margins": 1.7182807922363281, + "rewards/rejected": -1.7630828619003296, + "step": 11561 + }, + { + "epoch": 0.67, + "learning_rate": 2.5540575731767166e-08, + "logits/chosen": -1.8750005960464478, + "logits/rejected": -1.876893401145935, + "logps/chosen": -3.600084164645523e-05, + "logps/rejected": -155.14617919921875, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.821349410936818e-07, + "rewards/margins": 5.182026386260986, + "rewards/rejected": -5.18202543258667, + "step": 11562 + }, + { + "epoch": 0.67, + "learning_rate": 2.553235673896112e-08, + "logits/chosen": -1.892357349395752, + "logits/rejected": -1.8964735269546509, + "logps/chosen": -5.865833759307861, + "logps/rejected": -142.38645935058594, + "loss": 0.2769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.700193464756012, + "rewards/margins": 1.6830213069915771, + "rewards/rejected": -0.9828277826309204, + "step": 11563 + }, + { + "epoch": 0.67, + "learning_rate": 2.5524138615365424e-08, + "logits/chosen": -2.076230525970459, + "logits/rejected": -2.074087142944336, + "logps/chosen": -5.2145843505859375, + "logps/rejected": -303.0855407714844, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08173594623804092, + "rewards/margins": 4.039041996002197, + "rewards/rejected": -3.957305908203125, + "step": 11564 + }, + { + "epoch": 0.67, + "learning_rate": 2.5515921361272052e-08, + "logits/chosen": -2.031346321105957, + "logits/rejected": -2.030649185180664, + "logps/chosen": -75.59999084472656, + "logps/rejected": -248.832763671875, + "loss": 0.1233, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.140641927719116, + "rewards/margins": 1.5677186250686646, + "rewards/rejected": 0.5729233026504517, + "step": 11565 + }, + { + "epoch": 0.67, + "learning_rate": 2.5507704976972917e-08, + "logits/chosen": -2.004941463470459, + "logits/rejected": -1.9576472043991089, + "logps/chosen": -164.96426391601562, + "logps/rejected": -472.7065734863281, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3086395263671875, + "rewards/margins": 5.852075576782227, + "rewards/rejected": -2.54343581199646, + "step": 11566 + }, + { + "epoch": 0.67, + "learning_rate": 2.5499489462759926e-08, + "logits/chosen": -2.083513021469116, + "logits/rejected": -2.0532948970794678, + "logps/chosen": -1.814176082611084, + "logps/rejected": -198.1457061767578, + "loss": 0.3373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12418573349714279, + "rewards/margins": 3.0082480907440186, + "rewards/rejected": -2.8840622901916504, + "step": 11567 + }, + { + "epoch": 0.67, + "learning_rate": 2.549127481892489e-08, + "logits/chosen": -1.6007733345031738, + "logits/rejected": -1.5834468603134155, + "logps/chosen": -3.175083875656128, + "logps/rejected": -300.7907409667969, + "loss": 0.393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1765163391828537, + "rewards/margins": 4.956045627593994, + "rewards/rejected": -5.132562160491943, + "step": 11568 + }, + { + "epoch": 0.67, + "learning_rate": 2.548306104575966e-08, + "logits/chosen": -1.8540157079696655, + "logits/rejected": -1.8606915473937988, + "logps/chosen": -0.006970728747546673, + "logps/rejected": -213.11521911621094, + "loss": 0.3432, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006470453809015453, + "rewards/margins": 3.9831771850585938, + "rewards/rejected": -3.9838242530822754, + "step": 11569 + }, + { + "epoch": 0.67, + "learning_rate": 2.5474848143556033e-08, + "logits/chosen": -1.9297956228256226, + "logits/rejected": -1.8871370553970337, + "logps/chosen": -208.02682495117188, + "logps/rejected": -503.52288818359375, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.265390157699585, + "rewards/margins": 4.239017009735107, + "rewards/rejected": -1.973626732826233, + "step": 11570 + }, + { + "epoch": 0.67, + "learning_rate": 2.5466636112605776e-08, + "logits/chosen": -1.7526904344558716, + "logits/rejected": -1.7517186403274536, + "logps/chosen": -11.012253761291504, + "logps/rejected": -155.02099609375, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24190950393676758, + "rewards/margins": 1.6271389722824097, + "rewards/rejected": -1.385229468345642, + "step": 11571 + }, + { + "epoch": 0.67, + "learning_rate": 2.5458424953200598e-08, + "logits/chosen": -1.8069143295288086, + "logits/rejected": -1.7957690954208374, + "logps/chosen": -193.74591064453125, + "logps/rejected": -440.6581726074219, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2757599353790283, + "rewards/margins": 3.309060573577881, + "rewards/rejected": -1.033300757408142, + "step": 11572 + }, + { + "epoch": 0.67, + "learning_rate": 2.5450214665632207e-08, + "logits/chosen": -1.8160161972045898, + "logits/rejected": -1.804731845855713, + "logps/chosen": -165.68594360351562, + "logps/rejected": -270.21563720703125, + "loss": 0.1471, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8316376209259033, + "rewards/margins": 1.3169128894805908, + "rewards/rejected": 1.5147247314453125, + "step": 11573 + }, + { + "epoch": 0.67, + "learning_rate": 2.5442005250192276e-08, + "logits/chosen": -2.088942527770996, + "logits/rejected": -2.077721118927002, + "logps/chosen": -26.24299430847168, + "logps/rejected": -260.59295654296875, + "loss": 0.2845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2224559783935547, + "rewards/margins": 3.5281293392181396, + "rewards/rejected": -3.305673360824585, + "step": 11574 + }, + { + "epoch": 0.67, + "learning_rate": 2.543379670717245e-08, + "logits/chosen": -1.7604293823242188, + "logits/rejected": -1.729179859161377, + "logps/chosen": -280.8078918457031, + "logps/rejected": -570.67626953125, + "loss": 0.1209, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9110596179962158, + "rewards/margins": 2.026538133621216, + "rewards/rejected": -0.115478515625, + "step": 11575 + }, + { + "epoch": 0.67, + "learning_rate": 2.5425589036864346e-08, + "logits/chosen": -2.0451748371124268, + "logits/rejected": -2.0308876037597656, + "logps/chosen": -5.829244037158787e-05, + "logps/rejected": -160.23825073242188, + "loss": 0.3726, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0967022490149247e-06, + "rewards/margins": 2.7551822662353516, + "rewards/rejected": -2.755183458328247, + "step": 11576 + }, + { + "epoch": 0.67, + "learning_rate": 2.5417382239559494e-08, + "logits/chosen": -1.9580875635147095, + "logits/rejected": -1.956412672996521, + "logps/chosen": -125.73123168945312, + "logps/rejected": -438.7284240722656, + "loss": 0.2053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3628692626953125, + "rewards/margins": 4.602090358734131, + "rewards/rejected": -4.239221096038818, + "step": 11577 + }, + { + "epoch": 0.67, + "learning_rate": 2.54091763155495e-08, + "logits/chosen": -1.9395211935043335, + "logits/rejected": -1.9425772428512573, + "logps/chosen": -51.14519119262695, + "logps/rejected": -159.4097137451172, + "loss": 0.5791, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007348251529037952, + "rewards/margins": 0.5215778350830078, + "rewards/rejected": -0.5289260745048523, + "step": 11578 + }, + { + "epoch": 0.67, + "learning_rate": 2.5400971265125836e-08, + "logits/chosen": -1.9877288341522217, + "logits/rejected": -1.97265625, + "logps/chosen": -63.77425765991211, + "logps/rejected": -197.4473419189453, + "loss": 0.2207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1341220885515213, + "rewards/margins": 3.469414234161377, + "rewards/rejected": -3.335292100906372, + "step": 11579 + }, + { + "epoch": 0.67, + "learning_rate": 2.539276708858002e-08, + "logits/chosen": -2.097102642059326, + "logits/rejected": -2.1021625995635986, + "logps/chosen": -29.319812774658203, + "logps/rejected": -220.30831909179688, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013477325439453125, + "rewards/margins": 4.463869571685791, + "rewards/rejected": -4.477346897125244, + "step": 11580 + }, + { + "epoch": 0.67, + "learning_rate": 2.538456378620344e-08, + "logits/chosen": -1.770821213722229, + "logits/rejected": -1.771385908126831, + "logps/chosen": -0.1582360714673996, + "logps/rejected": -235.7189178466797, + "loss": 0.3368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030878841876983643, + "rewards/margins": 3.6775341033935547, + "rewards/rejected": -3.646655321121216, + "step": 11581 + }, + { + "epoch": 0.67, + "learning_rate": 2.5376361358287606e-08, + "logits/chosen": -1.810714840888977, + "logits/rejected": -1.7986445426940918, + "logps/chosen": -9.242833137512207, + "logps/rejected": -299.2762451171875, + "loss": 0.2594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41885778307914734, + "rewards/margins": 5.167102813720703, + "rewards/rejected": -4.7482452392578125, + "step": 11582 + }, + { + "epoch": 0.67, + "learning_rate": 2.5368159805123845e-08, + "logits/chosen": -1.7889450788497925, + "logits/rejected": -1.7680752277374268, + "logps/chosen": -369.68536376953125, + "logps/rejected": -518.545654296875, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.020404100418091, + "rewards/margins": 3.7686004638671875, + "rewards/rejected": -0.7481964230537415, + "step": 11583 + }, + { + "epoch": 0.67, + "learning_rate": 2.5359959127003528e-08, + "logits/chosen": -1.8408538103103638, + "logits/rejected": -1.841429352760315, + "logps/chosen": -3.8801543712615967, + "logps/rejected": -154.89263916015625, + "loss": 0.4494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10402987152338028, + "rewards/margins": 1.225849986076355, + "rewards/rejected": -1.121820092201233, + "step": 11584 + }, + { + "epoch": 0.67, + "learning_rate": 2.535175932421799e-08, + "logits/chosen": -2.165247917175293, + "logits/rejected": -2.1470603942871094, + "logps/chosen": -163.68943786621094, + "logps/rejected": -284.981689453125, + "loss": 0.0844, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.427964925765991, + "rewards/margins": 1.862919807434082, + "rewards/rejected": 0.565045177936554, + "step": 11585 + }, + { + "epoch": 0.67, + "learning_rate": 2.534356039705856e-08, + "logits/chosen": -1.851168155670166, + "logits/rejected": -1.8529337644577026, + "logps/chosen": -38.18751525878906, + "logps/rejected": -124.42280578613281, + "loss": 0.3821, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1330028623342514, + "rewards/margins": 1.2784862518310547, + "rewards/rejected": -1.4114891290664673, + "step": 11586 + }, + { + "epoch": 0.67, + "learning_rate": 2.5335362345816445e-08, + "logits/chosen": -1.8649550676345825, + "logits/rejected": -1.8496177196502686, + "logps/chosen": -164.68763732910156, + "logps/rejected": -437.67169189453125, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3558273315429688, + "rewards/margins": 4.842350959777832, + "rewards/rejected": -2.486523389816284, + "step": 11587 + }, + { + "epoch": 0.67, + "learning_rate": 2.5327165170782904e-08, + "logits/chosen": -1.764256238937378, + "logits/rejected": -1.789008378982544, + "logps/chosen": -201.09909057617188, + "logps/rejected": -423.6009216308594, + "loss": 0.1098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9794708490371704, + "rewards/margins": 5.106890678405762, + "rewards/rejected": -4.127419948577881, + "step": 11588 + }, + { + "epoch": 0.67, + "learning_rate": 2.5318968872249148e-08, + "logits/chosen": -1.915392279624939, + "logits/rejected": -1.9108021259307861, + "logps/chosen": -10.875131607055664, + "logps/rejected": -117.81405639648438, + "loss": 0.6045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1756555587053299, + "rewards/margins": 0.13290195167064667, + "rewards/rejected": 0.04275360330939293, + "step": 11589 + }, + { + "epoch": 0.67, + "learning_rate": 2.5310773450506362e-08, + "logits/chosen": -1.8237859010696411, + "logits/rejected": -1.8297308683395386, + "logps/chosen": -269.6479797363281, + "logps/rejected": -356.0456848144531, + "loss": 0.1581, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.587289571762085, + "rewards/margins": 1.0138764381408691, + "rewards/rejected": 2.573413133621216, + "step": 11590 + }, + { + "epoch": 0.67, + "learning_rate": 2.530257890584566e-08, + "logits/chosen": -1.839029312133789, + "logits/rejected": -1.8463318347930908, + "logps/chosen": -159.27294921875, + "logps/rejected": -320.4234619140625, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3888611793518066, + "rewards/margins": 4.271707057952881, + "rewards/rejected": -0.8828460574150085, + "step": 11591 + }, + { + "epoch": 0.67, + "learning_rate": 2.529438523855816e-08, + "logits/chosen": -1.9068869352340698, + "logits/rejected": -1.9156006574630737, + "logps/chosen": -7.15503454208374, + "logps/rejected": -237.71685791015625, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28530725836753845, + "rewards/margins": 4.666133403778076, + "rewards/rejected": -4.380825996398926, + "step": 11592 + }, + { + "epoch": 0.67, + "learning_rate": 2.528619244893494e-08, + "logits/chosen": -1.610555648803711, + "logits/rejected": -1.6012247800827026, + "logps/chosen": -155.84344482421875, + "logps/rejected": -276.0004577636719, + "loss": 0.2238, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2289505004882812, + "rewards/margins": 0.7285964488983154, + "rewards/rejected": 1.5003540515899658, + "step": 11593 + }, + { + "epoch": 0.67, + "learning_rate": 2.5278000537267076e-08, + "logits/chosen": -1.9001399278640747, + "logits/rejected": -1.8956419229507446, + "logps/chosen": -164.0218505859375, + "logps/rejected": -219.95745849609375, + "loss": 0.4183, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2549896240234375, + "rewards/margins": -0.11969757080078125, + "rewards/rejected": 1.3746871948242188, + "step": 11594 + }, + { + "epoch": 0.67, + "learning_rate": 2.5269809503845534e-08, + "logits/chosen": -2.0368757247924805, + "logits/rejected": -2.03460693359375, + "logps/chosen": -2.3998069763183594, + "logps/rejected": -123.80253601074219, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23094086349010468, + "rewards/margins": 3.2502858638763428, + "rewards/rejected": -3.0193450450897217, + "step": 11595 + }, + { + "epoch": 0.67, + "learning_rate": 2.5261619348961318e-08, + "logits/chosen": -1.6307587623596191, + "logits/rejected": -1.6220370531082153, + "logps/chosen": -38.66170120239258, + "logps/rejected": -312.33453369140625, + "loss": 0.0734, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5230778455734253, + "rewards/margins": 3.642261028289795, + "rewards/rejected": -2.119183301925659, + "step": 11596 + }, + { + "epoch": 0.67, + "learning_rate": 2.5253430072905425e-08, + "logits/chosen": -1.816543459892273, + "logits/rejected": -1.8178234100341797, + "logps/chosen": -141.6900634765625, + "logps/rejected": -292.958984375, + "loss": 0.085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4324768781661987, + "rewards/margins": 2.3268373012542725, + "rewards/rejected": -0.894360363483429, + "step": 11597 + }, + { + "epoch": 0.67, + "learning_rate": 2.524524167596872e-08, + "logits/chosen": -1.8530726432800293, + "logits/rejected": -1.8580238819122314, + "logps/chosen": -62.17411422729492, + "logps/rejected": -108.89999389648438, + "loss": 1.1414, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.3587474822998047, + "rewards/margins": -0.25830042362213135, + "rewards/rejected": -1.1004470586776733, + "step": 11598 + }, + { + "epoch": 0.67, + "learning_rate": 2.5237054158442152e-08, + "logits/chosen": -1.8685506582260132, + "logits/rejected": -1.881192922592163, + "logps/chosen": -113.14787292480469, + "logps/rejected": -300.323486328125, + "loss": 0.0483, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6862045526504517, + "rewards/margins": 3.3542404174804688, + "rewards/rejected": -1.668035864830017, + "step": 11599 + }, + { + "epoch": 0.68, + "learning_rate": 2.5228867520616497e-08, + "logits/chosen": -1.943905234336853, + "logits/rejected": -1.941210150718689, + "logps/chosen": -105.27816772460938, + "logps/rejected": -493.2625732421875, + "loss": 0.0668, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7981491088867188, + "rewards/margins": 4.928382873535156, + "rewards/rejected": -3.1302337646484375, + "step": 11600 + }, + { + "epoch": 0.68, + "learning_rate": 2.5220681762782682e-08, + "logits/chosen": -1.8759084939956665, + "logits/rejected": -1.8421211242675781, + "logps/chosen": -351.4028625488281, + "logps/rejected": -555.159423828125, + "loss": 0.0555, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.456341505050659, + "rewards/margins": 2.862408399581909, + "rewards/rejected": -0.40606689453125, + "step": 11601 + }, + { + "epoch": 0.68, + "learning_rate": 2.5212496885231437e-08, + "logits/chosen": -1.8116555213928223, + "logits/rejected": -1.808813214302063, + "logps/chosen": -31.728248596191406, + "logps/rejected": -145.85130310058594, + "loss": 0.2323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6203487515449524, + "rewards/margins": 1.7436673641204834, + "rewards/rejected": -1.1233185529708862, + "step": 11602 + }, + { + "epoch": 0.68, + "learning_rate": 2.5204312888253576e-08, + "logits/chosen": -1.9349225759506226, + "logits/rejected": -1.9160535335540771, + "logps/chosen": -211.21578979492188, + "logps/rejected": -483.565673828125, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4838699102401733, + "rewards/margins": 5.735908508300781, + "rewards/rejected": -4.252038478851318, + "step": 11603 + }, + { + "epoch": 0.68, + "learning_rate": 2.5196129772139764e-08, + "logits/chosen": -1.5119171142578125, + "logits/rejected": -1.5172206163406372, + "logps/chosen": -182.37118530273438, + "logps/rejected": -245.2126007080078, + "loss": 0.1819, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.620739698410034, + "rewards/margins": 0.94629967212677, + "rewards/rejected": 1.6744400262832642, + "step": 11604 + }, + { + "epoch": 0.68, + "learning_rate": 2.5187947537180798e-08, + "logits/chosen": -1.8941717147827148, + "logits/rejected": -1.8909693956375122, + "logps/chosen": -48.73384094238281, + "logps/rejected": -111.62088775634766, + "loss": 0.3236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3756221830844879, + "rewards/margins": 1.4248043298721313, + "rewards/rejected": -1.0491821765899658, + "step": 11605 + }, + { + "epoch": 0.68, + "learning_rate": 2.517976618366728e-08, + "logits/chosen": -2.0542972087860107, + "logits/rejected": -2.0463380813598633, + "logps/chosen": -46.87717056274414, + "logps/rejected": -223.98228454589844, + "loss": 0.2727, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20956841111183167, + "rewards/margins": 2.136852741241455, + "rewards/rejected": -1.9272842407226562, + "step": 11606 + }, + { + "epoch": 0.68, + "learning_rate": 2.5171585711889875e-08, + "logits/chosen": -1.7731904983520508, + "logits/rejected": -1.771490454673767, + "logps/chosen": -41.94474411010742, + "logps/rejected": -180.5018310546875, + "loss": 0.3759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.128304362297058, + "rewards/margins": 0.3088921308517456, + "rewards/rejected": 0.8194122314453125, + "step": 11607 + }, + { + "epoch": 0.68, + "learning_rate": 2.51634061221392e-08, + "logits/chosen": -1.9170805215835571, + "logits/rejected": -1.9231146574020386, + "logps/chosen": -40.080684661865234, + "logps/rejected": -149.74331665039062, + "loss": 0.3589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3607952296733856, + "rewards/margins": 1.2397960424423218, + "rewards/rejected": -0.8790008425712585, + "step": 11608 + }, + { + "epoch": 0.68, + "learning_rate": 2.5155227414705838e-08, + "logits/chosen": -1.8586065769195557, + "logits/rejected": -1.843528151512146, + "logps/chosen": -0.0027093617245554924, + "logps/rejected": -140.27932739257812, + "loss": 0.4674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002880894811823964, + "rewards/margins": 1.2884103059768677, + "rewards/rejected": -1.285529375076294, + "step": 11609 + }, + { + "epoch": 0.68, + "learning_rate": 2.5147049589880315e-08, + "logits/chosen": -2.1007440090179443, + "logits/rejected": -2.089163064956665, + "logps/chosen": -95.01425170898438, + "logps/rejected": -154.31964111328125, + "loss": 0.367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24015656113624573, + "rewards/margins": 1.0718551874160767, + "rewards/rejected": -0.8316985964775085, + "step": 11610 + }, + { + "epoch": 0.68, + "learning_rate": 2.513887264795316e-08, + "logits/chosen": -1.8830825090408325, + "logits/rejected": -1.8862085342407227, + "logps/chosen": -26.47283935546875, + "logps/rejected": -85.32220458984375, + "loss": 0.4571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02789611928164959, + "rewards/margins": 1.1920623779296875, + "rewards/rejected": -1.2199585437774658, + "step": 11611 + }, + { + "epoch": 0.68, + "learning_rate": 2.5130696589214856e-08, + "logits/chosen": -1.789125919342041, + "logits/rejected": -1.7861346006393433, + "logps/chosen": -86.0652084350586, + "logps/rejected": -275.8102722167969, + "loss": 0.0677, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8986542224884033, + "rewards/margins": 3.0838043689727783, + "rewards/rejected": -1.185150146484375, + "step": 11612 + }, + { + "epoch": 0.68, + "learning_rate": 2.5122521413955876e-08, + "logits/chosen": -1.8704156875610352, + "logits/rejected": -1.8519173860549927, + "logps/chosen": -6.468055248260498, + "logps/rejected": -278.04656982421875, + "loss": 0.2469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17779412865638733, + "rewards/margins": 6.5910539627075195, + "rewards/rejected": -6.413259983062744, + "step": 11613 + }, + { + "epoch": 0.68, + "learning_rate": 2.5114347122466605e-08, + "logits/chosen": -1.8487567901611328, + "logits/rejected": -1.8191132545471191, + "logps/chosen": -182.18309020996094, + "logps/rejected": -250.77655029296875, + "loss": 0.1185, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2359542846679688, + "rewards/margins": 1.901576280593872, + "rewards/rejected": 0.33437806367874146, + "step": 11614 + }, + { + "epoch": 0.68, + "learning_rate": 2.5106173715037453e-08, + "logits/chosen": -1.9949811697006226, + "logits/rejected": -1.9950460195541382, + "logps/chosen": -19.766895294189453, + "logps/rejected": -254.20989990234375, + "loss": 0.28, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2689962387084961, + "rewards/margins": 5.5231804847717285, + "rewards/rejected": -5.254184246063232, + "step": 11615 + }, + { + "epoch": 0.68, + "learning_rate": 2.5098001191958783e-08, + "logits/chosen": -1.8667858839035034, + "logits/rejected": -1.864059567451477, + "logps/chosen": -2.81972599029541, + "logps/rejected": -125.35494995117188, + "loss": 0.386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18287964165210724, + "rewards/margins": 1.7325081825256348, + "rewards/rejected": -1.549628496170044, + "step": 11616 + }, + { + "epoch": 0.68, + "learning_rate": 2.5089829553520914e-08, + "logits/chosen": -1.9815223217010498, + "logits/rejected": -1.9789185523986816, + "logps/chosen": -23.768909454345703, + "logps/rejected": -207.15182495117188, + "loss": 0.3428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.192328080534935, + "rewards/margins": 2.3426575660705566, + "rewards/rejected": -2.15032958984375, + "step": 11617 + }, + { + "epoch": 0.68, + "learning_rate": 2.508165880001417e-08, + "logits/chosen": -1.9436750411987305, + "logits/rejected": -1.9512676000595093, + "logps/chosen": -0.015292138792574406, + "logps/rejected": -140.14097595214844, + "loss": 0.3645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036926931352354586, + "rewards/margins": 2.881974697113037, + "rewards/rejected": -2.8823440074920654, + "step": 11618 + }, + { + "epoch": 0.68, + "learning_rate": 2.507348893172876e-08, + "logits/chosen": -1.8725671768188477, + "logits/rejected": -1.9257798194885254, + "logps/chosen": -232.41290283203125, + "logps/rejected": -294.95709228515625, + "loss": 0.2134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.314965844154358, + "rewards/margins": 1.0286438465118408, + "rewards/rejected": 0.2863220274448395, + "step": 11619 + }, + { + "epoch": 0.68, + "learning_rate": 2.5065319948954988e-08, + "logits/chosen": -1.946478009223938, + "logits/rejected": -1.9792890548706055, + "logps/chosen": -237.1891632080078, + "logps/rejected": -346.23651123046875, + "loss": 0.0945, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.266963243484497, + "rewards/margins": 1.874568223953247, + "rewards/rejected": 0.39239501953125, + "step": 11620 + }, + { + "epoch": 0.68, + "learning_rate": 2.5057151851983e-08, + "logits/chosen": -1.8922499418258667, + "logits/rejected": -1.8916146755218506, + "logps/chosen": -380.7411804199219, + "logps/rejected": -667.5487670898438, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9458160400390625, + "rewards/margins": 6.134561061859131, + "rewards/rejected": -5.188745021820068, + "step": 11621 + }, + { + "epoch": 0.68, + "learning_rate": 2.5048984641103005e-08, + "logits/chosen": -2.0583393573760986, + "logits/rejected": -2.0625202655792236, + "logps/chosen": -0.005610594525933266, + "logps/rejected": -118.00979614257812, + "loss": 0.7056, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00843114871531725, + "rewards/margins": -0.058498483151197433, + "rewards/rejected": 0.06692963093519211, + "step": 11622 + }, + { + "epoch": 0.68, + "learning_rate": 2.5040818316605084e-08, + "logits/chosen": -1.8943132162094116, + "logits/rejected": -1.9017671346664429, + "logps/chosen": -238.18264770507812, + "logps/rejected": -471.82269287109375, + "loss": 0.0761, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0063705444335938, + "rewards/margins": 4.065302848815918, + "rewards/rejected": -3.0589325428009033, + "step": 11623 + }, + { + "epoch": 0.68, + "learning_rate": 2.5032652878779426e-08, + "logits/chosen": -1.974327802658081, + "logits/rejected": -1.9692819118499756, + "logps/chosen": -18.507110595703125, + "logps/rejected": -149.315185546875, + "loss": 0.4763, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5130997896194458, + "rewards/margins": 4.401054382324219, + "rewards/rejected": -4.914154052734375, + "step": 11624 + }, + { + "epoch": 0.68, + "learning_rate": 2.502448832791605e-08, + "logits/chosen": -1.8825839757919312, + "logits/rejected": -1.888920545578003, + "logps/chosen": -51.992286682128906, + "logps/rejected": -231.02735900878906, + "loss": 0.2729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22785340249538422, + "rewards/margins": 4.0038743019104, + "rewards/rejected": -3.7760207653045654, + "step": 11625 + }, + { + "epoch": 0.68, + "learning_rate": 2.5016324664305012e-08, + "logits/chosen": -1.9119703769683838, + "logits/rejected": -1.9095380306243896, + "logps/chosen": -49.168601989746094, + "logps/rejected": -206.2737579345703, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5722114443778992, + "rewards/margins": 1.1185646057128906, + "rewards/rejected": -0.5463531613349915, + "step": 11626 + }, + { + "epoch": 0.68, + "learning_rate": 2.5008161888236335e-08, + "logits/chosen": -1.8930375576019287, + "logits/rejected": -1.8692570924758911, + "logps/chosen": -197.21340942382812, + "logps/rejected": -245.345458984375, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.092529296875, + "rewards/margins": 0.7115341424942017, + "rewards/rejected": 0.3809951841831207, + "step": 11627 + }, + { + "epoch": 0.68, + "learning_rate": 2.500000000000001e-08, + "logits/chosen": -1.8447877168655396, + "logits/rejected": -1.8099548816680908, + "logps/chosen": -274.24273681640625, + "logps/rejected": -458.9853210449219, + "loss": 0.0296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9049866199493408, + "rewards/margins": 6.380576133728027, + "rewards/rejected": -4.475589275360107, + "step": 11628 + }, + { + "epoch": 0.68, + "learning_rate": 2.4991838999885956e-08, + "logits/chosen": -1.7119215726852417, + "logits/rejected": -1.6909074783325195, + "logps/chosen": -291.6779479980469, + "logps/rejected": -378.6691589355469, + "loss": 0.1942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4934113025665283, + "rewards/margins": 0.8816223740577698, + "rewards/rejected": 0.6117889285087585, + "step": 11629 + }, + { + "epoch": 0.68, + "learning_rate": 2.4983678888184108e-08, + "logits/chosen": -2.0521626472473145, + "logits/rejected": -2.0473365783691406, + "logps/chosen": -52.63401794433594, + "logps/rejected": -251.83859252929688, + "loss": 0.3466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2484336942434311, + "rewards/margins": 1.8096672296524048, + "rewards/rejected": -1.5612335205078125, + "step": 11630 + }, + { + "epoch": 0.68, + "learning_rate": 2.4975519665184357e-08, + "logits/chosen": -1.9205498695373535, + "logits/rejected": -1.8850536346435547, + "logps/chosen": -139.9234161376953, + "logps/rejected": -260.63848876953125, + "loss": 0.0477, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8298171758651733, + "rewards/margins": 3.1532974243164062, + "rewards/rejected": -1.323480248451233, + "step": 11631 + }, + { + "epoch": 0.68, + "learning_rate": 2.4967361331176572e-08, + "logits/chosen": -2.0084798336029053, + "logits/rejected": -2.0115299224853516, + "logps/chosen": -75.25241088867188, + "logps/rejected": -173.5113525390625, + "loss": 0.2809, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6747978925704956, + "rewards/margins": 0.604346513748169, + "rewards/rejected": 1.0704513788223267, + "step": 11632 + }, + { + "epoch": 0.68, + "learning_rate": 2.4959203886450543e-08, + "logits/chosen": -1.735898733139038, + "logits/rejected": -1.7319201231002808, + "logps/chosen": -134.447998046875, + "logps/rejected": -270.42083740234375, + "loss": 0.084, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5343643426895142, + "rewards/margins": 2.5853805541992188, + "rewards/rejected": -1.0510162115097046, + "step": 11633 + }, + { + "epoch": 0.68, + "learning_rate": 2.4951047331296077e-08, + "logits/chosen": -1.9109441041946411, + "logits/rejected": -1.8805561065673828, + "logps/chosen": -206.30670166015625, + "logps/rejected": -380.47119140625, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.86248779296875, + "rewards/margins": 4.403918266296387, + "rewards/rejected": -0.541430652141571, + "step": 11634 + }, + { + "epoch": 0.68, + "learning_rate": 2.494289166600294e-08, + "logits/chosen": -2.0317578315734863, + "logits/rejected": -2.01180100440979, + "logps/chosen": -7.626325607299805, + "logps/rejected": -187.60763549804688, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06240959092974663, + "rewards/margins": 3.60654878616333, + "rewards/rejected": -3.5441391468048096, + "step": 11635 + }, + { + "epoch": 0.68, + "learning_rate": 2.4934736890860857e-08, + "logits/chosen": -1.9177956581115723, + "logits/rejected": -1.9111387729644775, + "logps/chosen": -10.63801097869873, + "logps/rejected": -235.16885375976562, + "loss": 0.2589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3411746025085449, + "rewards/margins": 6.447094917297363, + "rewards/rejected": -6.105920314788818, + "step": 11636 + }, + { + "epoch": 0.68, + "learning_rate": 2.4926583006159546e-08, + "logits/chosen": -2.171942710876465, + "logits/rejected": -2.1707170009613037, + "logps/chosen": -0.01159799937158823, + "logps/rejected": -218.29205322265625, + "loss": 0.3175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003211435105185956, + "rewards/margins": 3.330371856689453, + "rewards/rejected": -3.330693006515503, + "step": 11637 + }, + { + "epoch": 0.68, + "learning_rate": 2.491843001218862e-08, + "logits/chosen": -1.7893213033676147, + "logits/rejected": -1.8474462032318115, + "logps/chosen": -256.8634033203125, + "logps/rejected": -546.5054321289062, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.903540015220642, + "rewards/margins": 8.113927841186523, + "rewards/rejected": -6.21038818359375, + "step": 11638 + }, + { + "epoch": 0.68, + "learning_rate": 2.491027790923778e-08, + "logits/chosen": -1.807761788368225, + "logits/rejected": -1.7888412475585938, + "logps/chosen": -192.06314086914062, + "logps/rejected": -264.0492248535156, + "loss": 0.3546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6672897338867188, + "rewards/margins": 0.24414825439453125, + "rewards/rejected": 0.4231414794921875, + "step": 11639 + }, + { + "epoch": 0.68, + "learning_rate": 2.4902126697596576e-08, + "logits/chosen": -1.751112699508667, + "logits/rejected": -1.7332628965377808, + "logps/chosen": -222.28884887695312, + "logps/rejected": -348.78009033203125, + "loss": 0.1265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8328644037246704, + "rewards/margins": 1.5946075916290283, + "rewards/rejected": -0.7617431879043579, + "step": 11640 + }, + { + "epoch": 0.68, + "learning_rate": 2.489397637755462e-08, + "logits/chosen": -2.016233205795288, + "logits/rejected": -2.012403964996338, + "logps/chosen": -28.56280517578125, + "logps/rejected": -130.28704833984375, + "loss": 0.3635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22293472290039062, + "rewards/margins": 1.741686224937439, + "rewards/rejected": -1.5187515020370483, + "step": 11641 + }, + { + "epoch": 0.68, + "learning_rate": 2.4885826949401395e-08, + "logits/chosen": -2.076125144958496, + "logits/rejected": -2.068894386291504, + "logps/chosen": -4.278680801391602, + "logps/rejected": -211.24801635742188, + "loss": 0.2013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5813528299331665, + "rewards/margins": 3.7027015686035156, + "rewards/rejected": -3.1213486194610596, + "step": 11642 + }, + { + "epoch": 0.68, + "learning_rate": 2.4877678413426478e-08, + "logits/chosen": -1.9188780784606934, + "logits/rejected": -1.9109070301055908, + "logps/chosen": -84.32579803466797, + "logps/rejected": -179.41415405273438, + "loss": 0.3853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2413688749074936, + "rewards/margins": 0.7141319513320923, + "rewards/rejected": -0.4727630615234375, + "step": 11643 + }, + { + "epoch": 0.68, + "learning_rate": 2.4869530769919295e-08, + "logits/chosen": -2.02160382270813, + "logits/rejected": -2.0149388313293457, + "logps/chosen": -3.0524537563323975, + "logps/rejected": -201.09686279296875, + "loss": 0.3953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035444118082523346, + "rewards/margins": 2.2262320518493652, + "rewards/rejected": -2.261676073074341, + "step": 11644 + }, + { + "epoch": 0.68, + "learning_rate": 2.4861384019169302e-08, + "logits/chosen": -2.000741958618164, + "logits/rejected": -1.9958527088165283, + "logps/chosen": -19.38936424255371, + "logps/rejected": -110.81550598144531, + "loss": 0.3186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28724023699760437, + "rewards/margins": 2.3675403594970703, + "rewards/rejected": -2.0803000926971436, + "step": 11645 + }, + { + "epoch": 0.68, + "learning_rate": 2.485323816146591e-08, + "logits/chosen": -1.8642807006835938, + "logits/rejected": -1.8406354188919067, + "logps/chosen": -232.2830047607422, + "logps/rejected": -436.05706787109375, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5519790649414062, + "rewards/margins": 4.89810037612915, + "rewards/rejected": -1.3461211919784546, + "step": 11646 + }, + { + "epoch": 0.68, + "learning_rate": 2.4845093197098533e-08, + "logits/chosen": -1.9686779975891113, + "logits/rejected": -1.963147521018982, + "logps/chosen": -32.0359992980957, + "logps/rejected": -152.43551635742188, + "loss": 0.196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7296661734580994, + "rewards/margins": 2.624202013015747, + "rewards/rejected": -1.8945358991622925, + "step": 11647 + }, + { + "epoch": 0.68, + "learning_rate": 2.483694912635647e-08, + "logits/chosen": -1.8951764106750488, + "logits/rejected": -1.8912684917449951, + "logps/chosen": -3.263723850250244, + "logps/rejected": -111.28581237792969, + "loss": 0.4992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20976042747497559, + "rewards/margins": 0.6812204122543335, + "rewards/rejected": -0.4714599549770355, + "step": 11648 + }, + { + "epoch": 0.68, + "learning_rate": 2.4828805949529057e-08, + "logits/chosen": -1.9798674583435059, + "logits/rejected": -1.9759190082550049, + "logps/chosen": -26.05545425415039, + "logps/rejected": -374.29925537109375, + "loss": 0.1139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9175094962120056, + "rewards/margins": 8.211143493652344, + "rewards/rejected": -7.293633937835693, + "step": 11649 + }, + { + "epoch": 0.68, + "learning_rate": 2.4820663666905584e-08, + "logits/chosen": -2.0214719772338867, + "logits/rejected": -2.024345874786377, + "logps/chosen": -10.9938325881958, + "logps/rejected": -162.08009338378906, + "loss": 0.43, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2791377007961273, + "rewards/margins": 3.4954137802124023, + "rewards/rejected": -3.7745513916015625, + "step": 11650 + }, + { + "epoch": 0.68, + "learning_rate": 2.4812522278775323e-08, + "logits/chosen": -1.9827677011489868, + "logits/rejected": -1.9821194410324097, + "logps/chosen": -9.250399589538574, + "logps/rejected": -181.18544006347656, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05001506954431534, + "rewards/margins": 2.877786636352539, + "rewards/rejected": -2.9278016090393066, + "step": 11651 + }, + { + "epoch": 0.68, + "learning_rate": 2.4804381785427454e-08, + "logits/chosen": -1.939015507698059, + "logits/rejected": -1.9377740621566772, + "logps/chosen": -0.04024428501725197, + "logps/rejected": -214.4580841064453, + "loss": 0.3545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011584118474274874, + "rewards/margins": 2.579044818878174, + "rewards/rejected": -2.5802032947540283, + "step": 11652 + }, + { + "epoch": 0.68, + "learning_rate": 2.479624218715119e-08, + "logits/chosen": -1.8691356182098389, + "logits/rejected": -1.8650758266448975, + "logps/chosen": -4.451705455780029, + "logps/rejected": -156.62757873535156, + "loss": 0.3223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05899052694439888, + "rewards/margins": 2.384852647781372, + "rewards/rejected": -2.325862169265747, + "step": 11653 + }, + { + "epoch": 0.68, + "learning_rate": 2.478810348423569e-08, + "logits/chosen": -1.7011363506317139, + "logits/rejected": -1.7028061151504517, + "logps/chosen": -4.473365783691406, + "logps/rejected": -153.642578125, + "loss": 0.4374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21413221955299377, + "rewards/margins": 1.6416051387786865, + "rewards/rejected": -1.855737328529358, + "step": 11654 + }, + { + "epoch": 0.68, + "learning_rate": 2.47799656769701e-08, + "logits/chosen": -1.907090187072754, + "logits/rejected": -1.9103572368621826, + "logps/chosen": -15.92104434967041, + "logps/rejected": -234.16839599609375, + "loss": 0.0993, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3786202669143677, + "rewards/margins": 3.821615219116211, + "rewards/rejected": -2.4429948329925537, + "step": 11655 + }, + { + "epoch": 0.68, + "learning_rate": 2.477182876564347e-08, + "logits/chosen": -1.8481496572494507, + "logits/rejected": -1.847475528717041, + "logps/chosen": -7.628821849822998, + "logps/rejected": -72.66844177246094, + "loss": 0.4026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1393633335828781, + "rewards/margins": 1.980548620223999, + "rewards/rejected": -2.1199119091033936, + "step": 11656 + }, + { + "epoch": 0.68, + "learning_rate": 2.4763692750544867e-08, + "logits/chosen": -1.9718847274780273, + "logits/rejected": -1.9714105129241943, + "logps/chosen": -5.371762275695801, + "logps/rejected": -32.20950698852539, + "loss": 0.6311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005783939268440008, + "rewards/margins": 0.025957392528653145, + "rewards/rejected": -0.03174133226275444, + "step": 11657 + }, + { + "epoch": 0.68, + "learning_rate": 2.4755557631963385e-08, + "logits/chosen": -1.794715404510498, + "logits/rejected": -1.7921537160873413, + "logps/chosen": -200.7658233642578, + "logps/rejected": -377.4461975097656, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5242021083831787, + "rewards/margins": 5.773637771606445, + "rewards/rejected": -3.2494354248046875, + "step": 11658 + }, + { + "epoch": 0.68, + "learning_rate": 2.4747423410187956e-08, + "logits/chosen": -2.0873541831970215, + "logits/rejected": -2.0746705532073975, + "logps/chosen": -15.696483612060547, + "logps/rejected": -136.80123901367188, + "loss": 0.3855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09035187214612961, + "rewards/margins": 1.9192107915878296, + "rewards/rejected": -1.8288589715957642, + "step": 11659 + }, + { + "epoch": 0.68, + "learning_rate": 2.4739290085507593e-08, + "logits/chosen": -1.8639353513717651, + "logits/rejected": -1.9070929288864136, + "logps/chosen": -173.54974365234375, + "logps/rejected": -304.9111022949219, + "loss": 0.0428, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.304351806640625, + "rewards/margins": 2.759106397628784, + "rewards/rejected": -0.45475465059280396, + "step": 11660 + }, + { + "epoch": 0.68, + "learning_rate": 2.473115765821117e-08, + "logits/chosen": -1.8431931734085083, + "logits/rejected": -1.8430038690567017, + "logps/chosen": -30.238800048828125, + "logps/rejected": -130.48208618164062, + "loss": 0.2806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.65273517370224, + "rewards/margins": 1.644324541091919, + "rewards/rejected": -0.991589367389679, + "step": 11661 + }, + { + "epoch": 0.68, + "learning_rate": 2.4723026128587672e-08, + "logits/chosen": -1.7502254247665405, + "logits/rejected": -1.755474328994751, + "logps/chosen": -20.493000030517578, + "logps/rejected": -98.16808319091797, + "loss": 0.511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30740854144096375, + "rewards/margins": 0.4631359279155731, + "rewards/rejected": -0.15572738647460938, + "step": 11662 + }, + { + "epoch": 0.68, + "learning_rate": 2.4714895496925904e-08, + "logits/chosen": -1.8728222846984863, + "logits/rejected": -1.869229793548584, + "logps/chosen": -0.8738523125648499, + "logps/rejected": -108.86844635009766, + "loss": 0.5825, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07104294747114182, + "rewards/margins": 0.5786625742912292, + "rewards/rejected": -0.6497055292129517, + "step": 11663 + }, + { + "epoch": 0.68, + "learning_rate": 2.4706765763514753e-08, + "logits/chosen": -1.9124196767807007, + "logits/rejected": -2.0298125743865967, + "logps/chosen": -254.22967529296875, + "logps/rejected": -521.6202392578125, + "loss": 0.0688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2510131895542145, + "rewards/margins": 5.219781875610352, + "rewards/rejected": -4.96876859664917, + "step": 11664 + }, + { + "epoch": 0.68, + "learning_rate": 2.469863692864297e-08, + "logits/chosen": -1.9010545015335083, + "logits/rejected": -1.8978526592254639, + "logps/chosen": -167.51844787597656, + "logps/rejected": -319.12567138671875, + "loss": 0.2913, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2555099725723267, + "rewards/margins": 0.5722519159317017, + "rewards/rejected": 0.683258056640625, + "step": 11665 + }, + { + "epoch": 0.68, + "learning_rate": 2.4690508992599395e-08, + "logits/chosen": -1.9124096632003784, + "logits/rejected": -1.9075371026992798, + "logps/chosen": -161.48536682128906, + "logps/rejected": -254.5484161376953, + "loss": 0.1451, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.354872226715088, + "rewards/margins": 1.297134518623352, + "rewards/rejected": 1.0577377080917358, + "step": 11666 + }, + { + "epoch": 0.68, + "learning_rate": 2.4682381955672733e-08, + "logits/chosen": -1.8760967254638672, + "logits/rejected": -1.8625638484954834, + "logps/chosen": -180.05995178222656, + "logps/rejected": -368.012451171875, + "loss": 0.1505, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.159437656402588, + "rewards/margins": 1.1256120204925537, + "rewards/rejected": 2.033825635910034, + "step": 11667 + }, + { + "epoch": 0.68, + "learning_rate": 2.4674255818151703e-08, + "logits/chosen": -1.937107801437378, + "logits/rejected": -1.9322474002838135, + "logps/chosen": -51.102020263671875, + "logps/rejected": -335.2713623046875, + "loss": 0.2243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26370546221733093, + "rewards/margins": 4.053335666656494, + "rewards/rejected": -3.789630174636841, + "step": 11668 + }, + { + "epoch": 0.68, + "learning_rate": 2.4666130580324986e-08, + "logits/chosen": -1.9482632875442505, + "logits/rejected": -1.9476075172424316, + "logps/chosen": -0.006195133086293936, + "logps/rejected": -286.99627685546875, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035111160832457244, + "rewards/margins": 4.375082492828369, + "rewards/rejected": -4.375433444976807, + "step": 11669 + }, + { + "epoch": 0.68, + "learning_rate": 2.465800624248125e-08, + "logits/chosen": -1.6318674087524414, + "logits/rejected": -1.6205003261566162, + "logps/chosen": -50.495948791503906, + "logps/rejected": -225.07321166992188, + "loss": 0.1379, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.145311713218689, + "rewards/margins": 3.66048526763916, + "rewards/rejected": -2.5151734352111816, + "step": 11670 + }, + { + "epoch": 0.68, + "learning_rate": 2.4649882804909073e-08, + "logits/chosen": -2.0131263732910156, + "logits/rejected": -2.0155863761901855, + "logps/chosen": -1.9105945825576782, + "logps/rejected": -212.79550170898438, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021647561341524124, + "rewards/margins": 4.011744499206543, + "rewards/rejected": -3.9900970458984375, + "step": 11671 + }, + { + "epoch": 0.68, + "learning_rate": 2.4641760267897065e-08, + "logits/chosen": -1.7089167833328247, + "logits/rejected": -1.6864168643951416, + "logps/chosen": -267.6886901855469, + "logps/rejected": -549.5744018554688, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.726782202720642, + "rewards/margins": 10.136059761047363, + "rewards/rejected": -8.40927791595459, + "step": 11672 + }, + { + "epoch": 0.68, + "learning_rate": 2.4633638631733766e-08, + "logits/chosen": -1.8278409242630005, + "logits/rejected": -1.8797019720077515, + "logps/chosen": -348.4158935546875, + "logps/rejected": -402.4515380859375, + "loss": 0.1206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35382387042045593, + "rewards/margins": 2.671496629714966, + "rewards/rejected": -2.3176727294921875, + "step": 11673 + }, + { + "epoch": 0.68, + "learning_rate": 2.462551789670773e-08, + "logits/chosen": -1.846535563468933, + "logits/rejected": -1.8455193042755127, + "logps/chosen": -61.99645233154297, + "logps/rejected": -182.6551513671875, + "loss": 0.6104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5079795718193054, + "rewards/margins": 0.8165275454521179, + "rewards/rejected": -1.3245071172714233, + "step": 11674 + }, + { + "epoch": 0.68, + "learning_rate": 2.4617398063107398e-08, + "logits/chosen": -1.799814224243164, + "logits/rejected": -1.8040740489959717, + "logps/chosen": -109.52025604248047, + "logps/rejected": -300.75018310546875, + "loss": 0.0869, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3186218738555908, + "rewards/margins": 3.8567934036254883, + "rewards/rejected": -2.5381715297698975, + "step": 11675 + }, + { + "epoch": 0.68, + "learning_rate": 2.4609279131221238e-08, + "logits/chosen": -1.8781486749649048, + "logits/rejected": -1.8727366924285889, + "logps/chosen": -37.64609909057617, + "logps/rejected": -268.3150329589844, + "loss": 0.1373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6874481439590454, + "rewards/margins": 3.8350038528442383, + "rewards/rejected": -3.1475555896759033, + "step": 11676 + }, + { + "epoch": 0.68, + "learning_rate": 2.4601161101337693e-08, + "logits/chosen": -1.8869898319244385, + "logits/rejected": -1.8828706741333008, + "logps/chosen": -33.70176315307617, + "logps/rejected": -99.74789428710938, + "loss": 0.4367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20293007791042328, + "rewards/margins": 1.1982510089874268, + "rewards/rejected": -0.9953209161758423, + "step": 11677 + }, + { + "epoch": 0.68, + "learning_rate": 2.4593043973745143e-08, + "logits/chosen": -2.002413749694824, + "logits/rejected": -1.9913599491119385, + "logps/chosen": -54.152870178222656, + "logps/rejected": -283.89630126953125, + "loss": 0.2991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20389556884765625, + "rewards/margins": 6.642528057098389, + "rewards/rejected": -6.438632488250732, + "step": 11678 + }, + { + "epoch": 0.68, + "learning_rate": 2.4584927748731966e-08, + "logits/chosen": -1.8514803647994995, + "logits/rejected": -1.8659611940383911, + "logps/chosen": -271.99517822265625, + "logps/rejected": -354.4548034667969, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.643423557281494, + "rewards/margins": 3.135568380355835, + "rewards/rejected": -0.49214479327201843, + "step": 11679 + }, + { + "epoch": 0.68, + "learning_rate": 2.4576812426586434e-08, + "logits/chosen": -1.943757176399231, + "logits/rejected": -1.9304356575012207, + "logps/chosen": -203.9139404296875, + "logps/rejected": -287.77691650390625, + "loss": 0.3181, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6231704950332642, + "rewards/margins": 0.19257962703704834, + "rewards/rejected": 1.4305908679962158, + "step": 11680 + }, + { + "epoch": 0.68, + "learning_rate": 2.4568698007596922e-08, + "logits/chosen": -1.9635530710220337, + "logits/rejected": -1.9053356647491455, + "logps/chosen": -255.38616943359375, + "logps/rejected": -425.8786926269531, + "loss": 0.0443, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.195382833480835, + "rewards/margins": 3.0037598609924316, + "rewards/rejected": 0.19162292778491974, + "step": 11681 + }, + { + "epoch": 0.68, + "learning_rate": 2.456058449205164e-08, + "logits/chosen": -1.6872882843017578, + "logits/rejected": -1.664402723312378, + "logps/chosen": -368.34228515625, + "logps/rejected": -644.172607421875, + "loss": 0.0339, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.910028100013733, + "rewards/margins": 7.8707275390625, + "rewards/rejected": -5.960699558258057, + "step": 11682 + }, + { + "epoch": 0.68, + "learning_rate": 2.455247188023885e-08, + "logits/chosen": -1.774732232093811, + "logits/rejected": -1.761643648147583, + "logps/chosen": -30.26320457458496, + "logps/rejected": -281.26446533203125, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48850497603416443, + "rewards/margins": 4.23029899597168, + "rewards/rejected": -3.7417938709259033, + "step": 11683 + }, + { + "epoch": 0.68, + "learning_rate": 2.45443601724467e-08, + "logits/chosen": -1.811155080795288, + "logits/rejected": -1.794816017150879, + "logps/chosen": -222.04489135742188, + "logps/rejected": -400.90570068359375, + "loss": 0.0673, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7553956508636475, + "rewards/margins": 2.14013671875, + "rewards/rejected": 0.6152588129043579, + "step": 11684 + }, + { + "epoch": 0.68, + "learning_rate": 2.453624936896343e-08, + "logits/chosen": -2.0554821491241455, + "logits/rejected": -2.053220748901367, + "logps/chosen": -0.00011312541028019041, + "logps/rejected": -67.74128723144531, + "loss": 0.6005, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.465935151325539e-05, + "rewards/margins": 0.40400195121765137, + "rewards/rejected": -0.4039672911167145, + "step": 11685 + }, + { + "epoch": 0.68, + "learning_rate": 2.4528139470077118e-08, + "logits/chosen": -1.846573829650879, + "logits/rejected": -1.8392966985702515, + "logps/chosen": -22.944358825683594, + "logps/rejected": -178.9967803955078, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3275810182094574, + "rewards/margins": 2.892235517501831, + "rewards/rejected": -2.564654588699341, + "step": 11686 + }, + { + "epoch": 0.68, + "learning_rate": 2.4520030476075894e-08, + "logits/chosen": -1.9535350799560547, + "logits/rejected": -1.964030385017395, + "logps/chosen": -5.509780406951904, + "logps/rejected": -204.45126342773438, + "loss": 0.3856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11040563881397247, + "rewards/margins": 3.8074395656585693, + "rewards/rejected": -3.9178452491760254, + "step": 11687 + }, + { + "epoch": 0.68, + "learning_rate": 2.451192238724782e-08, + "logits/chosen": -1.877833604812622, + "logits/rejected": -1.855399489402771, + "logps/chosen": -237.8157958984375, + "logps/rejected": -369.51190185546875, + "loss": 0.2675, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.365466356277466, + "rewards/margins": 0.4059631824493408, + "rewards/rejected": 1.959503173828125, + "step": 11688 + }, + { + "epoch": 0.68, + "learning_rate": 2.4503815203880956e-08, + "logits/chosen": -1.9197454452514648, + "logits/rejected": -1.9567621946334839, + "logps/chosen": -244.69683837890625, + "logps/rejected": -463.7578125, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.615020751953125, + "rewards/margins": 4.331332206726074, + "rewards/rejected": -2.7163116931915283, + "step": 11689 + }, + { + "epoch": 0.68, + "learning_rate": 2.4495708926263277e-08, + "logits/chosen": -2.0012929439544678, + "logits/rejected": -2.003053903579712, + "logps/chosen": -5.669404983520508, + "logps/rejected": -63.84334945678711, + "loss": 0.6349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.077178955078125, + "rewards/margins": 0.14093360304832458, + "rewards/rejected": -0.06375465542078018, + "step": 11690 + }, + { + "epoch": 0.68, + "learning_rate": 2.4487603554682768e-08, + "logits/chosen": -2.042935848236084, + "logits/rejected": -2.0434393882751465, + "logps/chosen": -0.0894867405295372, + "logps/rejected": -83.91287231445312, + "loss": 0.4304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004610883537679911, + "rewards/margins": 1.5887881517410278, + "rewards/rejected": -1.5933990478515625, + "step": 11691 + }, + { + "epoch": 0.68, + "learning_rate": 2.4479499089427373e-08, + "logits/chosen": -1.8628849983215332, + "logits/rejected": -1.8498355150222778, + "logps/chosen": -207.43228149414062, + "logps/rejected": -319.5614013671875, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.454913377761841, + "rewards/margins": 3.1770875453948975, + "rewards/rejected": 0.2778259217739105, + "step": 11692 + }, + { + "epoch": 0.68, + "learning_rate": 2.447139553078503e-08, + "logits/chosen": -1.824512004852295, + "logits/rejected": -1.830817461013794, + "logps/chosen": -185.75750732421875, + "logps/rejected": -476.39697265625, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.411163330078125, + "rewards/margins": 7.885083198547363, + "rewards/rejected": -5.473919868469238, + "step": 11693 + }, + { + "epoch": 0.68, + "learning_rate": 2.446329287904357e-08, + "logits/chosen": -1.9628618955612183, + "logits/rejected": -1.9607402086257935, + "logps/chosen": -68.5928955078125, + "logps/rejected": -169.35379028320312, + "loss": 0.2317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6682403683662415, + "rewards/margins": 1.9179749488830566, + "rewards/rejected": -1.2497345209121704, + "step": 11694 + }, + { + "epoch": 0.68, + "learning_rate": 2.445519113449086e-08, + "logits/chosen": -1.9583256244659424, + "logits/rejected": -1.9457643032073975, + "logps/chosen": -98.99498748779297, + "logps/rejected": -276.40594482421875, + "loss": 0.0777, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4624474048614502, + "rewards/margins": 3.090740203857422, + "rewards/rejected": -1.6282929182052612, + "step": 11695 + }, + { + "epoch": 0.68, + "learning_rate": 2.4447090297414714e-08, + "logits/chosen": -2.087146043777466, + "logits/rejected": -2.091216802597046, + "logps/chosen": -24.535186767578125, + "logps/rejected": -158.61680603027344, + "loss": 0.1675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.63678377866745, + "rewards/margins": 2.909907102584839, + "rewards/rejected": -2.273123264312744, + "step": 11696 + }, + { + "epoch": 0.68, + "learning_rate": 2.4438990368102915e-08, + "logits/chosen": -1.8685534000396729, + "logits/rejected": -1.8697158098220825, + "logps/chosen": -339.06671142578125, + "logps/rejected": -476.6939697265625, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1250762939453125, + "rewards/margins": 3.6814911365509033, + "rewards/rejected": -0.556414783000946, + "step": 11697 + }, + { + "epoch": 0.68, + "learning_rate": 2.4430891346843224e-08, + "logits/chosen": -1.8716158866882324, + "logits/rejected": -1.8692054748535156, + "logps/chosen": -28.102506637573242, + "logps/rejected": -154.1446990966797, + "loss": 0.21, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5601480603218079, + "rewards/margins": 3.524163246154785, + "rewards/rejected": -2.964015245437622, + "step": 11698 + }, + { + "epoch": 0.68, + "learning_rate": 2.4422793233923312e-08, + "logits/chosen": -1.7740297317504883, + "logits/rejected": -1.787826657295227, + "logps/chosen": -328.37091064453125, + "logps/rejected": -438.2410888671875, + "loss": 0.1609, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.534515380859375, + "rewards/margins": 1.0837891101837158, + "rewards/rejected": 0.45072633028030396, + "step": 11699 + }, + { + "epoch": 0.68, + "learning_rate": 2.4414696029630933e-08, + "logits/chosen": -1.962662696838379, + "logits/rejected": -1.9611515998840332, + "logps/chosen": -0.0008692715200595558, + "logps/rejected": -314.90704345703125, + "loss": 0.3421, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.916837133350782e-05, + "rewards/margins": 4.891149520874023, + "rewards/rejected": -4.891198635101318, + "step": 11700 + }, + { + "epoch": 0.68, + "learning_rate": 2.4406599734253686e-08, + "logits/chosen": -1.8415192365646362, + "logits/rejected": -1.8398412466049194, + "logps/chosen": -259.93939208984375, + "logps/rejected": -436.6892395019531, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.91617751121521, + "rewards/margins": 4.225027561187744, + "rewards/rejected": -1.3088501691818237, + "step": 11701 + }, + { + "epoch": 0.68, + "learning_rate": 2.439850434807922e-08, + "logits/chosen": -1.8797849416732788, + "logits/rejected": -1.8789573907852173, + "logps/chosen": -182.27349853515625, + "logps/rejected": -261.5724182128906, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5514114499092102, + "rewards/margins": 0.37657320499420166, + "rewards/rejected": 0.17483825981616974, + "step": 11702 + }, + { + "epoch": 0.68, + "learning_rate": 2.4390409871395074e-08, + "logits/chosen": -2.0980257987976074, + "logits/rejected": -2.0995335578918457, + "logps/chosen": -11.983349800109863, + "logps/rejected": -164.8781280517578, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15464583039283752, + "rewards/margins": 3.031834125518799, + "rewards/rejected": -2.877188205718994, + "step": 11703 + }, + { + "epoch": 0.68, + "learning_rate": 2.438231630448888e-08, + "logits/chosen": -2.0977399349212646, + "logits/rejected": -2.0980379581451416, + "logps/chosen": -4.963752746582031, + "logps/rejected": -102.79254913330078, + "loss": 0.3587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45206862688064575, + "rewards/margins": 1.3278141021728516, + "rewards/rejected": -0.875745415687561, + "step": 11704 + }, + { + "epoch": 0.68, + "learning_rate": 2.43742236476481e-08, + "logits/chosen": -2.1254944801330566, + "logits/rejected": -2.128682851791382, + "logps/chosen": -4.051417350769043, + "logps/rejected": -79.01651000976562, + "loss": 0.3025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7651819586753845, + "rewards/margins": 1.2578692436218262, + "rewards/rejected": -0.4926872253417969, + "step": 11705 + }, + { + "epoch": 0.68, + "learning_rate": 2.4366131901160265e-08, + "logits/chosen": -1.9227044582366943, + "logits/rejected": -1.9454963207244873, + "logps/chosen": -248.03443908691406, + "logps/rejected": -439.94476318359375, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8669174909591675, + "rewards/margins": 4.681880474090576, + "rewards/rejected": -2.814962863922119, + "step": 11706 + }, + { + "epoch": 0.68, + "learning_rate": 2.4358041065312774e-08, + "logits/chosen": -1.7829293012619019, + "logits/rejected": -1.791826605796814, + "logps/chosen": -22.863178253173828, + "logps/rejected": -326.4111328125, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03340473398566246, + "rewards/margins": 5.555786609649658, + "rewards/rejected": -5.522381782531738, + "step": 11707 + }, + { + "epoch": 0.68, + "learning_rate": 2.434995114039313e-08, + "logits/chosen": -1.9349247217178345, + "logits/rejected": -1.9370224475860596, + "logps/chosen": -99.13235473632812, + "logps/rejected": -305.00677490234375, + "loss": 0.1097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.054022192955017, + "rewards/margins": 4.368719577789307, + "rewards/rejected": -3.314697265625, + "step": 11708 + }, + { + "epoch": 0.68, + "learning_rate": 2.4341862126688673e-08, + "logits/chosen": -1.900534987449646, + "logits/rejected": -1.885672926902771, + "logps/chosen": -41.04922103881836, + "logps/rejected": -534.3643798828125, + "loss": 0.4266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49961891770362854, + "rewards/margins": 10.132901191711426, + "rewards/rejected": -10.632519721984863, + "step": 11709 + }, + { + "epoch": 0.68, + "learning_rate": 2.433377402448678e-08, + "logits/chosen": -2.003175973892212, + "logits/rejected": -1.9985097646713257, + "logps/chosen": -5.608111381530762, + "logps/rejected": -274.6894836425781, + "loss": 0.269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2001023292541504, + "rewards/margins": 5.844418525695801, + "rewards/rejected": -5.64431619644165, + "step": 11710 + }, + { + "epoch": 0.68, + "learning_rate": 2.4325686834074784e-08, + "logits/chosen": -1.7991464138031006, + "logits/rejected": -1.795435905456543, + "logps/chosen": -32.39257049560547, + "logps/rejected": -131.76824951171875, + "loss": 0.5064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45421257615089417, + "rewards/margins": 0.27127647399902344, + "rewards/rejected": 0.18293610215187073, + "step": 11711 + }, + { + "epoch": 0.68, + "learning_rate": 2.4317600555739992e-08, + "logits/chosen": -1.9537227153778076, + "logits/rejected": -1.9422500133514404, + "logps/chosen": -12.726993560791016, + "logps/rejected": -196.38116455078125, + "loss": 0.3809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017816925421357155, + "rewards/margins": 3.1467583179473877, + "rewards/rejected": -3.1645753383636475, + "step": 11712 + }, + { + "epoch": 0.68, + "learning_rate": 2.4309515189769637e-08, + "logits/chosen": -1.753708004951477, + "logits/rejected": -1.7608050107955933, + "logps/chosen": -150.53335571289062, + "logps/rejected": -557.1512451171875, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.747328281402588, + "rewards/margins": 11.95028305053711, + "rewards/rejected": -9.202954292297363, + "step": 11713 + }, + { + "epoch": 0.68, + "learning_rate": 2.4301430736450968e-08, + "logits/chosen": -2.069810152053833, + "logits/rejected": -2.0616235733032227, + "logps/chosen": -3.135169390588999e-05, + "logps/rejected": -119.95191955566406, + "loss": 0.4188, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.29151270964212e-07, + "rewards/margins": 1.7211507558822632, + "rewards/rejected": -1.721150279045105, + "step": 11714 + }, + { + "epoch": 0.68, + "learning_rate": 2.4293347196071186e-08, + "logits/chosen": -1.944737195968628, + "logits/rejected": -1.9456161260604858, + "logps/chosen": -19.157413482666016, + "logps/rejected": -248.72865295410156, + "loss": 0.2271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2819484770298004, + "rewards/margins": 4.444849967956543, + "rewards/rejected": -4.162901401519775, + "step": 11715 + }, + { + "epoch": 0.68, + "learning_rate": 2.4285264568917475e-08, + "logits/chosen": -2.062918186187744, + "logits/rejected": -2.105046033859253, + "logps/chosen": -212.35565185546875, + "logps/rejected": -363.3760070800781, + "loss": 0.0551, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.943579077720642, + "rewards/margins": 2.668478488922119, + "rewards/rejected": -0.7248992919921875, + "step": 11716 + }, + { + "epoch": 0.68, + "learning_rate": 2.427718285527693e-08, + "logits/chosen": -2.0806689262390137, + "logits/rejected": -2.0721681118011475, + "logps/chosen": -55.05216979980469, + "logps/rejected": -177.39405822753906, + "loss": 0.241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6586883664131165, + "rewards/margins": 1.7672271728515625, + "rewards/rejected": -1.1085388660430908, + "step": 11717 + }, + { + "epoch": 0.68, + "learning_rate": 2.4269102055436675e-08, + "logits/chosen": -1.882691502571106, + "logits/rejected": -1.8549892902374268, + "logps/chosen": -88.0733413696289, + "logps/rejected": -327.806884765625, + "loss": 0.1704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6755356192588806, + "rewards/margins": 1.742262363433838, + "rewards/rejected": -1.0667266845703125, + "step": 11718 + }, + { + "epoch": 0.68, + "learning_rate": 2.4261022169683783e-08, + "logits/chosen": -1.7836833000183105, + "logits/rejected": -1.8892608880996704, + "logps/chosen": -289.1865234375, + "logps/rejected": -545.4151000976562, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5745790004730225, + "rewards/margins": 5.28602933883667, + "rewards/rejected": -2.7114503383636475, + "step": 11719 + }, + { + "epoch": 0.68, + "learning_rate": 2.425294319830528e-08, + "logits/chosen": -1.8030288219451904, + "logits/rejected": -1.805299162864685, + "logps/chosen": -215.6537628173828, + "logps/rejected": -307.89599609375, + "loss": 0.3238, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5008728504180908, + "rewards/margins": 0.25816047191619873, + "rewards/rejected": 1.242712378501892, + "step": 11720 + }, + { + "epoch": 0.68, + "learning_rate": 2.42448651415882e-08, + "logits/chosen": -1.8687859773635864, + "logits/rejected": -1.8811748027801514, + "logps/chosen": -390.7965087890625, + "logps/rejected": -392.46966552734375, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.841528296470642, + "rewards/margins": 3.069958448410034, + "rewards/rejected": -1.228430151939392, + "step": 11721 + }, + { + "epoch": 0.68, + "learning_rate": 2.4236787999819452e-08, + "logits/chosen": -1.980819582939148, + "logits/rejected": -1.979856014251709, + "logps/chosen": -13.44759464263916, + "logps/rejected": -114.77996063232422, + "loss": 0.3988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3731013238430023, + "rewards/margins": 1.1424205303192139, + "rewards/rejected": -0.7693191766738892, + "step": 11722 + }, + { + "epoch": 0.68, + "learning_rate": 2.4228711773286052e-08, + "logits/chosen": -1.8716665506362915, + "logits/rejected": -1.8684996366500854, + "logps/chosen": -292.638916015625, + "logps/rejected": -447.8869934082031, + "loss": 0.1315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.900360107421875, + "rewards/margins": 3.978506565093994, + "rewards/rejected": -3.078146457672119, + "step": 11723 + }, + { + "epoch": 0.68, + "learning_rate": 2.422063646227485e-08, + "logits/chosen": -1.8404057025909424, + "logits/rejected": -1.8395134210586548, + "logps/chosen": -233.91708374023438, + "logps/rejected": -521.9044799804688, + "loss": 0.0363, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1190431118011475, + "rewards/margins": 5.736133098602295, + "rewards/rejected": -3.6170899868011475, + "step": 11724 + }, + { + "epoch": 0.68, + "learning_rate": 2.4212562067072766e-08, + "logits/chosen": -1.8242820501327515, + "logits/rejected": -1.8254446983337402, + "logps/chosen": -2.3352277278900146, + "logps/rejected": -104.92376708984375, + "loss": 0.4081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035204529762268066, + "rewards/margins": 1.7381396293640137, + "rewards/rejected": -1.7029350996017456, + "step": 11725 + }, + { + "epoch": 0.68, + "learning_rate": 2.4204488587966583e-08, + "logits/chosen": -1.9690674543380737, + "logits/rejected": -1.9339405298233032, + "logps/chosen": -22.554105758666992, + "logps/rejected": -201.68161010742188, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2774272859096527, + "rewards/margins": 3.4368479251861572, + "rewards/rejected": -3.1594207286834717, + "step": 11726 + }, + { + "epoch": 0.68, + "learning_rate": 2.4196416025243177e-08, + "logits/chosen": -1.7948462963104248, + "logits/rejected": -1.7973405122756958, + "logps/chosen": -0.07296080887317657, + "logps/rejected": -107.96973419189453, + "loss": 0.7271, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.053480006754398346, + "rewards/margins": -0.18115058541297913, + "rewards/rejected": 0.23463058471679688, + "step": 11727 + }, + { + "epoch": 0.68, + "learning_rate": 2.4188344379189273e-08, + "logits/chosen": -1.8197580575942993, + "logits/rejected": -1.8210848569869995, + "logps/chosen": -53.34357833862305, + "logps/rejected": -284.1141052246094, + "loss": 0.2251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5357017517089844, + "rewards/margins": 5.644591808319092, + "rewards/rejected": -5.108890056610107, + "step": 11728 + }, + { + "epoch": 0.68, + "learning_rate": 2.4180273650091636e-08, + "logits/chosen": -2.00811505317688, + "logits/rejected": -1.9765918254852295, + "logps/chosen": -197.2608642578125, + "logps/rejected": -370.94903564453125, + "loss": 0.1337, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.829504370689392, + "rewards/margins": 1.3690154552459717, + "rewards/rejected": 0.460488885641098, + "step": 11729 + }, + { + "epoch": 0.68, + "learning_rate": 2.4172203838236975e-08, + "logits/chosen": -2.013023614883423, + "logits/rejected": -2.0079565048217773, + "logps/chosen": -8.141848957166076e-05, + "logps/rejected": -325.4039001464844, + "loss": 0.3299, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7523285578135983e-06, + "rewards/margins": 8.357409477233887, + "rewards/rejected": -8.35741138458252, + "step": 11730 + }, + { + "epoch": 0.68, + "learning_rate": 2.4164134943911997e-08, + "logits/chosen": -1.8944224119186401, + "logits/rejected": -1.9422990083694458, + "logps/chosen": -317.0321350097656, + "logps/rejected": -475.88848876953125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1071534156799316, + "rewards/margins": 5.991613864898682, + "rewards/rejected": -2.88446044921875, + "step": 11731 + }, + { + "epoch": 0.68, + "learning_rate": 2.41560669674033e-08, + "logits/chosen": -1.9471527338027954, + "logits/rejected": -1.9247174263000488, + "logps/chosen": -110.78106689453125, + "logps/rejected": -283.0242919921875, + "loss": 0.2701, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8549941778182983, + "rewards/margins": 0.6247177124023438, + "rewards/rejected": 1.2302764654159546, + "step": 11732 + }, + { + "epoch": 0.68, + "learning_rate": 2.414799990899752e-08, + "logits/chosen": -1.7039897441864014, + "logits/rejected": -1.7016332149505615, + "logps/chosen": -35.9387092590332, + "logps/rejected": -174.7610626220703, + "loss": 0.2646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5858063101768494, + "rewards/margins": 1.529473900794983, + "rewards/rejected": -0.9436675906181335, + "step": 11733 + }, + { + "epoch": 0.68, + "learning_rate": 2.4139933768981235e-08, + "logits/chosen": -1.7800614833831787, + "logits/rejected": -1.7810255289077759, + "logps/chosen": -40.08211898803711, + "logps/rejected": -194.37124633789062, + "loss": 0.5941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12711219489574432, + "rewards/margins": 0.18374672532081604, + "rewards/rejected": -0.05663452297449112, + "step": 11734 + }, + { + "epoch": 0.68, + "learning_rate": 2.413186854764102e-08, + "logits/chosen": -1.938103199005127, + "logits/rejected": -1.9404445886611938, + "logps/chosen": -140.35494995117188, + "logps/rejected": -230.1429443359375, + "loss": 0.1158, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3820114135742188, + "rewards/margins": 1.7096145153045654, + "rewards/rejected": 0.6723968386650085, + "step": 11735 + }, + { + "epoch": 0.68, + "learning_rate": 2.412380424526335e-08, + "logits/chosen": -1.8865844011306763, + "logits/rejected": -1.8670275211334229, + "logps/chosen": -181.7919921875, + "logps/rejected": -372.9570617675781, + "loss": 0.2092, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.031207323074341, + "rewards/margins": 0.7538543939590454, + "rewards/rejected": 1.2773529291152954, + "step": 11736 + }, + { + "epoch": 0.68, + "learning_rate": 2.4115740862134726e-08, + "logits/chosen": -1.9282606840133667, + "logits/rejected": -1.9539287090301514, + "logps/chosen": -236.17697143554688, + "logps/rejected": -205.0769500732422, + "loss": 0.1608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8835372924804688, + "rewards/margins": 1.2803375720977783, + "rewards/rejected": 0.6031997799873352, + "step": 11737 + }, + { + "epoch": 0.68, + "learning_rate": 2.4107678398541604e-08, + "logits/chosen": -1.8104889392852783, + "logits/rejected": -1.8066738843917847, + "logps/chosen": -221.44204711914062, + "logps/rejected": -330.45037841796875, + "loss": 0.2749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7918106317520142, + "rewards/margins": 0.6041152477264404, + "rewards/rejected": 1.1876953840255737, + "step": 11738 + }, + { + "epoch": 0.68, + "learning_rate": 2.40996168547704e-08, + "logits/chosen": -1.9061254262924194, + "logits/rejected": -1.9046753644943237, + "logps/chosen": -4.906448841094971, + "logps/rejected": -53.16176986694336, + "loss": 0.5256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.268094927072525, + "rewards/margins": 0.3086516559123993, + "rewards/rejected": -0.04055671766400337, + "step": 11739 + }, + { + "epoch": 0.68, + "learning_rate": 2.4091556231107506e-08, + "logits/chosen": -2.0502209663391113, + "logits/rejected": -2.0315749645233154, + "logps/chosen": -0.014640972018241882, + "logps/rejected": -234.0465850830078, + "loss": 0.3535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009265745175071061, + "rewards/margins": 4.23097562789917, + "rewards/rejected": -4.230049133300781, + "step": 11740 + }, + { + "epoch": 0.68, + "learning_rate": 2.4083496527839242e-08, + "logits/chosen": -2.1410000324249268, + "logits/rejected": -2.138730764389038, + "logps/chosen": -90.67118072509766, + "logps/rejected": -172.03982543945312, + "loss": 0.1863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7452919483184814, + "rewards/margins": 1.277320146560669, + "rewards/rejected": 0.4679718017578125, + "step": 11741 + }, + { + "epoch": 0.68, + "learning_rate": 2.4075437745251982e-08, + "logits/chosen": -1.859344482421875, + "logits/rejected": -1.8745626211166382, + "logps/chosen": -215.31910705566406, + "logps/rejected": -292.18524169921875, + "loss": 0.1474, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9379119873046875, + "rewards/margins": 1.6355987787246704, + "rewards/rejected": 0.3023132383823395, + "step": 11742 + }, + { + "epoch": 0.68, + "learning_rate": 2.4067379883631965e-08, + "logits/chosen": -1.8481327295303345, + "logits/rejected": -1.8741422891616821, + "logps/chosen": -204.31979370117188, + "logps/rejected": -285.4822692871094, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0082931518554688, + "rewards/margins": 3.1615798473358154, + "rewards/rejected": -0.15328674018383026, + "step": 11743 + }, + { + "epoch": 0.68, + "learning_rate": 2.4059322943265486e-08, + "logits/chosen": -1.9359835386276245, + "logits/rejected": -1.9309941530227661, + "logps/chosen": -56.97299575805664, + "logps/rejected": -249.4058837890625, + "loss": 0.2745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7164264917373657, + "rewards/margins": 1.2887928485870361, + "rewards/rejected": -0.5723663568496704, + "step": 11744 + }, + { + "epoch": 0.68, + "learning_rate": 2.4051266924438713e-08, + "logits/chosen": -1.830645203590393, + "logits/rejected": -1.8313945531845093, + "logps/chosen": -21.395414352416992, + "logps/rejected": -132.666015625, + "loss": 0.4423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11499233543872833, + "rewards/margins": 1.3650656938552856, + "rewards/rejected": -1.2500733137130737, + "step": 11745 + }, + { + "epoch": 0.68, + "learning_rate": 2.40432118274379e-08, + "logits/chosen": -1.9642257690429688, + "logits/rejected": -1.942282795906067, + "logps/chosen": -214.73330688476562, + "logps/rejected": -353.4842834472656, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.47123122215271, + "rewards/margins": 4.267279148101807, + "rewards/rejected": -1.7960480451583862, + "step": 11746 + }, + { + "epoch": 0.68, + "learning_rate": 2.4035157652549147e-08, + "logits/chosen": -2.0721192359924316, + "logits/rejected": -2.0553855895996094, + "logps/chosen": -30.804141998291016, + "logps/rejected": -160.62176513671875, + "loss": 0.4055, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026213455945253372, + "rewards/margins": 2.072990894317627, + "rewards/rejected": -2.0467774868011475, + "step": 11747 + }, + { + "epoch": 0.68, + "learning_rate": 2.402710440005861e-08, + "logits/chosen": -1.8100899457931519, + "logits/rejected": -1.791774868965149, + "logps/chosen": -129.5394287109375, + "logps/rejected": -237.47842407226562, + "loss": 0.4548, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.024163842201233, + "rewards/margins": 0.1924789547920227, + "rewards/rejected": 0.8316848874092102, + "step": 11748 + }, + { + "epoch": 0.68, + "learning_rate": 2.401905207025237e-08, + "logits/chosen": -1.9927682876586914, + "logits/rejected": -1.9804712533950806, + "logps/chosen": -0.003861166536808014, + "logps/rejected": -327.83984375, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002858545340131968, + "rewards/margins": 4.269959926605225, + "rewards/rejected": -4.270245552062988, + "step": 11749 + }, + { + "epoch": 0.68, + "learning_rate": 2.4011000663416502e-08, + "logits/chosen": -1.9950542449951172, + "logits/rejected": -1.9696542024612427, + "logps/chosen": -3.120781421661377, + "logps/rejected": -553.7263793945312, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07291398197412491, + "rewards/margins": 9.56067943572998, + "rewards/rejected": -9.633593559265137, + "step": 11750 + }, + { + "epoch": 0.68, + "learning_rate": 2.4002950179837007e-08, + "logits/chosen": -2.142529010772705, + "logits/rejected": -2.12914776802063, + "logps/chosen": -3.278218355262652e-05, + "logps/rejected": -208.97039794921875, + "loss": 0.3243, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.814631952536729e-07, + "rewards/margins": 5.627639293670654, + "rewards/rejected": -5.6276397705078125, + "step": 11751 + }, + { + "epoch": 0.68, + "learning_rate": 2.399490061979988e-08, + "logits/chosen": -2.1055943965911865, + "logits/rejected": -2.106231689453125, + "logps/chosen": -21.48011016845703, + "logps/rejected": -118.78795623779297, + "loss": 0.2432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.749005913734436, + "rewards/margins": 1.7617135047912598, + "rewards/rejected": -1.0127075910568237, + "step": 11752 + }, + { + "epoch": 0.68, + "learning_rate": 2.398685198359109e-08, + "logits/chosen": -1.9502118825912476, + "logits/rejected": -1.9352121353149414, + "logps/chosen": -96.66685485839844, + "logps/rejected": -272.1969909667969, + "loss": 0.0647, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8050826787948608, + "rewards/margins": 4.144415378570557, + "rewards/rejected": -2.3393325805664062, + "step": 11753 + }, + { + "epoch": 0.68, + "learning_rate": 2.3978804271496578e-08, + "logits/chosen": -1.9212778806686401, + "logits/rejected": -1.9389855861663818, + "logps/chosen": -164.2578125, + "logps/rejected": -301.1904602050781, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.753657579421997, + "rewards/margins": 2.2788803577423096, + "rewards/rejected": -0.5252227783203125, + "step": 11754 + }, + { + "epoch": 0.68, + "learning_rate": 2.39707574838022e-08, + "logits/chosen": -1.965842843055725, + "logits/rejected": -1.9562163352966309, + "logps/chosen": -54.273292541503906, + "logps/rejected": -140.62045288085938, + "loss": 0.6517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07803306728601456, + "rewards/margins": 0.001980207860469818, + "rewards/rejected": 0.07605285942554474, + "step": 11755 + }, + { + "epoch": 0.68, + "learning_rate": 2.3962711620793847e-08, + "logits/chosen": -1.660703420639038, + "logits/rejected": -1.6479501724243164, + "logps/chosen": -222.91847229003906, + "logps/rejected": -317.4472961425781, + "loss": 0.1237, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4482743740081787, + "rewards/margins": 1.5641648769378662, + "rewards/rejected": 0.8841094970703125, + "step": 11756 + }, + { + "epoch": 0.68, + "learning_rate": 2.3954666682757334e-08, + "logits/chosen": -1.930478572845459, + "logits/rejected": -1.932106852531433, + "logps/chosen": -62.000431060791016, + "logps/rejected": -310.63641357421875, + "loss": 0.2032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8635150790214539, + "rewards/margins": 1.5311603546142578, + "rewards/rejected": -0.667645275592804, + "step": 11757 + }, + { + "epoch": 0.68, + "learning_rate": 2.3946622669978462e-08, + "logits/chosen": -1.870373249053955, + "logits/rejected": -1.8575212955474854, + "logps/chosen": -38.15059280395508, + "logps/rejected": -234.3935546875, + "loss": 0.1906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5300766229629517, + "rewards/margins": 3.7000794410705566, + "rewards/rejected": -3.1700026988983154, + "step": 11758 + }, + { + "epoch": 0.68, + "learning_rate": 2.3938579582743012e-08, + "logits/chosen": -1.98955237865448, + "logits/rejected": -1.9862326383590698, + "logps/chosen": -7.8754377365112305, + "logps/rejected": -291.4676208496094, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06245002895593643, + "rewards/margins": 4.832685470581055, + "rewards/rejected": -4.895135402679443, + "step": 11759 + }, + { + "epoch": 0.68, + "learning_rate": 2.393053742133666e-08, + "logits/chosen": -1.8980211019515991, + "logits/rejected": -1.8813385963439941, + "logps/chosen": -228.71400451660156, + "logps/rejected": -431.6593017578125, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.514352560043335, + "rewards/margins": 3.0391602516174316, + "rewards/rejected": -0.5248077511787415, + "step": 11760 + }, + { + "epoch": 0.68, + "learning_rate": 2.3922496186045175e-08, + "logits/chosen": -1.8086256980895996, + "logits/rejected": -1.823074460029602, + "logps/chosen": -138.34295654296875, + "logps/rejected": -244.54798889160156, + "loss": 0.5154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7011780142784119, + "rewards/margins": 2.871730089187622, + "rewards/rejected": -3.5729081630706787, + "step": 11761 + }, + { + "epoch": 0.68, + "learning_rate": 2.3914455877154165e-08, + "logits/chosen": -1.7913172245025635, + "logits/rejected": -1.794100046157837, + "logps/chosen": -12.589938163757324, + "logps/rejected": -228.9611358642578, + "loss": 0.3766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22591190040111542, + "rewards/margins": 4.065486431121826, + "rewards/rejected": -4.291398525238037, + "step": 11762 + }, + { + "epoch": 0.68, + "learning_rate": 2.39064164949493e-08, + "logits/chosen": -1.9901659488677979, + "logits/rejected": -2.0021612644195557, + "logps/chosen": -55.76279067993164, + "logps/rejected": -225.99404907226562, + "loss": 0.4068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.062453459948301315, + "rewards/margins": 2.020606279373169, + "rewards/rejected": -2.083059787750244, + "step": 11763 + }, + { + "epoch": 0.68, + "learning_rate": 2.3898378039716122e-08, + "logits/chosen": -2.101867914199829, + "logits/rejected": -2.096068859100342, + "logps/chosen": -0.028592459857463837, + "logps/rejected": -145.39413452148438, + "loss": 0.35, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02076066844165325, + "rewards/margins": 3.386845827102661, + "rewards/rejected": -3.3660850524902344, + "step": 11764 + }, + { + "epoch": 0.68, + "learning_rate": 2.389034051174027e-08, + "logits/chosen": -2.085148334503174, + "logits/rejected": -2.077226161956787, + "logps/chosen": -0.018228013068437576, + "logps/rejected": -217.7257537841797, + "loss": 0.3601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0043777464888989925, + "rewards/margins": 3.4939348697662354, + "rewards/rejected": -3.4895570278167725, + "step": 11765 + }, + { + "epoch": 0.68, + "learning_rate": 2.3882303911307222e-08, + "logits/chosen": -1.9350491762161255, + "logits/rejected": -1.8910959959030151, + "logps/chosen": -147.720947265625, + "logps/rejected": -252.5641632080078, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.860888659954071, + "rewards/margins": 0.7752227783203125, + "rewards/rejected": 0.08566589653491974, + "step": 11766 + }, + { + "epoch": 0.68, + "learning_rate": 2.3874268238702512e-08, + "logits/chosen": -1.986358404159546, + "logits/rejected": -1.97688889503479, + "logps/chosen": -4.053051452501677e-05, + "logps/rejected": -287.9163818359375, + "loss": 0.3458, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.814624562892277e-07, + "rewards/margins": 5.762119293212891, + "rewards/rejected": -5.762118816375732, + "step": 11767 + }, + { + "epoch": 0.68, + "learning_rate": 2.3866233494211546e-08, + "logits/chosen": -2.161844253540039, + "logits/rejected": -2.163118362426758, + "logps/chosen": -5.8830485343933105, + "logps/rejected": -121.06017303466797, + "loss": 0.378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08561386913061142, + "rewards/margins": 1.465116024017334, + "rewards/rejected": -1.3795021772384644, + "step": 11768 + }, + { + "epoch": 0.68, + "learning_rate": 2.3858199678119845e-08, + "logits/chosen": -2.1720612049102783, + "logits/rejected": -2.160219669342041, + "logps/chosen": -55.2213134765625, + "logps/rejected": -226.16065979003906, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8706962466239929, + "rewards/margins": 1.1117042303085327, + "rewards/rejected": -0.241007998585701, + "step": 11769 + }, + { + "epoch": 0.68, + "learning_rate": 2.3850166790712746e-08, + "logits/chosen": -1.7655161619186401, + "logits/rejected": -1.7788987159729004, + "logps/chosen": -9.037776947021484, + "logps/rejected": -279.22412109375, + "loss": 0.3485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10173149406909943, + "rewards/margins": 2.5698742866516113, + "rewards/rejected": -2.4681427478790283, + "step": 11770 + }, + { + "epoch": 0.68, + "learning_rate": 2.3842134832275627e-08, + "logits/chosen": -1.8025758266448975, + "logits/rejected": -1.8213995695114136, + "logps/chosen": -253.79515075683594, + "logps/rejected": -446.09228515625, + "loss": 0.0391, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4269698858261108, + "rewards/margins": 4.457719326019287, + "rewards/rejected": -3.030749559402466, + "step": 11771 + }, + { + "epoch": 0.69, + "learning_rate": 2.383410380309384e-08, + "logits/chosen": -1.6287800073623657, + "logits/rejected": -1.638952374458313, + "logps/chosen": -232.85760498046875, + "logps/rejected": -607.9348754882812, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1792449951171875, + "rewards/margins": 9.836832046508789, + "rewards/rejected": -7.657586574554443, + "step": 11772 + }, + { + "epoch": 0.69, + "learning_rate": 2.382607370345269e-08, + "logits/chosen": -1.9536898136138916, + "logits/rejected": -1.9526867866516113, + "logps/chosen": -51.15062713623047, + "logps/rejected": -152.3203125, + "loss": 0.3649, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40733107924461365, + "rewards/margins": 1.1970008611679077, + "rewards/rejected": -0.7896698117256165, + "step": 11773 + }, + { + "epoch": 0.69, + "learning_rate": 2.3818044533637417e-08, + "logits/chosen": -1.9839935302734375, + "logits/rejected": -1.988359808921814, + "logps/chosen": -43.60259246826172, + "logps/rejected": -260.51092529296875, + "loss": 0.3755, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6359123587608337, + "rewards/margins": 0.8429436087608337, + "rewards/rejected": -0.20703125, + "step": 11774 + }, + { + "epoch": 0.69, + "learning_rate": 2.3810016293933272e-08, + "logits/chosen": -1.8761186599731445, + "logits/rejected": -1.870404839515686, + "logps/chosen": -392.33526611328125, + "logps/rejected": -472.31390380859375, + "loss": 0.2641, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.245764136314392, + "rewards/margins": 0.49729305505752563, + "rewards/rejected": 0.7484710812568665, + "step": 11775 + }, + { + "epoch": 0.69, + "learning_rate": 2.380198898462546e-08, + "logits/chosen": -1.829980731010437, + "logits/rejected": -1.8197195529937744, + "logps/chosen": -60.42443084716797, + "logps/rejected": -122.23838806152344, + "loss": 0.1281, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0303497314453125, + "rewards/margins": 3.083232879638672, + "rewards/rejected": -2.0528831481933594, + "step": 11776 + }, + { + "epoch": 0.69, + "learning_rate": 2.3793962605999156e-08, + "logits/chosen": -1.840583324432373, + "logits/rejected": -1.8451857566833496, + "logps/chosen": -0.9750227928161621, + "logps/rejected": -115.23046112060547, + "loss": 0.4609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08317531645298004, + "rewards/margins": 1.1720974445343018, + "rewards/rejected": -1.088922142982483, + "step": 11777 + }, + { + "epoch": 0.69, + "learning_rate": 2.378593715833948e-08, + "logits/chosen": -1.7257999181747437, + "logits/rejected": -1.684800386428833, + "logps/chosen": -219.8360595703125, + "logps/rejected": -464.1282653808594, + "loss": 0.1095, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8281372785568237, + "rewards/margins": 2.122732639312744, + "rewards/rejected": -0.294595330953598, + "step": 11778 + }, + { + "epoch": 0.69, + "learning_rate": 2.3777912641931537e-08, + "logits/chosen": -1.9663468599319458, + "logits/rejected": -1.9640154838562012, + "logps/chosen": -146.2491455078125, + "logps/rejected": -301.6624755859375, + "loss": 0.5322, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3859314024448395, + "rewards/margins": -0.2017517387866974, + "rewards/rejected": 0.5876831412315369, + "step": 11779 + }, + { + "epoch": 0.69, + "learning_rate": 2.3769889057060406e-08, + "logits/chosen": -1.9599814414978027, + "logits/rejected": -1.9531152248382568, + "logps/chosen": -294.0116271972656, + "logps/rejected": -451.0096130371094, + "loss": 0.3422, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6850067377090454, + "rewards/margins": 0.14365839958190918, + "rewards/rejected": 1.5413483381271362, + "step": 11780 + }, + { + "epoch": 0.69, + "learning_rate": 2.3761866404011123e-08, + "logits/chosen": -1.9273513555526733, + "logits/rejected": -1.933700680732727, + "logps/chosen": -183.3161163330078, + "logps/rejected": -271.6681213378906, + "loss": 0.0386, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.214526414871216, + "rewards/margins": 2.997036933898926, + "rewards/rejected": -0.7825103998184204, + "step": 11781 + }, + { + "epoch": 0.69, + "learning_rate": 2.3753844683068702e-08, + "logits/chosen": -1.9482823610305786, + "logits/rejected": -1.9448599815368652, + "logps/chosen": -63.633060455322266, + "logps/rejected": -233.20323181152344, + "loss": 0.252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8966396450996399, + "rewards/margins": 1.3994534015655518, + "rewards/rejected": -0.5028137564659119, + "step": 11782 + }, + { + "epoch": 0.69, + "learning_rate": 2.374582389451807e-08, + "logits/chosen": -1.9235645532608032, + "logits/rejected": -1.9051237106323242, + "logps/chosen": -28.28633689880371, + "logps/rejected": -206.61883544921875, + "loss": 0.3429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43979740142822266, + "rewards/margins": 1.447948694229126, + "rewards/rejected": -1.0081512928009033, + "step": 11783 + }, + { + "epoch": 0.69, + "learning_rate": 2.373780403864423e-08, + "logits/chosen": -1.9916119575500488, + "logits/rejected": -1.997453212738037, + "logps/chosen": -34.510772705078125, + "logps/rejected": -151.51731872558594, + "loss": 0.216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41218337416648865, + "rewards/margins": 2.4580740928649902, + "rewards/rejected": -2.0458908081054688, + "step": 11784 + }, + { + "epoch": 0.69, + "learning_rate": 2.3729785115732036e-08, + "logits/chosen": -1.9623031616210938, + "logits/rejected": -1.9567065238952637, + "logps/chosen": -5.412028986029327e-05, + "logps/rejected": -190.6799774169922, + "loss": 0.3348, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.940318707573169e-07, + "rewards/margins": 5.383408546447754, + "rewards/rejected": -5.3834075927734375, + "step": 11785 + }, + { + "epoch": 0.69, + "learning_rate": 2.3721767126066396e-08, + "logits/chosen": -1.9674068689346313, + "logits/rejected": -1.9571856260299683, + "logps/chosen": -42.037845611572266, + "logps/rejected": -197.6171417236328, + "loss": 0.1276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.036394476890564, + "rewards/margins": 5.603631019592285, + "rewards/rejected": -4.567236423492432, + "step": 11786 + }, + { + "epoch": 0.69, + "learning_rate": 2.371375006993209e-08, + "logits/chosen": -1.6880701780319214, + "logits/rejected": -1.702439785003662, + "logps/chosen": -127.01252746582031, + "logps/rejected": -391.019287109375, + "loss": 0.1613, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3652085065841675, + "rewards/margins": 1.2415268421173096, + "rewards/rejected": 0.12368164211511612, + "step": 11787 + }, + { + "epoch": 0.69, + "learning_rate": 2.3705733947614e-08, + "logits/chosen": -1.8748109340667725, + "logits/rejected": -1.8566845655441284, + "logps/chosen": -250.19680786132812, + "logps/rejected": -391.72998046875, + "loss": 0.1188, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5090699195861816, + "rewards/margins": 1.67669677734375, + "rewards/rejected": 0.8323730826377869, + "step": 11788 + }, + { + "epoch": 0.69, + "learning_rate": 2.3697718759396835e-08, + "logits/chosen": -1.6738736629486084, + "logits/rejected": -1.6806139945983887, + "logps/chosen": -2.051262617111206, + "logps/rejected": -165.4052734375, + "loss": 0.3748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028853893280029297, + "rewards/margins": 2.6939337253570557, + "rewards/rejected": -2.722787618637085, + "step": 11789 + }, + { + "epoch": 0.69, + "learning_rate": 2.368970450556536e-08, + "logits/chosen": -1.9719277620315552, + "logits/rejected": -1.9576072692871094, + "logps/chosen": -106.52754974365234, + "logps/rejected": -334.3105163574219, + "loss": 0.1465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7510597109794617, + "rewards/margins": 3.788275957107544, + "rewards/rejected": -3.0372161865234375, + "step": 11790 + }, + { + "epoch": 0.69, + "learning_rate": 2.3681691186404285e-08, + "logits/chosen": -2.015184164047241, + "logits/rejected": -2.0133609771728516, + "logps/chosen": -4.325303077697754, + "logps/rejected": -179.19793701171875, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35754457116127014, + "rewards/margins": 2.562732696533203, + "rewards/rejected": -2.205188035964966, + "step": 11791 + }, + { + "epoch": 0.69, + "learning_rate": 2.367367880219829e-08, + "logits/chosen": -1.7074449062347412, + "logits/rejected": -1.7056089639663696, + "logps/chosen": -3.397442924324423e-05, + "logps/rejected": -188.74606323242188, + "loss": 0.3387, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.841193342348561e-07, + "rewards/margins": 3.6271016597747803, + "rewards/rejected": -3.627101182937622, + "step": 11792 + }, + { + "epoch": 0.69, + "learning_rate": 2.3665667353231977e-08, + "logits/chosen": -1.9703500270843506, + "logits/rejected": -1.970913052558899, + "logps/chosen": -29.22079849243164, + "logps/rejected": -130.133056640625, + "loss": 0.694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3297182023525238, + "rewards/margins": 0.10325300693511963, + "rewards/rejected": -0.43297120928764343, + "step": 11793 + }, + { + "epoch": 0.69, + "learning_rate": 2.365765683978998e-08, + "logits/chosen": -1.7646406888961792, + "logits/rejected": -1.8059741258621216, + "logps/chosen": -371.85498046875, + "logps/rejected": -366.47900390625, + "loss": 0.077, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.398236036300659, + "rewards/margins": 1.9591399431228638, + "rewards/rejected": 0.439096063375473, + "step": 11794 + }, + { + "epoch": 0.69, + "learning_rate": 2.364964726215687e-08, + "logits/chosen": -1.9444538354873657, + "logits/rejected": -1.9454094171524048, + "logps/chosen": -244.56192016601562, + "logps/rejected": -476.4406433105469, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1099305152893066, + "rewards/margins": 8.652420043945312, + "rewards/rejected": -5.542489528656006, + "step": 11795 + }, + { + "epoch": 0.69, + "learning_rate": 2.3641638620617194e-08, + "logits/chosen": -1.8934086561203003, + "logits/rejected": -1.8955738544464111, + "logps/chosen": -63.11572265625, + "logps/rejected": -211.2301025390625, + "loss": 0.2994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022742463275790215, + "rewards/margins": 4.698655128479004, + "rewards/rejected": -4.721397399902344, + "step": 11796 + }, + { + "epoch": 0.69, + "learning_rate": 2.363363091545543e-08, + "logits/chosen": -1.9690825939178467, + "logits/rejected": -1.9448118209838867, + "logps/chosen": -186.85403442382812, + "logps/rejected": -403.3441162109375, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1631317138671875, + "rewards/margins": 4.6087188720703125, + "rewards/rejected": -2.445587158203125, + "step": 11797 + }, + { + "epoch": 0.69, + "learning_rate": 2.362562414695607e-08, + "logits/chosen": -1.9469614028930664, + "logits/rejected": -1.9476704597473145, + "logps/chosen": -213.2980194091797, + "logps/rejected": -277.9434814453125, + "loss": 0.1776, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.21929931640625, + "rewards/margins": 2.028717041015625, + "rewards/rejected": -0.809417724609375, + "step": 11798 + }, + { + "epoch": 0.69, + "learning_rate": 2.3617618315403548e-08, + "logits/chosen": -1.9665614366531372, + "logits/rejected": -1.960883617401123, + "logps/chosen": -16.064668655395508, + "logps/rejected": -161.969970703125, + "loss": 0.2435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4988554120063782, + "rewards/margins": 2.1904265880584717, + "rewards/rejected": -1.6915711164474487, + "step": 11799 + }, + { + "epoch": 0.69, + "learning_rate": 2.360961342108228e-08, + "logits/chosen": -1.7391737699508667, + "logits/rejected": -1.7295924425125122, + "logps/chosen": -40.43524932861328, + "logps/rejected": -191.27749633789062, + "loss": 0.2352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5597000122070312, + "rewards/margins": 2.525007724761963, + "rewards/rejected": -1.965307593345642, + "step": 11800 + }, + { + "epoch": 0.69, + "learning_rate": 2.3601609464276645e-08, + "logits/chosen": -1.9744133949279785, + "logits/rejected": -1.958624005317688, + "logps/chosen": -0.28148338198661804, + "logps/rejected": -387.2536926269531, + "loss": 0.315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08555612713098526, + "rewards/margins": 9.598352432250977, + "rewards/rejected": -9.512796401977539, + "step": 11801 + }, + { + "epoch": 0.69, + "learning_rate": 2.3593606445270936e-08, + "logits/chosen": -1.9995311498641968, + "logits/rejected": -2.0015623569488525, + "logps/chosen": -0.008788417093455791, + "logps/rejected": -229.47244262695312, + "loss": 0.3517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001966856885701418, + "rewards/margins": 4.20271635055542, + "rewards/rejected": -4.200749397277832, + "step": 11802 + }, + { + "epoch": 0.69, + "learning_rate": 2.358560436434953e-08, + "logits/chosen": -1.9072685241699219, + "logits/rejected": -1.9101965427398682, + "logps/chosen": -6.351454257965088, + "logps/rejected": -146.30203247070312, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35689854621887207, + "rewards/margins": 2.1504197120666504, + "rewards/rejected": -1.7935211658477783, + "step": 11803 + }, + { + "epoch": 0.69, + "learning_rate": 2.3577603221796643e-08, + "logits/chosen": -1.948240041732788, + "logits/rejected": -1.953122854232788, + "logps/chosen": -11.394710540771484, + "logps/rejected": -240.34373474121094, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10084161907434464, + "rewards/margins": 5.279813289642334, + "rewards/rejected": -5.178971767425537, + "step": 11804 + }, + { + "epoch": 0.69, + "learning_rate": 2.356960301789656e-08, + "logits/chosen": -1.913038969039917, + "logits/rejected": -1.9140067100524902, + "logps/chosen": -5.582699775695801, + "logps/rejected": -174.82574462890625, + "loss": 0.2169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6919874548912048, + "rewards/margins": 2.203962564468384, + "rewards/rejected": -1.5119751691818237, + "step": 11805 + }, + { + "epoch": 0.69, + "learning_rate": 2.356160375293342e-08, + "logits/chosen": -1.9658604860305786, + "logits/rejected": -1.9672552347183228, + "logps/chosen": -8.885697364807129, + "logps/rejected": -149.33767700195312, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18286696076393127, + "rewards/margins": 2.238131284713745, + "rewards/rejected": -2.0552642345428467, + "step": 11806 + }, + { + "epoch": 0.69, + "learning_rate": 2.3553605427191482e-08, + "logits/chosen": -1.9484878778457642, + "logits/rejected": -1.941791296005249, + "logps/chosen": -53.693302154541016, + "logps/rejected": -171.93873596191406, + "loss": 0.8139, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.31874582171440125, + "rewards/margins": -0.8601436614990234, + "rewards/rejected": 1.178889513015747, + "step": 11807 + }, + { + "epoch": 0.69, + "learning_rate": 2.3545608040954816e-08, + "logits/chosen": -1.8573120832443237, + "logits/rejected": -1.8836725950241089, + "logps/chosen": -224.33966064453125, + "logps/rejected": -236.65452575683594, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.475290060043335, + "rewards/margins": 3.109147787094116, + "rewards/rejected": -0.6338577270507812, + "step": 11808 + }, + { + "epoch": 0.69, + "learning_rate": 2.3537611594507578e-08, + "logits/chosen": -1.8333454132080078, + "logits/rejected": -1.8200953006744385, + "logps/chosen": -184.0048828125, + "logps/rejected": -304.7190246582031, + "loss": 0.4506, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.726770043373108, + "rewards/margins": -0.2734314203262329, + "rewards/rejected": 2.000201463699341, + "step": 11809 + }, + { + "epoch": 0.69, + "learning_rate": 2.352961608813378e-08, + "logits/chosen": -2.014507532119751, + "logits/rejected": -2.012068748474121, + "logps/chosen": -0.007532584480941296, + "logps/rejected": -163.23019409179688, + "loss": 0.5012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008406850509345531, + "rewards/margins": 0.9610680341720581, + "rewards/rejected": -0.9526611566543579, + "step": 11810 + }, + { + "epoch": 0.69, + "learning_rate": 2.3521621522117536e-08, + "logits/chosen": -1.8995609283447266, + "logits/rejected": -1.9064881801605225, + "logps/chosen": -23.49545669555664, + "logps/rejected": -231.24105834960938, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21042099595069885, + "rewards/margins": 3.1806206703186035, + "rewards/rejected": -2.9701995849609375, + "step": 11811 + }, + { + "epoch": 0.69, + "learning_rate": 2.3513627896742798e-08, + "logits/chosen": -2.0272698402404785, + "logits/rejected": -2.008866786956787, + "logps/chosen": -26.804033279418945, + "logps/rejected": -218.70767211914062, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8217344284057617, + "rewards/margins": 0.49267253279685974, + "rewards/rejected": 0.329061895608902, + "step": 11812 + }, + { + "epoch": 0.69, + "learning_rate": 2.350563521229355e-08, + "logits/chosen": -1.9909900426864624, + "logits/rejected": -1.9921807050704956, + "logps/chosen": -51.33660888671875, + "logps/rejected": -178.9677276611328, + "loss": 0.7382, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4984802305698395, + "rewards/margins": -0.6475540399551392, + "rewards/rejected": 1.1460342407226562, + "step": 11813 + }, + { + "epoch": 0.69, + "learning_rate": 2.3497643469053747e-08, + "logits/chosen": -1.6155705451965332, + "logits/rejected": -1.6236330270767212, + "logps/chosen": -0.5431827306747437, + "logps/rejected": -202.57846069335938, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10653924942016602, + "rewards/margins": 4.135366439819336, + "rewards/rejected": -4.02882719039917, + "step": 11814 + }, + { + "epoch": 0.69, + "learning_rate": 2.3489652667307298e-08, + "logits/chosen": -1.9199934005737305, + "logits/rejected": -1.9213874340057373, + "logps/chosen": -41.43914031982422, + "logps/rejected": -135.08155822753906, + "loss": 0.3677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3180557191371918, + "rewards/margins": 1.3177871704101562, + "rewards/rejected": -0.9997314810752869, + "step": 11815 + }, + { + "epoch": 0.69, + "learning_rate": 2.348166280733804e-08, + "logits/chosen": -1.952015995979309, + "logits/rejected": -1.9577168226242065, + "logps/chosen": -117.47156524658203, + "logps/rejected": -310.2160339355469, + "loss": 0.3497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1964057981967926, + "rewards/margins": 1.7277565002441406, + "rewards/rejected": -1.5313507318496704, + "step": 11816 + }, + { + "epoch": 0.69, + "learning_rate": 2.347367388942984e-08, + "logits/chosen": -1.9474656581878662, + "logits/rejected": -1.932652235031128, + "logps/chosen": -97.1369400024414, + "logps/rejected": -215.80435180664062, + "loss": 0.3267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3900901973247528, + "rewards/margins": 1.1923805475234985, + "rewards/rejected": -0.8022903800010681, + "step": 11817 + }, + { + "epoch": 0.69, + "learning_rate": 2.3465685913866502e-08, + "logits/chosen": -1.8350425958633423, + "logits/rejected": -1.8275517225265503, + "logps/chosen": -156.70101928710938, + "logps/rejected": -388.4751281738281, + "loss": 0.0727, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1761322021484375, + "rewards/margins": 2.3510589599609375, + "rewards/rejected": -0.1749267578125, + "step": 11818 + }, + { + "epoch": 0.69, + "learning_rate": 2.3457698880931785e-08, + "logits/chosen": -1.712860345840454, + "logits/rejected": -1.7116183042526245, + "logps/chosen": -168.31639099121094, + "logps/rejected": -247.7276611328125, + "loss": 0.1084, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4275619983673096, + "rewards/margins": 1.9003829956054688, + "rewards/rejected": 0.527178943157196, + "step": 11819 + }, + { + "epoch": 0.69, + "learning_rate": 2.344971279090946e-08, + "logits/chosen": -2.0069727897644043, + "logits/rejected": -1.9972565174102783, + "logps/chosen": -99.1910400390625, + "logps/rejected": -468.8382873535156, + "loss": 0.067, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4664512872695923, + "rewards/margins": 5.908925533294678, + "rewards/rejected": -4.442474365234375, + "step": 11820 + }, + { + "epoch": 0.69, + "learning_rate": 2.3441727644083186e-08, + "logits/chosen": -2.0685455799102783, + "logits/rejected": -2.0603253841400146, + "logps/chosen": -8.39618968963623, + "logps/rejected": -96.915771484375, + "loss": 0.6958, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1620013266801834, + "rewards/margins": -0.13417328894138336, + "rewards/rejected": 0.2961746156215668, + "step": 11821 + }, + { + "epoch": 0.69, + "learning_rate": 2.3433743440736657e-08, + "logits/chosen": -1.9196736812591553, + "logits/rejected": -1.9743605852127075, + "logps/chosen": -239.15411376953125, + "logps/rejected": -213.0155792236328, + "loss": 0.1548, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0167603492736816, + "rewards/margins": 1.2094086408615112, + "rewards/rejected": 0.8073517084121704, + "step": 11822 + }, + { + "epoch": 0.69, + "learning_rate": 2.3425760181153514e-08, + "logits/chosen": -1.736905813217163, + "logits/rejected": -1.725822925567627, + "logps/chosen": -56.65043640136719, + "logps/rejected": -229.67160034179688, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07124900817871094, + "rewards/margins": 3.7516238689422607, + "rewards/rejected": -3.8228728771209717, + "step": 11823 + }, + { + "epoch": 0.69, + "learning_rate": 2.3417777865617373e-08, + "logits/chosen": -1.8119018077850342, + "logits/rejected": -1.7760965824127197, + "logps/chosen": -209.86537170410156, + "logps/rejected": -425.6534118652344, + "loss": 0.1048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8282073736190796, + "rewards/margins": 2.5643036365509033, + "rewards/rejected": -0.736096203327179, + "step": 11824 + }, + { + "epoch": 0.69, + "learning_rate": 2.3409796494411755e-08, + "logits/chosen": -2.071958541870117, + "logits/rejected": -2.0645694732666016, + "logps/chosen": -0.002321117091923952, + "logps/rejected": -128.06887817382812, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014047840377315879, + "rewards/margins": 3.1898438930511475, + "rewards/rejected": -3.188439130783081, + "step": 11825 + }, + { + "epoch": 0.69, + "learning_rate": 2.3401816067820268e-08, + "logits/chosen": -2.07783842086792, + "logits/rejected": -2.075817823410034, + "logps/chosen": -5.091673851013184, + "logps/rejected": -72.03004455566406, + "loss": 0.4986, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0963740348815918, + "rewards/margins": 0.9264107942581177, + "rewards/rejected": -1.0227848291397095, + "step": 11826 + }, + { + "epoch": 0.69, + "learning_rate": 2.3393836586126363e-08, + "logits/chosen": -1.9854464530944824, + "logits/rejected": -1.9832541942596436, + "logps/chosen": -19.881879806518555, + "logps/rejected": -119.58353424072266, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3596866726875305, + "rewards/margins": 2.6746716499328613, + "rewards/rejected": -2.3149850368499756, + "step": 11827 + }, + { + "epoch": 0.69, + "learning_rate": 2.3385858049613545e-08, + "logits/chosen": -1.9600390195846558, + "logits/rejected": -1.952893614768982, + "logps/chosen": -25.966068267822266, + "logps/rejected": -172.5516357421875, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018163681030273438, + "rewards/margins": 2.9231204986572266, + "rewards/rejected": -2.9412841796875, + "step": 11828 + }, + { + "epoch": 0.69, + "learning_rate": 2.3377880458565196e-08, + "logits/chosen": -1.8805631399154663, + "logits/rejected": -1.8798253536224365, + "logps/chosen": -0.10875314474105835, + "logps/rejected": -77.5257568359375, + "loss": 0.7345, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.026265356689691544, + "rewards/margins": -0.210355743765831, + "rewards/rejected": 0.23662109673023224, + "step": 11829 + }, + { + "epoch": 0.69, + "learning_rate": 2.336990381326479e-08, + "logits/chosen": -1.9941829442977905, + "logits/rejected": -2.0023834705352783, + "logps/chosen": -10.875571250915527, + "logps/rejected": -182.39076232910156, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13749246299266815, + "rewards/margins": 4.495233535766602, + "rewards/rejected": -4.357740879058838, + "step": 11830 + }, + { + "epoch": 0.69, + "learning_rate": 2.336192811399565e-08, + "logits/chosen": -2.0642800331115723, + "logits/rejected": -2.0689237117767334, + "logps/chosen": -35.27584457397461, + "logps/rejected": -154.786865234375, + "loss": 0.1601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9988929629325867, + "rewards/margins": 3.2161691188812256, + "rewards/rejected": -2.217276096343994, + "step": 11831 + }, + { + "epoch": 0.69, + "learning_rate": 2.3353953361041117e-08, + "logits/chosen": -1.8792368173599243, + "logits/rejected": -1.8455730676651, + "logps/chosen": -212.43881225585938, + "logps/rejected": -320.5184326171875, + "loss": 0.2514, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3696625232696533, + "rewards/margins": 0.5937408804893494, + "rewards/rejected": 0.775921642780304, + "step": 11832 + }, + { + "epoch": 0.69, + "learning_rate": 2.3345979554684508e-08, + "logits/chosen": -1.646785855293274, + "logits/rejected": -1.6415926218032837, + "logps/chosen": -53.24244689941406, + "logps/rejected": -249.57980346679688, + "loss": 0.2734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6604785919189453, + "rewards/margins": 2.02459454536438, + "rewards/rejected": -1.3641159534454346, + "step": 11833 + }, + { + "epoch": 0.69, + "learning_rate": 2.3338006695209096e-08, + "logits/chosen": -1.7591290473937988, + "logits/rejected": -1.7549424171447754, + "logps/chosen": -4.217362403869629, + "logps/rejected": -55.28795623779297, + "loss": 0.3332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3329765498638153, + "rewards/margins": 1.578334093093872, + "rewards/rejected": -1.2453575134277344, + "step": 11834 + }, + { + "epoch": 0.69, + "learning_rate": 2.333003478289809e-08, + "logits/chosen": -1.981688380241394, + "logits/rejected": -1.9785466194152832, + "logps/chosen": -2.137195110321045, + "logps/rejected": -99.41436767578125, + "loss": 0.9415, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07522501796483994, + "rewards/margins": -0.8907693028450012, + "rewards/rejected": 0.9659942984580994, + "step": 11835 + }, + { + "epoch": 0.69, + "learning_rate": 2.3322063818034704e-08, + "logits/chosen": -2.0763821601867676, + "logits/rejected": -2.063788414001465, + "logps/chosen": -65.2479476928711, + "logps/rejected": -296.14031982421875, + "loss": 0.2302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29111939668655396, + "rewards/margins": 6.675674915313721, + "rewards/rejected": -6.384555339813232, + "step": 11836 + }, + { + "epoch": 0.69, + "learning_rate": 2.3314093800902106e-08, + "logits/chosen": -1.8052881956100464, + "logits/rejected": -1.8035304546356201, + "logps/chosen": -0.20321473479270935, + "logps/rejected": -276.83282470703125, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015221319161355495, + "rewards/margins": 5.623768329620361, + "rewards/rejected": -5.638989448547363, + "step": 11837 + }, + { + "epoch": 0.69, + "learning_rate": 2.330612473178345e-08, + "logits/chosen": -1.9747233390808105, + "logits/rejected": -1.9790769815444946, + "logps/chosen": -142.90834045410156, + "logps/rejected": -224.25338745117188, + "loss": 0.1986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7540069818496704, + "rewards/margins": 1.0954331159591675, + "rewards/rejected": -0.3414261043071747, + "step": 11838 + }, + { + "epoch": 0.69, + "learning_rate": 2.32981566109618e-08, + "logits/chosen": -1.6565660238265991, + "logits/rejected": -1.6543066501617432, + "logps/chosen": -213.32305908203125, + "logps/rejected": -368.0081481933594, + "loss": 0.1111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9205902218818665, + "rewards/margins": 2.3405609130859375, + "rewards/rejected": -1.4199707508087158, + "step": 11839 + }, + { + "epoch": 0.69, + "learning_rate": 2.329018943872024e-08, + "logits/chosen": -1.8395291566848755, + "logits/rejected": -1.8369486331939697, + "logps/chosen": -232.7275390625, + "logps/rejected": -364.69256591796875, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.669952392578125, + "rewards/margins": 4.357202053070068, + "rewards/rejected": -1.687249779701233, + "step": 11840 + }, + { + "epoch": 0.69, + "learning_rate": 2.32822232153418e-08, + "logits/chosen": -1.870235800743103, + "logits/rejected": -1.820792317390442, + "logps/chosen": -208.064208984375, + "logps/rejected": -408.41595458984375, + "loss": 0.0453, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.842489719390869, + "rewards/margins": 2.7085938453674316, + "rewards/rejected": 0.1338958740234375, + "step": 11841 + }, + { + "epoch": 0.69, + "learning_rate": 2.327425794110949e-08, + "logits/chosen": -1.984069585800171, + "logits/rejected": -1.9697434902191162, + "logps/chosen": -39.874385833740234, + "logps/rejected": -166.8426513671875, + "loss": 0.3996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7866302728652954, + "rewards/margins": 0.45732423663139343, + "rewards/rejected": 0.329306036233902, + "step": 11842 + }, + { + "epoch": 0.69, + "learning_rate": 2.3266293616306283e-08, + "logits/chosen": -1.793675422668457, + "logits/rejected": -1.840217113494873, + "logps/chosen": -238.1957244873047, + "logps/rejected": -481.70318603515625, + "loss": 0.0731, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4332138299942017, + "rewards/margins": 2.7702255249023438, + "rewards/rejected": -1.337011694908142, + "step": 11843 + }, + { + "epoch": 0.69, + "learning_rate": 2.3258330241215053e-08, + "logits/chosen": -1.8340494632720947, + "logits/rejected": -1.8384215831756592, + "logps/chosen": -23.10903549194336, + "logps/rejected": -251.15313720703125, + "loss": 0.1183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9472156763076782, + "rewards/margins": 3.7684988975524902, + "rewards/rejected": -2.8212831020355225, + "step": 11844 + }, + { + "epoch": 0.69, + "learning_rate": 2.3250367816118783e-08, + "logits/chosen": -1.8640897274017334, + "logits/rejected": -1.8668630123138428, + "logps/chosen": -54.522552490234375, + "logps/rejected": -173.70928955078125, + "loss": 0.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3910026550292969, + "rewards/margins": 1.077802300453186, + "rewards/rejected": -0.6867996454238892, + "step": 11845 + }, + { + "epoch": 0.69, + "learning_rate": 2.324240634130027e-08, + "logits/chosen": -1.807321548461914, + "logits/rejected": -1.841637134552002, + "logps/chosen": -193.65045166015625, + "logps/rejected": -311.9697265625, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6840851306915283, + "rewards/margins": 6.724356651306152, + "rewards/rejected": -4.040271282196045, + "step": 11846 + }, + { + "epoch": 0.69, + "learning_rate": 2.323444581704239e-08, + "logits/chosen": -1.9454758167266846, + "logits/rejected": -1.9365274906158447, + "logps/chosen": -70.75314331054688, + "logps/rejected": -288.03802490234375, + "loss": 0.0674, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.597132921218872, + "rewards/margins": 4.7313432693481445, + "rewards/rejected": -3.1342103481292725, + "step": 11847 + }, + { + "epoch": 0.69, + "learning_rate": 2.3226486243627886e-08, + "logits/chosen": -1.99204421043396, + "logits/rejected": -1.9817405939102173, + "logps/chosen": -29.882434844970703, + "logps/rejected": -172.799560546875, + "loss": 0.213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7990108728408813, + "rewards/margins": 2.4120266437530518, + "rewards/rejected": -1.6130157709121704, + "step": 11848 + }, + { + "epoch": 0.69, + "learning_rate": 2.3218527621339588e-08, + "logits/chosen": -1.8484348058700562, + "logits/rejected": -1.8468279838562012, + "logps/chosen": -217.5507354736328, + "logps/rejected": -396.4446105957031, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.859152317047119, + "rewards/margins": 4.289746284484863, + "rewards/rejected": -1.4305938482284546, + "step": 11849 + }, + { + "epoch": 0.69, + "learning_rate": 2.3210569950460173e-08, + "logits/chosen": -1.9106324911117554, + "logits/rejected": -1.9374881982803345, + "logps/chosen": -153.41131591796875, + "logps/rejected": -319.7286376953125, + "loss": 0.0779, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3517639636993408, + "rewards/margins": 2.3763794898986816, + "rewards/rejected": -1.0246155261993408, + "step": 11850 + }, + { + "epoch": 0.69, + "learning_rate": 2.3202613231272356e-08, + "logits/chosen": -1.7273318767547607, + "logits/rejected": -1.7240228652954102, + "logps/chosen": -0.2278805822134018, + "logps/rejected": -158.10513305664062, + "loss": 0.4461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06765743345022202, + "rewards/margins": 1.267414927482605, + "rewards/rejected": -1.19975745677948, + "step": 11851 + }, + { + "epoch": 0.69, + "learning_rate": 2.31946574640588e-08, + "logits/chosen": -1.923593521118164, + "logits/rejected": -1.9212905168533325, + "logps/chosen": -281.1754455566406, + "logps/rejected": -452.8546142578125, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7634552121162415, + "rewards/margins": 2.8758208751678467, + "rewards/rejected": -2.11236572265625, + "step": 11852 + }, + { + "epoch": 0.69, + "learning_rate": 2.318670264910214e-08, + "logits/chosen": -1.8473957777023315, + "logits/rejected": -1.8465385437011719, + "logps/chosen": -267.19189453125, + "logps/rejected": -469.2471008300781, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.547149658203125, + "rewards/margins": 3.8891143798828125, + "rewards/rejected": -2.3419647216796875, + "step": 11853 + }, + { + "epoch": 0.69, + "learning_rate": 2.3178748786684948e-08, + "logits/chosen": -2.2107129096984863, + "logits/rejected": -2.1998982429504395, + "logps/chosen": -0.08972825855016708, + "logps/rejected": -99.29164123535156, + "loss": 0.5089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004105748143047094, + "rewards/margins": 0.9370822906494141, + "rewards/rejected": -0.9411880373954773, + "step": 11854 + }, + { + "epoch": 0.69, + "learning_rate": 2.3170795877089793e-08, + "logits/chosen": -1.783280849456787, + "logits/rejected": -1.7460476160049438, + "logps/chosen": -228.15142822265625, + "logps/rejected": -432.6016540527344, + "loss": 0.2275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44613343477249146, + "rewards/margins": 2.792279005050659, + "rewards/rejected": -2.3461456298828125, + "step": 11855 + }, + { + "epoch": 0.69, + "learning_rate": 2.3162843920599208e-08, + "logits/chosen": -1.7090158462524414, + "logits/rejected": -1.6893280744552612, + "logps/chosen": -136.0261688232422, + "logps/rejected": -322.1612854003906, + "loss": 0.1058, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9494460821151733, + "rewards/margins": 2.1518540382385254, + "rewards/rejected": -0.2024078369140625, + "step": 11856 + }, + { + "epoch": 0.69, + "learning_rate": 2.31548929174957e-08, + "logits/chosen": -1.8859652280807495, + "logits/rejected": -1.8748700618743896, + "logps/chosen": -15.183311462402344, + "logps/rejected": -241.5590362548828, + "loss": 0.3489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03316850587725639, + "rewards/margins": 3.428394079208374, + "rewards/rejected": -3.3952255249023438, + "step": 11857 + }, + { + "epoch": 0.69, + "learning_rate": 2.3146942868061685e-08, + "logits/chosen": -1.7829177379608154, + "logits/rejected": -1.7881336212158203, + "logps/chosen": -81.0137939453125, + "logps/rejected": -267.15704345703125, + "loss": 0.4212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3103538453578949, + "rewards/margins": 1.9782601594924927, + "rewards/rejected": -2.28861403465271, + "step": 11858 + }, + { + "epoch": 0.69, + "learning_rate": 2.313899377257962e-08, + "logits/chosen": -1.8317930698394775, + "logits/rejected": -1.8259583711624146, + "logps/chosen": -25.803821563720703, + "logps/rejected": -345.484375, + "loss": 0.2275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36369457840919495, + "rewards/margins": 6.218550205230713, + "rewards/rejected": -5.854855537414551, + "step": 11859 + }, + { + "epoch": 0.69, + "learning_rate": 2.313104563133188e-08, + "logits/chosen": -2.0954885482788086, + "logits/rejected": -2.083129405975342, + "logps/chosen": -20.5523681640625, + "logps/rejected": -164.57626342773438, + "loss": 0.4092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04268608242273331, + "rewards/margins": 1.1065661907196045, + "rewards/rejected": -1.1492522954940796, + "step": 11860 + }, + { + "epoch": 0.69, + "learning_rate": 2.3123098444600837e-08, + "logits/chosen": -1.967379093170166, + "logits/rejected": -1.9631197452545166, + "logps/chosen": -18.329914093017578, + "logps/rejected": -234.5165252685547, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.155089259147644, + "rewards/margins": 3.687750816345215, + "rewards/rejected": -2.5326614379882812, + "step": 11861 + }, + { + "epoch": 0.69, + "learning_rate": 2.3115152212668815e-08, + "logits/chosen": -1.6544088125228882, + "logits/rejected": -1.6577969789505005, + "logps/chosen": -251.48147583007812, + "logps/rejected": -340.97320556640625, + "loss": 0.2561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.575604259967804, + "rewards/margins": 0.9562561511993408, + "rewards/rejected": -0.3806518614292145, + "step": 11862 + }, + { + "epoch": 0.69, + "learning_rate": 2.3107206935818063e-08, + "logits/chosen": -1.9141956567764282, + "logits/rejected": -1.89534592628479, + "logps/chosen": -6.379683494567871, + "logps/rejected": -109.78024291992188, + "loss": 0.3975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24982643127441406, + "rewards/margins": 1.2946140766143799, + "rewards/rejected": -1.0447876453399658, + "step": 11863 + }, + { + "epoch": 0.69, + "learning_rate": 2.3099262614330906e-08, + "logits/chosen": -1.7681936025619507, + "logits/rejected": -1.7615585327148438, + "logps/chosen": -8.74189281463623, + "logps/rejected": -157.41818237304688, + "loss": 0.2426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3680294156074524, + "rewards/margins": 2.501459836959839, + "rewards/rejected": -2.1334304809570312, + "step": 11864 + }, + { + "epoch": 0.69, + "learning_rate": 2.3091319248489503e-08, + "logits/chosen": -1.8899385929107666, + "logits/rejected": -1.891427755355835, + "logps/chosen": -36.71692657470703, + "logps/rejected": -240.36778259277344, + "loss": 0.19, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6683517694473267, + "rewards/margins": 3.006430149078369, + "rewards/rejected": -2.338078260421753, + "step": 11865 + }, + { + "epoch": 0.69, + "learning_rate": 2.308337683857609e-08, + "logits/chosen": -1.9342727661132812, + "logits/rejected": -1.9293969869613647, + "logps/chosen": -0.618985652923584, + "logps/rejected": -191.43942260742188, + "loss": 0.3533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04866918921470642, + "rewards/margins": 3.0579991340637207, + "rewards/rejected": -3.10666823387146, + "step": 11866 + }, + { + "epoch": 0.69, + "learning_rate": 2.3075435384872756e-08, + "logits/chosen": -1.9710626602172852, + "logits/rejected": -1.9743198156356812, + "logps/chosen": -9.370081901550293, + "logps/rejected": -173.87802124023438, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16569338738918304, + "rewards/margins": 2.914039373397827, + "rewards/rejected": -2.7483460903167725, + "step": 11867 + }, + { + "epoch": 0.69, + "learning_rate": 2.3067494887661693e-08, + "logits/chosen": -1.989176630973816, + "logits/rejected": -1.9675244092941284, + "logps/chosen": -298.76263427734375, + "logps/rejected": -526.1243896484375, + "loss": 0.0506, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.06549072265625, + "rewards/margins": 3.539764404296875, + "rewards/rejected": -1.474273681640625, + "step": 11868 + }, + { + "epoch": 0.69, + "learning_rate": 2.3059555347224934e-08, + "logits/chosen": -1.9130321741104126, + "logits/rejected": -1.906829833984375, + "logps/chosen": -0.2021549791097641, + "logps/rejected": -299.3573913574219, + "loss": 0.3245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009754758328199387, + "rewards/margins": 9.217742919921875, + "rewards/rejected": -9.227498054504395, + "step": 11869 + }, + { + "epoch": 0.69, + "learning_rate": 2.305161676384457e-08, + "logits/chosen": -1.7801347970962524, + "logits/rejected": -1.7892595529556274, + "logps/chosen": -269.7152404785156, + "logps/rejected": -426.1647644042969, + "loss": 0.0898, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4784698486328125, + "rewards/margins": 1.7395567893981934, + "rewards/rejected": 0.7389129996299744, + "step": 11870 + }, + { + "epoch": 0.69, + "learning_rate": 2.3043679137802564e-08, + "logits/chosen": -2.0291192531585693, + "logits/rejected": -2.030881881713867, + "logps/chosen": -0.0012318385997787118, + "logps/rejected": -109.8800048828125, + "loss": 0.3752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9242766938987188e-05, + "rewards/margins": 2.530790328979492, + "rewards/rejected": -2.530771017074585, + "step": 11871 + }, + { + "epoch": 0.69, + "learning_rate": 2.3035742469380964e-08, + "logits/chosen": -1.8197060823440552, + "logits/rejected": -1.8111469745635986, + "logps/chosen": -4.667238235473633, + "logps/rejected": -218.28305053710938, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025891590863466263, + "rewards/margins": 2.9443700313568115, + "rewards/rejected": -2.918478488922119, + "step": 11872 + }, + { + "epoch": 0.69, + "learning_rate": 2.3027806758861672e-08, + "logits/chosen": -1.9155231714248657, + "logits/rejected": -1.9280917644500732, + "logps/chosen": -236.25038146972656, + "logps/rejected": -493.6838684082031, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7822952270507812, + "rewards/margins": 7.516120910644531, + "rewards/rejected": -5.73382568359375, + "step": 11873 + }, + { + "epoch": 0.69, + "learning_rate": 2.301987200652662e-08, + "logits/chosen": -1.843779444694519, + "logits/rejected": -1.8659754991531372, + "logps/chosen": -208.0457305908203, + "logps/rejected": -333.4598388671875, + "loss": 0.0392, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0421295166015625, + "rewards/margins": 3.5150086879730225, + "rewards/rejected": -2.47287917137146, + "step": 11874 + }, + { + "epoch": 0.69, + "learning_rate": 2.3011938212657688e-08, + "logits/chosen": -1.9392017126083374, + "logits/rejected": -1.9352465867996216, + "logps/chosen": -75.64950561523438, + "logps/rejected": -233.1563720703125, + "loss": 0.2021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4532325863838196, + "rewards/margins": 3.280780792236328, + "rewards/rejected": -2.8275482654571533, + "step": 11875 + }, + { + "epoch": 0.69, + "learning_rate": 2.300400537753674e-08, + "logits/chosen": -1.930617332458496, + "logits/rejected": -1.920506477355957, + "logps/chosen": -0.00043471428216435015, + "logps/rejected": -140.04525756835938, + "loss": 0.5234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00024298527569044381, + "rewards/margins": 0.7468967437744141, + "rewards/rejected": -0.7466537356376648, + "step": 11876 + }, + { + "epoch": 0.69, + "learning_rate": 2.2996073501445556e-08, + "logits/chosen": -1.9695392847061157, + "logits/rejected": -1.9731700420379639, + "logps/chosen": -0.0012339483946561813, + "logps/rejected": -151.6549072265625, + "loss": 0.4112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1799741059803637e-06, + "rewards/margins": 1.7767926454544067, + "rewards/rejected": -1.7767914533615112, + "step": 11877 + }, + { + "epoch": 0.69, + "learning_rate": 2.298814258466593e-08, + "logits/chosen": -1.86762273311615, + "logits/rejected": -1.8610342741012573, + "logps/chosen": -23.843608856201172, + "logps/rejected": -174.32965087890625, + "loss": 0.5152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22917881608009338, + "rewards/margins": 1.003883957862854, + "rewards/rejected": -1.233062744140625, + "step": 11878 + }, + { + "epoch": 0.69, + "learning_rate": 2.298021262747961e-08, + "logits/chosen": -1.6398842334747314, + "logits/rejected": -1.6149446964263916, + "logps/chosen": -191.0574951171875, + "logps/rejected": -295.40655517578125, + "loss": 0.1891, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4364120960235596, + "rewards/margins": 1.530378818511963, + "rewards/rejected": -0.09396667778491974, + "step": 11879 + }, + { + "epoch": 0.69, + "learning_rate": 2.2972283630168306e-08, + "logits/chosen": -1.7923270463943481, + "logits/rejected": -1.7430050373077393, + "logps/chosen": -184.96632385253906, + "logps/rejected": -292.1148986816406, + "loss": 0.092, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8931655883789062, + "rewards/margins": 1.802470326423645, + "rewards/rejected": 1.0906952619552612, + "step": 11880 + }, + { + "epoch": 0.69, + "learning_rate": 2.2964355593013713e-08, + "logits/chosen": -1.879754900932312, + "logits/rejected": -1.8853203058242798, + "logps/chosen": -0.6334624290466309, + "logps/rejected": -196.9806365966797, + "loss": 0.3272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12772689759731293, + "rewards/margins": 3.402334690093994, + "rewards/rejected": -3.2746078968048096, + "step": 11881 + }, + { + "epoch": 0.69, + "learning_rate": 2.2956428516297433e-08, + "logits/chosen": -1.8165208101272583, + "logits/rejected": -1.8122581243515015, + "logps/chosen": -14.970439910888672, + "logps/rejected": -171.21823120117188, + "loss": 0.4457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.060196783393621445, + "rewards/margins": 1.1860800981521606, + "rewards/rejected": -1.24627685546875, + "step": 11882 + }, + { + "epoch": 0.69, + "learning_rate": 2.29485024003011e-08, + "logits/chosen": -1.9165436029434204, + "logits/rejected": -1.909917950630188, + "logps/chosen": -0.13177360594272614, + "logps/rejected": -136.2573699951172, + "loss": 0.3898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005629824008792639, + "rewards/margins": 2.2807250022888184, + "rewards/rejected": -2.2863547801971436, + "step": 11883 + }, + { + "epoch": 0.69, + "learning_rate": 2.2940577245306288e-08, + "logits/chosen": -1.7946457862854004, + "logits/rejected": -1.8357192277908325, + "logps/chosen": -178.10693359375, + "logps/rejected": -212.62010192871094, + "loss": 0.3431, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5210357904434204, + "rewards/margins": 0.11291348934173584, + "rewards/rejected": 1.4081223011016846, + "step": 11884 + }, + { + "epoch": 0.69, + "learning_rate": 2.2932653051594543e-08, + "logits/chosen": -1.8827420473098755, + "logits/rejected": -1.8934296369552612, + "logps/chosen": -277.97845458984375, + "logps/rejected": -402.71490478515625, + "loss": 0.1422, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6393494606018066, + "rewards/margins": 1.4503144025802612, + "rewards/rejected": 1.1890350580215454, + "step": 11885 + }, + { + "epoch": 0.69, + "learning_rate": 2.292472981944733e-08, + "logits/chosen": -1.7665997743606567, + "logits/rejected": -1.785884976387024, + "logps/chosen": -154.76522827148438, + "logps/rejected": -291.07330322265625, + "loss": 0.0579, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9417587518692017, + "rewards/margins": 2.4750258922576904, + "rewards/rejected": -0.5332671999931335, + "step": 11886 + }, + { + "epoch": 0.69, + "learning_rate": 2.2916807549146194e-08, + "logits/chosen": -1.9374324083328247, + "logits/rejected": -1.834578037261963, + "logps/chosen": -337.49932861328125, + "logps/rejected": -1015.372802734375, + "loss": 0.0692, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.022027611732483, + "rewards/margins": 12.398358345031738, + "rewards/rejected": -11.376330375671387, + "step": 11887 + }, + { + "epoch": 0.69, + "learning_rate": 2.290888624097251e-08, + "logits/chosen": -2.0129659175872803, + "logits/rejected": -2.0025699138641357, + "logps/chosen": -1.1875975131988525, + "logps/rejected": -289.0823974609375, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027621507178992033, + "rewards/margins": 4.682790756225586, + "rewards/rejected": -4.685553073883057, + "step": 11888 + }, + { + "epoch": 0.69, + "learning_rate": 2.2900965895207724e-08, + "logits/chosen": -1.8698269128799438, + "logits/rejected": -1.8609663248062134, + "logps/chosen": -26.732460021972656, + "logps/rejected": -190.33880615234375, + "loss": 0.3901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31568223237991333, + "rewards/margins": 1.0012078285217285, + "rewards/rejected": -0.6855255365371704, + "step": 11889 + }, + { + "epoch": 0.69, + "learning_rate": 2.2893046512133147e-08, + "logits/chosen": -1.587402582168579, + "logits/rejected": -1.5709991455078125, + "logps/chosen": -239.69619750976562, + "logps/rejected": -512.6144409179688, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5409530401229858, + "rewards/margins": 4.42678689956665, + "rewards/rejected": -2.885833740234375, + "step": 11890 + }, + { + "epoch": 0.69, + "learning_rate": 2.2885128092030187e-08, + "logits/chosen": -1.905853271484375, + "logits/rejected": -1.901157021522522, + "logps/chosen": -16.315332412719727, + "logps/rejected": -186.93624877929688, + "loss": 0.4282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1239139586687088, + "rewards/margins": 1.1606861352920532, + "rewards/rejected": -1.0367721319198608, + "step": 11891 + }, + { + "epoch": 0.69, + "learning_rate": 2.2877210635180095e-08, + "logits/chosen": -1.655179738998413, + "logits/rejected": -1.6506054401397705, + "logps/chosen": -218.523681640625, + "logps/rejected": -389.3305358886719, + "loss": 0.0511, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7335739135742188, + "rewards/margins": 3.5018997192382812, + "rewards/rejected": -1.7683258056640625, + "step": 11892 + }, + { + "epoch": 0.69, + "learning_rate": 2.2869294141864154e-08, + "logits/chosen": -1.8989558219909668, + "logits/rejected": -1.8915852308273315, + "logps/chosen": -54.84710693359375, + "logps/rejected": -189.79319763183594, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03100128285586834, + "rewards/margins": 2.488668918609619, + "rewards/rejected": -2.457667589187622, + "step": 11893 + }, + { + "epoch": 0.69, + "learning_rate": 2.286137861236359e-08, + "logits/chosen": -1.8161876201629639, + "logits/rejected": -1.8035322427749634, + "logps/chosen": -304.92449951171875, + "logps/rejected": -428.8580017089844, + "loss": 0.0876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6484527587890625, + "rewards/margins": 1.8306610584259033, + "rewards/rejected": -0.18220825493335724, + "step": 11894 + }, + { + "epoch": 0.69, + "learning_rate": 2.285346404695963e-08, + "logits/chosen": -1.8488529920578003, + "logits/rejected": -1.831835389137268, + "logps/chosen": -106.05509948730469, + "logps/rejected": -238.6405487060547, + "loss": 0.0637, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.573033094406128, + "rewards/margins": 3.0173721313476562, + "rewards/rejected": -0.44433900713920593, + "step": 11895 + }, + { + "epoch": 0.69, + "learning_rate": 2.28455504459334e-08, + "logits/chosen": -2.0717740058898926, + "logits/rejected": -2.071475028991699, + "logps/chosen": -3.9774231910705566, + "logps/rejected": -224.89874267578125, + "loss": 0.3261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003543996950611472, + "rewards/margins": 4.268718719482422, + "rewards/rejected": -4.265174865722656, + "step": 11896 + }, + { + "epoch": 0.69, + "learning_rate": 2.283763780956604e-08, + "logits/chosen": -2.046056032180786, + "logits/rejected": -2.0373547077178955, + "logps/chosen": -19.52735137939453, + "logps/rejected": -136.399169921875, + "loss": 0.5128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4852376878261566, + "rewards/margins": 0.13319146633148193, + "rewards/rejected": 0.3520462214946747, + "step": 11897 + }, + { + "epoch": 0.69, + "learning_rate": 2.2829726138138648e-08, + "logits/chosen": -1.9689983129501343, + "logits/rejected": -1.9865182638168335, + "logps/chosen": -217.8887939453125, + "logps/rejected": -290.0252685546875, + "loss": 0.0954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5685272216796875, + "rewards/margins": 2.2856597900390625, + "rewards/rejected": -1.717132568359375, + "step": 11898 + }, + { + "epoch": 0.69, + "learning_rate": 2.2821815431932313e-08, + "logits/chosen": -1.8737409114837646, + "logits/rejected": -1.802420735359192, + "logps/chosen": -173.78079223632812, + "logps/rejected": -435.4563903808594, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4015228748321533, + "rewards/margins": 3.7914886474609375, + "rewards/rejected": -0.38996583223342896, + "step": 11899 + }, + { + "epoch": 0.69, + "learning_rate": 2.2813905691228014e-08, + "logits/chosen": -1.9683730602264404, + "logits/rejected": -1.9658135175704956, + "logps/chosen": -49.80861282348633, + "logps/rejected": -288.86834716796875, + "loss": 0.1343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8119354248046875, + "rewards/margins": 4.665654182434082, + "rewards/rejected": -3.8537185192108154, + "step": 11900 + }, + { + "epoch": 0.69, + "learning_rate": 2.280599691630677e-08, + "logits/chosen": -1.9168967008590698, + "logits/rejected": -1.9353240728378296, + "logps/chosen": -185.9141845703125, + "logps/rejected": -533.8353271484375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.655163526535034, + "rewards/margins": 6.576083183288574, + "rewards/rejected": -3.920919895172119, + "step": 11901 + }, + { + "epoch": 0.69, + "learning_rate": 2.279808910744953e-08, + "logits/chosen": -2.042006492614746, + "logits/rejected": -2.036473274230957, + "logps/chosen": -34.395015716552734, + "logps/rejected": -204.5396270751953, + "loss": 0.2437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5758342742919922, + "rewards/margins": 2.6809871196746826, + "rewards/rejected": -2.1051528453826904, + "step": 11902 + }, + { + "epoch": 0.69, + "learning_rate": 2.279018226493723e-08, + "logits/chosen": -2.030496120452881, + "logits/rejected": -2.024334669113159, + "logps/chosen": -22.152542114257812, + "logps/rejected": -149.09326171875, + "loss": 0.3022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0086605548858643, + "rewards/margins": 0.8391185998916626, + "rewards/rejected": 0.16954194009304047, + "step": 11903 + }, + { + "epoch": 0.69, + "learning_rate": 2.278227638905077e-08, + "logits/chosen": -1.8811568021774292, + "logits/rejected": -1.8795299530029297, + "logps/chosen": -74.35338592529297, + "logps/rejected": -189.51519775390625, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31755754351615906, + "rewards/margins": 1.6095223426818848, + "rewards/rejected": -1.2919647693634033, + "step": 11904 + }, + { + "epoch": 0.69, + "learning_rate": 2.2774371480070946e-08, + "logits/chosen": -1.8312945365905762, + "logits/rejected": -1.8261339664459229, + "logps/chosen": -13.03829288482666, + "logps/rejected": -127.0571060180664, + "loss": 0.5944, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39638423919677734, + "rewards/margins": 1.0344736576080322, + "rewards/rejected": -1.4308578968048096, + "step": 11905 + }, + { + "epoch": 0.69, + "learning_rate": 2.2766467538278665e-08, + "logits/chosen": -1.8370070457458496, + "logits/rejected": -1.8122615814208984, + "logps/chosen": -177.76429748535156, + "logps/rejected": -287.53424072265625, + "loss": 0.1174, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1574738025665283, + "rewards/margins": 2.0359132289886475, + "rewards/rejected": 0.12156067043542862, + "step": 11906 + }, + { + "epoch": 0.69, + "learning_rate": 2.275856456395465e-08, + "logits/chosen": -1.8721405267715454, + "logits/rejected": -1.8737740516662598, + "logps/chosen": -192.1229248046875, + "logps/rejected": -424.31243896484375, + "loss": 0.0444, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9410278797149658, + "rewards/margins": 2.929708957672119, + "rewards/rejected": -0.9886810183525085, + "step": 11907 + }, + { + "epoch": 0.69, + "learning_rate": 2.2750662557379696e-08, + "logits/chosen": -1.9983340501785278, + "logits/rejected": -2.0114521980285645, + "logps/chosen": -85.59690856933594, + "logps/rejected": -338.52374267578125, + "loss": 0.1424, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.495690941810608, + "rewards/margins": 1.9998657703399658, + "rewards/rejected": -0.5041748285293579, + "step": 11908 + }, + { + "epoch": 0.69, + "learning_rate": 2.2742761518834465e-08, + "logits/chosen": -1.8574631214141846, + "logits/rejected": -1.8577557802200317, + "logps/chosen": -0.018009744584560394, + "logps/rejected": -172.5486297607422, + "loss": 0.3362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004761180025525391, + "rewards/margins": 4.075422763824463, + "rewards/rejected": -4.07589864730835, + "step": 11909 + }, + { + "epoch": 0.69, + "learning_rate": 2.27348614485997e-08, + "logits/chosen": -1.9176926612854004, + "logits/rejected": -1.9149423837661743, + "logps/chosen": -5.8650159189710394e-05, + "logps/rejected": -96.1007080078125, + "loss": 0.4331, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0966967920467141e-06, + "rewards/margins": 1.6530911922454834, + "rewards/rejected": -1.6530922651290894, + "step": 11910 + }, + { + "epoch": 0.69, + "learning_rate": 2.2726962346956014e-08, + "logits/chosen": -1.8166865110397339, + "logits/rejected": -1.8192697763442993, + "logps/chosen": -47.38234329223633, + "logps/rejected": -185.79959106445312, + "loss": 0.378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3367820680141449, + "rewards/margins": 1.385846734046936, + "rewards/rejected": -1.0490646362304688, + "step": 11911 + }, + { + "epoch": 0.69, + "learning_rate": 2.2719064214184047e-08, + "logits/chosen": -1.8296135663986206, + "logits/rejected": -1.8449530601501465, + "logps/chosen": -166.03675842285156, + "logps/rejected": -451.8080749511719, + "loss": 0.1066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5709137320518494, + "rewards/margins": 4.4935455322265625, + "rewards/rejected": -3.9226319789886475, + "step": 11912 + }, + { + "epoch": 0.69, + "learning_rate": 2.2711167050564334e-08, + "logits/chosen": -1.8162834644317627, + "logits/rejected": -1.807895302772522, + "logps/chosen": -144.86094665527344, + "logps/rejected": -322.9980773925781, + "loss": 0.1792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.482818603515625, + "rewards/margins": 1.387359619140625, + "rewards/rejected": -0.904541015625, + "step": 11913 + }, + { + "epoch": 0.69, + "learning_rate": 2.2703270856377482e-08, + "logits/chosen": -1.9894781112670898, + "logits/rejected": -1.9805264472961426, + "logps/chosen": -35.060237884521484, + "logps/rejected": -154.3828125, + "loss": 0.307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11106415092945099, + "rewards/margins": 3.031385898590088, + "rewards/rejected": -2.9203217029571533, + "step": 11914 + }, + { + "epoch": 0.69, + "learning_rate": 2.2695375631903956e-08, + "logits/chosen": -1.9418110847473145, + "logits/rejected": -1.9390949010849, + "logps/chosen": -48.01524353027344, + "logps/rejected": -104.27544403076172, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46454963088035583, + "rewards/margins": 1.8605526685714722, + "rewards/rejected": -1.396003007888794, + "step": 11915 + }, + { + "epoch": 0.69, + "learning_rate": 2.2687481377424244e-08, + "logits/chosen": -1.956061601638794, + "logits/rejected": -1.9305680990219116, + "logps/chosen": -126.64936828613281, + "logps/rejected": -230.8274688720703, + "loss": 0.0784, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4339218139648438, + "rewards/margins": 2.4736571311950684, + "rewards/rejected": -0.03973541408777237, + "step": 11916 + }, + { + "epoch": 0.69, + "learning_rate": 2.2679588093218798e-08, + "logits/chosen": -1.8945449590682983, + "logits/rejected": -1.8915514945983887, + "logps/chosen": -12.657087326049805, + "logps/rejected": -201.70945739746094, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13513584434986115, + "rewards/margins": 3.7654809951782227, + "rewards/rejected": -3.630345106124878, + "step": 11917 + }, + { + "epoch": 0.69, + "learning_rate": 2.267169577956804e-08, + "logits/chosen": -1.932502031326294, + "logits/rejected": -1.9789237976074219, + "logps/chosen": -157.7307891845703, + "logps/rejected": -267.445556640625, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6060227155685425, + "rewards/margins": 0.36395418643951416, + "rewards/rejected": 1.2420685291290283, + "step": 11918 + }, + { + "epoch": 0.69, + "learning_rate": 2.266380443675231e-08, + "logits/chosen": -1.9345844984054565, + "logits/rejected": -1.942165732383728, + "logps/chosen": -7.4729790687561035, + "logps/rejected": -55.44884490966797, + "loss": 0.5374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2756209969520569, + "rewards/margins": 0.5824092626571655, + "rewards/rejected": -0.30678826570510864, + "step": 11919 + }, + { + "epoch": 0.69, + "learning_rate": 2.2655914065051955e-08, + "logits/chosen": -1.765764832496643, + "logits/rejected": -1.8166859149932861, + "logps/chosen": -138.9866485595703, + "logps/rejected": -306.36041259765625, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4223343133926392, + "rewards/margins": 2.3755874633789062, + "rewards/rejected": -0.9532532095909119, + "step": 11920 + }, + { + "epoch": 0.69, + "learning_rate": 2.264802466474729e-08, + "logits/chosen": -1.797829031944275, + "logits/rejected": -1.9147577285766602, + "logps/chosen": -371.792724609375, + "logps/rejected": -284.13995361328125, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.783111572265625, + "rewards/margins": 5.875735282897949, + "rewards/rejected": -3.0926239490509033, + "step": 11921 + }, + { + "epoch": 0.69, + "learning_rate": 2.2640136236118583e-08, + "logits/chosen": -1.8950384855270386, + "logits/rejected": -1.878161907196045, + "logps/chosen": -174.3773193359375, + "logps/rejected": -283.56585693359375, + "loss": 0.0344, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8507416248321533, + "rewards/margins": 3.1824493408203125, + "rewards/rejected": -1.3317078351974487, + "step": 11922 + }, + { + "epoch": 0.69, + "learning_rate": 2.263224877944609e-08, + "logits/chosen": -1.9319231510162354, + "logits/rejected": -1.928457260131836, + "logps/chosen": -13.793041229248047, + "logps/rejected": -49.020530700683594, + "loss": 0.439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0066207884810864925, + "rewards/margins": 1.6349220275878906, + "rewards/rejected": -1.6283012628555298, + "step": 11923 + }, + { + "epoch": 0.69, + "learning_rate": 2.2624362295009963e-08, + "logits/chosen": -2.120710611343384, + "logits/rejected": -2.119755744934082, + "logps/chosen": -62.93838882446289, + "logps/rejected": -310.81365966796875, + "loss": 0.3101, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48058968782424927, + "rewards/margins": 1.3846607208251953, + "rewards/rejected": -0.904071033000946, + "step": 11924 + }, + { + "epoch": 0.69, + "learning_rate": 2.2616476783090405e-08, + "logits/chosen": -1.8989810943603516, + "logits/rejected": -1.897103190422058, + "logps/chosen": -100.89689636230469, + "logps/rejected": -305.425537109375, + "loss": 0.14, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.312464952468872, + "rewards/margins": 2.488185167312622, + "rewards/rejected": -1.17572021484375, + "step": 11925 + }, + { + "epoch": 0.69, + "learning_rate": 2.2608592243967535e-08, + "logits/chosen": -1.5805379152297974, + "logits/rejected": -1.578659176826477, + "logps/chosen": -29.621849060058594, + "logps/rejected": -157.59286499023438, + "loss": 0.4945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5650133490562439, + "rewards/margins": 2.788889169692993, + "rewards/rejected": -3.353902578353882, + "step": 11926 + }, + { + "epoch": 0.69, + "learning_rate": 2.260070867792147e-08, + "logits/chosen": -1.855546474456787, + "logits/rejected": -1.846703052520752, + "logps/chosen": -41.81809997558594, + "logps/rejected": -279.81689453125, + "loss": 0.3536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10167503356933594, + "rewards/margins": 2.299471616744995, + "rewards/rejected": -2.197796583175659, + "step": 11927 + }, + { + "epoch": 0.69, + "learning_rate": 2.2592826085232224e-08, + "logits/chosen": -1.9104387760162354, + "logits/rejected": -1.9185203313827515, + "logps/chosen": -21.996946334838867, + "logps/rejected": -183.46615600585938, + "loss": 0.4819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3915308117866516, + "rewards/margins": 0.10120031237602234, + "rewards/rejected": 0.2903304994106293, + "step": 11928 + }, + { + "epoch": 0.69, + "learning_rate": 2.2584944466179894e-08, + "logits/chosen": -2.1251673698425293, + "logits/rejected": -2.1231234073638916, + "logps/chosen": -0.0015690565342083573, + "logps/rejected": -134.0386962890625, + "loss": 0.3834, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0011087472084909678, + "rewards/margins": 2.526604652404785, + "rewards/rejected": -2.525496006011963, + "step": 11929 + }, + { + "epoch": 0.69, + "learning_rate": 2.2577063821044422e-08, + "logits/chosen": -1.833879828453064, + "logits/rejected": -1.8252918720245361, + "logps/chosen": -108.68830871582031, + "logps/rejected": -278.7104797363281, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27984926104545593, + "rewards/margins": 2.243560791015625, + "rewards/rejected": -1.9637116193771362, + "step": 11930 + }, + { + "epoch": 0.69, + "learning_rate": 2.2569184150105797e-08, + "logits/chosen": -1.8774993419647217, + "logits/rejected": -1.8454532623291016, + "logps/chosen": -218.33070373535156, + "logps/rejected": -381.0288391113281, + "loss": 0.0576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6102371215820312, + "rewards/margins": 4.081608772277832, + "rewards/rejected": -3.4713714122772217, + "step": 11931 + }, + { + "epoch": 0.69, + "learning_rate": 2.2561305453643898e-08, + "logits/chosen": -1.954360842704773, + "logits/rejected": -1.9475822448730469, + "logps/chosen": -0.34936222434043884, + "logps/rejected": -140.6154022216797, + "loss": 0.3671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027013912796974182, + "rewards/margins": 3.5485925674438477, + "rewards/rejected": -3.57560658454895, + "step": 11932 + }, + { + "epoch": 0.69, + "learning_rate": 2.255342773193868e-08, + "logits/chosen": -1.901071310043335, + "logits/rejected": -1.8942350149154663, + "logps/chosen": -161.26123046875, + "logps/rejected": -420.14892578125, + "loss": 0.0724, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1364609003067017, + "rewards/margins": 3.4734482765197754, + "rewards/rejected": -2.336987257003784, + "step": 11933 + }, + { + "epoch": 0.69, + "learning_rate": 2.2545550985269946e-08, + "logits/chosen": -1.9923460483551025, + "logits/rejected": -1.9888685941696167, + "logps/chosen": -0.05265234038233757, + "logps/rejected": -113.56206512451172, + "loss": 0.514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003661364084109664, + "rewards/margins": 0.7937881946563721, + "rewards/rejected": -0.79744952917099, + "step": 11934 + }, + { + "epoch": 0.69, + "learning_rate": 2.253767521391754e-08, + "logits/chosen": -1.703501582145691, + "logits/rejected": -1.699033498764038, + "logps/chosen": -54.856998443603516, + "logps/rejected": -257.116455078125, + "loss": 0.5257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0365447998046875, + "rewards/margins": 0.456826776266098, + "rewards/rejected": -0.4933715760707855, + "step": 11935 + }, + { + "epoch": 0.69, + "learning_rate": 2.2529800418161237e-08, + "logits/chosen": -1.8023406267166138, + "logits/rejected": -1.8360518217086792, + "logps/chosen": -159.48141479492188, + "logps/rejected": -311.9955139160156, + "loss": 0.0442, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4900786876678467, + "rewards/margins": 2.6627256870269775, + "rewards/rejected": -0.17264710366725922, + "step": 11936 + }, + { + "epoch": 0.69, + "learning_rate": 2.2521926598280815e-08, + "logits/chosen": -1.8091943264007568, + "logits/rejected": -1.8149629831314087, + "logps/chosen": -148.60418701171875, + "logps/rejected": -242.00714111328125, + "loss": 0.0948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.72503662109375, + "rewards/margins": 2.043055772781372, + "rewards/rejected": -0.3180191218852997, + "step": 11937 + }, + { + "epoch": 0.69, + "learning_rate": 2.251405375455595e-08, + "logits/chosen": -1.9296318292617798, + "logits/rejected": -1.9301667213439941, + "logps/chosen": -11.453782081604004, + "logps/rejected": -33.96338653564453, + "loss": 0.681, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2048574537038803, + "rewards/margins": -0.14279384911060333, + "rewards/rejected": 0.34765130281448364, + "step": 11938 + }, + { + "epoch": 0.69, + "learning_rate": 2.2506181887266345e-08, + "logits/chosen": -2.0002522468566895, + "logits/rejected": -1.997756838798523, + "logps/chosen": -32.572479248046875, + "logps/rejected": -149.271728515625, + "loss": 0.1919, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8755630850791931, + "rewards/margins": 2.7450668811798096, + "rewards/rejected": -1.8695038557052612, + "step": 11939 + }, + { + "epoch": 0.69, + "learning_rate": 2.249831099669165e-08, + "logits/chosen": -1.7941768169403076, + "logits/rejected": -1.781106948852539, + "logps/chosen": -270.60711669921875, + "logps/rejected": -354.9649658203125, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1897735595703125, + "rewards/margins": 3.4338011741638184, + "rewards/rejected": -0.24402771890163422, + "step": 11940 + }, + { + "epoch": 0.69, + "learning_rate": 2.249044108311147e-08, + "logits/chosen": -1.9544858932495117, + "logits/rejected": -1.9576687812805176, + "logps/chosen": -0.14187650382518768, + "logps/rejected": -263.0866394042969, + "loss": 0.3311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013842226937413216, + "rewards/margins": 4.570183753967285, + "rewards/rejected": -4.584025859832764, + "step": 11941 + }, + { + "epoch": 0.69, + "learning_rate": 2.2482572146805407e-08, + "logits/chosen": -1.8508034944534302, + "logits/rejected": -1.836875557899475, + "logps/chosen": -0.0005944914883002639, + "logps/rejected": -259.72845458984375, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00022819466539658606, + "rewards/margins": 4.825692176818848, + "rewards/rejected": -4.825463771820068, + "step": 11942 + }, + { + "epoch": 0.7, + "learning_rate": 2.247470418805296e-08, + "logits/chosen": -1.9228644371032715, + "logits/rejected": -1.9299017190933228, + "logps/chosen": -31.00986671447754, + "logps/rejected": -160.89036560058594, + "loss": 0.5858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10790081322193146, + "rewards/margins": 0.23713207244873047, + "rewards/rejected": -0.129231259226799, + "step": 11943 + }, + { + "epoch": 0.7, + "learning_rate": 2.2466837207133665e-08, + "logits/chosen": -1.8557188510894775, + "logits/rejected": -1.8337112665176392, + "logps/chosen": -254.21542358398438, + "logps/rejected": -451.2179870605469, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.451345920562744, + "rewards/margins": 3.8354249000549316, + "rewards/rejected": -0.3840789794921875, + "step": 11944 + }, + { + "epoch": 0.7, + "learning_rate": 2.2458971204326992e-08, + "logits/chosen": -2.0876688957214355, + "logits/rejected": -2.0662336349487305, + "logps/chosen": -7.337566375732422, + "logps/rejected": -293.53997802734375, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18353433907032013, + "rewards/margins": 3.918361186981201, + "rewards/rejected": -3.7348268032073975, + "step": 11945 + }, + { + "epoch": 0.7, + "learning_rate": 2.24511061799124e-08, + "logits/chosen": -1.874260425567627, + "logits/rejected": -1.8623820543289185, + "logps/chosen": -8.40652084350586, + "logps/rejected": -244.3949432373047, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11347360908985138, + "rewards/margins": 3.898947238922119, + "rewards/rejected": -3.785473585128784, + "step": 11946 + }, + { + "epoch": 0.7, + "learning_rate": 2.2443242134169234e-08, + "logits/chosen": -1.975469708442688, + "logits/rejected": -1.9669134616851807, + "logps/chosen": -0.0009658503113314509, + "logps/rejected": -116.85806274414062, + "loss": 0.3856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0004849830293096602, + "rewards/margins": 2.467379570007324, + "rewards/rejected": -2.4668946266174316, + "step": 11947 + }, + { + "epoch": 0.7, + "learning_rate": 2.2435379067376935e-08, + "logits/chosen": -1.891980528831482, + "logits/rejected": -1.8832077980041504, + "logps/chosen": -21.768266677856445, + "logps/rejected": -175.21665954589844, + "loss": 0.4313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2151266187429428, + "rewards/margins": 0.9287359714508057, + "rewards/rejected": -0.7136093378067017, + "step": 11948 + }, + { + "epoch": 0.7, + "learning_rate": 2.2427516979814794e-08, + "logits/chosen": -1.8541381359100342, + "logits/rejected": -1.852515459060669, + "logps/chosen": -0.005427752621471882, + "logps/rejected": -138.06192016601562, + "loss": 0.3814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0002404284750809893, + "rewards/margins": 2.2840194702148438, + "rewards/rejected": -2.2837791442871094, + "step": 11949 + }, + { + "epoch": 0.7, + "learning_rate": 2.2419655871762134e-08, + "logits/chosen": -2.0615808963775635, + "logits/rejected": -2.060209274291992, + "logps/chosen": -170.2561798095703, + "logps/rejected": -311.70269775390625, + "loss": 0.1481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5024200677871704, + "rewards/margins": 2.034506320953369, + "rewards/rejected": -1.5320862531661987, + "step": 11950 + }, + { + "epoch": 0.7, + "learning_rate": 2.2411795743498168e-08, + "logits/chosen": -1.90921151638031, + "logits/rejected": -1.8932676315307617, + "logps/chosen": -177.24554443359375, + "logps/rejected": -239.03369140625, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9214112758636475, + "rewards/margins": 0.15584111213684082, + "rewards/rejected": 2.7655701637268066, + "step": 11951 + }, + { + "epoch": 0.7, + "learning_rate": 2.240393659530221e-08, + "logits/chosen": -1.991942048072815, + "logits/rejected": -1.9838811159133911, + "logps/chosen": -6.506487846374512, + "logps/rejected": -101.26742553710938, + "loss": 0.4244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08308468014001846, + "rewards/margins": 1.314234733581543, + "rewards/rejected": -1.2311500310897827, + "step": 11952 + }, + { + "epoch": 0.7, + "learning_rate": 2.2396078427453386e-08, + "logits/chosen": -1.887995719909668, + "logits/rejected": -1.8748104572296143, + "logps/chosen": -0.0243745855987072, + "logps/rejected": -127.88362121582031, + "loss": 0.4958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023265810683369637, + "rewards/margins": 0.9688789248466492, + "rewards/rejected": -0.9456130862236023, + "step": 11953 + }, + { + "epoch": 0.7, + "learning_rate": 2.2388221240230888e-08, + "logits/chosen": -1.8872522115707397, + "logits/rejected": -1.9409847259521484, + "logps/chosen": -178.95347595214844, + "logps/rejected": -297.2951354980469, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.170135498046875, + "rewards/margins": 2.6135284900665283, + "rewards/rejected": -0.44339296221733093, + "step": 11954 + }, + { + "epoch": 0.7, + "learning_rate": 2.238036503391383e-08, + "logits/chosen": -1.865749478340149, + "logits/rejected": -1.8577533960342407, + "logps/chosen": -150.67233276367188, + "logps/rejected": -286.7325439453125, + "loss": 0.0885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.454809546470642, + "rewards/margins": 2.6094939708709717, + "rewards/rejected": -1.1546844244003296, + "step": 11955 + }, + { + "epoch": 0.7, + "learning_rate": 2.237250980878133e-08, + "logits/chosen": -1.9146462678909302, + "logits/rejected": -1.9166967868804932, + "logps/chosen": -0.3115963041782379, + "logps/rejected": -103.26555633544922, + "loss": 0.4102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02722066082060337, + "rewards/margins": 1.6862491369247437, + "rewards/rejected": -1.7134697437286377, + "step": 11956 + }, + { + "epoch": 0.7, + "learning_rate": 2.2364655565112402e-08, + "logits/chosen": -1.995322823524475, + "logits/rejected": -1.98506498336792, + "logps/chosen": -36.520362854003906, + "logps/rejected": -249.08248901367188, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11279334872961044, + "rewards/margins": 5.800667762756348, + "rewards/rejected": -5.913461208343506, + "step": 11957 + }, + { + "epoch": 0.7, + "learning_rate": 2.235680230318609e-08, + "logits/chosen": -1.9166195392608643, + "logits/rejected": -1.9108225107192993, + "logps/chosen": -0.00022803436149843037, + "logps/rejected": -86.57417297363281, + "loss": 0.4837, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.857671102508903e-06, + "rewards/margins": 1.1418373584747314, + "rewards/rejected": -1.1418472528457642, + "step": 11958 + }, + { + "epoch": 0.7, + "learning_rate": 2.234895002328137e-08, + "logits/chosen": -1.8951174020767212, + "logits/rejected": -1.9023473262786865, + "logps/chosen": -173.68695068359375, + "logps/rejected": -281.69500732421875, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2814483642578125, + "rewards/margins": 2.872589111328125, + "rewards/rejected": 0.4088592529296875, + "step": 11959 + }, + { + "epoch": 0.7, + "learning_rate": 2.2341098725677226e-08, + "logits/chosen": -1.8793331384658813, + "logits/rejected": -1.8778448104858398, + "logps/chosen": -206.1444091796875, + "logps/rejected": -427.5273742675781, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.333792209625244, + "rewards/margins": 3.441000461578369, + "rewards/rejected": -1.107208251953125, + "step": 11960 + }, + { + "epoch": 0.7, + "learning_rate": 2.2333248410652533e-08, + "logits/chosen": -1.9740917682647705, + "logits/rejected": -1.9705568552017212, + "logps/chosen": -5.780670642852783, + "logps/rejected": -168.47744750976562, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03233771398663521, + "rewards/margins": 1.6960583925247192, + "rewards/rejected": -1.663720726966858, + "step": 11961 + }, + { + "epoch": 0.7, + "learning_rate": 2.2325399078486183e-08, + "logits/chosen": -1.799597144126892, + "logits/rejected": -1.8022136688232422, + "logps/chosen": -0.00011908695887541398, + "logps/rejected": -23.657344818115234, + "loss": 0.7389, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.353664048219798e-06, + "rewards/margins": -0.17800718545913696, + "rewards/rejected": 0.17800083756446838, + "step": 11962 + }, + { + "epoch": 0.7, + "learning_rate": 2.2317550729457036e-08, + "logits/chosen": -2.117060661315918, + "logits/rejected": -2.1182644367218018, + "logps/chosen": -34.49040603637695, + "logps/rejected": -181.11065673828125, + "loss": 0.7134, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0877784714102745, + "rewards/margins": -0.1379905641078949, + "rewards/rejected": 0.05021209642291069, + "step": 11963 + }, + { + "epoch": 0.7, + "learning_rate": 2.230970336384389e-08, + "logits/chosen": -1.952713131904602, + "logits/rejected": -1.9510011672973633, + "logps/chosen": -34.05586624145508, + "logps/rejected": -151.11412048339844, + "loss": 0.5917, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1459503173828125, + "rewards/margins": 0.03239135444164276, + "rewards/rejected": 0.11355896294116974, + "step": 11964 + }, + { + "epoch": 0.7, + "learning_rate": 2.2301856981925555e-08, + "logits/chosen": -1.8811954259872437, + "logits/rejected": -1.9081436395645142, + "logps/chosen": -228.53346252441406, + "logps/rejected": -403.7376403808594, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.576991319656372, + "rewards/margins": 6.6129560470581055, + "rewards/rejected": -4.0359649658203125, + "step": 11965 + }, + { + "epoch": 0.7, + "learning_rate": 2.22940115839807e-08, + "logits/chosen": -1.980536699295044, + "logits/rejected": -1.9785724878311157, + "logps/chosen": -57.064788818359375, + "logps/rejected": -217.93441772460938, + "loss": 0.8182, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2451149225234985, + "rewards/margins": 1.152060627937317, + "rewards/rejected": -2.3971755504608154, + "step": 11966 + }, + { + "epoch": 0.7, + "learning_rate": 2.228616717028812e-08, + "logits/chosen": -1.9849551916122437, + "logits/rejected": -1.9801164865493774, + "logps/chosen": -0.0005698382155969739, + "logps/rejected": -73.84107971191406, + "loss": 0.4832, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6191160057132947e-06, + "rewards/margins": 1.1185976266860962, + "rewards/rejected": -1.11859929561615, + "step": 11967 + }, + { + "epoch": 0.7, + "learning_rate": 2.2278323741126426e-08, + "logits/chosen": -1.826214075088501, + "logits/rejected": -1.819593906402588, + "logps/chosen": -33.194091796875, + "logps/rejected": -310.4302673339844, + "loss": 0.2091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.566297173500061, + "rewards/margins": 5.8120245933532715, + "rewards/rejected": -5.2457275390625, + "step": 11968 + }, + { + "epoch": 0.7, + "learning_rate": 2.227048129677429e-08, + "logits/chosen": -2.121178150177002, + "logits/rejected": -2.098297119140625, + "logps/chosen": -127.69589233398438, + "logps/rejected": -328.36236572265625, + "loss": 0.2352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3716781735420227, + "rewards/margins": 3.3774337768554688, + "rewards/rejected": -3.005755662918091, + "step": 11969 + }, + { + "epoch": 0.7, + "learning_rate": 2.2262639837510265e-08, + "logits/chosen": -1.8305621147155762, + "logits/rejected": -1.8532055616378784, + "logps/chosen": -239.18238830566406, + "logps/rejected": -416.7990417480469, + "loss": 0.1231, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3665847778320312, + "rewards/margins": 1.4697799682617188, + "rewards/rejected": -0.1031951904296875, + "step": 11970 + }, + { + "epoch": 0.7, + "learning_rate": 2.2254799363612985e-08, + "logits/chosen": -1.9066495895385742, + "logits/rejected": -1.9151381254196167, + "logps/chosen": -75.15706634521484, + "logps/rejected": -228.89886474609375, + "loss": 0.136, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4465149641036987, + "rewards/margins": 2.2952606678009033, + "rewards/rejected": -0.8487457633018494, + "step": 11971 + }, + { + "epoch": 0.7, + "learning_rate": 2.2246959875360933e-08, + "logits/chosen": -1.7723921537399292, + "logits/rejected": -1.7401084899902344, + "logps/chosen": -244.25711059570312, + "logps/rejected": -395.01116943359375, + "loss": 0.0849, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.10418701171875, + "rewards/margins": 1.897058129310608, + "rewards/rejected": 1.207128882408142, + "step": 11972 + }, + { + "epoch": 0.7, + "learning_rate": 2.223912137303264e-08, + "logits/chosen": -1.9917863607406616, + "logits/rejected": -1.9877679347991943, + "logps/chosen": -167.19515991210938, + "logps/rejected": -197.1365966796875, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1303284168243408, + "rewards/margins": 0.40289002656936646, + "rewards/rejected": 0.7274383902549744, + "step": 11973 + }, + { + "epoch": 0.7, + "learning_rate": 2.223128385690651e-08, + "logits/chosen": -1.9371612071990967, + "logits/rejected": -1.9313756227493286, + "logps/chosen": -178.96835327148438, + "logps/rejected": -411.3571472167969, + "loss": 0.0413, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.367687940597534, + "rewards/margins": 3.031463623046875, + "rewards/rejected": 0.33622437715530396, + "step": 11974 + }, + { + "epoch": 0.7, + "learning_rate": 2.2223447327261043e-08, + "logits/chosen": -1.6949931383132935, + "logits/rejected": -1.7106596231460571, + "logps/chosen": -244.98329162597656, + "logps/rejected": -377.27801513671875, + "loss": 0.1432, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6362746953964233, + "rewards/margins": 1.8592239618301392, + "rewards/rejected": -0.22294922173023224, + "step": 11975 + }, + { + "epoch": 0.7, + "learning_rate": 2.2215611784374582e-08, + "logits/chosen": -1.8630633354187012, + "logits/rejected": -1.8570411205291748, + "logps/chosen": -1.2443472146987915, + "logps/rejected": -56.18404769897461, + "loss": 0.5876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013254642486572266, + "rewards/margins": 0.36550435423851013, + "rewards/rejected": -0.3787589967250824, + "step": 11976 + }, + { + "epoch": 0.7, + "learning_rate": 2.2207777228525497e-08, + "logits/chosen": -2.0056800842285156, + "logits/rejected": -2.0166969299316406, + "logps/chosen": -26.067901611328125, + "logps/rejected": -174.34811401367188, + "loss": 0.4099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2152305692434311, + "rewards/margins": 1.343996524810791, + "rewards/rejected": -1.1287659406661987, + "step": 11977 + }, + { + "epoch": 0.7, + "learning_rate": 2.219994365999211e-08, + "logits/chosen": -1.8748669624328613, + "logits/rejected": -1.8713693618774414, + "logps/chosen": -0.00016545748803764582, + "logps/rejected": -84.97433471679688, + "loss": 0.4305, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9591270504170097e-05, + "rewards/margins": 1.6705435514450073, + "rewards/rejected": -1.6705139875411987, + "step": 11978 + }, + { + "epoch": 0.7, + "learning_rate": 2.2192111079052728e-08, + "logits/chosen": -2.13622784614563, + "logits/rejected": -2.1305718421936035, + "logps/chosen": -0.01008701417595148, + "logps/rejected": -117.20332336425781, + "loss": 0.4411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012041805166518316, + "rewards/margins": 1.7041070461273193, + "rewards/rejected": -1.7042274475097656, + "step": 11979 + }, + { + "epoch": 0.7, + "learning_rate": 2.218427948598557e-08, + "logits/chosen": -1.8366844654083252, + "logits/rejected": -1.8265963792800903, + "logps/chosen": -2.3364851585938595e-05, + "logps/rejected": -187.15074157714844, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.4107056851316884e-07, + "rewards/margins": 3.3332815170288086, + "rewards/rejected": -3.3332810401916504, + "step": 11980 + }, + { + "epoch": 0.7, + "learning_rate": 2.217644888106886e-08, + "logits/chosen": -2.062690258026123, + "logits/rejected": -2.05898118019104, + "logps/chosen": -53.18608093261719, + "logps/rejected": -159.731201171875, + "loss": 1.4019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4217700958251953, + "rewards/margins": 0.23273897171020508, + "rewards/rejected": -2.6545090675354004, + "step": 11981 + }, + { + "epoch": 0.7, + "learning_rate": 2.216861926458079e-08, + "logits/chosen": -1.851614236831665, + "logits/rejected": -1.845397710800171, + "logps/chosen": -8.778209686279297, + "logps/rejected": -165.09742736816406, + "loss": 0.3144, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13824252784252167, + "rewards/margins": 3.5117318630218506, + "rewards/rejected": -3.3734893798828125, + "step": 11982 + }, + { + "epoch": 0.7, + "learning_rate": 2.2160790636799497e-08, + "logits/chosen": -2.1134443283081055, + "logits/rejected": -2.11301851272583, + "logps/chosen": -5.614689507638104e-05, + "logps/rejected": -201.25946044921875, + "loss": 0.3378, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.655944950281992e-07, + "rewards/margins": 3.0543553829193115, + "rewards/rejected": -3.054356336593628, + "step": 11983 + }, + { + "epoch": 0.7, + "learning_rate": 2.2152962998003118e-08, + "logits/chosen": -1.904063105583191, + "logits/rejected": -1.917997121810913, + "logps/chosen": -233.33741760253906, + "logps/rejected": -588.1656494140625, + "loss": 0.0655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7601730227470398, + "rewards/margins": 5.040849208831787, + "rewards/rejected": -4.280676364898682, + "step": 11984 + }, + { + "epoch": 0.7, + "learning_rate": 2.214513634846969e-08, + "logits/chosen": -1.8330141305923462, + "logits/rejected": -1.8270938396453857, + "logps/chosen": -67.25314331054688, + "logps/rejected": -185.35183715820312, + "loss": 0.2375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7384933829307556, + "rewards/margins": 2.0162339210510254, + "rewards/rejected": -1.277740478515625, + "step": 11985 + }, + { + "epoch": 0.7, + "learning_rate": 2.2137310688477268e-08, + "logits/chosen": -2.066958427429199, + "logits/rejected": -2.064858913421631, + "logps/chosen": -2.1815189029439352e-05, + "logps/rejected": -61.74662780761719, + "loss": 0.4002, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.152488592510053e-07, + "rewards/margins": 2.032482624053955, + "rewards/rejected": -2.0324819087982178, + "step": 11986 + }, + { + "epoch": 0.7, + "learning_rate": 2.2129486018303868e-08, + "logits/chosen": -2.187976598739624, + "logits/rejected": -2.1762261390686035, + "logps/chosen": -0.21161574125289917, + "logps/rejected": -176.972412109375, + "loss": 0.3883, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00564652681350708, + "rewards/margins": 2.4521522521972656, + "rewards/rejected": -2.4465057849884033, + "step": 11987 + }, + { + "epoch": 0.7, + "learning_rate": 2.212166233822747e-08, + "logits/chosen": -1.967639684677124, + "logits/rejected": -1.968165397644043, + "logps/chosen": -2.2185113430023193, + "logps/rejected": -59.039947509765625, + "loss": 0.5607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.000534272228833288, + "rewards/margins": 0.5291861295700073, + "rewards/rejected": -0.5286518335342407, + "step": 11988 + }, + { + "epoch": 0.7, + "learning_rate": 2.211383964852595e-08, + "logits/chosen": -2.0194289684295654, + "logits/rejected": -2.017697811126709, + "logps/chosen": -0.00010227940219920129, + "logps/rejected": -122.3697509765625, + "loss": 0.4637, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7178218715562252e-06, + "rewards/margins": 1.0907849073410034, + "rewards/rejected": -1.0907821655273438, + "step": 11989 + }, + { + "epoch": 0.7, + "learning_rate": 2.2106017949477296e-08, + "logits/chosen": -1.918055772781372, + "logits/rejected": -1.9135050773620605, + "logps/chosen": -48.66069412231445, + "logps/rejected": -186.63397216796875, + "loss": 0.6467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3528415858745575, + "rewards/margins": 0.40629079937934875, + "rewards/rejected": -0.7591323852539062, + "step": 11990 + }, + { + "epoch": 0.7, + "learning_rate": 2.2098197241359306e-08, + "logits/chosen": -1.8355352878570557, + "logits/rejected": -1.8226234912872314, + "logps/chosen": -25.33095359802246, + "logps/rejected": -297.08282470703125, + "loss": 0.2136, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6263414621353149, + "rewards/margins": 3.7496628761291504, + "rewards/rejected": -3.123321533203125, + "step": 11991 + }, + { + "epoch": 0.7, + "learning_rate": 2.2090377524449855e-08, + "logits/chosen": -2.06333589553833, + "logits/rejected": -2.0556206703186035, + "logps/chosen": -13.700340270996094, + "logps/rejected": -293.84857177734375, + "loss": 0.2668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21958903968334198, + "rewards/margins": 6.748501777648926, + "rewards/rejected": -6.528912544250488, + "step": 11992 + }, + { + "epoch": 0.7, + "learning_rate": 2.2082558799026678e-08, + "logits/chosen": -2.1207985877990723, + "logits/rejected": -2.112074851989746, + "logps/chosen": -43.60435104370117, + "logps/rejected": -316.533203125, + "loss": 0.1457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5812854766845703, + "rewards/margins": 6.9578070640563965, + "rewards/rejected": -6.376521587371826, + "step": 11993 + }, + { + "epoch": 0.7, + "learning_rate": 2.2074741065367608e-08, + "logits/chosen": -2.0049121379852295, + "logits/rejected": -1.9950246810913086, + "logps/chosen": -108.03019714355469, + "logps/rejected": -222.13616943359375, + "loss": 0.2285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8217506408691406, + "rewards/margins": 1.8509835004806519, + "rewards/rejected": -1.0292328596115112, + "step": 11994 + }, + { + "epoch": 0.7, + "learning_rate": 2.2066924323750313e-08, + "logits/chosen": -1.717041015625, + "logits/rejected": -1.7253785133361816, + "logps/chosen": -208.69891357421875, + "logps/rejected": -249.0373992919922, + "loss": 0.1622, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3816406726837158, + "rewards/margins": 2.1966049671173096, + "rewards/rejected": -0.8149642944335938, + "step": 11995 + }, + { + "epoch": 0.7, + "learning_rate": 2.20591085744525e-08, + "logits/chosen": -1.9431736469268799, + "logits/rejected": -1.9729530811309814, + "logps/chosen": -175.84442138671875, + "logps/rejected": -306.618896484375, + "loss": 0.0788, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.037736654281616, + "rewards/margins": 1.9501938819885254, + "rewards/rejected": 0.08754272758960724, + "step": 11996 + }, + { + "epoch": 0.7, + "learning_rate": 2.2051293817751826e-08, + "logits/chosen": -1.7138129472732544, + "logits/rejected": -1.721923589706421, + "logps/chosen": -158.03782653808594, + "logps/rejected": -403.966064453125, + "loss": 0.0339, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8607239723205566, + "rewards/margins": 2.86328125, + "rewards/rejected": 0.9974426627159119, + "step": 11997 + }, + { + "epoch": 0.7, + "learning_rate": 2.204348005392592e-08, + "logits/chosen": -1.983915090560913, + "logits/rejected": -1.9838478565216064, + "logps/chosen": -0.0002670084359124303, + "logps/rejected": -173.17184448242188, + "loss": 0.3549, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.839495431951946e-06, + "rewards/margins": 3.716404676437378, + "rewards/rejected": -3.71640944480896, + "step": 11998 + }, + { + "epoch": 0.7, + "learning_rate": 2.203566728325234e-08, + "logits/chosen": -1.6194531917572021, + "logits/rejected": -1.5062341690063477, + "logps/chosen": -241.0820770263672, + "logps/rejected": -647.27490234375, + "loss": 0.1166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9676742553710938, + "rewards/margins": 4.057268142700195, + "rewards/rejected": -3.0895936489105225, + "step": 11999 + }, + { + "epoch": 0.7, + "learning_rate": 2.2027855506008646e-08, + "logits/chosen": -1.7532178163528442, + "logits/rejected": -1.7450778484344482, + "logps/chosen": -113.55833435058594, + "logps/rejected": -263.2696838378906, + "loss": 0.2362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5160247683525085, + "rewards/margins": 1.9596009254455566, + "rewards/rejected": -1.4435760974884033, + "step": 12000 + }, + { + "epoch": 0.7, + "learning_rate": 2.2020044722472354e-08, + "logits/chosen": -1.937348484992981, + "logits/rejected": -1.878636121749878, + "logps/chosen": -175.99435424804688, + "logps/rejected": -301.9298095703125, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2915481328964233, + "rewards/margins": 0.6350356936454773, + "rewards/rejected": 0.656512439250946, + "step": 12001 + }, + { + "epoch": 0.7, + "learning_rate": 2.201223493292093e-08, + "logits/chosen": -1.6936954259872437, + "logits/rejected": -1.6946911811828613, + "logps/chosen": -37.6242561340332, + "logps/rejected": -233.26397705078125, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08776511996984482, + "rewards/margins": 3.9696836471557617, + "rewards/rejected": -3.881918430328369, + "step": 12002 + }, + { + "epoch": 0.7, + "learning_rate": 2.2004426137631856e-08, + "logits/chosen": -1.9399604797363281, + "logits/rejected": -1.9291956424713135, + "logps/chosen": -205.2740478515625, + "logps/rejected": -282.12347412109375, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8556747436523438, + "rewards/margins": 2.986943006515503, + "rewards/rejected": 0.868731677532196, + "step": 12003 + }, + { + "epoch": 0.7, + "learning_rate": 2.199661833688248e-08, + "logits/chosen": -2.0186595916748047, + "logits/rejected": -2.0239906311035156, + "logps/chosen": -63.88926696777344, + "logps/rejected": -226.84901428222656, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3770034909248352, + "rewards/margins": 2.6445159912109375, + "rewards/rejected": -2.267512559890747, + "step": 12004 + }, + { + "epoch": 0.7, + "learning_rate": 2.1988811530950197e-08, + "logits/chosen": -1.9291996955871582, + "logits/rejected": -1.8845596313476562, + "logps/chosen": -188.8089141845703, + "logps/rejected": -514.8179931640625, + "loss": 0.1099, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1814422607421875, + "rewards/margins": 2.514697313308716, + "rewards/rejected": -1.3332550525665283, + "step": 12005 + }, + { + "epoch": 0.7, + "learning_rate": 2.1981005720112344e-08, + "logits/chosen": -1.692315936088562, + "logits/rejected": -1.6940703392028809, + "logps/chosen": -56.844425201416016, + "logps/rejected": -340.4245910644531, + "loss": 0.1043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4854061603546143, + "rewards/margins": 2.918203353881836, + "rewards/rejected": -1.4327973127365112, + "step": 12006 + }, + { + "epoch": 0.7, + "learning_rate": 2.197320090464625e-08, + "logits/chosen": -1.7638169527053833, + "logits/rejected": -1.7570217847824097, + "logps/chosen": -0.0050642467103898525, + "logps/rejected": -207.7093048095703, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026384182274341583, + "rewards/margins": 4.541935920715332, + "rewards/rejected": -4.542199611663818, + "step": 12007 + }, + { + "epoch": 0.7, + "learning_rate": 2.196539708482911e-08, + "logits/chosen": -1.838031530380249, + "logits/rejected": -1.8520358800888062, + "logps/chosen": -58.91420364379883, + "logps/rejected": -175.38735961914062, + "loss": 0.4351, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10596352070569992, + "rewards/margins": 1.6118252277374268, + "rewards/rejected": -1.7177886962890625, + "step": 12008 + }, + { + "epoch": 0.7, + "learning_rate": 2.195759426093824e-08, + "logits/chosen": -2.0603866577148438, + "logits/rejected": -2.1185600757598877, + "logps/chosen": -208.79061889648438, + "logps/rejected": -383.9255065917969, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2065231800079346, + "rewards/margins": 6.3065290451049805, + "rewards/rejected": -3.100006103515625, + "step": 12009 + }, + { + "epoch": 0.7, + "learning_rate": 2.1949792433250776e-08, + "logits/chosen": -2.0509092807769775, + "logits/rejected": -2.047201633453369, + "logps/chosen": -30.72085952758789, + "logps/rejected": -195.45643615722656, + "loss": 0.1989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43000832200050354, + "rewards/margins": 5.304982662200928, + "rewards/rejected": -4.874974250793457, + "step": 12010 + }, + { + "epoch": 0.7, + "learning_rate": 2.1941991602043903e-08, + "logits/chosen": -1.7280488014221191, + "logits/rejected": -1.73152756690979, + "logps/chosen": -144.5110626220703, + "logps/rejected": -256.82232666015625, + "loss": 0.0472, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.309823751449585, + "rewards/margins": 2.591754198074341, + "rewards/rejected": -0.281930536031723, + "step": 12011 + }, + { + "epoch": 0.7, + "learning_rate": 2.1934191767594708e-08, + "logits/chosen": -1.793443202972412, + "logits/rejected": -1.7979923486709595, + "logps/chosen": -235.9964599609375, + "logps/rejected": -306.80975341796875, + "loss": 0.315, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.427331566810608, + "rewards/margins": 0.2752227783203125, + "rewards/rejected": 1.1521087884902954, + "step": 12012 + }, + { + "epoch": 0.7, + "learning_rate": 2.192639293018033e-08, + "logits/chosen": -1.9202375411987305, + "logits/rejected": -1.9186296463012695, + "logps/chosen": -0.0005611769738607109, + "logps/rejected": -98.08396911621094, + "loss": 0.4659, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.854747648001648e-06, + "rewards/margins": 1.2476062774658203, + "rewards/rejected": -1.247615098953247, + "step": 12013 + }, + { + "epoch": 0.7, + "learning_rate": 2.191859509007779e-08, + "logits/chosen": -1.7698711156845093, + "logits/rejected": -1.7722644805908203, + "logps/chosen": -0.18967223167419434, + "logps/rejected": -162.30665588378906, + "loss": 0.3854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004793429281562567, + "rewards/margins": 2.542029857635498, + "rewards/rejected": -2.546823263168335, + "step": 12014 + }, + { + "epoch": 0.7, + "learning_rate": 2.1910798247564128e-08, + "logits/chosen": -1.9186683893203735, + "logits/rejected": -1.916027545928955, + "logps/chosen": -32.842899322509766, + "logps/rejected": -242.79531860351562, + "loss": 0.3599, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01614990271627903, + "rewards/margins": 2.6448118686676025, + "rewards/rejected": -2.660961866378784, + "step": 12015 + }, + { + "epoch": 0.7, + "learning_rate": 2.1903002402916277e-08, + "logits/chosen": -1.819393515586853, + "logits/rejected": -1.8401877880096436, + "logps/chosen": -212.15872192382812, + "logps/rejected": -305.7292785644531, + "loss": 0.0933, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.51287841796875, + "rewards/margins": 2.1770477294921875, + "rewards/rejected": -0.6641693115234375, + "step": 12016 + }, + { + "epoch": 0.7, + "learning_rate": 2.1895207556411256e-08, + "logits/chosen": -1.9672659635543823, + "logits/rejected": -1.944058895111084, + "logps/chosen": -213.67649841308594, + "logps/rejected": -263.43914794921875, + "loss": 0.3396, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.15203857421875, + "rewards/margins": 0.673297107219696, + "rewards/rejected": 0.47874146699905396, + "step": 12017 + }, + { + "epoch": 0.7, + "learning_rate": 2.1887413708325914e-08, + "logits/chosen": -1.7416877746582031, + "logits/rejected": -1.731508493423462, + "logps/chosen": -3.4792914390563965, + "logps/rejected": -198.3838348388672, + "loss": 0.399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.160407155752182, + "rewards/margins": 2.0293376445770264, + "rewards/rejected": -2.189744710922241, + "step": 12018 + }, + { + "epoch": 0.7, + "learning_rate": 2.1879620858937154e-08, + "logits/chosen": -1.9535661935806274, + "logits/rejected": -1.9567333459854126, + "logps/chosen": -134.4029541015625, + "logps/rejected": -256.0059814453125, + "loss": 0.1907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.149938941001892, + "rewards/margins": 1.40777587890625, + "rewards/rejected": -0.2578369081020355, + "step": 12019 + }, + { + "epoch": 0.7, + "learning_rate": 2.1871829008521807e-08, + "logits/chosen": -1.8723349571228027, + "logits/rejected": -1.8931382894515991, + "logps/chosen": -235.11468505859375, + "logps/rejected": -399.58599853515625, + "loss": 0.0938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9960724115371704, + "rewards/margins": 2.004525661468506, + "rewards/rejected": -1.008453369140625, + "step": 12020 + }, + { + "epoch": 0.7, + "learning_rate": 2.186403815735669e-08, + "logits/chosen": -2.000620126724243, + "logits/rejected": -1.9767292737960815, + "logps/chosen": -66.87802124023438, + "logps/rejected": -304.36346435546875, + "loss": 0.2662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1591842621564865, + "rewards/margins": 5.361427307128906, + "rewards/rejected": -5.520611763000488, + "step": 12021 + }, + { + "epoch": 0.7, + "learning_rate": 2.1856248305718583e-08, + "logits/chosen": -1.655013918876648, + "logits/rejected": -1.6606299877166748, + "logps/chosen": -44.843238830566406, + "logps/rejected": -124.57942199707031, + "loss": 0.3831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42240068316459656, + "rewards/margins": 0.6675407886505127, + "rewards/rejected": -0.24514007568359375, + "step": 12022 + }, + { + "epoch": 0.7, + "learning_rate": 2.1848459453884182e-08, + "logits/chosen": -1.747101068496704, + "logits/rejected": -1.7406554222106934, + "logps/chosen": -207.69155883789062, + "logps/rejected": -260.821044921875, + "loss": 0.3631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.472930908203125, + "rewards/margins": 0.9915329217910767, + "rewards/rejected": -0.5186020135879517, + "step": 12023 + }, + { + "epoch": 0.7, + "learning_rate": 2.1840671602130207e-08, + "logits/chosen": -1.7899880409240723, + "logits/rejected": -1.7916302680969238, + "logps/chosen": -62.8109016418457, + "logps/rejected": -487.2286376953125, + "loss": 0.1559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8574405908584595, + "rewards/margins": 11.048114776611328, + "rewards/rejected": -10.190673828125, + "step": 12024 + }, + { + "epoch": 0.7, + "learning_rate": 2.1832884750733322e-08, + "logits/chosen": -1.8177592754364014, + "logits/rejected": -1.8206562995910645, + "logps/chosen": -10.616435050964355, + "logps/rejected": -82.76416015625, + "loss": 0.4006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22401581704616547, + "rewards/margins": 1.9015275239944458, + "rewards/rejected": -2.1255433559417725, + "step": 12025 + }, + { + "epoch": 0.7, + "learning_rate": 2.1825098899970164e-08, + "logits/chosen": -1.8248850107192993, + "logits/rejected": -1.8331780433654785, + "logps/chosen": -185.13966369628906, + "logps/rejected": -228.0919189453125, + "loss": 0.1749, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0943315029144287, + "rewards/margins": 1.2213456630706787, + "rewards/rejected": 0.87298583984375, + "step": 12026 + }, + { + "epoch": 0.7, + "learning_rate": 2.1817314050117292e-08, + "logits/chosen": -1.912353277206421, + "logits/rejected": -1.9133155345916748, + "logps/chosen": -335.7443542480469, + "logps/rejected": -644.3447265625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1134705543518066, + "rewards/margins": 6.403460502624512, + "rewards/rejected": -3.289990186691284, + "step": 12027 + }, + { + "epoch": 0.7, + "learning_rate": 2.1809530201451286e-08, + "logits/chosen": -1.7963331937789917, + "logits/rejected": -1.8171635866165161, + "logps/chosen": -260.7940673828125, + "logps/rejected": -480.73443603515625, + "loss": 0.0571, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.54547119140625, + "rewards/margins": 4.739496231079102, + "rewards/rejected": -3.1940248012542725, + "step": 12028 + }, + { + "epoch": 0.7, + "learning_rate": 2.1801747354248655e-08, + "logits/chosen": -2.034076690673828, + "logits/rejected": -2.0326640605926514, + "logps/chosen": -36.98532485961914, + "logps/rejected": -244.04693603515625, + "loss": 0.3136, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08237991482019424, + "rewards/margins": 5.739298343658447, + "rewards/rejected": -5.656918525695801, + "step": 12029 + }, + { + "epoch": 0.7, + "learning_rate": 2.1793965508785916e-08, + "logits/chosen": -1.8828630447387695, + "logits/rejected": -1.9521019458770752, + "logps/chosen": -164.49252319335938, + "logps/rejected": -335.1734619140625, + "loss": 0.03, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9400681257247925, + "rewards/margins": 3.497056722640991, + "rewards/rejected": -1.5569885969161987, + "step": 12030 + }, + { + "epoch": 0.7, + "learning_rate": 2.1786184665339453e-08, + "logits/chosen": -1.874974012374878, + "logits/rejected": -1.8696366548538208, + "logps/chosen": -31.38031768798828, + "logps/rejected": -206.24395751953125, + "loss": 0.2843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2856590449810028, + "rewards/margins": 2.8933801651000977, + "rewards/rejected": -2.6077210903167725, + "step": 12031 + }, + { + "epoch": 0.7, + "learning_rate": 2.177840482418576e-08, + "logits/chosen": -1.9292197227478027, + "logits/rejected": -1.9185118675231934, + "logps/chosen": -6.723307160427794e-05, + "logps/rejected": -120.73898315429688, + "loss": 0.5636, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.90715951475795e-07, + "rewards/margins": 0.5730409622192383, + "rewards/rejected": -0.573040783405304, + "step": 12032 + }, + { + "epoch": 0.7, + "learning_rate": 2.1770625985601148e-08, + "logits/chosen": -1.7693830728530884, + "logits/rejected": -1.745474100112915, + "logps/chosen": -141.00584411621094, + "logps/rejected": -364.04010009765625, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6948776245117188, + "rewards/margins": 3.0299696922302246, + "rewards/rejected": -1.3350921869277954, + "step": 12033 + }, + { + "epoch": 0.7, + "learning_rate": 2.176284814986201e-08, + "logits/chosen": -1.7803330421447754, + "logits/rejected": -1.8058007955551147, + "logps/chosen": -264.75384521484375, + "logps/rejected": -317.51690673828125, + "loss": 0.0664, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3721559047698975, + "rewards/margins": 2.0874695777893066, + "rewards/rejected": 0.28468629717826843, + "step": 12034 + }, + { + "epoch": 0.7, + "learning_rate": 2.1755071317244595e-08, + "logits/chosen": -1.9821261167526245, + "logits/rejected": -1.9886540174484253, + "logps/chosen": -99.58332824707031, + "logps/rejected": -209.5152587890625, + "loss": 0.2546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19267502427101135, + "rewards/margins": 5.416619300842285, + "rewards/rejected": -5.223944187164307, + "step": 12035 + }, + { + "epoch": 0.7, + "learning_rate": 2.1747295488025236e-08, + "logits/chosen": -1.6619548797607422, + "logits/rejected": -1.6723414659500122, + "logps/chosen": -186.95193481445312, + "logps/rejected": -350.0734558105469, + "loss": 0.0865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8567780256271362, + "rewards/margins": 2.218588352203369, + "rewards/rejected": -0.3618102967739105, + "step": 12036 + }, + { + "epoch": 0.7, + "learning_rate": 2.1739520662480126e-08, + "logits/chosen": -2.1161251068115234, + "logits/rejected": -2.1104214191436768, + "logps/chosen": -228.33544921875, + "logps/rejected": -454.59185791015625, + "loss": 0.0898, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2887604236602783, + "rewards/margins": 1.8705109357833862, + "rewards/rejected": 0.4182495176792145, + "step": 12037 + }, + { + "epoch": 0.7, + "learning_rate": 2.1731746840885477e-08, + "logits/chosen": -2.0004067420959473, + "logits/rejected": -1.9962568283081055, + "logps/chosen": -277.52490234375, + "logps/rejected": -388.15985107421875, + "loss": 0.1386, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2211365699768066, + "rewards/margins": 1.2490570545196533, + "rewards/rejected": 0.9720794558525085, + "step": 12038 + }, + { + "epoch": 0.7, + "learning_rate": 2.172397402351746e-08, + "logits/chosen": -1.7124241590499878, + "logits/rejected": -1.7133407592773438, + "logps/chosen": -172.3319091796875, + "logps/rejected": -282.7000732421875, + "loss": 0.2959, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2104339599609375, + "rewards/margins": 0.38900452852249146, + "rewards/rejected": 0.821429431438446, + "step": 12039 + }, + { + "epoch": 0.7, + "learning_rate": 2.1716202210652213e-08, + "logits/chosen": -1.9768273830413818, + "logits/rejected": -1.978936791419983, + "logps/chosen": -4.471245288848877, + "logps/rejected": -133.57974243164062, + "loss": 0.7749, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.19827213883399963, + "rewards/margins": -0.12175388634204865, + "rewards/rejected": -0.07651825249195099, + "step": 12040 + }, + { + "epoch": 0.7, + "learning_rate": 2.1708431402565795e-08, + "logits/chosen": -1.9376616477966309, + "logits/rejected": -1.9308842420578003, + "logps/chosen": -167.77272033691406, + "logps/rejected": -389.74383544921875, + "loss": 0.0749, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.546421766281128, + "rewards/margins": 2.046797037124634, + "rewards/rejected": 0.499624639749527, + "step": 12041 + }, + { + "epoch": 0.7, + "learning_rate": 2.1700661599534286e-08, + "logits/chosen": -1.8482195138931274, + "logits/rejected": -1.8560296297073364, + "logps/chosen": -14.758310317993164, + "logps/rejected": -257.88665771484375, + "loss": 0.3169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12431736290454865, + "rewards/margins": 6.720893383026123, + "rewards/rejected": -6.59657621383667, + "step": 12042 + }, + { + "epoch": 0.7, + "learning_rate": 2.1692892801833707e-08, + "logits/chosen": -2.027890920639038, + "logits/rejected": -2.026433229446411, + "logps/chosen": -0.14664196968078613, + "logps/rejected": -84.77324676513672, + "loss": 0.6716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00924144871532917, + "rewards/margins": 0.10213016718626022, + "rewards/rejected": -0.11137161403894424, + "step": 12043 + }, + { + "epoch": 0.7, + "learning_rate": 2.1685125009740034e-08, + "logits/chosen": -2.0183355808258057, + "logits/rejected": -2.0223186016082764, + "logps/chosen": -0.021640755236148834, + "logps/rejected": -59.772377014160156, + "loss": 0.5591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017670545494183898, + "rewards/margins": 0.6212340593338013, + "rewards/rejected": -0.6230010986328125, + "step": 12044 + }, + { + "epoch": 0.7, + "learning_rate": 2.1677358223529246e-08, + "logits/chosen": -2.0365657806396484, + "logits/rejected": -2.063138484954834, + "logps/chosen": -186.45626831054688, + "logps/rejected": -245.10231018066406, + "loss": 0.4579, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8048034906387329, + "rewards/margins": -0.17563778162002563, + "rewards/rejected": 0.9804412722587585, + "step": 12045 + }, + { + "epoch": 0.7, + "learning_rate": 2.1669592443477212e-08, + "logits/chosen": -2.088095188140869, + "logits/rejected": -2.073306083679199, + "logps/chosen": -2.8967551770620048e-05, + "logps/rejected": -210.48580932617188, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5761878791618074e-08, + "rewards/margins": 4.06317138671875, + "rewards/rejected": -4.06317138671875, + "step": 12046 + }, + { + "epoch": 0.7, + "learning_rate": 2.166182766985984e-08, + "logits/chosen": -1.8495419025421143, + "logits/rejected": -1.855955958366394, + "logps/chosen": -60.83979797363281, + "logps/rejected": -220.98773193359375, + "loss": 0.0534, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9270744323730469, + "rewards/margins": 5.287792205810547, + "rewards/rejected": -3.3607177734375, + "step": 12047 + }, + { + "epoch": 0.7, + "learning_rate": 2.1654063902952964e-08, + "logits/chosen": -1.7098594903945923, + "logits/rejected": -1.674980878829956, + "logps/chosen": -199.23556518554688, + "logps/rejected": -293.7575988769531, + "loss": 0.2183, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5319366455078125, + "rewards/margins": 1.2277618646621704, + "rewards/rejected": 0.3041748106479645, + "step": 12048 + }, + { + "epoch": 0.7, + "learning_rate": 2.164630114303241e-08, + "logits/chosen": -1.961594820022583, + "logits/rejected": -1.9551154375076294, + "logps/chosen": -44.54751968383789, + "logps/rejected": -137.67198181152344, + "loss": 0.5527, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3506103456020355, + "rewards/margins": -0.05096742510795593, + "rewards/rejected": 0.40157777070999146, + "step": 12049 + }, + { + "epoch": 0.7, + "learning_rate": 2.1638539390373896e-08, + "logits/chosen": -1.9576388597488403, + "logits/rejected": -1.9636436700820923, + "logps/chosen": -0.06907054036855698, + "logps/rejected": -289.25250244140625, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0034347944892942905, + "rewards/margins": 4.1229448318481445, + "rewards/rejected": -4.119510173797607, + "step": 12050 + }, + { + "epoch": 0.7, + "learning_rate": 2.1630778645253227e-08, + "logits/chosen": -1.9551752805709839, + "logits/rejected": -1.9251716136932373, + "logps/chosen": -281.3223571777344, + "logps/rejected": -400.2631530761719, + "loss": 0.0275, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.247091770172119, + "rewards/margins": 3.8950836658477783, + "rewards/rejected": -0.647991955280304, + "step": 12051 + }, + { + "epoch": 0.7, + "learning_rate": 2.1623018907946055e-08, + "logits/chosen": -1.8799841403961182, + "logits/rejected": -1.8862969875335693, + "logps/chosen": -118.34663391113281, + "logps/rejected": -479.2167663574219, + "loss": 0.1261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8101562857627869, + "rewards/margins": 6.908682346343994, + "rewards/rejected": -6.0985260009765625, + "step": 12052 + }, + { + "epoch": 0.7, + "learning_rate": 2.1615260178728073e-08, + "logits/chosen": -1.8634752035140991, + "logits/rejected": -1.9162472486495972, + "logps/chosen": -175.80572509765625, + "logps/rejected": -269.18157958984375, + "loss": 0.1113, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3487579822540283, + "rewards/margins": 1.8284332752227783, + "rewards/rejected": -0.47967529296875, + "step": 12053 + }, + { + "epoch": 0.7, + "learning_rate": 2.160750245787486e-08, + "logits/chosen": -1.8541725873947144, + "logits/rejected": -1.8423962593078613, + "logps/chosen": -174.10247802734375, + "logps/rejected": -246.73822021484375, + "loss": 0.211, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.677484154701233, + "rewards/margins": 1.2265808582305908, + "rewards/rejected": 0.4509033262729645, + "step": 12054 + }, + { + "epoch": 0.7, + "learning_rate": 2.1599745745662085e-08, + "logits/chosen": -1.754618525505066, + "logits/rejected": -1.7674869298934937, + "logps/chosen": -128.1553955078125, + "logps/rejected": -190.70330810546875, + "loss": 0.1572, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0216705799102783, + "rewards/margins": 1.5059783458709717, + "rewards/rejected": 0.5156921744346619, + "step": 12055 + }, + { + "epoch": 0.7, + "learning_rate": 2.1591990042365232e-08, + "logits/chosen": -1.8015458583831787, + "logits/rejected": -1.792069911956787, + "logps/chosen": -130.15611267089844, + "logps/rejected": -334.6287841796875, + "loss": 0.3343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010464477352797985, + "rewards/margins": 2.8340089321136475, + "rewards/rejected": -2.844473361968994, + "step": 12056 + }, + { + "epoch": 0.7, + "learning_rate": 2.158423534825986e-08, + "logits/chosen": -1.9198611974716187, + "logits/rejected": -1.9153294563293457, + "logps/chosen": -81.55035400390625, + "logps/rejected": -192.8748321533203, + "loss": 0.1905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4892822206020355, + "rewards/margins": 4.0993242263793945, + "rewards/rejected": -3.610041856765747, + "step": 12057 + }, + { + "epoch": 0.7, + "learning_rate": 2.1576481663621444e-08, + "logits/chosen": -1.876194953918457, + "logits/rejected": -1.875818133354187, + "logps/chosen": -135.51495361328125, + "logps/rejected": -353.91656494140625, + "loss": 0.0485, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2401764392852783, + "rewards/margins": 2.9718170166015625, + "rewards/rejected": -0.731640636920929, + "step": 12058 + }, + { + "epoch": 0.7, + "learning_rate": 2.1568728988725448e-08, + "logits/chosen": -1.937288522720337, + "logits/rejected": -1.9294579029083252, + "logps/chosen": -4.260532379150391, + "logps/rejected": -32.82652282714844, + "loss": 0.6054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02669978141784668, + "rewards/margins": 0.1811027079820633, + "rewards/rejected": -0.1544029265642166, + "step": 12059 + }, + { + "epoch": 0.7, + "learning_rate": 2.1560977323847258e-08, + "logits/chosen": -1.792628288269043, + "logits/rejected": -1.791953444480896, + "logps/chosen": -65.9750747680664, + "logps/rejected": -236.04061889648438, + "loss": 0.3349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09861373901367188, + "rewards/margins": 3.023879289627075, + "rewards/rejected": -3.122493028640747, + "step": 12060 + }, + { + "epoch": 0.7, + "learning_rate": 2.155322666926226e-08, + "logits/chosen": -2.131457567214966, + "logits/rejected": -2.122743606567383, + "logps/chosen": -0.7452384233474731, + "logps/rejected": -218.9901123046875, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07134466618299484, + "rewards/margins": 4.055643558502197, + "rewards/rejected": -3.9842987060546875, + "step": 12061 + }, + { + "epoch": 0.7, + "learning_rate": 2.1545477025245796e-08, + "logits/chosen": -2.159895181655884, + "logits/rejected": -2.146717071533203, + "logps/chosen": -3.213228225708008, + "logps/rejected": -83.65811920166016, + "loss": 0.5178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13302741944789886, + "rewards/margins": 0.6520406603813171, + "rewards/rejected": -0.5190132260322571, + "step": 12062 + }, + { + "epoch": 0.7, + "learning_rate": 2.1537728392073178e-08, + "logits/chosen": -1.9021764993667603, + "logits/rejected": -1.9157453775405884, + "logps/chosen": -141.96978759765625, + "logps/rejected": -351.83843994140625, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.638449192047119, + "rewards/margins": 5.815496921539307, + "rewards/rejected": -3.1770477294921875, + "step": 12063 + }, + { + "epoch": 0.7, + "learning_rate": 2.152998077001969e-08, + "logits/chosen": -1.9469424486160278, + "logits/rejected": -1.9445631504058838, + "logps/chosen": -1.303072452545166, + "logps/rejected": -57.77079391479492, + "loss": 0.4541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01549389399588108, + "rewards/margins": 1.1552642583847046, + "rewards/rejected": -1.1707581281661987, + "step": 12064 + }, + { + "epoch": 0.7, + "learning_rate": 2.1522234159360525e-08, + "logits/chosen": -1.9282827377319336, + "logits/rejected": -1.9154280424118042, + "logps/chosen": -0.5240449905395508, + "logps/rejected": -103.2119369506836, + "loss": 0.621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21721045672893524, + "rewards/margins": 0.08341985940933228, + "rewards/rejected": 0.13379059731960297, + "step": 12065 + }, + { + "epoch": 0.7, + "learning_rate": 2.151448856037091e-08, + "logits/chosen": -1.8876699209213257, + "logits/rejected": -1.890631079673767, + "logps/chosen": -1.3766859769821167, + "logps/rejected": -81.063720703125, + "loss": 0.459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13519513607025146, + "rewards/margins": 1.9298614263534546, + "rewards/rejected": -2.065056562423706, + "step": 12066 + }, + { + "epoch": 0.7, + "learning_rate": 2.1506743973325993e-08, + "logits/chosen": -1.8309487104415894, + "logits/rejected": -1.8239299058914185, + "logps/chosen": -21.435081481933594, + "logps/rejected": -182.2739715576172, + "loss": 0.3012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24195729196071625, + "rewards/margins": 2.048488140106201, + "rewards/rejected": -1.8065308332443237, + "step": 12067 + }, + { + "epoch": 0.7, + "learning_rate": 2.1499000398500927e-08, + "logits/chosen": -1.8065226078033447, + "logits/rejected": -1.8196767568588257, + "logps/chosen": -68.7962417602539, + "logps/rejected": -327.05517578125, + "loss": 0.2368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5210372805595398, + "rewards/margins": 5.289018154144287, + "rewards/rejected": -4.767981052398682, + "step": 12068 + }, + { + "epoch": 0.7, + "learning_rate": 2.1491257836170755e-08, + "logits/chosen": -1.9546293020248413, + "logits/rejected": -1.939113974571228, + "logps/chosen": -3.027888851647731e-05, + "logps/rejected": -269.7987976074219, + "loss": 0.3494, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.099329433098319e-06, + "rewards/margins": 3.6111772060394287, + "rewards/rejected": -3.6111741065979004, + "step": 12069 + }, + { + "epoch": 0.7, + "learning_rate": 2.1483516286610565e-08, + "logits/chosen": -1.7008904218673706, + "logits/rejected": -1.6975840330123901, + "logps/chosen": -236.45933532714844, + "logps/rejected": -361.30206298828125, + "loss": 0.0562, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8067153692245483, + "rewards/margins": 2.7708237171173096, + "rewards/rejected": -0.9641082882881165, + "step": 12070 + }, + { + "epoch": 0.7, + "learning_rate": 2.147577575009536e-08, + "logits/chosen": -1.6230307817459106, + "logits/rejected": -1.6297367811203003, + "logps/chosen": -68.96904754638672, + "logps/rejected": -434.1645812988281, + "loss": 0.1816, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1855919361114502, + "rewards/margins": 4.423348426818848, + "rewards/rejected": -3.2377564907073975, + "step": 12071 + }, + { + "epoch": 0.7, + "learning_rate": 2.146803622690015e-08, + "logits/chosen": -1.998563289642334, + "logits/rejected": -1.998097538948059, + "logps/chosen": -4.001765251159668, + "logps/rejected": -126.95128631591797, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005715131759643555, + "rewards/margins": 2.5917999744415283, + "rewards/rejected": -2.597515106201172, + "step": 12072 + }, + { + "epoch": 0.7, + "learning_rate": 2.146029771729982e-08, + "logits/chosen": -2.0038130283355713, + "logits/rejected": -2.0042989253997803, + "logps/chosen": -0.0010713854571804404, + "logps/rejected": -89.86534118652344, + "loss": 0.3834, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.95487654209137e-06, + "rewards/margins": 2.063041925430298, + "rewards/rejected": -2.0630478858947754, + "step": 12073 + }, + { + "epoch": 0.7, + "learning_rate": 2.1452560221569356e-08, + "logits/chosen": -1.9667682647705078, + "logits/rejected": -1.9618844985961914, + "logps/chosen": -27.589244842529297, + "logps/rejected": -226.55984497070312, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25144901871681213, + "rewards/margins": 2.708730697631836, + "rewards/rejected": -2.4572815895080566, + "step": 12074 + }, + { + "epoch": 0.7, + "learning_rate": 2.144482373998357e-08, + "logits/chosen": -1.8403598070144653, + "logits/rejected": -1.893515944480896, + "logps/chosen": -228.0823211669922, + "logps/rejected": -298.45965576171875, + "loss": 0.0687, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5328826904296875, + "rewards/margins": 2.035174608230591, + "rewards/rejected": 0.49770814180374146, + "step": 12075 + }, + { + "epoch": 0.7, + "learning_rate": 2.1437088272817345e-08, + "logits/chosen": -1.6136873960494995, + "logits/rejected": -1.6503545045852661, + "logps/chosen": -200.4312286376953, + "logps/rejected": -421.56011962890625, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.362400770187378, + "rewards/margins": 5.7216901779174805, + "rewards/rejected": -3.3592896461486816, + "step": 12076 + }, + { + "epoch": 0.7, + "learning_rate": 2.1429353820345423e-08, + "logits/chosen": -1.846107006072998, + "logits/rejected": -1.838726282119751, + "logps/chosen": -12.125781059265137, + "logps/rejected": -210.78439331054688, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39072486758232117, + "rewards/margins": 3.2380852699279785, + "rewards/rejected": -2.847360372543335, + "step": 12077 + }, + { + "epoch": 0.7, + "learning_rate": 2.1421620382842643e-08, + "logits/chosen": -2.0618529319763184, + "logits/rejected": -2.129239082336426, + "logps/chosen": -219.59344482421875, + "logps/rejected": -382.53387451171875, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.381890892982483, + "rewards/margins": 5.956427097320557, + "rewards/rejected": -4.574536323547363, + "step": 12078 + }, + { + "epoch": 0.7, + "learning_rate": 2.1413887960583682e-08, + "logits/chosen": -1.8126243352890015, + "logits/rejected": -1.8009189367294312, + "logps/chosen": -152.59555053710938, + "logps/rejected": -354.7601318359375, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4455933570861816, + "rewards/margins": 3.9963624477386475, + "rewards/rejected": -1.5507690906524658, + "step": 12079 + }, + { + "epoch": 0.7, + "learning_rate": 2.1406156553843253e-08, + "logits/chosen": -1.930138111114502, + "logits/rejected": -1.9246548414230347, + "logps/chosen": -1.2253131866455078, + "logps/rejected": -152.1903533935547, + "loss": 0.4027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10153839737176895, + "rewards/margins": 1.5956103801727295, + "rewards/rejected": -1.4940719604492188, + "step": 12080 + }, + { + "epoch": 0.7, + "learning_rate": 2.139842616289601e-08, + "logits/chosen": -2.164149045944214, + "logits/rejected": -2.163294792175293, + "logps/chosen": -21.02383041381836, + "logps/rejected": -164.853759765625, + "loss": 0.4522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07588654011487961, + "rewards/margins": 1.3834701776504517, + "rewards/rejected": -1.3075836896896362, + "step": 12081 + }, + { + "epoch": 0.7, + "learning_rate": 2.139069678801657e-08, + "logits/chosen": -1.9689130783081055, + "logits/rejected": -1.9635090827941895, + "logps/chosen": -57.78124237060547, + "logps/rejected": -200.3114471435547, + "loss": 0.24, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0076957941055298, + "rewards/margins": 1.2532386779785156, + "rewards/rejected": -0.24554291367530823, + "step": 12082 + }, + { + "epoch": 0.7, + "learning_rate": 2.138296842947955e-08, + "logits/chosen": -2.048459768295288, + "logits/rejected": -2.0329172611236572, + "logps/chosen": -5.084135055541992, + "logps/rejected": -197.69723510742188, + "loss": 0.2933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24945373833179474, + "rewards/margins": 2.977108955383301, + "rewards/rejected": -2.7276551723480225, + "step": 12083 + }, + { + "epoch": 0.7, + "learning_rate": 2.137524108755945e-08, + "logits/chosen": -1.7360888719558716, + "logits/rejected": -1.7394194602966309, + "logps/chosen": -16.40765953063965, + "logps/rejected": -152.495849609375, + "loss": 0.2736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3356046676635742, + "rewards/margins": 1.824416995048523, + "rewards/rejected": -1.4888123273849487, + "step": 12084 + }, + { + "epoch": 0.7, + "learning_rate": 2.1367514762530803e-08, + "logits/chosen": -2.0681540966033936, + "logits/rejected": -2.066316843032837, + "logps/chosen": -7.448827266693115, + "logps/rejected": -176.24920654296875, + "loss": 0.3503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08088717609643936, + "rewards/margins": 1.9892947673797607, + "rewards/rejected": -1.9084075689315796, + "step": 12085 + }, + { + "epoch": 0.7, + "learning_rate": 2.1359789454668092e-08, + "logits/chosen": -2.027250051498413, + "logits/rejected": -2.024231195449829, + "logps/chosen": -37.92607116699219, + "logps/rejected": -342.7198791503906, + "loss": 0.1744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46553918719291687, + "rewards/margins": 4.8693413734436035, + "rewards/rejected": -4.403802394866943, + "step": 12086 + }, + { + "epoch": 0.7, + "learning_rate": 2.135206516424577e-08, + "logits/chosen": -1.6949520111083984, + "logits/rejected": -1.7013856172561646, + "logps/chosen": -80.09119415283203, + "logps/rejected": -198.83456420898438, + "loss": 0.1522, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3586395978927612, + "rewards/margins": 1.9921586513519287, + "rewards/rejected": -0.6335189938545227, + "step": 12087 + }, + { + "epoch": 0.7, + "learning_rate": 2.1344341891538208e-08, + "logits/chosen": -1.905585765838623, + "logits/rejected": -1.8989049196243286, + "logps/chosen": -0.03273959085345268, + "logps/rejected": -71.35568237304688, + "loss": 0.5003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004668069537729025, + "rewards/margins": 0.8905414938926697, + "rewards/rejected": -0.8858734369277954, + "step": 12088 + }, + { + "epoch": 0.7, + "learning_rate": 2.133661963681979e-08, + "logits/chosen": -1.894652009010315, + "logits/rejected": -1.8810179233551025, + "logps/chosen": -242.31614685058594, + "logps/rejected": -394.9638366699219, + "loss": 0.1124, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.065577745437622, + "rewards/margins": 1.8783921003341675, + "rewards/rejected": 0.18718567490577698, + "step": 12089 + }, + { + "epoch": 0.7, + "learning_rate": 2.1328898400364852e-08, + "logits/chosen": -1.8660954236984253, + "logits/rejected": -1.8521674871444702, + "logps/chosen": -0.0037079816684126854, + "logps/rejected": -207.56387329101562, + "loss": 0.3404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001458478975109756, + "rewards/margins": 4.561071872711182, + "rewards/rejected": -4.559613227844238, + "step": 12090 + }, + { + "epoch": 0.7, + "learning_rate": 2.1321178182447707e-08, + "logits/chosen": -1.9626179933547974, + "logits/rejected": -1.9665285348892212, + "logps/chosen": -37.22587585449219, + "logps/rejected": -236.4700927734375, + "loss": 0.4748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08383064717054367, + "rewards/margins": 1.308311104774475, + "rewards/rejected": -1.3921416997909546, + "step": 12091 + }, + { + "epoch": 0.7, + "learning_rate": 2.131345898334256e-08, + "logits/chosen": -1.9211626052856445, + "logits/rejected": -1.927248239517212, + "logps/chosen": -208.13922119140625, + "logps/rejected": -328.9952392578125, + "loss": 0.0871, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.512678623199463, + "rewards/margins": 2.207780599594116, + "rewards/rejected": 0.30489808320999146, + "step": 12092 + }, + { + "epoch": 0.7, + "learning_rate": 2.1305740803323707e-08, + "logits/chosen": -2.021439790725708, + "logits/rejected": -2.017695903778076, + "logps/chosen": -8.278908729553223, + "logps/rejected": -156.1792449951172, + "loss": 0.3517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29070836305618286, + "rewards/margins": 1.941983938217163, + "rewards/rejected": -1.651275634765625, + "step": 12093 + }, + { + "epoch": 0.7, + "learning_rate": 2.1298023642665282e-08, + "logits/chosen": -1.7654812335968018, + "logits/rejected": -1.7628613710403442, + "logps/chosen": -185.07437133789062, + "logps/rejected": -336.2366027832031, + "loss": 0.3901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.704925537109375, + "rewards/margins": 0.057656824588775635, + "rewards/rejected": 0.6472687125205994, + "step": 12094 + }, + { + "epoch": 0.7, + "learning_rate": 2.1290307501641468e-08, + "logits/chosen": -1.9979625940322876, + "logits/rejected": -2.003572702407837, + "logps/chosen": -93.54904174804688, + "logps/rejected": -167.86630249023438, + "loss": 0.8974, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.8261284232139587, + "rewards/margins": -0.28861314058303833, + "rewards/rejected": -0.5375152826309204, + "step": 12095 + }, + { + "epoch": 0.7, + "learning_rate": 2.1282592380526337e-08, + "logits/chosen": -1.979157567024231, + "logits/rejected": -1.9675730466842651, + "logps/chosen": -0.04977644979953766, + "logps/rejected": -190.33746337890625, + "loss": 0.3507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06369484215974808, + "rewards/margins": 3.0912859439849854, + "rewards/rejected": -3.0275909900665283, + "step": 12096 + }, + { + "epoch": 0.7, + "learning_rate": 2.1274878279594026e-08, + "logits/chosen": -1.6650058031082153, + "logits/rejected": -1.666761875152588, + "logps/chosen": -38.17351531982422, + "logps/rejected": -164.58656311035156, + "loss": 0.3216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3814888000488281, + "rewards/margins": 1.4687340259552002, + "rewards/rejected": -1.087245225906372, + "step": 12097 + }, + { + "epoch": 0.7, + "learning_rate": 2.126716519911853e-08, + "logits/chosen": -1.821999430656433, + "logits/rejected": -1.8236874341964722, + "logps/chosen": -69.3994369506836, + "logps/rejected": -214.18170166015625, + "loss": 0.1142, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1997650861740112, + "rewards/margins": 4.425000190734863, + "rewards/rejected": -3.2252349853515625, + "step": 12098 + }, + { + "epoch": 0.7, + "learning_rate": 2.125945313937388e-08, + "logits/chosen": -1.97446870803833, + "logits/rejected": -1.9841417074203491, + "logps/chosen": -265.38360595703125, + "logps/rejected": -406.414794921875, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.702551245689392, + "rewards/margins": 3.485085964202881, + "rewards/rejected": -1.7825348377227783, + "step": 12099 + }, + { + "epoch": 0.7, + "learning_rate": 2.1251742100634033e-08, + "logits/chosen": -2.0518839359283447, + "logits/rejected": -2.0196309089660645, + "logps/chosen": -248.78378295898438, + "logps/rejected": -465.377197265625, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0137603282928467, + "rewards/margins": 3.9408416748046875, + "rewards/rejected": -0.927081286907196, + "step": 12100 + }, + { + "epoch": 0.7, + "learning_rate": 2.1244032083172953e-08, + "logits/chosen": -1.79989492893219, + "logits/rejected": -1.8272062540054321, + "logps/chosen": -171.6020050048828, + "logps/rejected": -151.10791015625, + "loss": 0.1603, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9704697132110596, + "rewards/margins": 1.4203277826309204, + "rewards/rejected": 0.5501419305801392, + "step": 12101 + }, + { + "epoch": 0.7, + "learning_rate": 2.123632308726449e-08, + "logits/chosen": -1.897284746170044, + "logits/rejected": -1.9016810655593872, + "logps/chosen": -38.83690643310547, + "logps/rejected": -175.1329803466797, + "loss": 0.4265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5223060846328735, + "rewards/margins": 0.8223488330841064, + "rewards/rejected": -0.3000427186489105, + "step": 12102 + }, + { + "epoch": 0.7, + "learning_rate": 2.1228615113182535e-08, + "logits/chosen": -1.9528177976608276, + "logits/rejected": -1.937354564666748, + "logps/chosen": -32.826942443847656, + "logps/rejected": -295.0057067871094, + "loss": 0.288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5654708743095398, + "rewards/margins": 1.3032150268554688, + "rewards/rejected": -0.737744152545929, + "step": 12103 + }, + { + "epoch": 0.7, + "learning_rate": 2.1220908161200907e-08, + "logits/chosen": -1.7745012044906616, + "logits/rejected": -1.7730765342712402, + "logps/chosen": -1.2456116676330566, + "logps/rejected": -39.82882308959961, + "loss": 0.5578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02117178402841091, + "rewards/margins": 0.4345720410346985, + "rewards/rejected": -0.413400262594223, + "step": 12104 + }, + { + "epoch": 0.7, + "learning_rate": 2.121320223159339e-08, + "logits/chosen": -1.782326340675354, + "logits/rejected": -1.8254953622817993, + "logps/chosen": -228.63418579101562, + "logps/rejected": -254.4530029296875, + "loss": 0.191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.961474597454071, + "rewards/margins": 1.1020889282226562, + "rewards/rejected": -0.140614315867424, + "step": 12105 + }, + { + "epoch": 0.7, + "learning_rate": 2.1205497324633765e-08, + "logits/chosen": -1.9753808975219727, + "logits/rejected": -1.9643831253051758, + "logps/chosen": -168.9598846435547, + "logps/rejected": -241.4252471923828, + "loss": 0.3988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8424026370048523, + "rewards/margins": 0.2721816897392273, + "rewards/rejected": 0.570220947265625, + "step": 12106 + }, + { + "epoch": 0.7, + "learning_rate": 2.11977934405957e-08, + "logits/chosen": -2.1119275093078613, + "logits/rejected": -2.1085989475250244, + "logps/chosen": -10.824398040771484, + "logps/rejected": -114.61670684814453, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5033547282218933, + "rewards/margins": 1.3798600435256958, + "rewards/rejected": -0.8765053153038025, + "step": 12107 + }, + { + "epoch": 0.7, + "learning_rate": 2.1190090579752907e-08, + "logits/chosen": -1.9474318027496338, + "logits/rejected": -1.9491626024246216, + "logps/chosen": -24.479782104492188, + "logps/rejected": -124.84004974365234, + "loss": 0.4803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3979850709438324, + "rewards/margins": 0.5504745244979858, + "rewards/rejected": -0.15248946845531464, + "step": 12108 + }, + { + "epoch": 0.7, + "learning_rate": 2.1182388742379014e-08, + "logits/chosen": -1.9932491779327393, + "logits/rejected": -1.9687241315841675, + "logps/chosen": -7.343199831666425e-05, + "logps/rejected": -199.63885498046875, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0394181521842256e-05, + "rewards/margins": 3.5489041805267334, + "rewards/rejected": -3.5488739013671875, + "step": 12109 + }, + { + "epoch": 0.7, + "learning_rate": 2.1174687928747654e-08, + "logits/chosen": -1.973806619644165, + "logits/rejected": -1.9824292659759521, + "logps/chosen": -191.91976928710938, + "logps/rejected": -276.3780517578125, + "loss": 0.1408, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9100738763809204, + "rewards/margins": 1.3339812755584717, + "rewards/rejected": 0.576092541217804, + "step": 12110 + }, + { + "epoch": 0.7, + "learning_rate": 2.1166988139132348e-08, + "logits/chosen": -1.7102998495101929, + "logits/rejected": -1.7037068605422974, + "logps/chosen": -15.506884574890137, + "logps/rejected": -143.79156494140625, + "loss": 0.5329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15149736404418945, + "rewards/margins": 0.49482470750808716, + "rewards/rejected": -0.3433273434638977, + "step": 12111 + }, + { + "epoch": 0.7, + "learning_rate": 2.1159289373806688e-08, + "logits/chosen": -1.9452871084213257, + "logits/rejected": -1.9450209140777588, + "logps/chosen": -2.015425682067871, + "logps/rejected": -218.171142578125, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0041446806862950325, + "rewards/margins": 2.2144253253936768, + "rewards/rejected": -2.2185699939727783, + "step": 12112 + }, + { + "epoch": 0.7, + "learning_rate": 2.115159163304413e-08, + "logits/chosen": -1.905185341835022, + "logits/rejected": -1.8954969644546509, + "logps/chosen": -148.43280029296875, + "logps/rejected": -400.48504638671875, + "loss": 0.0535, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2627700567245483, + "rewards/margins": 3.3608412742614746, + "rewards/rejected": -2.098071336746216, + "step": 12113 + }, + { + "epoch": 0.7, + "learning_rate": 2.114389491711817e-08, + "logits/chosen": -1.8243438005447388, + "logits/rejected": -1.8106815814971924, + "logps/chosen": -9.789422035217285, + "logps/rejected": -305.2445983886719, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05898456647992134, + "rewards/margins": 7.544375419616699, + "rewards/rejected": -7.603360176086426, + "step": 12114 + }, + { + "epoch": 0.71, + "learning_rate": 2.1136199226302175e-08, + "logits/chosen": -1.8275882005691528, + "logits/rejected": -1.8120695352554321, + "logps/chosen": -267.7243347167969, + "logps/rejected": -358.31103515625, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7789947986602783, + "rewards/margins": 2.6542389392852783, + "rewards/rejected": 0.124755859375, + "step": 12115 + }, + { + "epoch": 0.71, + "learning_rate": 2.1128504560869613e-08, + "logits/chosen": -1.8540561199188232, + "logits/rejected": -1.84208345413208, + "logps/chosen": -223.72064208984375, + "logps/rejected": -466.79364013671875, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6367523670196533, + "rewards/margins": 2.937033176422119, + "rewards/rejected": -0.30028077960014343, + "step": 12116 + }, + { + "epoch": 0.71, + "learning_rate": 2.1120810921093773e-08, + "logits/chosen": -1.7418173551559448, + "logits/rejected": -1.7409337759017944, + "logps/chosen": -8.629096984863281, + "logps/rejected": -161.76539611816406, + "loss": 0.3614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11532659828662872, + "rewards/margins": 1.8895301818847656, + "rewards/rejected": -1.7742035388946533, + "step": 12117 + }, + { + "epoch": 0.71, + "learning_rate": 2.1113118307248006e-08, + "logits/chosen": -1.9211453199386597, + "logits/rejected": -1.9027541875839233, + "logps/chosen": -142.1021728515625, + "logps/rejected": -306.2352294921875, + "loss": 0.1262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7320221662521362, + "rewards/margins": 2.325244188308716, + "rewards/rejected": -0.5932220816612244, + "step": 12118 + }, + { + "epoch": 0.71, + "learning_rate": 2.1105426719605545e-08, + "logits/chosen": -1.9073690176010132, + "logits/rejected": -1.9082578420639038, + "logps/chosen": -233.790283203125, + "logps/rejected": -332.0091552734375, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7888214588165283, + "rewards/margins": 4.2118072509765625, + "rewards/rejected": -1.4229859113693237, + "step": 12119 + }, + { + "epoch": 0.71, + "learning_rate": 2.1097736158439706e-08, + "logits/chosen": -1.968261957168579, + "logits/rejected": -1.9644736051559448, + "logps/chosen": -12.039643287658691, + "logps/rejected": -62.08487319946289, + "loss": 0.8064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5918194055557251, + "rewards/margins": 0.31390380859375, + "rewards/rejected": -0.9057232141494751, + "step": 12120 + }, + { + "epoch": 0.71, + "learning_rate": 2.1090046624023632e-08, + "logits/chosen": -1.784692406654358, + "logits/rejected": -1.7840383052825928, + "logps/chosen": -0.009387684985995293, + "logps/rejected": -37.30565643310547, + "loss": 0.6578, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7170651669148356e-05, + "rewards/margins": 0.11942228674888611, + "rewards/rejected": -0.11946945637464523, + "step": 12121 + }, + { + "epoch": 0.71, + "learning_rate": 2.1082358116630516e-08, + "logits/chosen": -1.5846734046936035, + "logits/rejected": -1.6415573358535767, + "logps/chosen": -351.73260498046875, + "logps/rejected": -431.4266662597656, + "loss": 0.0442, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5979859828948975, + "rewards/margins": 4.043026924133301, + "rewards/rejected": -1.4450409412384033, + "step": 12122 + }, + { + "epoch": 0.71, + "learning_rate": 2.1074670636533493e-08, + "logits/chosen": -2.0188958644866943, + "logits/rejected": -2.0142014026641846, + "logps/chosen": -47.0504150390625, + "logps/rejected": -173.57723999023438, + "loss": 0.2046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6991233825683594, + "rewards/margins": 2.817925214767456, + "rewards/rejected": -2.1188018321990967, + "step": 12123 + }, + { + "epoch": 0.71, + "learning_rate": 2.106698418400566e-08, + "logits/chosen": -1.7970938682556152, + "logits/rejected": -1.8042864799499512, + "logps/chosen": -261.45257568359375, + "logps/rejected": -433.4630432128906, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5531678199768066, + "rewards/margins": 2.73319411277771, + "rewards/rejected": 0.8199737668037415, + "step": 12124 + }, + { + "epoch": 0.71, + "learning_rate": 2.1059298759320087e-08, + "logits/chosen": -1.5849874019622803, + "logits/rejected": -1.578845500946045, + "logps/chosen": -204.78575134277344, + "logps/rejected": -489.204833984375, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.704463243484497, + "rewards/margins": 8.313277244567871, + "rewards/rejected": -5.608813762664795, + "step": 12125 + }, + { + "epoch": 0.71, + "learning_rate": 2.1051614362749776e-08, + "logits/chosen": -1.7452322244644165, + "logits/rejected": -1.714591383934021, + "logps/chosen": -13.242947578430176, + "logps/rejected": -255.1289520263672, + "loss": 0.2621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13587485253810883, + "rewards/margins": 5.322360038757324, + "rewards/rejected": -5.186485290527344, + "step": 12126 + }, + { + "epoch": 0.71, + "learning_rate": 2.104393099456772e-08, + "logits/chosen": -2.076585054397583, + "logits/rejected": -2.0687551498413086, + "logps/chosen": -57.51676940917969, + "logps/rejected": -232.22332763671875, + "loss": 0.2582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4206138551235199, + "rewards/margins": 2.2416739463806152, + "rewards/rejected": -1.8210601806640625, + "step": 12127 + }, + { + "epoch": 0.71, + "learning_rate": 2.1036248655046872e-08, + "logits/chosen": -1.9244489669799805, + "logits/rejected": -1.920859694480896, + "logps/chosen": -4.895965099334717, + "logps/rejected": -44.010658264160156, + "loss": 0.5031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4629833698272705, + "rewards/margins": 0.5297764539718628, + "rewards/rejected": -0.0667930617928505, + "step": 12128 + }, + { + "epoch": 0.71, + "learning_rate": 2.1028567344460176e-08, + "logits/chosen": -1.8236643075942993, + "logits/rejected": -1.8540784120559692, + "logps/chosen": -206.1710662841797, + "logps/rejected": -556.6851806640625, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4483933448791504, + "rewards/margins": 9.081132888793945, + "rewards/rejected": -6.632739543914795, + "step": 12129 + }, + { + "epoch": 0.71, + "learning_rate": 2.1020887063080456e-08, + "logits/chosen": -1.9091533422470093, + "logits/rejected": -1.908677577972412, + "logps/chosen": -4.873987197875977, + "logps/rejected": -201.79049682617188, + "loss": 0.4418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0705324187874794, + "rewards/margins": 1.4923887252807617, + "rewards/rejected": -1.562921166419983, + "step": 12130 + }, + { + "epoch": 0.71, + "learning_rate": 2.1013207811180584e-08, + "logits/chosen": -1.9891743659973145, + "logits/rejected": -1.9845430850982666, + "logps/chosen": -66.43805694580078, + "logps/rejected": -209.28543090820312, + "loss": 0.3213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4074241816997528, + "rewards/margins": 1.2875527143478394, + "rewards/rejected": -0.8801285028457642, + "step": 12131 + }, + { + "epoch": 0.71, + "learning_rate": 2.1005529589033354e-08, + "logits/chosen": -1.6809749603271484, + "logits/rejected": -1.7562676668167114, + "logps/chosen": -242.56040954589844, + "logps/rejected": -300.28802490234375, + "loss": 0.0421, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.466291904449463, + "rewards/margins": 2.8254501819610596, + "rewards/rejected": -0.35915833711624146, + "step": 12132 + }, + { + "epoch": 0.71, + "learning_rate": 2.0997852396911565e-08, + "logits/chosen": -2.061748743057251, + "logits/rejected": -2.057046413421631, + "logps/chosen": -0.0004012006684206426, + "logps/rejected": -113.26913452148438, + "loss": 0.431, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.601956864760723e-05, + "rewards/margins": 1.533918023109436, + "rewards/rejected": -1.5339020490646362, + "step": 12133 + }, + { + "epoch": 0.71, + "learning_rate": 2.0990176235087888e-08, + "logits/chosen": -1.7522882223129272, + "logits/rejected": -1.7587841749191284, + "logps/chosen": -5.2582597732543945, + "logps/rejected": -156.87220764160156, + "loss": 0.3332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024729205295443535, + "rewards/margins": 4.630248546600342, + "rewards/rejected": -4.6055192947387695, + "step": 12134 + }, + { + "epoch": 0.71, + "learning_rate": 2.098250110383509e-08, + "logits/chosen": -1.9073402881622314, + "logits/rejected": -1.8973901271820068, + "logps/chosen": -175.48092651367188, + "logps/rejected": -271.9194641113281, + "loss": 0.3657, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5591720342636108, + "rewards/margins": 0.04532623291015625, + "rewards/rejected": 1.5138458013534546, + "step": 12135 + }, + { + "epoch": 0.71, + "learning_rate": 2.0974827003425767e-08, + "logits/chosen": -2.031944751739502, + "logits/rejected": -2.003535032272339, + "logps/chosen": -134.97195434570312, + "logps/rejected": -233.68511962890625, + "loss": 0.1995, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7950302362442017, + "rewards/margins": 1.2173430919647217, + "rewards/rejected": 0.5776870846748352, + "step": 12136 + }, + { + "epoch": 0.71, + "learning_rate": 2.0967153934132585e-08, + "logits/chosen": -1.6877038478851318, + "logits/rejected": -1.7113951444625854, + "logps/chosen": -213.56814575195312, + "logps/rejected": -315.91412353515625, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7154510021209717, + "rewards/margins": 2.7743377685546875, + "rewards/rejected": -0.05888671800494194, + "step": 12137 + }, + { + "epoch": 0.71, + "learning_rate": 2.0959481896228082e-08, + "logits/chosen": -2.076840400695801, + "logits/rejected": -2.075233221054077, + "logps/chosen": -24.053890228271484, + "logps/rejected": -43.76439666748047, + "loss": 0.509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2516048550605774, + "rewards/margins": 0.5861911773681641, + "rewards/rejected": -0.33458635210990906, + "step": 12138 + }, + { + "epoch": 0.71, + "learning_rate": 2.0951810889984867e-08, + "logits/chosen": -1.9019320011138916, + "logits/rejected": -1.9272462129592896, + "logps/chosen": -281.06353759765625, + "logps/rejected": -456.0954895019531, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.944482445716858, + "rewards/margins": 5.171469211578369, + "rewards/rejected": -3.2269866466522217, + "step": 12139 + }, + { + "epoch": 0.71, + "learning_rate": 2.0944140915675403e-08, + "logits/chosen": -1.9831780195236206, + "logits/rejected": -1.9747021198272705, + "logps/chosen": -0.0010168890003114939, + "logps/rejected": -178.5123291015625, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.653570820461027e-05, + "rewards/margins": 2.408390998840332, + "rewards/rejected": -2.4084274768829346, + "step": 12140 + }, + { + "epoch": 0.71, + "learning_rate": 2.093647197357218e-08, + "logits/chosen": -1.8084986209869385, + "logits/rejected": -1.7942169904708862, + "logps/chosen": -170.859130859375, + "logps/rejected": -250.77667236328125, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9799317121505737, + "rewards/margins": 0.4176666736602783, + "rewards/rejected": 1.5622650384902954, + "step": 12141 + }, + { + "epoch": 0.71, + "learning_rate": 2.0928804063947635e-08, + "logits/chosen": -2.0740416049957275, + "logits/rejected": -2.0811257362365723, + "logps/chosen": -64.07964324951172, + "logps/rejected": -335.05096435546875, + "loss": 0.1709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6157218813896179, + "rewards/margins": 6.793102264404297, + "rewards/rejected": -6.177380561828613, + "step": 12142 + }, + { + "epoch": 0.71, + "learning_rate": 2.092113718707417e-08, + "logits/chosen": -1.876088261604309, + "logits/rejected": -1.87447988986969, + "logps/chosen": -7.057088805595413e-05, + "logps/rejected": -128.65435791015625, + "loss": 0.4967, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.245172474133142e-07, + "rewards/margins": 1.0463114976882935, + "rewards/rejected": -1.0463119745254517, + "step": 12143 + }, + { + "epoch": 0.71, + "learning_rate": 2.091347134322418e-08, + "logits/chosen": -1.8321951627731323, + "logits/rejected": -1.8230714797973633, + "logps/chosen": -253.2158966064453, + "logps/rejected": -396.4777526855469, + "loss": 0.2007, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3223953247070312, + "rewards/margins": 0.7412490844726562, + "rewards/rejected": 2.581146240234375, + "step": 12144 + }, + { + "epoch": 0.71, + "learning_rate": 2.090580653266994e-08, + "logits/chosen": -1.9051384925842285, + "logits/rejected": -1.9050681591033936, + "logps/chosen": -53.005592346191406, + "logps/rejected": -338.2052307128906, + "loss": 0.116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1881698369979858, + "rewards/margins": 3.623426914215088, + "rewards/rejected": -2.4352569580078125, + "step": 12145 + }, + { + "epoch": 0.71, + "learning_rate": 2.0898142755683768e-08, + "logits/chosen": -1.922261357307434, + "logits/rejected": -1.9203428030014038, + "logps/chosen": -0.7085760235786438, + "logps/rejected": -109.16085052490234, + "loss": 0.8776, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.05768435075879097, + "rewards/margins": -0.6310646533966064, + "rewards/rejected": 0.5733802914619446, + "step": 12146 + }, + { + "epoch": 0.71, + "learning_rate": 2.089048001253792e-08, + "logits/chosen": -1.8287791013717651, + "logits/rejected": -1.914775013923645, + "logps/chosen": -261.7098693847656, + "logps/rejected": -545.7572631835938, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39386293292045593, + "rewards/margins": 5.812997341156006, + "rewards/rejected": -5.419134616851807, + "step": 12147 + }, + { + "epoch": 0.71, + "learning_rate": 2.0882818303504628e-08, + "logits/chosen": -1.8557331562042236, + "logits/rejected": -1.8695836067199707, + "logps/chosen": -11.295167922973633, + "logps/rejected": -157.61273193359375, + "loss": 0.4578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015623855404555798, + "rewards/margins": 1.2157093286514282, + "rewards/rejected": -1.2000855207443237, + "step": 12148 + }, + { + "epoch": 0.71, + "learning_rate": 2.0875157628856044e-08, + "logits/chosen": -2.165311813354492, + "logits/rejected": -2.170215129852295, + "logps/chosen": -22.179462432861328, + "logps/rejected": -106.152099609375, + "loss": 0.4138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7565967440605164, + "rewards/margins": 0.4543033540248871, + "rewards/rejected": 0.3022933900356293, + "step": 12149 + }, + { + "epoch": 0.71, + "learning_rate": 2.086749798886432e-08, + "logits/chosen": -1.9331982135772705, + "logits/rejected": -1.925320029258728, + "logps/chosen": -40.772865295410156, + "logps/rejected": -227.9823455810547, + "loss": 0.3226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0289306640625, + "rewards/margins": 2.3028855323791504, + "rewards/rejected": -2.2739548683166504, + "step": 12150 + }, + { + "epoch": 0.71, + "learning_rate": 2.0859839383801575e-08, + "logits/chosen": -1.656531810760498, + "logits/rejected": -1.6434860229492188, + "logps/chosen": -224.0111846923828, + "logps/rejected": -365.4431457519531, + "loss": 0.1723, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3456146717071533, + "rewards/margins": 1.1639740467071533, + "rewards/rejected": 1.181640625, + "step": 12151 + }, + { + "epoch": 0.71, + "learning_rate": 2.085218181393989e-08, + "logits/chosen": -1.9379099607467651, + "logits/rejected": -1.9264345169067383, + "logps/chosen": -11.387837409973145, + "logps/rejected": -216.286865234375, + "loss": 0.3722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06334209442138672, + "rewards/margins": 3.688969612121582, + "rewards/rejected": -3.7523117065429688, + "step": 12152 + }, + { + "epoch": 0.71, + "learning_rate": 2.0844525279551247e-08, + "logits/chosen": -1.9526996612548828, + "logits/rejected": -1.9430310726165771, + "logps/chosen": -2.4318560463143513e-05, + "logps/rejected": -129.11386108398438, + "loss": 0.4394, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.344505175728045e-08, + "rewards/margins": 1.4587111473083496, + "rewards/rejected": -1.4587112665176392, + "step": 12153 + }, + { + "epoch": 0.71, + "learning_rate": 2.0836869780907717e-08, + "logits/chosen": -2.032745838165283, + "logits/rejected": -2.032397508621216, + "logps/chosen": -26.604511260986328, + "logps/rejected": -125.31844329833984, + "loss": 0.6065, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2096046507358551, + "rewards/margins": -0.020263671875, + "rewards/rejected": 0.2298683226108551, + "step": 12154 + }, + { + "epoch": 0.71, + "learning_rate": 2.0829215318281206e-08, + "logits/chosen": -1.8353524208068848, + "logits/rejected": -1.8806487321853638, + "logps/chosen": -205.31430053710938, + "logps/rejected": -421.812255859375, + "loss": 0.1736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3204406797885895, + "rewards/margins": 4.5635528564453125, + "rewards/rejected": -4.243112087249756, + "step": 12155 + }, + { + "epoch": 0.71, + "learning_rate": 2.0821561891943678e-08, + "logits/chosen": -2.0375783443450928, + "logits/rejected": -2.0386030673980713, + "logps/chosen": -234.54995727539062, + "logps/rejected": -437.164306640625, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4716522693634033, + "rewards/margins": 5.512875556945801, + "rewards/rejected": -3.0412232875823975, + "step": 12156 + }, + { + "epoch": 0.71, + "learning_rate": 2.0813909502166965e-08, + "logits/chosen": -1.7780736684799194, + "logits/rejected": -1.7699459791183472, + "logps/chosen": -4.652349948883057, + "logps/rejected": -119.18058776855469, + "loss": 0.3529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1480794996023178, + "rewards/margins": 2.3223135471343994, + "rewards/rejected": -2.17423415184021, + "step": 12157 + }, + { + "epoch": 0.71, + "learning_rate": 2.0806258149222983e-08, + "logits/chosen": -1.9276080131530762, + "logits/rejected": -1.9426583051681519, + "logps/chosen": -155.40008544921875, + "logps/rejected": -470.34161376953125, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.860922336578369, + "rewards/margins": 7.282803535461426, + "rewards/rejected": -4.421881198883057, + "step": 12158 + }, + { + "epoch": 0.71, + "learning_rate": 2.0798607833383498e-08, + "logits/chosen": -1.9175443649291992, + "logits/rejected": -1.8914215564727783, + "logps/chosen": -193.50625610351562, + "logps/rejected": -336.19830322265625, + "loss": 0.1234, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.881140112876892, + "rewards/margins": 1.4374877214431763, + "rewards/rejected": 0.44365236163139343, + "step": 12159 + }, + { + "epoch": 0.71, + "learning_rate": 2.0790958554920295e-08, + "logits/chosen": -1.7932989597320557, + "logits/rejected": -1.7955472469329834, + "logps/chosen": -0.0001512738090241328, + "logps/rejected": -161.37142944335938, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.527154038034496e-06, + "rewards/margins": 3.452411413192749, + "rewards/rejected": -3.452414035797119, + "step": 12160 + }, + { + "epoch": 0.71, + "learning_rate": 2.078331031410513e-08, + "logits/chosen": -1.9068502187728882, + "logits/rejected": -1.886609435081482, + "logps/chosen": -236.84320068359375, + "logps/rejected": -390.33221435546875, + "loss": 0.1454, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.122546434402466, + "rewards/margins": 1.5209412574768066, + "rewards/rejected": 0.601605236530304, + "step": 12161 + }, + { + "epoch": 0.71, + "learning_rate": 2.0775663111209714e-08, + "logits/chosen": -1.8855767250061035, + "logits/rejected": -1.8399279117584229, + "logps/chosen": -209.89547729492188, + "logps/rejected": -295.3979187011719, + "loss": 0.1204, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.99754798412323, + "rewards/margins": 1.8874374628067017, + "rewards/rejected": 0.11011047661304474, + "step": 12162 + }, + { + "epoch": 0.71, + "learning_rate": 2.0768016946505674e-08, + "logits/chosen": -1.912269115447998, + "logits/rejected": -1.9022754430770874, + "logps/chosen": -0.00012957790750078857, + "logps/rejected": -226.22149658203125, + "loss": 0.4043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.429117128282087e-05, + "rewards/margins": 2.0421011447906494, + "rewards/rejected": -2.0420868396759033, + "step": 12163 + }, + { + "epoch": 0.71, + "learning_rate": 2.076037182026466e-08, + "logits/chosen": -1.9053559303283691, + "logits/rejected": -1.9056316614151, + "logps/chosen": -9.964482307434082, + "logps/rejected": -55.83083724975586, + "loss": 0.4978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03368797525763512, + "rewards/margins": 0.6038417816162109, + "rewards/rejected": -0.5701538324356079, + "step": 12164 + }, + { + "epoch": 0.71, + "learning_rate": 2.075272773275827e-08, + "logits/chosen": -1.906856894493103, + "logits/rejected": -1.907649278640747, + "logps/chosen": -227.65216064453125, + "logps/rejected": -491.9350891113281, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6987245082855225, + "rewards/margins": 6.0879669189453125, + "rewards/rejected": -2.389242649078369, + "step": 12165 + }, + { + "epoch": 0.71, + "learning_rate": 2.0745084684258052e-08, + "logits/chosen": -1.845040202140808, + "logits/rejected": -1.82685387134552, + "logps/chosen": -186.62799072265625, + "logps/rejected": -270.9779052734375, + "loss": 0.0442, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.93619704246521, + "rewards/margins": 3.0500550270080566, + "rewards/rejected": -0.11385803669691086, + "step": 12166 + }, + { + "epoch": 0.71, + "learning_rate": 2.0737442675035543e-08, + "logits/chosen": -1.918920874595642, + "logits/rejected": -1.9194917678833008, + "logps/chosen": -11.501023292541504, + "logps/rejected": -293.1209411621094, + "loss": 0.2551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4131811261177063, + "rewards/margins": 5.946421146392822, + "rewards/rejected": -5.533239841461182, + "step": 12167 + }, + { + "epoch": 0.71, + "learning_rate": 2.0729801705362194e-08, + "logits/chosen": -1.9877665042877197, + "logits/rejected": -1.9572397470474243, + "logps/chosen": -260.3238525390625, + "logps/rejected": -536.054931640625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.643322706222534, + "rewards/margins": 7.285138130187988, + "rewards/rejected": -4.641815185546875, + "step": 12168 + }, + { + "epoch": 0.71, + "learning_rate": 2.0722161775509466e-08, + "logits/chosen": -2.0021984577178955, + "logits/rejected": -1.9907996654510498, + "logps/chosen": -0.02102642133831978, + "logps/rejected": -125.92826080322266, + "loss": 0.4891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013393682427704334, + "rewards/margins": 1.0687159299850464, + "rewards/rejected": -1.055322289466858, + "step": 12169 + }, + { + "epoch": 0.71, + "learning_rate": 2.0714522885748766e-08, + "logits/chosen": -1.9635279178619385, + "logits/rejected": -1.9596190452575684, + "logps/chosen": -102.17589569091797, + "logps/rejected": -252.61279296875, + "loss": 0.1668, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4424842596054077, + "rewards/margins": 1.874590277671814, + "rewards/rejected": -0.43210601806640625, + "step": 12170 + }, + { + "epoch": 0.71, + "learning_rate": 2.070688503635148e-08, + "logits/chosen": -1.8924014568328857, + "logits/rejected": -1.891305685043335, + "logps/chosen": -14.233896255493164, + "logps/rejected": -232.533203125, + "loss": 0.2743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.452592670917511, + "rewards/margins": 2.894819736480713, + "rewards/rejected": -2.4422271251678467, + "step": 12171 + }, + { + "epoch": 0.71, + "learning_rate": 2.0699248227588907e-08, + "logits/chosen": -1.932939052581787, + "logits/rejected": -1.9128919839859009, + "logps/chosen": -195.4385986328125, + "logps/rejected": -384.0420837402344, + "loss": 0.0825, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2150681018829346, + "rewards/margins": 2.282139539718628, + "rewards/rejected": -0.06707153469324112, + "step": 12172 + }, + { + "epoch": 0.71, + "learning_rate": 2.069161245973236e-08, + "logits/chosen": -1.9183814525604248, + "logits/rejected": -1.931558609008789, + "logps/chosen": -219.7193145751953, + "logps/rejected": -590.8453979492188, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.737481713294983, + "rewards/margins": 6.610803127288818, + "rewards/rejected": -4.873321533203125, + "step": 12173 + }, + { + "epoch": 0.71, + "learning_rate": 2.0683977733053105e-08, + "logits/chosen": -1.8583800792694092, + "logits/rejected": -1.8432040214538574, + "logps/chosen": -179.52163696289062, + "logps/rejected": -228.46578979492188, + "loss": 0.3071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6372894644737244, + "rewards/margins": 1.1405563354492188, + "rewards/rejected": -0.5032669305801392, + "step": 12174 + }, + { + "epoch": 0.71, + "learning_rate": 2.0676344047822375e-08, + "logits/chosen": -2.0137217044830322, + "logits/rejected": -2.0184671878814697, + "logps/chosen": -4.3842453956604, + "logps/rejected": -166.1070556640625, + "loss": 0.2912, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19118285179138184, + "rewards/margins": 2.585875988006592, + "rewards/rejected": -2.39469313621521, + "step": 12175 + }, + { + "epoch": 0.71, + "learning_rate": 2.0668711404311307e-08, + "logits/chosen": -1.8795486688613892, + "logits/rejected": -1.896071195602417, + "logps/chosen": -147.89186096191406, + "logps/rejected": -315.5328369140625, + "loss": 0.2575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9486923217773438, + "rewards/margins": 0.8537948727607727, + "rewards/rejected": 0.09489746391773224, + "step": 12176 + }, + { + "epoch": 0.71, + "learning_rate": 2.0661079802791127e-08, + "logits/chosen": -1.9710458517074585, + "logits/rejected": -1.9658279418945312, + "logps/chosen": -0.0067418706603348255, + "logps/rejected": -195.76451110839844, + "loss": 0.3365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005203019827604294, + "rewards/margins": 3.2560181617736816, + "rewards/rejected": -3.2565383911132812, + "step": 12177 + }, + { + "epoch": 0.71, + "learning_rate": 2.065344924353288e-08, + "logits/chosen": -1.8859317302703857, + "logits/rejected": -1.855461597442627, + "logps/chosen": -183.838623046875, + "logps/rejected": -504.89190673828125, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.875115990638733, + "rewards/margins": 6.415091037750244, + "rewards/rejected": -4.539975166320801, + "step": 12178 + }, + { + "epoch": 0.71, + "learning_rate": 2.0645819726807685e-08, + "logits/chosen": -1.9231681823730469, + "logits/rejected": -1.92255699634552, + "logps/chosen": -8.99651050567627, + "logps/rejected": -251.1835174560547, + "loss": 0.2627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33881378173828125, + "rewards/margins": 2.9997804164886475, + "rewards/rejected": -2.660966634750366, + "step": 12179 + }, + { + "epoch": 0.71, + "learning_rate": 2.063819125288652e-08, + "logits/chosen": -1.9093852043151855, + "logits/rejected": -1.9090663194656372, + "logps/chosen": -87.7793960571289, + "logps/rejected": -317.3751525878906, + "loss": 0.1979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.540362536907196, + "rewards/margins": 4.0016632080078125, + "rewards/rejected": -3.4613006114959717, + "step": 12180 + }, + { + "epoch": 0.71, + "learning_rate": 2.063056382204047e-08, + "logits/chosen": -1.675598382949829, + "logits/rejected": -1.663439393043518, + "logps/chosen": -256.14361572265625, + "logps/rejected": -506.00164794921875, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.611132860183716, + "rewards/margins": 2.93804931640625, + "rewards/rejected": -0.32691651582717896, + "step": 12181 + }, + { + "epoch": 0.71, + "learning_rate": 2.0622937434540434e-08, + "logits/chosen": -1.6974414587020874, + "logits/rejected": -1.694005012512207, + "logps/chosen": -0.007831458002328873, + "logps/rejected": -352.9525146484375, + "loss": 0.3485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007536190678365529, + "rewards/margins": 5.312524795532227, + "rewards/rejected": -5.3132781982421875, + "step": 12182 + }, + { + "epoch": 0.71, + "learning_rate": 2.0615312090657354e-08, + "logits/chosen": -1.9014499187469482, + "logits/rejected": -1.8872199058532715, + "logps/chosen": -22.97608184814453, + "logps/rejected": -188.43060302734375, + "loss": 0.4699, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.357090562582016, + "rewards/margins": 1.8322924375534058, + "rewards/rejected": -2.189383029937744, + "step": 12183 + }, + { + "epoch": 0.71, + "learning_rate": 2.060768779066213e-08, + "logits/chosen": -1.7867337465286255, + "logits/rejected": -1.8074052333831787, + "logps/chosen": -205.7486572265625, + "logps/rejected": -387.26763916015625, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3135101795196533, + "rewards/margins": 4.581808567047119, + "rewards/rejected": -2.268298387527466, + "step": 12184 + }, + { + "epoch": 0.71, + "learning_rate": 2.0600064534825606e-08, + "logits/chosen": -2.0127639770507812, + "logits/rejected": -2.0148766040802, + "logps/chosen": -0.007209984585642815, + "logps/rejected": -268.94683837890625, + "loss": 0.348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003191730473190546, + "rewards/margins": 5.260267734527588, + "rewards/rejected": -5.257075786590576, + "step": 12185 + }, + { + "epoch": 0.71, + "learning_rate": 2.059244232341862e-08, + "logits/chosen": -1.7857354879379272, + "logits/rejected": -1.792025089263916, + "logps/chosen": -0.10129483789205551, + "logps/rejected": -173.9674835205078, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002606145339086652, + "rewards/margins": 3.122283935546875, + "rewards/rejected": -3.119677782058716, + "step": 12186 + }, + { + "epoch": 0.71, + "learning_rate": 2.0584821156711913e-08, + "logits/chosen": -1.9859464168548584, + "logits/rejected": -1.9806181192398071, + "logps/chosen": -75.851318359375, + "logps/rejected": -209.7744140625, + "loss": 0.4581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004493713495321572, + "rewards/margins": 0.7824882864952087, + "rewards/rejected": -0.7829376459121704, + "step": 12187 + }, + { + "epoch": 0.71, + "learning_rate": 2.0577201034976244e-08, + "logits/chosen": -1.9471118450164795, + "logits/rejected": -1.9470210075378418, + "logps/chosen": -34.54569625854492, + "logps/rejected": -69.63099670410156, + "loss": 0.6558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28410759568214417, + "rewards/margins": 0.17579230666160583, + "rewards/rejected": -0.45989990234375, + "step": 12188 + }, + { + "epoch": 0.71, + "learning_rate": 2.0569581958482313e-08, + "logits/chosen": -1.9950170516967773, + "logits/rejected": -1.9941786527633667, + "logps/chosen": -7.71959114074707, + "logps/rejected": -104.70356750488281, + "loss": 0.4109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2846445143222809, + "rewards/margins": 1.536564826965332, + "rewards/rejected": -1.2519203424453735, + "step": 12189 + }, + { + "epoch": 0.71, + "learning_rate": 2.0561963927500813e-08, + "logits/chosen": -1.9350947141647339, + "logits/rejected": -1.9351539611816406, + "logps/chosen": -50.37006378173828, + "logps/rejected": -153.94769287109375, + "loss": 0.4344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3070968687534332, + "rewards/margins": 0.6334946155548096, + "rewards/rejected": -0.32639771699905396, + "step": 12190 + }, + { + "epoch": 0.71, + "learning_rate": 2.055434694230233e-08, + "logits/chosen": -1.8449714183807373, + "logits/rejected": -1.864754557609558, + "logps/chosen": -198.1314697265625, + "logps/rejected": -317.3875732421875, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3515260219573975, + "rewards/margins": 3.7982819080352783, + "rewards/rejected": -0.446755975484848, + "step": 12191 + }, + { + "epoch": 0.71, + "learning_rate": 2.054673100315748e-08, + "logits/chosen": -1.8526475429534912, + "logits/rejected": -1.855986475944519, + "logps/chosen": -30.67691421508789, + "logps/rejected": -175.41641235351562, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0729824304580688, + "rewards/margins": 0.8703274130821228, + "rewards/rejected": 0.20265503227710724, + "step": 12192 + }, + { + "epoch": 0.71, + "learning_rate": 2.0539116110336818e-08, + "logits/chosen": -1.9716582298278809, + "logits/rejected": -1.974609375, + "logps/chosen": -3.0589852333068848, + "logps/rejected": -130.244140625, + "loss": 0.4085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.154265359044075, + "rewards/margins": 1.597730040550232, + "rewards/rejected": -1.4434646368026733, + "step": 12193 + }, + { + "epoch": 0.71, + "learning_rate": 2.0531502264110873e-08, + "logits/chosen": -1.9330071210861206, + "logits/rejected": -1.92366361618042, + "logps/chosen": -115.77461242675781, + "logps/rejected": -176.57058715820312, + "loss": 0.1483, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5790191888809204, + "rewards/margins": 1.8214324712753296, + "rewards/rejected": -0.24241332709789276, + "step": 12194 + }, + { + "epoch": 0.71, + "learning_rate": 2.0523889464750084e-08, + "logits/chosen": -2.1144306659698486, + "logits/rejected": -2.105529308319092, + "logps/chosen": -21.303478240966797, + "logps/rejected": -318.663330078125, + "loss": 0.2598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38572007417678833, + "rewards/margins": 4.76516056060791, + "rewards/rejected": -4.3794403076171875, + "step": 12195 + }, + { + "epoch": 0.71, + "learning_rate": 2.0516277712524954e-08, + "logits/chosen": -1.959601640701294, + "logits/rejected": -1.9582678079605103, + "logps/chosen": -34.82105255126953, + "logps/rejected": -100.67562866210938, + "loss": 0.5087, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07753753662109375, + "rewards/margins": 0.4901931881904602, + "rewards/rejected": -0.41265565156936646, + "step": 12196 + }, + { + "epoch": 0.71, + "learning_rate": 2.0508667007705847e-08, + "logits/chosen": -1.9414024353027344, + "logits/rejected": -1.9344089031219482, + "logps/chosen": -0.3679574429988861, + "logps/rejected": -177.04629516601562, + "loss": 0.4157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022593187168240547, + "rewards/margins": 1.8594229221343994, + "rewards/rejected": -1.8820160627365112, + "step": 12197 + }, + { + "epoch": 0.71, + "learning_rate": 2.050105735056316e-08, + "logits/chosen": -1.9923815727233887, + "logits/rejected": -2.073117256164551, + "logps/chosen": -132.0551300048828, + "logps/rejected": -300.07275390625, + "loss": 0.1695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008152770809829235, + "rewards/margins": 2.887193202972412, + "rewards/rejected": -2.879040479660034, + "step": 12198 + }, + { + "epoch": 0.71, + "learning_rate": 2.049344874136718e-08, + "logits/chosen": -1.816629409790039, + "logits/rejected": -1.8699215650558472, + "logps/chosen": -234.08688354492188, + "logps/rejected": -367.71484375, + "loss": 0.0991, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6385620832443237, + "rewards/margins": 1.8944885730743408, + "rewards/rejected": -0.2559265196323395, + "step": 12199 + }, + { + "epoch": 0.71, + "learning_rate": 2.0485841180388264e-08, + "logits/chosen": -1.784767508506775, + "logits/rejected": -1.7948800325393677, + "logps/chosen": -134.15603637695312, + "logps/rejected": -286.40228271484375, + "loss": 0.1876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.537286400794983, + "rewards/margins": 0.9271484613418579, + "rewards/rejected": 0.610137939453125, + "step": 12200 + }, + { + "epoch": 0.71, + "learning_rate": 2.047823466789662e-08, + "logits/chosen": -1.9440789222717285, + "logits/rejected": -1.9336507320404053, + "logps/chosen": -8.437397003173828, + "logps/rejected": -171.3052215576172, + "loss": 0.3667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2475452423095703, + "rewards/margins": 1.519052505493164, + "rewards/rejected": -1.2715072631835938, + "step": 12201 + }, + { + "epoch": 0.71, + "learning_rate": 2.047062920416249e-08, + "logits/chosen": -1.8654541969299316, + "logits/rejected": -1.8683388233184814, + "logps/chosen": -47.44654083251953, + "logps/rejected": -246.00270080566406, + "loss": 0.2343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4678535461425781, + "rewards/margins": 3.6679344177246094, + "rewards/rejected": -3.2000808715820312, + "step": 12202 + }, + { + "epoch": 0.71, + "learning_rate": 2.046302478945606e-08, + "logits/chosen": -1.9962204694747925, + "logits/rejected": -1.9840606451034546, + "logps/chosen": -58.63688278198242, + "logps/rejected": -165.40139770507812, + "loss": 0.333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8427128195762634, + "rewards/margins": 0.5787968039512634, + "rewards/rejected": 0.263916015625, + "step": 12203 + }, + { + "epoch": 0.71, + "learning_rate": 2.045542142404746e-08, + "logits/chosen": -1.9651157855987549, + "logits/rejected": -1.9740256071090698, + "logps/chosen": -153.20529174804688, + "logps/rejected": -295.7882080078125, + "loss": 0.4017, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.12956702709198, + "rewards/margins": -0.0028731822967529297, + "rewards/rejected": 1.132440209388733, + "step": 12204 + }, + { + "epoch": 0.71, + "learning_rate": 2.0447819108206832e-08, + "logits/chosen": -1.8535817861557007, + "logits/rejected": -1.8794997930526733, + "logps/chosen": -175.727783203125, + "logps/rejected": -269.556640625, + "loss": 0.0672, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2844512462615967, + "rewards/margins": 2.16408371925354, + "rewards/rejected": 0.12036743015050888, + "step": 12205 + }, + { + "epoch": 0.71, + "learning_rate": 2.0440217842204204e-08, + "logits/chosen": -1.8120898008346558, + "logits/rejected": -1.8072690963745117, + "logps/chosen": -12.764093399047852, + "logps/rejected": -99.65437316894531, + "loss": 0.4008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15718518197536469, + "rewards/margins": 1.321711540222168, + "rewards/rejected": -1.164526343345642, + "step": 12206 + }, + { + "epoch": 0.71, + "learning_rate": 2.0432617626309633e-08, + "logits/chosen": -1.848262071609497, + "logits/rejected": -1.8502345085144043, + "logps/chosen": -3.1378631591796875, + "logps/rejected": -193.89340209960938, + "loss": 0.3279, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08998682349920273, + "rewards/margins": 4.253523826599121, + "rewards/rejected": -4.343510627746582, + "step": 12207 + }, + { + "epoch": 0.71, + "learning_rate": 2.0425018460793115e-08, + "logits/chosen": -1.8609695434570312, + "logits/rejected": -1.8561313152313232, + "logps/chosen": -0.024298034608364105, + "logps/rejected": -237.19024658203125, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03697267547249794, + "rewards/margins": 4.122946739196777, + "rewards/rejected": -4.085974216461182, + "step": 12208 + }, + { + "epoch": 0.71, + "learning_rate": 2.0417420345924624e-08, + "logits/chosen": -2.1562516689300537, + "logits/rejected": -2.1469566822052, + "logps/chosen": -0.6298521757125854, + "logps/rejected": -224.21456909179688, + "loss": 0.3646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04913128912448883, + "rewards/margins": 3.7782094478607178, + "rewards/rejected": -3.827340841293335, + "step": 12209 + }, + { + "epoch": 0.71, + "learning_rate": 2.0409823281974047e-08, + "logits/chosen": -2.062053680419922, + "logits/rejected": -2.0278072357177734, + "logps/chosen": -126.11553955078125, + "logps/rejected": -358.9325256347656, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.014691114425659, + "rewards/margins": 0.41530752182006836, + "rewards/rejected": 1.5993835926055908, + "step": 12210 + }, + { + "epoch": 0.71, + "learning_rate": 2.0402227269211293e-08, + "logits/chosen": -2.0006377696990967, + "logits/rejected": -2.006134033203125, + "logps/chosen": -32.231224060058594, + "logps/rejected": -182.53321838378906, + "loss": 0.1713, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0424816608428955, + "rewards/margins": 2.7261624336242676, + "rewards/rejected": -1.683680772781372, + "step": 12211 + }, + { + "epoch": 0.71, + "learning_rate": 2.03946323079062e-08, + "logits/chosen": -1.8390331268310547, + "logits/rejected": -1.8315727710723877, + "logps/chosen": -184.43612670898438, + "logps/rejected": -232.3732452392578, + "loss": 0.5168, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.0619125366210938, + "rewards/margins": -0.5611329078674316, + "rewards/rejected": 2.6230454444885254, + "step": 12212 + }, + { + "epoch": 0.71, + "learning_rate": 2.0387038398328604e-08, + "logits/chosen": -1.6847902536392212, + "logits/rejected": -1.6883833408355713, + "logps/chosen": -158.94448852539062, + "logps/rejected": -184.11497497558594, + "loss": 0.4781, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.4343079328536987, + "rewards/margins": -0.1474364995956421, + "rewards/rejected": 1.5817444324493408, + "step": 12213 + }, + { + "epoch": 0.71, + "learning_rate": 2.037944554074822e-08, + "logits/chosen": -2.0454277992248535, + "logits/rejected": -2.02113676071167, + "logps/chosen": -18.06982421875, + "logps/rejected": -247.49200439453125, + "loss": 0.3066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21744728088378906, + "rewards/margins": 3.4792943000793457, + "rewards/rejected": -3.2618470191955566, + "step": 12214 + }, + { + "epoch": 0.71, + "learning_rate": 2.0371853735434856e-08, + "logits/chosen": -1.6986485719680786, + "logits/rejected": -1.6429930925369263, + "logps/chosen": -224.6928253173828, + "logps/rejected": -236.95230102539062, + "loss": 0.0435, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.296574354171753, + "rewards/margins": 2.8226377964019775, + "rewards/rejected": 0.4739364683628082, + "step": 12215 + }, + { + "epoch": 0.71, + "learning_rate": 2.0364262982658153e-08, + "logits/chosen": -1.909700870513916, + "logits/rejected": -1.9039424657821655, + "logps/chosen": -0.019122524186968803, + "logps/rejected": -247.40408325195312, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001353099592961371, + "rewards/margins": 8.395524978637695, + "rewards/rejected": -8.396878242492676, + "step": 12216 + }, + { + "epoch": 0.71, + "learning_rate": 2.035667328268782e-08, + "logits/chosen": -1.8737006187438965, + "logits/rejected": -1.8608285188674927, + "logps/chosen": -171.99591064453125, + "logps/rejected": -344.1189270019531, + "loss": 0.0848, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8008179664611816, + "rewards/margins": 1.9497040510177612, + "rewards/rejected": 0.8511139154434204, + "step": 12217 + }, + { + "epoch": 0.71, + "learning_rate": 2.0349084635793405e-08, + "logits/chosen": -1.9847288131713867, + "logits/rejected": -1.968634843826294, + "logps/chosen": -7.718687057495117, + "logps/rejected": -263.98736572265625, + "loss": 0.4227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04173612594604492, + "rewards/margins": 1.8920453786849976, + "rewards/rejected": -1.9337815046310425, + "step": 12218 + }, + { + "epoch": 0.71, + "learning_rate": 2.0341497042244587e-08, + "logits/chosen": -1.8554106950759888, + "logits/rejected": -1.8397294282913208, + "logps/chosen": -311.7387390136719, + "logps/rejected": -631.5687866210938, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.844046115875244, + "rewards/margins": 7.776937961578369, + "rewards/rejected": -4.932891845703125, + "step": 12219 + }, + { + "epoch": 0.71, + "learning_rate": 2.0333910502310846e-08, + "logits/chosen": -1.961883544921875, + "logits/rejected": -1.9694730043411255, + "logps/chosen": -262.7627868652344, + "logps/rejected": -467.26300048828125, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.474447727203369, + "rewards/margins": 6.919278144836426, + "rewards/rejected": -4.444830417633057, + "step": 12220 + }, + { + "epoch": 0.71, + "learning_rate": 2.0326325016261743e-08, + "logits/chosen": -1.8644163608551025, + "logits/rejected": -1.8617767095565796, + "logps/chosen": -33.89647674560547, + "logps/rejected": -121.1302719116211, + "loss": 1.066, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1043914556503296, + "rewards/margins": -0.015053510665893555, + "rewards/rejected": -1.089337944984436, + "step": 12221 + }, + { + "epoch": 0.71, + "learning_rate": 2.031874058436668e-08, + "logits/chosen": -1.9266513586044312, + "logits/rejected": -1.9112015962600708, + "logps/chosen": -182.68881225585938, + "logps/rejected": -327.39300537109375, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3522369861602783, + "rewards/margins": 4.012075901031494, + "rewards/rejected": -1.6598389148712158, + "step": 12222 + }, + { + "epoch": 0.71, + "learning_rate": 2.031115720689518e-08, + "logits/chosen": -1.6534520387649536, + "logits/rejected": -1.641645073890686, + "logps/chosen": -4.5308518409729, + "logps/rejected": -243.36508178710938, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07966559380292892, + "rewards/margins": 3.404362916946411, + "rewards/rejected": -3.4840285778045654, + "step": 12223 + }, + { + "epoch": 0.71, + "learning_rate": 2.0303574884116577e-08, + "logits/chosen": -1.7693761587142944, + "logits/rejected": -1.7637614011764526, + "logps/chosen": -27.60798454284668, + "logps/rejected": -239.52972412109375, + "loss": 0.2026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5823694467544556, + "rewards/margins": 3.568039894104004, + "rewards/rejected": -2.985670566558838, + "step": 12224 + }, + { + "epoch": 0.71, + "learning_rate": 2.0295993616300256e-08, + "logits/chosen": -1.9599322080612183, + "logits/rejected": -1.9702526330947876, + "logps/chosen": -122.08061218261719, + "logps/rejected": -416.0592956542969, + "loss": 0.2076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5038711428642273, + "rewards/margins": 7.934507846832275, + "rewards/rejected": -7.430636882781982, + "step": 12225 + }, + { + "epoch": 0.71, + "learning_rate": 2.0288413403715542e-08, + "logits/chosen": -1.7719088792800903, + "logits/rejected": -1.7387815713882446, + "logps/chosen": -107.8056640625, + "logps/rejected": -150.45565795898438, + "loss": 0.2471, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.55113685131073, + "rewards/margins": 1.0123977661132812, + "rewards/rejected": 0.538739025592804, + "step": 12226 + }, + { + "epoch": 0.71, + "learning_rate": 2.0280834246631716e-08, + "logits/chosen": -2.0326101779937744, + "logits/rejected": -2.0010457038879395, + "logps/chosen": -207.16854858398438, + "logps/rejected": -325.519287109375, + "loss": 0.1163, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.83256995677948, + "rewards/margins": 1.53044593334198, + "rewards/rejected": 0.3021240234375, + "step": 12227 + }, + { + "epoch": 0.71, + "learning_rate": 2.0273256145318052e-08, + "logits/chosen": -1.983072280883789, + "logits/rejected": -1.9862972497940063, + "logps/chosen": -19.026758193969727, + "logps/rejected": -178.4378662109375, + "loss": 0.2487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6271578073501587, + "rewards/margins": 3.359689712524414, + "rewards/rejected": -2.732531785964966, + "step": 12228 + }, + { + "epoch": 0.71, + "learning_rate": 2.026567910004372e-08, + "logits/chosen": -1.9150696992874146, + "logits/rejected": -1.9163005352020264, + "logps/chosen": -0.00012778965174220502, + "logps/rejected": -254.426513671875, + "loss": 0.3185, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5417809815262444e-06, + "rewards/margins": 6.6778974533081055, + "rewards/rejected": -6.6779022216796875, + "step": 12229 + }, + { + "epoch": 0.71, + "learning_rate": 2.0258103111077914e-08, + "logits/chosen": -2.0098438262939453, + "logits/rejected": -2.056244373321533, + "logps/chosen": -145.9505615234375, + "logps/rejected": -493.00323486328125, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.19952392578125, + "rewards/margins": 6.443158149719238, + "rewards/rejected": -5.243634223937988, + "step": 12230 + }, + { + "epoch": 0.71, + "learning_rate": 2.0250528178689762e-08, + "logits/chosen": -1.8699713945388794, + "logits/rejected": -1.8744863271713257, + "logps/chosen": -199.30075073242188, + "logps/rejected": -449.6575927734375, + "loss": 0.0682, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.163705587387085, + "rewards/margins": 3.316586494445801, + "rewards/rejected": -1.1528809070587158, + "step": 12231 + }, + { + "epoch": 0.71, + "learning_rate": 2.024295430314839e-08, + "logits/chosen": -1.9610668420791626, + "logits/rejected": -1.9685453176498413, + "logps/chosen": -238.05673217773438, + "logps/rejected": -463.4161071777344, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6829315423965454, + "rewards/margins": 2.9610838890075684, + "rewards/rejected": -1.2781524658203125, + "step": 12232 + }, + { + "epoch": 0.71, + "learning_rate": 2.0235381484722824e-08, + "logits/chosen": -1.9916428327560425, + "logits/rejected": -1.9847532510757446, + "logps/chosen": -3.444760322570801, + "logps/rejected": -120.885009765625, + "loss": 0.6717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1885869950056076, + "rewards/margins": 0.2900812029838562, + "rewards/rejected": -0.478668212890625, + "step": 12233 + }, + { + "epoch": 0.71, + "learning_rate": 2.02278097236821e-08, + "logits/chosen": -2.0567257404327393, + "logits/rejected": -2.0526628494262695, + "logps/chosen": -38.77312088012695, + "logps/rejected": -233.54440307617188, + "loss": 0.3321, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0945781469345093, + "rewards/margins": 0.7135401964187622, + "rewards/rejected": 0.3810379207134247, + "step": 12234 + }, + { + "epoch": 0.71, + "learning_rate": 2.0220239020295206e-08, + "logits/chosen": -1.8660353422164917, + "logits/rejected": -1.825490117073059, + "logps/chosen": -236.42721557617188, + "logps/rejected": -484.9375, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.473388671875, + "rewards/margins": 2.392169237136841, + "rewards/rejected": 0.08121948689222336, + "step": 12235 + }, + { + "epoch": 0.71, + "learning_rate": 2.021266937483111e-08, + "logits/chosen": -1.7939168214797974, + "logits/rejected": -1.8040422201156616, + "logps/chosen": -272.73956298828125, + "logps/rejected": -267.33990478515625, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5963897705078125, + "rewards/margins": 3.1853179931640625, + "rewards/rejected": 0.41107177734375, + "step": 12236 + }, + { + "epoch": 0.71, + "learning_rate": 2.0205100787558665e-08, + "logits/chosen": -1.844207763671875, + "logits/rejected": -1.8488825559616089, + "logps/chosen": -210.91041564941406, + "logps/rejected": -401.36962890625, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6527726650238037, + "rewards/margins": 5.972120761871338, + "rewards/rejected": -3.319348096847534, + "step": 12237 + }, + { + "epoch": 0.71, + "learning_rate": 2.0197533258746824e-08, + "logits/chosen": -1.8491932153701782, + "logits/rejected": -1.8356586694717407, + "logps/chosen": -0.00013445991498883814, + "logps/rejected": -252.081298828125, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5972218534443527e-06, + "rewards/margins": 7.477479934692383, + "rewards/rejected": -7.477481365203857, + "step": 12238 + }, + { + "epoch": 0.71, + "learning_rate": 2.018996678866436e-08, + "logits/chosen": -1.887168049812317, + "logits/rejected": -1.883171558380127, + "logps/chosen": -183.0454864501953, + "logps/rejected": -353.9285888671875, + "loss": 0.0876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5641937255859375, + "rewards/margins": 3.5224609375, + "rewards/rejected": -1.9582672119140625, + "step": 12239 + }, + { + "epoch": 0.71, + "learning_rate": 2.0182401377580104e-08, + "logits/chosen": -1.7762285470962524, + "logits/rejected": -1.771486520767212, + "logps/chosen": -10.079931259155273, + "logps/rejected": -156.48529052734375, + "loss": 0.4141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0774131789803505, + "rewards/margins": 1.9253746271133423, + "rewards/rejected": -2.0027878284454346, + "step": 12240 + }, + { + "epoch": 0.71, + "learning_rate": 2.0174837025762776e-08, + "logits/chosen": -1.8795604705810547, + "logits/rejected": -1.8627721071243286, + "logps/chosen": -108.77488708496094, + "logps/rejected": -355.3121337890625, + "loss": 0.0452, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7187385559082031, + "rewards/margins": 3.6133065223693848, + "rewards/rejected": -1.894567847251892, + "step": 12241 + }, + { + "epoch": 0.71, + "learning_rate": 2.0167273733481162e-08, + "logits/chosen": -1.8581759929656982, + "logits/rejected": -1.8537321090698242, + "logps/chosen": -202.86849975585938, + "logps/rejected": -362.6405029296875, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.9225404262542725, + "rewards/margins": 3.279071092605591, + "rewards/rejected": 0.6434692740440369, + "step": 12242 + }, + { + "epoch": 0.71, + "learning_rate": 2.0159711501003895e-08, + "logits/chosen": -1.9215035438537598, + "logits/rejected": -1.9226669073104858, + "logps/chosen": -14.023436546325684, + "logps/rejected": -314.02850341796875, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07771167904138565, + "rewards/margins": 7.000486850738525, + "rewards/rejected": -6.9227752685546875, + "step": 12243 + }, + { + "epoch": 0.71, + "learning_rate": 2.0152150328599638e-08, + "logits/chosen": -1.6632808446884155, + "logits/rejected": -1.680134892463684, + "logps/chosen": -184.8634490966797, + "logps/rejected": -230.44021606445312, + "loss": 0.1003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6998398303985596, + "rewards/margins": 1.6264450550079346, + "rewards/rejected": 0.073394775390625, + "step": 12244 + }, + { + "epoch": 0.71, + "learning_rate": 2.0144590216537006e-08, + "logits/chosen": -1.9273020029067993, + "logits/rejected": -1.9248840808868408, + "logps/chosen": -73.32304382324219, + "logps/rejected": -194.56912231445312, + "loss": 0.5002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5239837765693665, + "rewards/margins": 1.6903610229492188, + "rewards/rejected": -2.2143447399139404, + "step": 12245 + }, + { + "epoch": 0.71, + "learning_rate": 2.0137031165084568e-08, + "logits/chosen": -1.8298062086105347, + "logits/rejected": -1.828779935836792, + "logps/chosen": -192.24740600585938, + "logps/rejected": -297.80517578125, + "loss": 0.1732, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1053558588027954, + "rewards/margins": 1.2837432622909546, + "rewards/rejected": -0.17838744819164276, + "step": 12246 + }, + { + "epoch": 0.71, + "learning_rate": 2.0129473174510876e-08, + "logits/chosen": -1.9738277196884155, + "logits/rejected": -1.958735704421997, + "logps/chosen": -33.334205627441406, + "logps/rejected": -227.64065551757812, + "loss": 0.4263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06229591369628906, + "rewards/margins": 2.7692203521728516, + "rewards/rejected": -2.7069244384765625, + "step": 12247 + }, + { + "epoch": 0.71, + "learning_rate": 2.0121916245084387e-08, + "logits/chosen": -1.9012322425842285, + "logits/rejected": -1.9593336582183838, + "logps/chosen": -247.83767700195312, + "logps/rejected": -278.1728515625, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.04099440574646, + "rewards/margins": 6.186912536621094, + "rewards/rejected": -4.145918369293213, + "step": 12248 + }, + { + "epoch": 0.71, + "learning_rate": 2.0114360377073587e-08, + "logits/chosen": -2.05800724029541, + "logits/rejected": -2.0560543537139893, + "logps/chosen": -0.18087734282016754, + "logps/rejected": -128.01019287109375, + "loss": 0.4019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06488671153783798, + "rewards/margins": 1.7552447319030762, + "rewards/rejected": -1.69035804271698, + "step": 12249 + }, + { + "epoch": 0.71, + "learning_rate": 2.0106805570746894e-08, + "logits/chosen": -1.8790849447250366, + "logits/rejected": -1.860888957977295, + "logps/chosen": -212.66567993164062, + "logps/rejected": -300.2077331542969, + "loss": 0.0669, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5419113636016846, + "rewards/margins": 2.536299228668213, + "rewards/rejected": 0.005612182896584272, + "step": 12250 + }, + { + "epoch": 0.71, + "learning_rate": 2.0099251826372703e-08, + "logits/chosen": -1.8586674928665161, + "logits/rejected": -1.8600029945373535, + "logps/chosen": -30.80176544189453, + "logps/rejected": -87.38184356689453, + "loss": 0.6444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0567752830684185, + "rewards/margins": 0.14961890876293182, + "rewards/rejected": -0.20639419555664062, + "step": 12251 + }, + { + "epoch": 0.71, + "learning_rate": 2.0091699144219336e-08, + "logits/chosen": -1.8037971258163452, + "logits/rejected": -1.8034074306488037, + "logps/chosen": -0.00022052862914279103, + "logps/rejected": -212.2342071533203, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.667239282862283e-06, + "rewards/margins": 5.964087963104248, + "rewards/rejected": -5.964097499847412, + "step": 12252 + }, + { + "epoch": 0.71, + "learning_rate": 2.0084147524555113e-08, + "logits/chosen": -2.1313719749450684, + "logits/rejected": -2.127633810043335, + "logps/chosen": -4.1159281730651855, + "logps/rejected": -92.52931213378906, + "loss": 0.3734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06824522465467453, + "rewards/margins": 2.0256779193878174, + "rewards/rejected": -1.9574326276779175, + "step": 12253 + }, + { + "epoch": 0.71, + "learning_rate": 2.0076596967648297e-08, + "logits/chosen": -1.9038337469100952, + "logits/rejected": -1.8998396396636963, + "logps/chosen": -165.39642333984375, + "logps/rejected": -238.47378540039062, + "loss": 0.1736, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.76854407787323, + "rewards/margins": 1.3390412330627441, + "rewards/rejected": 0.4295028746128082, + "step": 12254 + }, + { + "epoch": 0.71, + "learning_rate": 2.0069047473767148e-08, + "logits/chosen": -1.971295952796936, + "logits/rejected": -1.9389594793319702, + "logps/chosen": -267.5618896484375, + "logps/rejected": -430.9852294921875, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4608917236328125, + "rewards/margins": 3.7860107421875, + "rewards/rejected": -0.3251190185546875, + "step": 12255 + }, + { + "epoch": 0.71, + "learning_rate": 2.0061499043179808e-08, + "logits/chosen": -1.8490222692489624, + "logits/rejected": -1.850066900253296, + "logps/chosen": -0.9818524718284607, + "logps/rejected": -125.61786651611328, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04392942786216736, + "rewards/margins": 2.4299774169921875, + "rewards/rejected": -2.3860480785369873, + "step": 12256 + }, + { + "epoch": 0.71, + "learning_rate": 2.0053951676154502e-08, + "logits/chosen": -1.9472905397415161, + "logits/rejected": -2.076204776763916, + "logps/chosen": -246.67723083496094, + "logps/rejected": -272.8848571777344, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2331924438476562, + "rewards/margins": 4.815394401550293, + "rewards/rejected": -2.582202196121216, + "step": 12257 + }, + { + "epoch": 0.71, + "learning_rate": 2.0046405372959297e-08, + "logits/chosen": -1.552691102027893, + "logits/rejected": -1.5625238418579102, + "logps/chosen": -0.00010025295341620222, + "logps/rejected": -151.992431640625, + "loss": 0.3268, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.492721589282155e-06, + "rewards/margins": 4.505295276641846, + "rewards/rejected": -4.505298614501953, + "step": 12258 + }, + { + "epoch": 0.71, + "learning_rate": 2.0038860133862308e-08, + "logits/chosen": -2.121609926223755, + "logits/rejected": -2.1203689575195312, + "logps/chosen": -12.945503234863281, + "logps/rejected": -220.76040649414062, + "loss": 0.3163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14762620627880096, + "rewards/margins": 4.501248836517334, + "rewards/rejected": -4.3536224365234375, + "step": 12259 + }, + { + "epoch": 0.71, + "learning_rate": 2.0031315959131528e-08, + "logits/chosen": -1.871206521987915, + "logits/rejected": -1.8719068765640259, + "logps/chosen": -26.183456420898438, + "logps/rejected": -138.4305419921875, + "loss": 0.4257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29441148042678833, + "rewards/margins": 0.849652886390686, + "rewards/rejected": -0.5552414059638977, + "step": 12260 + }, + { + "epoch": 0.71, + "learning_rate": 2.002377284903503e-08, + "logits/chosen": -1.8215738534927368, + "logits/rejected": -1.824742317199707, + "logps/chosen": -8.751355171203613, + "logps/rejected": -132.37387084960938, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.449942022562027, + "rewards/margins": 2.3955307006835938, + "rewards/rejected": -1.9455887079238892, + "step": 12261 + }, + { + "epoch": 0.71, + "learning_rate": 2.0016230803840738e-08, + "logits/chosen": -1.9484342336654663, + "logits/rejected": -1.947513461112976, + "logps/chosen": -3.9572341442108154, + "logps/rejected": -149.0313720703125, + "loss": 0.4268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22819207608699799, + "rewards/margins": 2.354544162750244, + "rewards/rejected": -2.5827362537384033, + "step": 12262 + }, + { + "epoch": 0.71, + "learning_rate": 2.0008689823816592e-08, + "logits/chosen": -2.1217105388641357, + "logits/rejected": -2.117213726043701, + "logps/chosen": -0.0007755592232570052, + "logps/rejected": -90.68070983886719, + "loss": 0.6953, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.053262455272488e-05, + "rewards/margins": -0.07076197117567062, + "rewards/rejected": 0.07072144001722336, + "step": 12263 + }, + { + "epoch": 0.71, + "learning_rate": 2.0001149909230487e-08, + "logits/chosen": -1.8280397653579712, + "logits/rejected": -1.7967805862426758, + "logps/chosen": -288.42645263671875, + "logps/rejected": -566.020263671875, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0861146450042725, + "rewards/margins": 3.500964403152466, + "rewards/rejected": -0.4148498475551605, + "step": 12264 + }, + { + "epoch": 0.71, + "learning_rate": 1.999361106035028e-08, + "logits/chosen": -1.9983149766921997, + "logits/rejected": -1.9975160360336304, + "logps/chosen": -141.92398071289062, + "logps/rejected": -194.43365478515625, + "loss": 0.3803, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4007980823516846, + "rewards/margins": 0.0015213489532470703, + "rewards/rejected": 1.3992767333984375, + "step": 12265 + }, + { + "epoch": 0.71, + "learning_rate": 1.99860732774438e-08, + "logits/chosen": -1.7991282939910889, + "logits/rejected": -1.8621386289596558, + "logps/chosen": -180.3173828125, + "logps/rejected": -296.55804443359375, + "loss": 0.052, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.829876661300659, + "rewards/margins": 2.634469509124756, + "rewards/rejected": 0.19540710747241974, + "step": 12266 + }, + { + "epoch": 0.71, + "learning_rate": 1.9978536560778792e-08, + "logits/chosen": -2.1163883209228516, + "logits/rejected": -2.122199058532715, + "logps/chosen": -58.42012405395508, + "logps/rejected": -157.38818359375, + "loss": 0.275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9466251730918884, + "rewards/margins": 0.8720829486846924, + "rewards/rejected": 0.07454223930835724, + "step": 12267 + }, + { + "epoch": 0.71, + "learning_rate": 1.997100091062302e-08, + "logits/chosen": -2.0329625606536865, + "logits/rejected": -2.024672746658325, + "logps/chosen": -37.20826721191406, + "logps/rejected": -217.74400329589844, + "loss": 0.1453, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0998878479003906, + "rewards/margins": 2.6110634803771973, + "rewards/rejected": -1.511175513267517, + "step": 12268 + }, + { + "epoch": 0.71, + "learning_rate": 1.9963466327244182e-08, + "logits/chosen": -1.9844566583633423, + "logits/rejected": -1.957862138748169, + "logps/chosen": -212.37686157226562, + "logps/rejected": -374.9191589355469, + "loss": 0.0541, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.92576003074646, + "rewards/margins": 2.587329149246216, + "rewards/rejected": 0.338430792093277, + "step": 12269 + }, + { + "epoch": 0.71, + "learning_rate": 1.9955932810909958e-08, + "logits/chosen": -1.9533097743988037, + "logits/rejected": -1.9448530673980713, + "logps/chosen": -14.60846996307373, + "logps/rejected": -107.81196594238281, + "loss": 0.4297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35296279191970825, + "rewards/margins": 0.9368899464607239, + "rewards/rejected": -0.5839271545410156, + "step": 12270 + }, + { + "epoch": 0.71, + "learning_rate": 1.994840036188795e-08, + "logits/chosen": -1.9031459093093872, + "logits/rejected": -1.8915941715240479, + "logps/chosen": -71.316650390625, + "logps/rejected": -392.5118408203125, + "loss": 0.2185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06784515827894211, + "rewards/margins": 6.566713333129883, + "rewards/rejected": -6.498867988586426, + "step": 12271 + }, + { + "epoch": 0.71, + "learning_rate": 1.994086898044576e-08, + "logits/chosen": -1.9802823066711426, + "logits/rejected": -2.024216890335083, + "logps/chosen": -213.6966552734375, + "logps/rejected": -569.140625, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9486907720565796, + "rewards/margins": 12.762664794921875, + "rewards/rejected": -10.813974380493164, + "step": 12272 + }, + { + "epoch": 0.71, + "learning_rate": 1.9933338666850936e-08, + "logits/chosen": -2.0836784839630127, + "logits/rejected": -2.071777105331421, + "logps/chosen": -0.043566226959228516, + "logps/rejected": -169.318359375, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021904821041971445, + "rewards/margins": 3.4485204219818115, + "rewards/rejected": -3.4507110118865967, + "step": 12273 + }, + { + "epoch": 0.71, + "learning_rate": 1.992580942137101e-08, + "logits/chosen": -1.9981002807617188, + "logits/rejected": -1.9724223613739014, + "logps/chosen": -257.74603271484375, + "logps/rejected": -374.67523193359375, + "loss": 0.1182, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4247283935546875, + "rewards/margins": 1.5058624744415283, + "rewards/rejected": -0.08113403618335724, + "step": 12274 + }, + { + "epoch": 0.71, + "learning_rate": 1.9918281244273427e-08, + "logits/chosen": -1.9346346855163574, + "logits/rejected": -1.9359833002090454, + "logps/chosen": -144.1766815185547, + "logps/rejected": -143.76844787597656, + "loss": 0.2136, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.74420166015625, + "rewards/margins": 1.074723720550537, + "rewards/rejected": 0.6694778800010681, + "step": 12275 + }, + { + "epoch": 0.71, + "learning_rate": 1.991075413582564e-08, + "logits/chosen": -1.9954146146774292, + "logits/rejected": -1.9986493587493896, + "logps/chosen": -56.77280807495117, + "logps/rejected": -251.79739379882812, + "loss": 0.171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6273910403251648, + "rewards/margins": 3.3847031593322754, + "rewards/rejected": -2.757312059402466, + "step": 12276 + }, + { + "epoch": 0.71, + "learning_rate": 1.990322809629505e-08, + "logits/chosen": -2.0015006065368652, + "logits/rejected": -1.982462763786316, + "logps/chosen": -203.87994384765625, + "logps/rejected": -387.7353210449219, + "loss": 0.0461, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.532914876937866, + "rewards/margins": 2.7106125354766846, + "rewards/rejected": -0.17769776284694672, + "step": 12277 + }, + { + "epoch": 0.71, + "learning_rate": 1.989570312594903e-08, + "logits/chosen": -1.7350298166275024, + "logits/rejected": -1.7675732374191284, + "logps/chosen": -235.55218505859375, + "logps/rejected": -325.3753967285156, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0791412591934204, + "rewards/margins": 1.7319762706756592, + "rewards/rejected": -0.6528350710868835, + "step": 12278 + }, + { + "epoch": 0.71, + "learning_rate": 1.9888179225054857e-08, + "logits/chosen": -1.9367806911468506, + "logits/rejected": -1.9490572214126587, + "logps/chosen": -241.49542236328125, + "logps/rejected": -323.0181884765625, + "loss": 0.064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6070587635040283, + "rewards/margins": 3.2138671875, + "rewards/rejected": -1.6068085432052612, + "step": 12279 + }, + { + "epoch": 0.71, + "learning_rate": 1.988065639387989e-08, + "logits/chosen": -1.6998589038848877, + "logits/rejected": -1.6857731342315674, + "logps/chosen": -117.29183197021484, + "logps/rejected": -225.4676513671875, + "loss": 0.2601, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5568183660507202, + "rewards/margins": 0.9163123965263367, + "rewards/rejected": 0.6405059695243835, + "step": 12280 + }, + { + "epoch": 0.71, + "learning_rate": 1.9873134632691318e-08, + "logits/chosen": -1.8127689361572266, + "logits/rejected": -1.817443609237671, + "logps/chosen": -45.61347198486328, + "logps/rejected": -205.03939819335938, + "loss": 0.4245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013299561105668545, + "rewards/margins": 0.9964447617530823, + "rewards/rejected": -0.9831451773643494, + "step": 12281 + }, + { + "epoch": 0.71, + "learning_rate": 1.9865613941756387e-08, + "logits/chosen": -1.8822442293167114, + "logits/rejected": -1.8848564624786377, + "logps/chosen": -34.936981201171875, + "logps/rejected": -271.3056640625, + "loss": 0.0399, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.621445894241333, + "rewards/margins": 3.5293073654174805, + "rewards/rejected": -0.9078613519668579, + "step": 12282 + }, + { + "epoch": 0.71, + "learning_rate": 1.9858094321342216e-08, + "logits/chosen": -1.9578291177749634, + "logits/rejected": -1.9558184146881104, + "logps/chosen": -164.72598266601562, + "logps/rejected": -254.36697387695312, + "loss": 0.099, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.128004550933838, + "rewards/margins": 2.3902587890625, + "rewards/rejected": -0.2622543275356293, + "step": 12283 + }, + { + "epoch": 0.71, + "learning_rate": 1.9850575771716015e-08, + "logits/chosen": -1.8990421295166016, + "logits/rejected": -1.8869760036468506, + "logps/chosen": -37.766109466552734, + "logps/rejected": -357.217041015625, + "loss": 0.2562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3612937927246094, + "rewards/margins": 6.335091590881348, + "rewards/rejected": -5.973797798156738, + "step": 12284 + }, + { + "epoch": 0.71, + "learning_rate": 1.9843058293144815e-08, + "logits/chosen": -1.9419416189193726, + "logits/rejected": -1.9133888483047485, + "logps/chosen": -6.711306923534721e-05, + "logps/rejected": -260.3661804199219, + "loss": 0.3431, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.464186862511269e-07, + "rewards/margins": 6.431501388549805, + "rewards/rejected": -6.431500434875488, + "step": 12285 + }, + { + "epoch": 0.71, + "learning_rate": 1.98355418858957e-08, + "logits/chosen": -1.8603127002716064, + "logits/rejected": -1.8494690656661987, + "logps/chosen": -9.04688835144043, + "logps/rejected": -91.26587677001953, + "loss": 0.5339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.054343607276678085, + "rewards/margins": 0.4324216842651367, + "rewards/rejected": -0.4867652952671051, + "step": 12286 + }, + { + "epoch": 0.72, + "learning_rate": 1.9828026550235695e-08, + "logits/chosen": -1.6589268445968628, + "logits/rejected": -1.654058814048767, + "logps/chosen": -216.8098602294922, + "logps/rejected": -419.6348876953125, + "loss": 0.0637, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6424195766448975, + "rewards/margins": 2.064138889312744, + "rewards/rejected": 0.5782806277275085, + "step": 12287 + }, + { + "epoch": 0.72, + "learning_rate": 1.982051228643176e-08, + "logits/chosen": -1.9384697675704956, + "logits/rejected": -1.9357531070709229, + "logps/chosen": -39.473426818847656, + "logps/rejected": -154.94520568847656, + "loss": 0.3067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6967666745185852, + "rewards/margins": 1.3295319080352783, + "rewards/rejected": -0.6327652335166931, + "step": 12288 + }, + { + "epoch": 0.72, + "learning_rate": 1.9812999094750882e-08, + "logits/chosen": -1.951553463935852, + "logits/rejected": -1.947935938835144, + "logps/chosen": -13.561349868774414, + "logps/rejected": -87.2490463256836, + "loss": 0.4347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1801169365644455, + "rewards/margins": 1.9782328605651855, + "rewards/rejected": -2.1583497524261475, + "step": 12289 + }, + { + "epoch": 0.72, + "learning_rate": 1.980548697545991e-08, + "logits/chosen": -1.9560843706130981, + "logits/rejected": -1.9381544589996338, + "logps/chosen": -132.90811157226562, + "logps/rejected": -288.57940673828125, + "loss": 0.3848, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022186279296875, + "rewards/margins": 1.619073510169983, + "rewards/rejected": -1.641259789466858, + "step": 12290 + }, + { + "epoch": 0.72, + "learning_rate": 1.9797975928825746e-08, + "logits/chosen": -1.9678956270217896, + "logits/rejected": -1.9641034603118896, + "logps/chosen": -121.10153198242188, + "logps/rejected": -168.3152313232422, + "loss": 0.6493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11143264919519424, + "rewards/margins": 0.17525407671928406, + "rewards/rejected": -0.2866867184638977, + "step": 12291 + }, + { + "epoch": 0.72, + "learning_rate": 1.9790465955115208e-08, + "logits/chosen": -1.6757643222808838, + "logits/rejected": -1.746458888053894, + "logps/chosen": -167.38258361816406, + "logps/rejected": -144.154052734375, + "loss": 0.319, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.074743628501892, + "rewards/margins": 0.28206324577331543, + "rewards/rejected": 0.7926803827285767, + "step": 12292 + }, + { + "epoch": 0.72, + "learning_rate": 1.9782957054595107e-08, + "logits/chosen": -1.8195687532424927, + "logits/rejected": -1.8218351602554321, + "logps/chosen": -191.739013671875, + "logps/rejected": -472.37188720703125, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4216461181640625, + "rewards/margins": 5.173550605773926, + "rewards/rejected": -3.751904249191284, + "step": 12293 + }, + { + "epoch": 0.72, + "learning_rate": 1.9775449227532165e-08, + "logits/chosen": -1.8909984827041626, + "logits/rejected": -1.8809120655059814, + "logps/chosen": -2.5051982402801514, + "logps/rejected": -112.67842864990234, + "loss": 0.3678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007253074552863836, + "rewards/margins": 3.0946357250213623, + "rewards/rejected": -3.0873825550079346, + "step": 12294 + }, + { + "epoch": 0.72, + "learning_rate": 1.9767942474193105e-08, + "logits/chosen": -2.097559690475464, + "logits/rejected": -2.0791618824005127, + "logps/chosen": -246.65179443359375, + "logps/rejected": -348.5186767578125, + "loss": 0.2113, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8613433837890625, + "rewards/margins": 0.8003814220428467, + "rewards/rejected": 1.0609619617462158, + "step": 12295 + }, + { + "epoch": 0.72, + "learning_rate": 1.9760436794844617e-08, + "logits/chosen": -1.9403431415557861, + "logits/rejected": -1.9824416637420654, + "logps/chosen": -214.21334838867188, + "logps/rejected": -384.44012451171875, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.439840793609619, + "rewards/margins": 5.569192886352539, + "rewards/rejected": -3.129351854324341, + "step": 12296 + }, + { + "epoch": 0.72, + "learning_rate": 1.975293218975334e-08, + "logits/chosen": -1.7882553339004517, + "logits/rejected": -1.8146857023239136, + "logps/chosen": -294.144775390625, + "logps/rejected": -442.82330322265625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8403899669647217, + "rewards/margins": 7.987582206726074, + "rewards/rejected": -5.147192478179932, + "step": 12297 + }, + { + "epoch": 0.72, + "learning_rate": 1.9745428659185835e-08, + "logits/chosen": -1.9510548114776611, + "logits/rejected": -1.9422669410705566, + "logps/chosen": -0.916266918182373, + "logps/rejected": -351.178466796875, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12553222477436066, + "rewards/margins": 8.652070045471191, + "rewards/rejected": -8.526537895202637, + "step": 12298 + }, + { + "epoch": 0.72, + "learning_rate": 1.973792620340873e-08, + "logits/chosen": -1.6972062587738037, + "logits/rejected": -1.698229432106018, + "logps/chosen": -1.4955602884292603, + "logps/rejected": -52.94077682495117, + "loss": 0.5675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005385339260101318, + "rewards/margins": 0.3276689052581787, + "rewards/rejected": -0.3222835659980774, + "step": 12299 + }, + { + "epoch": 0.72, + "learning_rate": 1.9730424822688497e-08, + "logits/chosen": -1.9293268918991089, + "logits/rejected": -1.9276329278945923, + "logps/chosen": -30.628414154052734, + "logps/rejected": -250.97702026367188, + "loss": 0.3581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10080032795667648, + "rewards/margins": 3.6107094287872314, + "rewards/rejected": -3.7115097045898438, + "step": 12300 + }, + { + "epoch": 0.72, + "learning_rate": 1.9722924517291656e-08, + "logits/chosen": -2.003321409225464, + "logits/rejected": -2.0127761363983154, + "logps/chosen": -111.72232055664062, + "logps/rejected": -390.61883544921875, + "loss": 0.1629, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.80996173620224, + "rewards/margins": 8.648426055908203, + "rewards/rejected": -7.838464260101318, + "step": 12301 + }, + { + "epoch": 0.72, + "learning_rate": 1.9715425287484598e-08, + "logits/chosen": -1.9909101724624634, + "logits/rejected": -1.9237391948699951, + "logps/chosen": -199.2650146484375, + "logps/rejected": -587.0703735351562, + "loss": 0.0508, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1895904541015625, + "rewards/margins": 4.9561309814453125, + "rewards/rejected": -2.76654052734375, + "step": 12302 + }, + { + "epoch": 0.72, + "learning_rate": 1.9707927133533814e-08, + "logits/chosen": -1.7805956602096558, + "logits/rejected": -1.7854195833206177, + "logps/chosen": -214.7587890625, + "logps/rejected": -372.3746643066406, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2007369995117188, + "rewards/margins": 5.280735969543457, + "rewards/rejected": -2.079998731613159, + "step": 12303 + }, + { + "epoch": 0.72, + "learning_rate": 1.9700430055705614e-08, + "logits/chosen": -1.9136710166931152, + "logits/rejected": -1.912002444267273, + "logps/chosen": -29.271907806396484, + "logps/rejected": -110.32186889648438, + "loss": 0.3932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4736446440219879, + "rewards/margins": 0.7358074188232422, + "rewards/rejected": -0.2621627748012543, + "step": 12304 + }, + { + "epoch": 0.72, + "learning_rate": 1.9692934054266346e-08, + "logits/chosen": -1.8669081926345825, + "logits/rejected": -1.8760250806808472, + "logps/chosen": -199.01007080078125, + "logps/rejected": -322.02935791015625, + "loss": 0.0628, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1459563970565796, + "rewards/margins": 2.989401340484619, + "rewards/rejected": -1.84344482421875, + "step": 12305 + }, + { + "epoch": 0.72, + "learning_rate": 1.9685439129482312e-08, + "logits/chosen": -1.9172019958496094, + "logits/rejected": -1.902581810951233, + "logps/chosen": -10.978471755981445, + "logps/rejected": -132.14666748046875, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21752290427684784, + "rewards/margins": 1.9371777772903442, + "rewards/rejected": -1.71965491771698, + "step": 12306 + }, + { + "epoch": 0.72, + "learning_rate": 1.9677945281619762e-08, + "logits/chosen": -1.987584114074707, + "logits/rejected": -1.9939157962799072, + "logps/chosen": -10.327705383300781, + "logps/rejected": -190.0261993408203, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3332066535949707, + "rewards/margins": 3.930551767349243, + "rewards/rejected": -3.5973451137542725, + "step": 12307 + }, + { + "epoch": 0.72, + "learning_rate": 1.9670452510944933e-08, + "logits/chosen": -1.766868233680725, + "logits/rejected": -1.7474344968795776, + "logps/chosen": -200.51412963867188, + "logps/rejected": -482.90118408203125, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.91241455078125, + "rewards/margins": 6.56825590133667, + "rewards/rejected": -4.65584135055542, + "step": 12308 + }, + { + "epoch": 0.72, + "learning_rate": 1.966296081772398e-08, + "logits/chosen": -1.8508886098861694, + "logits/rejected": -1.8263736963272095, + "logps/chosen": -163.91534423828125, + "logps/rejected": -286.72869873046875, + "loss": 0.4487, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.993933081626892, + "rewards/margins": -0.2855895757675171, + "rewards/rejected": 2.279522657394409, + "step": 12309 + }, + { + "epoch": 0.72, + "learning_rate": 1.965547020222304e-08, + "logits/chosen": -1.9893664121627808, + "logits/rejected": -1.9718005657196045, + "logps/chosen": -82.31719970703125, + "logps/rejected": -289.7103576660156, + "loss": 0.2578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4357101619243622, + "rewards/margins": 1.751539707183838, + "rewards/rejected": -1.3158295154571533, + "step": 12310 + }, + { + "epoch": 0.72, + "learning_rate": 1.964798066470824e-08, + "logits/chosen": -1.9283539056777954, + "logits/rejected": -1.9462008476257324, + "logps/chosen": -187.84779357910156, + "logps/rejected": -536.525146484375, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5994064807891846, + "rewards/margins": 8.613731384277344, + "rewards/rejected": -7.014325141906738, + "step": 12311 + }, + { + "epoch": 0.72, + "learning_rate": 1.9640492205445648e-08, + "logits/chosen": -1.8221818208694458, + "logits/rejected": -1.8196828365325928, + "logps/chosen": -70.03556823730469, + "logps/rejected": -207.10641479492188, + "loss": 0.3386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.046311188489198685, + "rewards/margins": 1.8154792785644531, + "rewards/rejected": -1.7691681385040283, + "step": 12312 + }, + { + "epoch": 0.72, + "learning_rate": 1.963300482470126e-08, + "logits/chosen": -1.7423276901245117, + "logits/rejected": -1.7524324655532837, + "logps/chosen": -157.70437622070312, + "logps/rejected": -332.2159118652344, + "loss": 0.131, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5019043684005737, + "rewards/margins": 1.5728638172149658, + "rewards/rejected": -0.07095947116613388, + "step": 12313 + }, + { + "epoch": 0.72, + "learning_rate": 1.962551852274108e-08, + "logits/chosen": -1.9512302875518799, + "logits/rejected": -1.9478280544281006, + "logps/chosen": -49.37196731567383, + "logps/rejected": -196.0345458984375, + "loss": 0.1154, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5355175733566284, + "rewards/margins": 3.031362533569336, + "rewards/rejected": -1.495845079421997, + "step": 12314 + }, + { + "epoch": 0.72, + "learning_rate": 1.961803329983106e-08, + "logits/chosen": -1.8618569374084473, + "logits/rejected": -1.8619478940963745, + "logps/chosen": -32.57554244995117, + "logps/rejected": -252.16259765625, + "loss": 0.412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24114838242530823, + "rewards/margins": 1.377709984779358, + "rewards/rejected": -1.136561632156372, + "step": 12315 + }, + { + "epoch": 0.72, + "learning_rate": 1.9610549156237134e-08, + "logits/chosen": -1.7807825803756714, + "logits/rejected": -1.7929236888885498, + "logps/chosen": -224.58193969726562, + "logps/rejected": -465.7469482421875, + "loss": 0.0405, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1410706043243408, + "rewards/margins": 4.764208793640137, + "rewards/rejected": -3.623138427734375, + "step": 12316 + }, + { + "epoch": 0.72, + "learning_rate": 1.960306609222511e-08, + "logits/chosen": -1.6183255910873413, + "logits/rejected": -1.6215178966522217, + "logps/chosen": -13.541177749633789, + "logps/rejected": -79.07491302490234, + "loss": 0.5323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2115146666765213, + "rewards/margins": 0.26727408170700073, + "rewards/rejected": -0.055759429931640625, + "step": 12317 + }, + { + "epoch": 0.72, + "learning_rate": 1.959558410806091e-08, + "logits/chosen": -1.9736741781234741, + "logits/rejected": -1.9460670948028564, + "logps/chosen": -187.52194213867188, + "logps/rejected": -301.14990234375, + "loss": 0.1758, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0525543689727783, + "rewards/margins": 2.560922145843506, + "rewards/rejected": -1.508367896080017, + "step": 12318 + }, + { + "epoch": 0.72, + "learning_rate": 1.9588103204010264e-08, + "logits/chosen": -1.8063076734542847, + "logits/rejected": -1.8201959133148193, + "logps/chosen": -304.8230895996094, + "logps/rejected": -469.7704772949219, + "loss": 0.0406, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4655059576034546, + "rewards/margins": 4.160748481750488, + "rewards/rejected": -2.695242404937744, + "step": 12319 + }, + { + "epoch": 0.72, + "learning_rate": 1.9580623380338967e-08, + "logits/chosen": -1.9402644634246826, + "logits/rejected": -1.9354233741760254, + "logps/chosen": -44.099464416503906, + "logps/rejected": -178.2685546875, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01205978449434042, + "rewards/margins": 2.497417688369751, + "rewards/rejected": -2.509477376937866, + "step": 12320 + }, + { + "epoch": 0.72, + "learning_rate": 1.9573144637312694e-08, + "logits/chosen": -2.158851146697998, + "logits/rejected": -2.1590182781219482, + "logps/chosen": -26.62251091003418, + "logps/rejected": -260.2351989746094, + "loss": 0.153, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1716012954711914, + "rewards/margins": 3.51166033744812, + "rewards/rejected": -2.3400590419769287, + "step": 12321 + }, + { + "epoch": 0.72, + "learning_rate": 1.9565666975197192e-08, + "logits/chosen": -2.0186686515808105, + "logits/rejected": -2.004042863845825, + "logps/chosen": -62.11335754394531, + "logps/rejected": -360.6700744628906, + "loss": 0.0589, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8730636835098267, + "rewards/margins": 4.704826354980469, + "rewards/rejected": -2.8317627906799316, + "step": 12322 + }, + { + "epoch": 0.72, + "learning_rate": 1.955819039425805e-08, + "logits/chosen": -1.9498766660690308, + "logits/rejected": -1.9496710300445557, + "logps/chosen": -53.6765022277832, + "logps/rejected": -206.77676391601562, + "loss": 1.0159, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.49847108125686646, + "rewards/margins": -0.8579056262969971, + "rewards/rejected": 0.3594345152378082, + "step": 12323 + }, + { + "epoch": 0.72, + "learning_rate": 1.955071489476091e-08, + "logits/chosen": -1.8014017343521118, + "logits/rejected": -1.7870397567749023, + "logps/chosen": -43.632728576660156, + "logps/rejected": -250.31787109375, + "loss": 0.0678, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.226860761642456, + "rewards/margins": 2.703453779220581, + "rewards/rejected": -0.476593017578125, + "step": 12324 + }, + { + "epoch": 0.72, + "learning_rate": 1.954324047697129e-08, + "logits/chosen": -1.8696086406707764, + "logits/rejected": -1.8666980266571045, + "logps/chosen": -24.24182891845703, + "logps/rejected": -184.45718383789062, + "loss": 0.1878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6187868118286133, + "rewards/margins": 3.012622356414795, + "rewards/rejected": -2.3938355445861816, + "step": 12325 + }, + { + "epoch": 0.72, + "learning_rate": 1.9535767141154764e-08, + "logits/chosen": -1.9010601043701172, + "logits/rejected": -1.8891301155090332, + "logps/chosen": -4.294564247131348, + "logps/rejected": -241.75643920898438, + "loss": 0.3148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22449813783168793, + "rewards/margins": 3.123370409011841, + "rewards/rejected": -2.8988723754882812, + "step": 12326 + }, + { + "epoch": 0.72, + "learning_rate": 1.9528294887576817e-08, + "logits/chosen": -1.8491648435592651, + "logits/rejected": -1.8653839826583862, + "logps/chosen": -248.6495819091797, + "logps/rejected": -430.43017578125, + "loss": 0.0578, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2110397815704346, + "rewards/margins": 2.887782335281372, + "rewards/rejected": -0.6767425537109375, + "step": 12327 + }, + { + "epoch": 0.72, + "learning_rate": 1.9520823716502883e-08, + "logits/chosen": -1.9849001169204712, + "logits/rejected": -2.0004706382751465, + "logps/chosen": -142.7644500732422, + "logps/rejected": -354.2646179199219, + "loss": 0.0631, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7856292724609375, + "rewards/margins": 4.760339736938477, + "rewards/rejected": -2.97471022605896, + "step": 12328 + }, + { + "epoch": 0.72, + "learning_rate": 1.951335362819837e-08, + "logits/chosen": -1.9190139770507812, + "logits/rejected": -1.916327714920044, + "logps/chosen": -0.00020121420675422996, + "logps/rejected": -228.28176879882812, + "loss": 0.3549, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.940332918427885e-07, + "rewards/margins": 4.070903778076172, + "rewards/rejected": -4.070904731750488, + "step": 12329 + }, + { + "epoch": 0.72, + "learning_rate": 1.9505884622928665e-08, + "logits/chosen": -1.8378406763076782, + "logits/rejected": -1.8185961246490479, + "logps/chosen": -268.42437744140625, + "logps/rejected": -438.8662109375, + "loss": 0.0899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9099395871162415, + "rewards/margins": 3.9895567893981934, + "rewards/rejected": -3.0796172618865967, + "step": 12330 + }, + { + "epoch": 0.72, + "learning_rate": 1.949841670095912e-08, + "logits/chosen": -1.8113948106765747, + "logits/rejected": -1.8046684265136719, + "logps/chosen": -1.8007433414459229, + "logps/rejected": -100.49630737304688, + "loss": 0.5937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0017092227935791016, + "rewards/margins": 0.3226023316383362, + "rewards/rejected": -0.3208931088447571, + "step": 12331 + }, + { + "epoch": 0.72, + "learning_rate": 1.9490949862554994e-08, + "logits/chosen": -2.1339051723480225, + "logits/rejected": -2.1296019554138184, + "logps/chosen": -6.644925594329834, + "logps/rejected": -226.99191284179688, + "loss": 0.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08709245175123215, + "rewards/margins": 4.6215500831604, + "rewards/rejected": -4.708642482757568, + "step": 12332 + }, + { + "epoch": 0.72, + "learning_rate": 1.948348410798157e-08, + "logits/chosen": -1.7501291036605835, + "logits/rejected": -1.699488639831543, + "logps/chosen": -269.2726745605469, + "logps/rejected": -645.0725708007812, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.729483127593994, + "rewards/margins": 4.848202705383301, + "rewards/rejected": -2.1187195777893066, + "step": 12333 + }, + { + "epoch": 0.72, + "learning_rate": 1.9476019437504053e-08, + "logits/chosen": -1.9929964542388916, + "logits/rejected": -1.987053632736206, + "logps/chosen": -3.153196096420288, + "logps/rejected": -126.89898681640625, + "loss": 0.4918, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02622663974761963, + "rewards/margins": 0.9508271813392639, + "rewards/rejected": -0.9770538210868835, + "step": 12334 + }, + { + "epoch": 0.72, + "learning_rate": 1.9468555851387664e-08, + "logits/chosen": -1.8665761947631836, + "logits/rejected": -1.859955906867981, + "logps/chosen": -0.072167307138443, + "logps/rejected": -132.69422912597656, + "loss": 0.549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06954126805067062, + "rewards/margins": 0.5840523838996887, + "rewards/rejected": -0.5145111083984375, + "step": 12335 + }, + { + "epoch": 0.72, + "learning_rate": 1.946109334989749e-08, + "logits/chosen": -1.8142634630203247, + "logits/rejected": -1.8053244352340698, + "logps/chosen": -0.003977994434535503, + "logps/rejected": -275.70355224609375, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025220849784091115, + "rewards/margins": 4.292984962463379, + "rewards/rejected": -4.293237209320068, + "step": 12336 + }, + { + "epoch": 0.72, + "learning_rate": 1.9453631933298665e-08, + "logits/chosen": -2.0962865352630615, + "logits/rejected": -2.0894670486450195, + "logps/chosen": -47.13346862792969, + "logps/rejected": -206.54010009765625, + "loss": 0.1477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8116718530654907, + "rewards/margins": 4.125173091888428, + "rewards/rejected": -3.3135011196136475, + "step": 12337 + }, + { + "epoch": 0.72, + "learning_rate": 1.9446171601856252e-08, + "logits/chosen": -1.8175700902938843, + "logits/rejected": -1.8185398578643799, + "logps/chosen": -0.023471731692552567, + "logps/rejected": -133.4001922607422, + "loss": 0.4665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008953860960900784, + "rewards/margins": 1.2782806158065796, + "rewards/rejected": -1.2693268060684204, + "step": 12338 + }, + { + "epoch": 0.72, + "learning_rate": 1.9438712355835296e-08, + "logits/chosen": -1.8766669034957886, + "logits/rejected": -1.8755356073379517, + "logps/chosen": -50.62479782104492, + "logps/rejected": -274.02056884765625, + "loss": 0.1523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8927494287490845, + "rewards/margins": 3.9277472496032715, + "rewards/rejected": -3.0349977016448975, + "step": 12339 + }, + { + "epoch": 0.72, + "learning_rate": 1.9431254195500735e-08, + "logits/chosen": -2.0515358448028564, + "logits/rejected": -2.043370008468628, + "logps/chosen": -171.99998474121094, + "logps/rejected": -334.4164123535156, + "loss": 0.4565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5529571771621704, + "rewards/margins": 0.023870885372161865, + "rewards/rejected": 0.5290862917900085, + "step": 12340 + }, + { + "epoch": 0.72, + "learning_rate": 1.9423797121117586e-08, + "logits/chosen": -1.8393254280090332, + "logits/rejected": -1.8524478673934937, + "logps/chosen": -250.1525421142578, + "logps/rejected": -483.4541320800781, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7906875610351562, + "rewards/margins": 5.575532913208008, + "rewards/rejected": -3.7848451137542725, + "step": 12341 + }, + { + "epoch": 0.72, + "learning_rate": 1.9416341132950714e-08, + "logits/chosen": -1.9954453706741333, + "logits/rejected": -1.9895974397659302, + "logps/chosen": -76.97935485839844, + "logps/rejected": -211.9342803955078, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2358085662126541, + "rewards/margins": 2.6322152614593506, + "rewards/rejected": -2.396406650543213, + "step": 12342 + }, + { + "epoch": 0.72, + "learning_rate": 1.940888623126502e-08, + "logits/chosen": -1.6618132591247559, + "logits/rejected": -1.6568689346313477, + "logps/chosen": -60.8975830078125, + "logps/rejected": -236.4271697998047, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3852089047431946, + "rewards/margins": 2.5576393604278564, + "rewards/rejected": -2.1724305152893066, + "step": 12343 + }, + { + "epoch": 0.72, + "learning_rate": 1.9401432416325288e-08, + "logits/chosen": -2.046132802963257, + "logits/rejected": -2.0515217781066895, + "logps/chosen": -65.14640808105469, + "logps/rejected": -220.70510864257812, + "loss": 0.3417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23842696845531464, + "rewards/margins": 1.6270256042480469, + "rewards/rejected": -1.3885986804962158, + "step": 12344 + }, + { + "epoch": 0.72, + "learning_rate": 1.939397968839639e-08, + "logits/chosen": -2.0046491622924805, + "logits/rejected": -2.0054399967193604, + "logps/chosen": -8.65448237163946e-05, + "logps/rejected": -18.12529182434082, + "loss": 0.6935, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.0622071588149993e-06, + "rewards/margins": -0.024986684322357178, + "rewards/rejected": 0.024988746270537376, + "step": 12345 + }, + { + "epoch": 0.72, + "learning_rate": 1.9386528047743024e-08, + "logits/chosen": -1.9459549188613892, + "logits/rejected": -1.9314168691635132, + "logps/chosen": -3.7312052882043645e-05, + "logps/rejected": -205.03555297851562, + "loss": 0.3817, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.5322878981532995e-06, + "rewards/margins": 2.8385167121887207, + "rewards/rejected": -2.838510274887085, + "step": 12346 + }, + { + "epoch": 0.72, + "learning_rate": 1.9379077494629925e-08, + "logits/chosen": -2.006279468536377, + "logits/rejected": -2.059887170791626, + "logps/chosen": -208.31875610351562, + "logps/rejected": -470.4958801269531, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.661215305328369, + "rewards/margins": 8.805898666381836, + "rewards/rejected": -6.144683837890625, + "step": 12347 + }, + { + "epoch": 0.72, + "learning_rate": 1.937162802932178e-08, + "logits/chosen": -1.9128360748291016, + "logits/rejected": -1.9145293235778809, + "logps/chosen": -9.207131385803223, + "logps/rejected": -72.39051818847656, + "loss": 0.7313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19140982627868652, + "rewards/margins": 0.07757362723350525, + "rewards/rejected": -0.2689834535121918, + "step": 12348 + }, + { + "epoch": 0.72, + "learning_rate": 1.9364179652083224e-08, + "logits/chosen": -1.9789891242980957, + "logits/rejected": -1.9800773859024048, + "logps/chosen": -0.0422307513654232, + "logps/rejected": -124.42362976074219, + "loss": 0.6031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0117472680285573, + "rewards/margins": 0.398151695728302, + "rewards/rejected": -0.386404424905777, + "step": 12349 + }, + { + "epoch": 0.72, + "learning_rate": 1.9356732363178885e-08, + "logits/chosen": -1.9061118364334106, + "logits/rejected": -1.901814579963684, + "logps/chosen": -16.76165199279785, + "logps/rejected": -103.40646362304688, + "loss": 0.6732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3323880136013031, + "rewards/margins": 0.1287936270236969, + "rewards/rejected": -0.461181640625, + "step": 12350 + }, + { + "epoch": 0.72, + "learning_rate": 1.934928616287328e-08, + "logits/chosen": -1.8477908372879028, + "logits/rejected": -1.8452757596969604, + "logps/chosen": -0.0016445985529571772, + "logps/rejected": -157.01846313476562, + "loss": 0.327, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6492454960825853e-05, + "rewards/margins": 4.417901515960693, + "rewards/rejected": -4.417884826660156, + "step": 12351 + }, + { + "epoch": 0.72, + "learning_rate": 1.9341841051430968e-08, + "logits/chosen": -1.9279727935791016, + "logits/rejected": -1.9253239631652832, + "logps/chosen": -18.800153732299805, + "logps/rejected": -80.4860610961914, + "loss": 0.4283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18753337860107422, + "rewards/margins": 1.0265157222747803, + "rewards/rejected": -0.8389824032783508, + "step": 12352 + }, + { + "epoch": 0.72, + "learning_rate": 1.9334397029116424e-08, + "logits/chosen": -1.685701608657837, + "logits/rejected": -1.6730411052703857, + "logps/chosen": -364.7158508300781, + "logps/rejected": -415.4568176269531, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8749420642852783, + "rewards/margins": 2.1059815883636475, + "rewards/rejected": -0.23103943467140198, + "step": 12353 + }, + { + "epoch": 0.72, + "learning_rate": 1.9326954096194114e-08, + "logits/chosen": -2.0279316902160645, + "logits/rejected": -2.006504774093628, + "logps/chosen": -55.8634033203125, + "logps/rejected": -288.5101318359375, + "loss": 0.5657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8588463068008423, + "rewards/margins": 3.0971550941467285, + "rewards/rejected": -3.9560012817382812, + "step": 12354 + }, + { + "epoch": 0.72, + "learning_rate": 1.9319512252928416e-08, + "logits/chosen": -1.7445991039276123, + "logits/rejected": -1.752284049987793, + "logps/chosen": -203.5832061767578, + "logps/rejected": -405.2562561035156, + "loss": 0.1398, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3030303716659546, + "rewards/margins": 2.133312940597534, + "rewards/rejected": -0.8302826285362244, + "step": 12355 + }, + { + "epoch": 0.72, + "learning_rate": 1.9312071499583722e-08, + "logits/chosen": -1.4513212442398071, + "logits/rejected": -1.456332802772522, + "logps/chosen": -0.00010132415627595037, + "logps/rejected": -203.06581115722656, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5138765547817457e-06, + "rewards/margins": 6.030961990356445, + "rewards/rejected": -6.03096342086792, + "step": 12356 + }, + { + "epoch": 0.72, + "learning_rate": 1.930463183642435e-08, + "logits/chosen": -1.6673691272735596, + "logits/rejected": -1.6628704071044922, + "logps/chosen": -199.73260498046875, + "logps/rejected": -256.3836669921875, + "loss": 0.0917, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2798447608947754, + "rewards/margins": 1.7822067737579346, + "rewards/rejected": 0.49763795733451843, + "step": 12357 + }, + { + "epoch": 0.72, + "learning_rate": 1.9297193263714628e-08, + "logits/chosen": -1.9012829065322876, + "logits/rejected": -1.7881979942321777, + "logps/chosen": -250.1798858642578, + "logps/rejected": -498.25543212890625, + "loss": 0.0253, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2205123901367188, + "rewards/margins": 3.8357253074645996, + "rewards/rejected": -0.6152130365371704, + "step": 12358 + }, + { + "epoch": 0.72, + "learning_rate": 1.9289755781718747e-08, + "logits/chosen": -1.8565603494644165, + "logits/rejected": -1.896467924118042, + "logps/chosen": -145.2350616455078, + "logps/rejected": -423.60699462890625, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0372543334960938, + "rewards/margins": 6.051652431488037, + "rewards/rejected": -4.014398097991943, + "step": 12359 + }, + { + "epoch": 0.72, + "learning_rate": 1.9282319390700997e-08, + "logits/chosen": -1.878574252128601, + "logits/rejected": -1.8758606910705566, + "logps/chosen": -50.09092712402344, + "logps/rejected": -185.9254608154297, + "loss": 0.2508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1587074249982834, + "rewards/margins": 2.5010368824005127, + "rewards/rejected": -2.342329502105713, + "step": 12360 + }, + { + "epoch": 0.72, + "learning_rate": 1.9274884090925498e-08, + "logits/chosen": -1.9629418849945068, + "logits/rejected": -1.9549181461334229, + "logps/chosen": -16.394868850708008, + "logps/rejected": -248.38961791992188, + "loss": 0.2281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7246080636978149, + "rewards/margins": 2.229444980621338, + "rewards/rejected": -1.5048370361328125, + "step": 12361 + }, + { + "epoch": 0.72, + "learning_rate": 1.9267449882656427e-08, + "logits/chosen": -1.7324870824813843, + "logits/rejected": -1.7225998640060425, + "logps/chosen": -68.00709533691406, + "logps/rejected": -309.8625183105469, + "loss": 0.1534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6548050045967102, + "rewards/margins": 3.8093459606170654, + "rewards/rejected": -3.154541015625, + "step": 12362 + }, + { + "epoch": 0.72, + "learning_rate": 1.926001676615783e-08, + "logits/chosen": -1.757416009902954, + "logits/rejected": -1.8139992952346802, + "logps/chosen": -281.098876953125, + "logps/rejected": -345.38238525390625, + "loss": 0.0547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8983154296875, + "rewards/margins": 4.789984226226807, + "rewards/rejected": -3.8916687965393066, + "step": 12363 + }, + { + "epoch": 0.72, + "learning_rate": 1.9252584741693846e-08, + "logits/chosen": -1.9298789501190186, + "logits/rejected": -1.9210307598114014, + "logps/chosen": -0.35024017095565796, + "logps/rejected": -183.1131591796875, + "loss": 0.3286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026109134778380394, + "rewards/margins": 2.6688246726989746, + "rewards/rejected": -2.6427154541015625, + "step": 12364 + }, + { + "epoch": 0.72, + "learning_rate": 1.924515380952843e-08, + "logits/chosen": -1.9180952310562134, + "logits/rejected": -1.9149997234344482, + "logps/chosen": -166.2906036376953, + "logps/rejected": -236.77774047851562, + "loss": 0.2485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.545574963092804, + "rewards/margins": 1.1910247802734375, + "rewards/rejected": -0.6454498171806335, + "step": 12365 + }, + { + "epoch": 0.72, + "learning_rate": 1.923772396992559e-08, + "logits/chosen": -1.9834541082382202, + "logits/rejected": -1.9871079921722412, + "logps/chosen": -14.542825698852539, + "logps/rejected": -245.241455078125, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11207618564367294, + "rewards/margins": 5.135269641876221, + "rewards/rejected": -5.023193359375, + "step": 12366 + }, + { + "epoch": 0.72, + "learning_rate": 1.9230295223149267e-08, + "logits/chosen": -2.017738103866577, + "logits/rejected": -2.026353120803833, + "logps/chosen": -8.250360488891602, + "logps/rejected": -112.70218658447266, + "loss": 0.435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1374838799238205, + "rewards/margins": 1.0714751482009888, + "rewards/rejected": -0.9339912533760071, + "step": 12367 + }, + { + "epoch": 0.72, + "learning_rate": 1.922286756946337e-08, + "logits/chosen": -1.8578482866287231, + "logits/rejected": -1.857901692390442, + "logps/chosen": -30.393718719482422, + "logps/rejected": -168.1143798828125, + "loss": 0.7702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7915794253349304, + "rewards/margins": 0.3034859299659729, + "rewards/rejected": -1.0950653553009033, + "step": 12368 + }, + { + "epoch": 0.72, + "learning_rate": 1.9215441009131778e-08, + "logits/chosen": -2.1144237518310547, + "logits/rejected": -2.1055290699005127, + "logps/chosen": -36.233497619628906, + "logps/rejected": -176.60507202148438, + "loss": 0.4113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0106964111328125, + "rewards/margins": 1.4501618146896362, + "rewards/rejected": -1.4394654035568237, + "step": 12369 + }, + { + "epoch": 0.72, + "learning_rate": 1.9208015542418294e-08, + "logits/chosen": -1.8653186559677124, + "logits/rejected": -1.7618122100830078, + "logps/chosen": -230.15765380859375, + "logps/rejected": -536.7278442382812, + "loss": 0.1656, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3066482543945312, + "rewards/margins": 1.9399734735488892, + "rewards/rejected": -0.6333252191543579, + "step": 12370 + }, + { + "epoch": 0.72, + "learning_rate": 1.9200591169586716e-08, + "logits/chosen": -1.9784512519836426, + "logits/rejected": -1.9822965860366821, + "logps/chosen": -242.65744018554688, + "logps/rejected": -453.1476135253906, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.211813449859619, + "rewards/margins": 3.32657790184021, + "rewards/rejected": -1.1147644519805908, + "step": 12371 + }, + { + "epoch": 0.72, + "learning_rate": 1.9193167890900792e-08, + "logits/chosen": -1.8469877243041992, + "logits/rejected": -1.8463692665100098, + "logps/chosen": -0.367648184299469, + "logps/rejected": -47.8521614074707, + "loss": 0.6175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012403376400470734, + "rewards/margins": 0.3317197263240814, + "rewards/rejected": -0.34412309527397156, + "step": 12372 + }, + { + "epoch": 0.72, + "learning_rate": 1.9185745706624258e-08, + "logits/chosen": -1.9828864336013794, + "logits/rejected": -1.9700722694396973, + "logps/chosen": -0.28972524404525757, + "logps/rejected": -224.0646209716797, + "loss": 0.3546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018997156992554665, + "rewards/margins": 4.17685079574585, + "rewards/rejected": -4.157853603363037, + "step": 12373 + }, + { + "epoch": 0.72, + "learning_rate": 1.9178324617020746e-08, + "logits/chosen": -1.750417709350586, + "logits/rejected": -1.7492570877075195, + "logps/chosen": -54.147972106933594, + "logps/rejected": -143.50352478027344, + "loss": 0.2275, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.354775309562683, + "rewards/margins": 1.3394783735275269, + "rewards/rejected": 0.01529693603515625, + "step": 12374 + }, + { + "epoch": 0.72, + "learning_rate": 1.917090462235391e-08, + "logits/chosen": -1.656151294708252, + "logits/rejected": -1.6624550819396973, + "logps/chosen": -1.2161856889724731, + "logps/rejected": -100.8264389038086, + "loss": 0.5742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06641288846731186, + "rewards/margins": 0.41972482204437256, + "rewards/rejected": -0.3533119261264801, + "step": 12375 + }, + { + "epoch": 0.72, + "learning_rate": 1.9163485722887342e-08, + "logits/chosen": -1.710223913192749, + "logits/rejected": -1.7143278121948242, + "logps/chosen": -253.03750610351562, + "logps/rejected": -333.3995666503906, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6293976306915283, + "rewards/margins": 0.23504018783569336, + "rewards/rejected": 2.394357442855835, + "step": 12376 + }, + { + "epoch": 0.72, + "learning_rate": 1.9156067918884616e-08, + "logits/chosen": -1.9050204753875732, + "logits/rejected": -1.8581302165985107, + "logps/chosen": -177.874267578125, + "logps/rejected": -321.50091552734375, + "loss": 0.1247, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.127188205718994, + "rewards/margins": 1.4252381324768066, + "rewards/rejected": 0.7019500732421875, + "step": 12377 + }, + { + "epoch": 0.72, + "learning_rate": 1.9148651210609216e-08, + "logits/chosen": -1.9566822052001953, + "logits/rejected": -1.9532814025878906, + "logps/chosen": -0.0781567394733429, + "logps/rejected": -41.339012145996094, + "loss": 0.6759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003007410792633891, + "rewards/margins": 0.03687753900885582, + "rewards/rejected": -0.03988495096564293, + "step": 12378 + }, + { + "epoch": 0.72, + "learning_rate": 1.9141235598324633e-08, + "logits/chosen": -1.6986711025238037, + "logits/rejected": -1.6855595111846924, + "logps/chosen": -210.01806640625, + "logps/rejected": -321.99652099609375, + "loss": 0.1539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9451431632041931, + "rewards/margins": 1.6268783807754517, + "rewards/rejected": -0.6817352175712585, + "step": 12379 + }, + { + "epoch": 0.72, + "learning_rate": 1.913382108229431e-08, + "logits/chosen": -1.7938019037246704, + "logits/rejected": -1.7788803577423096, + "logps/chosen": -137.61727905273438, + "logps/rejected": -403.3753967285156, + "loss": 0.2745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7829086184501648, + "rewards/margins": 1.0082015991210938, + "rewards/rejected": -0.22529296576976776, + "step": 12380 + }, + { + "epoch": 0.72, + "learning_rate": 1.912640766278166e-08, + "logits/chosen": -2.0581002235412598, + "logits/rejected": -2.0477168560028076, + "logps/chosen": -190.7450408935547, + "logps/rejected": -412.5094299316406, + "loss": 0.1029, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4512557983398438, + "rewards/margins": 3.2093307971954346, + "rewards/rejected": -1.7580749988555908, + "step": 12381 + }, + { + "epoch": 0.72, + "learning_rate": 1.9118995340049998e-08, + "logits/chosen": -1.9144049882888794, + "logits/rejected": -1.9224135875701904, + "logps/chosen": -14.944083213806152, + "logps/rejected": -129.47537231445312, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4753603935241699, + "rewards/margins": 2.0327463150024414, + "rewards/rejected": -1.557386040687561, + "step": 12382 + }, + { + "epoch": 0.72, + "learning_rate": 1.911158411436271e-08, + "logits/chosen": -1.876436471939087, + "logits/rejected": -1.867634892463684, + "logps/chosen": -37.890525817871094, + "logps/rejected": -157.78634643554688, + "loss": 0.4547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25407907366752625, + "rewards/margins": 0.9805072546005249, + "rewards/rejected": -0.726428210735321, + "step": 12383 + }, + { + "epoch": 0.72, + "learning_rate": 1.910417398598303e-08, + "logits/chosen": -2.001469135284424, + "logits/rejected": -1.9919371604919434, + "logps/chosen": -6.002562046051025, + "logps/rejected": -122.29783630371094, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1873003989458084, + "rewards/margins": 3.3624813556671143, + "rewards/rejected": -3.1751809120178223, + "step": 12384 + }, + { + "epoch": 0.72, + "learning_rate": 1.9096764955174232e-08, + "logits/chosen": -1.8843090534210205, + "logits/rejected": -1.8757485151290894, + "logps/chosen": -248.71768188476562, + "logps/rejected": -332.2301025390625, + "loss": 0.2007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8311614990234375, + "rewards/margins": 0.8093048334121704, + "rewards/rejected": 1.021856665611267, + "step": 12385 + }, + { + "epoch": 0.72, + "learning_rate": 1.9089357022199477e-08, + "logits/chosen": -1.8865034580230713, + "logits/rejected": -1.8887073993682861, + "logps/chosen": -0.6945571303367615, + "logps/rejected": -157.7437286376953, + "loss": 0.4053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011712837032973766, + "rewards/margins": 1.9078688621520996, + "rewards/rejected": -1.919581651687622, + "step": 12386 + }, + { + "epoch": 0.72, + "learning_rate": 1.908195018732197e-08, + "logits/chosen": -1.918489694595337, + "logits/rejected": -1.9291718006134033, + "logps/chosen": -195.36907958984375, + "logps/rejected": -333.0158386230469, + "loss": 0.084, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7646607160568237, + "rewards/margins": 2.195089817047119, + "rewards/rejected": -0.430429071187973, + "step": 12387 + }, + { + "epoch": 0.72, + "learning_rate": 1.907454445080485e-08, + "logits/chosen": -1.6589431762695312, + "logits/rejected": -1.652921199798584, + "logps/chosen": -186.93402099609375, + "logps/rejected": -272.45257568359375, + "loss": 0.3376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.209797665476799, + "rewards/margins": 1.1035934686660767, + "rewards/rejected": -1.313391089439392, + "step": 12388 + }, + { + "epoch": 0.72, + "learning_rate": 1.906713981291117e-08, + "logits/chosen": -1.7388639450073242, + "logits/rejected": -1.7182493209838867, + "logps/chosen": -245.83987426757812, + "logps/rejected": -337.0460205078125, + "loss": 0.1814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6592620611190796, + "rewards/margins": 1.5557464361190796, + "rewards/rejected": 0.103515625, + "step": 12389 + }, + { + "epoch": 0.72, + "learning_rate": 1.9059736273903987e-08, + "logits/chosen": -1.8163676261901855, + "logits/rejected": -1.810562252998352, + "logps/chosen": -9.375149726867676, + "logps/rejected": -101.31505584716797, + "loss": 0.5072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1499929428100586, + "rewards/margins": 0.7116705179214478, + "rewards/rejected": -0.5616775751113892, + "step": 12390 + }, + { + "epoch": 0.72, + "learning_rate": 1.9052333834046325e-08, + "logits/chosen": -2.0615763664245605, + "logits/rejected": -2.054391622543335, + "logps/chosen": -41.25669860839844, + "logps/rejected": -128.96240234375, + "loss": 0.429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12941475212574005, + "rewards/margins": 1.7330867052078247, + "rewards/rejected": -1.8625015020370483, + "step": 12391 + }, + { + "epoch": 0.72, + "learning_rate": 1.9044932493601155e-08, + "logits/chosen": -1.9128665924072266, + "logits/rejected": -1.9066784381866455, + "logps/chosen": -0.9913895130157471, + "logps/rejected": -179.2086944580078, + "loss": 0.4683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029159653931856155, + "rewards/margins": 1.236515760421753, + "rewards/rejected": -1.26567542552948, + "step": 12392 + }, + { + "epoch": 0.72, + "learning_rate": 1.903753225283139e-08, + "logits/chosen": -1.9067896604537964, + "logits/rejected": -1.9187514781951904, + "logps/chosen": -50.59400939941406, + "logps/rejected": -269.1216735839844, + "loss": 0.2768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3349010646343231, + "rewards/margins": 2.10248064994812, + "rewards/rejected": -1.7675796747207642, + "step": 12393 + }, + { + "epoch": 0.72, + "learning_rate": 1.903013311199993e-08, + "logits/chosen": -1.5465120077133179, + "logits/rejected": -1.5557855367660522, + "logps/chosen": -189.66392517089844, + "logps/rejected": -359.7283020019531, + "loss": 0.0441, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1375138759613037, + "rewards/margins": 2.9640915393829346, + "rewards/rejected": 0.17342224717140198, + "step": 12394 + }, + { + "epoch": 0.72, + "learning_rate": 1.902273507136963e-08, + "logits/chosen": -1.856324315071106, + "logits/rejected": -1.8605706691741943, + "logps/chosen": -15.87817096710205, + "logps/rejected": -168.71820068359375, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11865625530481339, + "rewards/margins": 2.287416934967041, + "rewards/rejected": -2.1687607765197754, + "step": 12395 + }, + { + "epoch": 0.72, + "learning_rate": 1.9015338131203323e-08, + "logits/chosen": -1.9873003959655762, + "logits/rejected": -1.9512763023376465, + "logps/chosen": -147.58526611328125, + "logps/rejected": -311.2791748046875, + "loss": 0.2513, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0646514892578125, + "rewards/margins": 1.2909637689590454, + "rewards/rejected": -0.22631226480007172, + "step": 12396 + }, + { + "epoch": 0.72, + "learning_rate": 1.9007942291763756e-08, + "logits/chosen": -1.8889060020446777, + "logits/rejected": -1.9116071462631226, + "logps/chosen": -182.671142578125, + "logps/rejected": -341.81610107421875, + "loss": 0.1013, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0409836769104004, + "rewards/margins": 1.6951035261154175, + "rewards/rejected": 0.3458801209926605, + "step": 12397 + }, + { + "epoch": 0.72, + "learning_rate": 1.9000547553313672e-08, + "logits/chosen": -1.8476701974868774, + "logits/rejected": -1.8294070959091187, + "logps/chosen": -221.17294311523438, + "logps/rejected": -344.2310485839844, + "loss": 0.2396, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4398880004882812, + "rewards/margins": 0.561076283454895, + "rewards/rejected": 1.8788117170333862, + "step": 12398 + }, + { + "epoch": 0.72, + "learning_rate": 1.899315391611578e-08, + "logits/chosen": -1.7774651050567627, + "logits/rejected": -1.7978887557983398, + "logps/chosen": -184.7962646484375, + "logps/rejected": -375.79852294921875, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.55615234375, + "rewards/margins": 5.315744400024414, + "rewards/rejected": -3.759591817855835, + "step": 12399 + }, + { + "epoch": 0.72, + "learning_rate": 1.8985761380432747e-08, + "logits/chosen": -2.087074041366577, + "logits/rejected": -2.056039810180664, + "logps/chosen": -0.0038296568673104048, + "logps/rejected": -302.1857604980469, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010298291221261024, + "rewards/margins": 7.46783447265625, + "rewards/rejected": -7.457536220550537, + "step": 12400 + }, + { + "epoch": 0.72, + "learning_rate": 1.897836994652714e-08, + "logits/chosen": -2.048311710357666, + "logits/rejected": -2.0406394004821777, + "logps/chosen": -19.032724380493164, + "logps/rejected": -34.52353286743164, + "loss": 0.6374, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.13342933356761932, + "rewards/margins": -0.0015968382358551025, + "rewards/rejected": 0.13502617180347443, + "step": 12401 + }, + { + "epoch": 0.72, + "learning_rate": 1.897097961466161e-08, + "logits/chosen": -1.914204716682434, + "logits/rejected": -1.8886584043502808, + "logps/chosen": -314.15875244140625, + "logps/rejected": -523.115234375, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.429620385169983, + "rewards/margins": 2.7790863513946533, + "rewards/rejected": -1.3494659662246704, + "step": 12402 + }, + { + "epoch": 0.72, + "learning_rate": 1.8963590385098644e-08, + "logits/chosen": -2.0096373558044434, + "logits/rejected": -1.9964213371276855, + "logps/chosen": -15.15756893157959, + "logps/rejected": -153.75112915039062, + "loss": 0.4371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23268194496631622, + "rewards/margins": 0.5543387532234192, + "rewards/rejected": -0.3216567933559418, + "step": 12403 + }, + { + "epoch": 0.72, + "learning_rate": 1.8956202258100785e-08, + "logits/chosen": -1.7499223947525024, + "logits/rejected": -1.7826130390167236, + "logps/chosen": -200.98582458496094, + "logps/rejected": -317.0027160644531, + "loss": 0.2239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8706573843955994, + "rewards/margins": 0.9900268912315369, + "rewards/rejected": -0.1193695068359375, + "step": 12404 + }, + { + "epoch": 0.72, + "learning_rate": 1.8948815233930436e-08, + "logits/chosen": -1.9014482498168945, + "logits/rejected": -1.892143964767456, + "logps/chosen": -69.62027740478516, + "logps/rejected": -249.42691040039062, + "loss": 0.7222, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.19551239907741547, + "rewards/margins": -0.2695968747138977, + "rewards/rejected": 0.07408447563648224, + "step": 12405 + }, + { + "epoch": 0.72, + "learning_rate": 1.8941429312850092e-08, + "logits/chosen": -1.8366191387176514, + "logits/rejected": -1.8263286352157593, + "logps/chosen": -187.9115447998047, + "logps/rejected": -228.5728759765625, + "loss": 0.2815, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2725465297698975, + "rewards/margins": 0.3443634510040283, + "rewards/rejected": 2.928183078765869, + "step": 12406 + }, + { + "epoch": 0.72, + "learning_rate": 1.8934044495122088e-08, + "logits/chosen": -1.904759407043457, + "logits/rejected": -1.9010564088821411, + "logps/chosen": -27.97549057006836, + "logps/rejected": -113.99569702148438, + "loss": 0.3276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3097400665283203, + "rewards/margins": 1.528360366821289, + "rewards/rejected": -1.2186203002929688, + "step": 12407 + }, + { + "epoch": 0.72, + "learning_rate": 1.8926660781008786e-08, + "logits/chosen": -1.836387276649475, + "logits/rejected": -1.8277395963668823, + "logps/chosen": -201.2033233642578, + "logps/rejected": -366.726318359375, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.272058129310608, + "rewards/margins": 4.216907024383545, + "rewards/rejected": -2.9448487758636475, + "step": 12408 + }, + { + "epoch": 0.72, + "learning_rate": 1.8919278170772484e-08, + "logits/chosen": -1.8449748754501343, + "logits/rejected": -1.8508455753326416, + "logps/chosen": -2.819169044494629, + "logps/rejected": -172.15780639648438, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01994485966861248, + "rewards/margins": 5.909438133239746, + "rewards/rejected": -5.889493465423584, + "step": 12409 + }, + { + "epoch": 0.72, + "learning_rate": 1.8911896664675453e-08, + "logits/chosen": -1.779842495918274, + "logits/rejected": -1.7566825151443481, + "logps/chosen": -244.984130859375, + "logps/rejected": -531.0313110351562, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7742127180099487, + "rewards/margins": 6.978113174438477, + "rewards/rejected": -5.203900337219238, + "step": 12410 + }, + { + "epoch": 0.72, + "learning_rate": 1.890451626297994e-08, + "logits/chosen": -1.9235538244247437, + "logits/rejected": -1.9225362539291382, + "logps/chosen": -0.00013887110981158912, + "logps/rejected": -88.10652160644531, + "loss": 0.5178, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1706404115539044e-06, + "rewards/margins": 0.7774840593338013, + "rewards/rejected": -0.7774872183799744, + "step": 12411 + }, + { + "epoch": 0.72, + "learning_rate": 1.8897136965948095e-08, + "logits/chosen": -2.054551362991333, + "logits/rejected": -2.0416030883789062, + "logps/chosen": -43.685340881347656, + "logps/rejected": -189.70718383789062, + "loss": 0.4608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02097015455365181, + "rewards/margins": 1.2125930786132812, + "rewards/rejected": -1.1916229724884033, + "step": 12412 + }, + { + "epoch": 0.72, + "learning_rate": 1.888975877384209e-08, + "logits/chosen": -1.6577997207641602, + "logits/rejected": -1.6692569255828857, + "logps/chosen": -245.0345001220703, + "logps/rejected": -435.4964599609375, + "loss": 0.0636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5264175534248352, + "rewards/margins": 3.0089187622070312, + "rewards/rejected": -2.482501268386841, + "step": 12413 + }, + { + "epoch": 0.72, + "learning_rate": 1.8882381686924026e-08, + "logits/chosen": -1.9421464204788208, + "logits/rejected": -1.9361586570739746, + "logps/chosen": -59.923946380615234, + "logps/rejected": -189.85498046875, + "loss": 0.1952, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1469646692276, + "rewards/margins": 1.827383041381836, + "rewards/rejected": -0.6804184317588806, + "step": 12414 + }, + { + "epoch": 0.72, + "learning_rate": 1.8875005705456e-08, + "logits/chosen": -1.8903144598007202, + "logits/rejected": -1.8849879503250122, + "logps/chosen": -10.521441459655762, + "logps/rejected": -121.51020812988281, + "loss": 0.4251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07459373772144318, + "rewards/margins": 1.4984050989151, + "rewards/rejected": -1.5729988813400269, + "step": 12415 + }, + { + "epoch": 0.72, + "learning_rate": 1.8867630829699998e-08, + "logits/chosen": -1.7325297594070435, + "logits/rejected": -1.725995659828186, + "logps/chosen": -58.812232971191406, + "logps/rejected": -194.90371704101562, + "loss": 0.3592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33695754408836365, + "rewards/margins": 1.1651008129119873, + "rewards/rejected": -0.828143298625946, + "step": 12416 + }, + { + "epoch": 0.72, + "learning_rate": 1.8860257059918033e-08, + "logits/chosen": -1.9243160486221313, + "logits/rejected": -1.9078396558761597, + "logps/chosen": -212.320068359375, + "logps/rejected": -396.7156066894531, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6345887184143066, + "rewards/margins": 4.1566925048828125, + "rewards/rejected": -1.5221039056777954, + "step": 12417 + }, + { + "epoch": 0.72, + "learning_rate": 1.8852884396372058e-08, + "logits/chosen": -1.953352451324463, + "logits/rejected": -1.9427474737167358, + "logps/chosen": -33.285430908203125, + "logps/rejected": -539.1158447265625, + "loss": 0.1832, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5852470397949219, + "rewards/margins": 11.021288871765137, + "rewards/rejected": -10.436041831970215, + "step": 12418 + }, + { + "epoch": 0.72, + "learning_rate": 1.8845512839324007e-08, + "logits/chosen": -1.7520692348480225, + "logits/rejected": -1.7661609649658203, + "logps/chosen": -193.34429931640625, + "logps/rejected": -306.2742614746094, + "loss": 0.1359, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6144561767578125, + "rewards/margins": 1.4901123046875, + "rewards/rejected": 0.1243438720703125, + "step": 12419 + }, + { + "epoch": 0.72, + "learning_rate": 1.8838142389035693e-08, + "logits/chosen": -1.7204564809799194, + "logits/rejected": -1.715532898902893, + "logps/chosen": -0.1430143117904663, + "logps/rejected": -208.25421142578125, + "loss": 0.3462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005362872965633869, + "rewards/margins": 4.364412307739258, + "rewards/rejected": -4.369775295257568, + "step": 12420 + }, + { + "epoch": 0.72, + "learning_rate": 1.8830773045769034e-08, + "logits/chosen": -1.9480657577514648, + "logits/rejected": -1.939779281616211, + "logps/chosen": -1.9109169244766235, + "logps/rejected": -146.43008422851562, + "loss": 0.4082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05970185995101929, + "rewards/margins": 2.2521846294403076, + "rewards/rejected": -2.3118865489959717, + "step": 12421 + }, + { + "epoch": 0.72, + "learning_rate": 1.8823404809785758e-08, + "logits/chosen": -1.9485973119735718, + "logits/rejected": -1.954824686050415, + "logps/chosen": -190.83526611328125, + "logps/rejected": -301.03594970703125, + "loss": 0.058, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.019099473953247, + "rewards/margins": 2.5168471336364746, + "rewards/rejected": 0.5022522211074829, + "step": 12422 + }, + { + "epoch": 0.72, + "learning_rate": 1.8816037681347664e-08, + "logits/chosen": -1.7465473413467407, + "logits/rejected": -1.747750997543335, + "logps/chosen": -22.518827438354492, + "logps/rejected": -108.61065673828125, + "loss": 0.3909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17163467407226562, + "rewards/margins": 1.0772788524627686, + "rewards/rejected": -0.9056442379951477, + "step": 12423 + }, + { + "epoch": 0.72, + "learning_rate": 1.8808671660716408e-08, + "logits/chosen": -2.0371155738830566, + "logits/rejected": -2.0280611515045166, + "logps/chosen": -0.008067220449447632, + "logps/rejected": -156.68763732910156, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02001386694610119, + "rewards/margins": 3.534686803817749, + "rewards/rejected": -3.5146729946136475, + "step": 12424 + }, + { + "epoch": 0.72, + "learning_rate": 1.880130674815375e-08, + "logits/chosen": -1.930249810218811, + "logits/rejected": -1.924666404724121, + "logps/chosen": -80.80550384521484, + "logps/rejected": -238.9983673095703, + "loss": 0.2542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09101714938879013, + "rewards/margins": 4.349551677703857, + "rewards/rejected": -4.2585344314575195, + "step": 12425 + }, + { + "epoch": 0.72, + "learning_rate": 1.8793942943921265e-08, + "logits/chosen": -1.7468692064285278, + "logits/rejected": -1.7474303245544434, + "logps/chosen": -18.079866409301758, + "logps/rejected": -73.34259033203125, + "loss": 0.8011, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.30378514528274536, + "rewards/margins": -0.02964574098587036, + "rewards/rejected": -0.274139404296875, + "step": 12426 + }, + { + "epoch": 0.72, + "learning_rate": 1.8786580248280593e-08, + "logits/chosen": -1.9578979015350342, + "logits/rejected": -1.956559181213379, + "logps/chosen": -0.0012254337780177593, + "logps/rejected": -94.20484924316406, + "loss": 0.5376, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.806927634286694e-05, + "rewards/margins": 0.7446930408477783, + "rewards/rejected": -0.7447311282157898, + "step": 12427 + }, + { + "epoch": 0.72, + "learning_rate": 1.8779218661493228e-08, + "logits/chosen": -1.8825690746307373, + "logits/rejected": -1.8937978744506836, + "logps/chosen": -150.48367309570312, + "logps/rejected": -247.90823364257812, + "loss": 0.0531, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3387451171875, + "rewards/margins": 3.3518998622894287, + "rewards/rejected": -2.0131547451019287, + "step": 12428 + }, + { + "epoch": 0.72, + "learning_rate": 1.877185818382076e-08, + "logits/chosen": -1.8597444295883179, + "logits/rejected": -1.842513084411621, + "logps/chosen": -223.42703247070312, + "logps/rejected": -394.24444580078125, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0000885725021362, + "rewards/margins": 3.071948528289795, + "rewards/rejected": -2.071859836578369, + "step": 12429 + }, + { + "epoch": 0.72, + "learning_rate": 1.8764498815524653e-08, + "logits/chosen": -1.928688645362854, + "logits/rejected": -1.9317280054092407, + "logps/chosen": -16.36658477783203, + "logps/rejected": -96.16098022460938, + "loss": 0.5759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055933188647031784, + "rewards/margins": 0.3160528242588043, + "rewards/rejected": -0.26011964678764343, + "step": 12430 + }, + { + "epoch": 0.72, + "learning_rate": 1.875714055686633e-08, + "logits/chosen": -1.9041576385498047, + "logits/rejected": -1.9119187593460083, + "logps/chosen": -65.9388198852539, + "logps/rejected": -205.6283721923828, + "loss": 0.2906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6313850283622742, + "rewards/margins": 1.1319999694824219, + "rewards/rejected": -0.5006149411201477, + "step": 12431 + }, + { + "epoch": 0.72, + "learning_rate": 1.874978340810719e-08, + "logits/chosen": -2.0069782733917236, + "logits/rejected": -2.0071816444396973, + "logps/chosen": -85.63516235351562, + "logps/rejected": -379.2488708496094, + "loss": 0.078, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4354759454727173, + "rewards/margins": 4.357762813568115, + "rewards/rejected": -2.9222869873046875, + "step": 12432 + }, + { + "epoch": 0.72, + "learning_rate": 1.874242736950861e-08, + "logits/chosen": -1.7968050241470337, + "logits/rejected": -1.8060314655303955, + "logps/chosen": -135.50894165039062, + "logps/rejected": -458.6280822753906, + "loss": 0.0471, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3038650751113892, + "rewards/margins": 4.467503547668457, + "rewards/rejected": -3.1636383533477783, + "step": 12433 + }, + { + "epoch": 0.72, + "learning_rate": 1.8735072441331924e-08, + "logits/chosen": -1.6516187191009521, + "logits/rejected": -1.7066786289215088, + "logps/chosen": -347.4933166503906, + "logps/rejected": -320.9577331542969, + "loss": 0.0974, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4562164545059204, + "rewards/margins": 2.033038377761841, + "rewards/rejected": -0.5768219232559204, + "step": 12434 + }, + { + "epoch": 0.72, + "learning_rate": 1.872771862383838e-08, + "logits/chosen": -1.9261786937713623, + "logits/rejected": -1.9192441701889038, + "logps/chosen": -25.533523559570312, + "logps/rejected": -226.27220153808594, + "loss": 0.2385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3223686218261719, + "rewards/margins": 3.7365593910217285, + "rewards/rejected": -3.4141907691955566, + "step": 12435 + }, + { + "epoch": 0.72, + "learning_rate": 1.8720365917289237e-08, + "logits/chosen": -1.9854764938354492, + "logits/rejected": -1.9238289594650269, + "logps/chosen": -194.88414001464844, + "logps/rejected": -344.728515625, + "loss": 0.1123, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6676620244979858, + "rewards/margins": 2.982344150543213, + "rewards/rejected": -1.3146820068359375, + "step": 12436 + }, + { + "epoch": 0.72, + "learning_rate": 1.8713014321945707e-08, + "logits/chosen": -1.6480696201324463, + "logits/rejected": -1.6445610523223877, + "logps/chosen": -27.786882400512695, + "logps/rejected": -197.2487030029297, + "loss": 0.1987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8256646990776062, + "rewards/margins": 3.179102659225464, + "rewards/rejected": -2.353437900543213, + "step": 12437 + }, + { + "epoch": 0.72, + "learning_rate": 1.8705663838068964e-08, + "logits/chosen": -1.638127326965332, + "logits/rejected": -1.6137398481369019, + "logps/chosen": -185.789794921875, + "logps/rejected": -251.49520874023438, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3798096179962158, + "rewards/margins": 0.5909393429756165, + "rewards/rejected": 0.7888702750205994, + "step": 12438 + }, + { + "epoch": 0.72, + "learning_rate": 1.86983144659201e-08, + "logits/chosen": -1.868504285812378, + "logits/rejected": -1.8533648252487183, + "logps/chosen": -193.53921508789062, + "logps/rejected": -383.6809997558594, + "loss": 0.1359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.417398065328598, + "rewards/margins": 3.854327440261841, + "rewards/rejected": -3.43692946434021, + "step": 12439 + }, + { + "epoch": 0.72, + "learning_rate": 1.8690966205760222e-08, + "logits/chosen": -1.8423559665679932, + "logits/rejected": -1.8303970098495483, + "logps/chosen": -44.096595764160156, + "logps/rejected": -168.11880493164062, + "loss": 0.1067, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.734259009361267, + "rewards/margins": 2.394810438156128, + "rewards/rejected": -0.6605514883995056, + "step": 12440 + }, + { + "epoch": 0.72, + "learning_rate": 1.868361905785037e-08, + "logits/chosen": -2.012528419494629, + "logits/rejected": -2.013084650039673, + "logps/chosen": -69.0351333618164, + "logps/rejected": -187.91571044921875, + "loss": 0.174, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2021797895431519, + "rewards/margins": 1.4191994667053223, + "rewards/rejected": -0.21701966226100922, + "step": 12441 + }, + { + "epoch": 0.72, + "learning_rate": 1.8676273022451577e-08, + "logits/chosen": -1.9768545627593994, + "logits/rejected": -1.971723198890686, + "logps/chosen": -30.825603485107422, + "logps/rejected": -171.1059112548828, + "loss": 0.5143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4964519441127777, + "rewards/margins": 0.24987296760082245, + "rewards/rejected": 0.24657897651195526, + "step": 12442 + }, + { + "epoch": 0.72, + "learning_rate": 1.866892809982475e-08, + "logits/chosen": -1.9880547523498535, + "logits/rejected": -1.9544179439544678, + "logps/chosen": -201.58575439453125, + "logps/rejected": -607.163818359375, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9874099493026733, + "rewards/margins": 9.581830978393555, + "rewards/rejected": -7.59442138671875, + "step": 12443 + }, + { + "epoch": 0.72, + "learning_rate": 1.8661584290230885e-08, + "logits/chosen": -1.9874647855758667, + "logits/rejected": -1.9795031547546387, + "logps/chosen": -19.54994773864746, + "logps/rejected": -145.9868927001953, + "loss": 0.2145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6029486060142517, + "rewards/margins": 4.0258026123046875, + "rewards/rejected": -3.422853946685791, + "step": 12444 + }, + { + "epoch": 0.72, + "learning_rate": 1.8654241593930824e-08, + "logits/chosen": -1.915612816810608, + "logits/rejected": -1.9172415733337402, + "logps/chosen": -38.8867301940918, + "logps/rejected": -189.21621704101562, + "loss": 0.278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.489056795835495, + "rewards/margins": 1.765166163444519, + "rewards/rejected": -1.2761093378067017, + "step": 12445 + }, + { + "epoch": 0.72, + "learning_rate": 1.8646900011185447e-08, + "logits/chosen": -1.8859686851501465, + "logits/rejected": -1.8912081718444824, + "logps/chosen": -0.04123930260539055, + "logps/rejected": -81.64578247070312, + "loss": 0.3518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0020095163490623236, + "rewards/margins": 2.3376665115356445, + "rewards/rejected": -2.3356568813323975, + "step": 12446 + }, + { + "epoch": 0.72, + "learning_rate": 1.8639559542255513e-08, + "logits/chosen": -1.9402241706848145, + "logits/rejected": -1.9355252981185913, + "logps/chosen": -0.49283257126808167, + "logps/rejected": -145.07101440429688, + "loss": 0.3431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02098560892045498, + "rewards/margins": 3.704989433288574, + "rewards/rejected": -3.7259750366210938, + "step": 12447 + }, + { + "epoch": 0.72, + "learning_rate": 1.8632220187401836e-08, + "logits/chosen": -1.9913679361343384, + "logits/rejected": -1.9864310026168823, + "logps/chosen": -43.3394889831543, + "logps/rejected": -296.83087158203125, + "loss": 0.1787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6189910769462585, + "rewards/margins": 4.063382148742676, + "rewards/rejected": -3.4443910121917725, + "step": 12448 + }, + { + "epoch": 0.72, + "learning_rate": 1.8624881946885163e-08, + "logits/chosen": -2.1015169620513916, + "logits/rejected": -2.145385503768921, + "logps/chosen": -215.95814514160156, + "logps/rejected": -391.42340087890625, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9530136585235596, + "rewards/margins": 3.8428757190704346, + "rewards/rejected": -1.889862060546875, + "step": 12449 + }, + { + "epoch": 0.72, + "learning_rate": 1.861754482096613e-08, + "logits/chosen": -2.034350633621216, + "logits/rejected": -2.02474045753479, + "logps/chosen": -229.9250030517578, + "logps/rejected": -383.62646484375, + "loss": 0.0676, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7936477661132812, + "rewards/margins": 2.4124159812927246, + "rewards/rejected": 0.3812316954135895, + "step": 12450 + }, + { + "epoch": 0.72, + "learning_rate": 1.8610208809905415e-08, + "logits/chosen": -2.0306708812713623, + "logits/rejected": -2.016782522201538, + "logps/chosen": -11.792823791503906, + "logps/rejected": -278.2595520019531, + "loss": 0.2076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.645703136920929, + "rewards/margins": 7.280282974243164, + "rewards/rejected": -6.634579658508301, + "step": 12451 + }, + { + "epoch": 0.72, + "learning_rate": 1.8602873913963625e-08, + "logits/chosen": -1.7752265930175781, + "logits/rejected": -1.7324548959732056, + "logps/chosen": -195.160888671875, + "logps/rejected": -335.5722961425781, + "loss": 0.1433, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1340363025665283, + "rewards/margins": 1.1950256824493408, + "rewards/rejected": 0.9390106201171875, + "step": 12452 + }, + { + "epoch": 0.72, + "learning_rate": 1.8595540133401355e-08, + "logits/chosen": -2.0598433017730713, + "logits/rejected": -2.0643975734710693, + "logps/chosen": -15.68559741973877, + "logps/rejected": -73.20765686035156, + "loss": 0.7207, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5664410591125488, + "rewards/margins": -0.6029070615768433, + "rewards/rejected": 1.169348120689392, + "step": 12453 + }, + { + "epoch": 0.72, + "learning_rate": 1.8588207468479094e-08, + "logits/chosen": -1.821045994758606, + "logits/rejected": -1.7862308025360107, + "logps/chosen": -237.328857421875, + "logps/rejected": -593.69091796875, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9814698696136475, + "rewards/margins": 8.584375381469727, + "rewards/rejected": -5.6029052734375, + "step": 12454 + }, + { + "epoch": 0.72, + "learning_rate": 1.8580875919457357e-08, + "logits/chosen": -1.9366804361343384, + "logits/rejected": -1.9379116296768188, + "logps/chosen": -0.00011002764949807897, + "logps/rejected": -163.17881774902344, + "loss": 0.3556, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.69401130026381e-06, + "rewards/margins": 3.911780834197998, + "rewards/rejected": -3.911778211593628, + "step": 12455 + }, + { + "epoch": 0.72, + "learning_rate": 1.8573545486596597e-08, + "logits/chosen": -1.9294672012329102, + "logits/rejected": -1.9359016418457031, + "logps/chosen": -5.901020050048828, + "logps/rejected": -187.8406982421875, + "loss": 0.3825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.060112860053777695, + "rewards/margins": 2.2837061882019043, + "rewards/rejected": -2.223593235015869, + "step": 12456 + }, + { + "epoch": 0.72, + "learning_rate": 1.856621617015724e-08, + "logits/chosen": -1.9875304698944092, + "logits/rejected": -1.9798128604888916, + "logps/chosen": -156.85678100585938, + "logps/rejected": -391.66680908203125, + "loss": 0.1111, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3700913190841675, + "rewards/margins": 1.7925889492034912, + "rewards/rejected": -0.42249757051467896, + "step": 12457 + }, + { + "epoch": 0.72, + "learning_rate": 1.8558887970399628e-08, + "logits/chosen": -1.9379078149795532, + "logits/rejected": -1.930424451828003, + "logps/chosen": -0.0032102225814014673, + "logps/rejected": -102.05117797851562, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000288554176222533, + "rewards/margins": 2.9869048595428467, + "rewards/rejected": -2.9871933460235596, + "step": 12458 + }, + { + "epoch": 0.73, + "learning_rate": 1.8551560887584105e-08, + "logits/chosen": -2.054021120071411, + "logits/rejected": -2.054990530014038, + "logps/chosen": -21.471025466918945, + "logps/rejected": -152.34689331054688, + "loss": 0.4057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4505014419555664, + "rewards/margins": 0.8495371341705322, + "rewards/rejected": -0.39903566241264343, + "step": 12459 + }, + { + "epoch": 0.73, + "learning_rate": 1.8544234921970973e-08, + "logits/chosen": -1.848831057548523, + "logits/rejected": -1.8396352529525757, + "logps/chosen": -84.14582824707031, + "logps/rejected": -232.98748779296875, + "loss": 0.0854, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7573639154434204, + "rewards/margins": 4.418377876281738, + "rewards/rejected": -2.6610138416290283, + "step": 12460 + }, + { + "epoch": 0.73, + "learning_rate": 1.8536910073820506e-08, + "logits/chosen": -1.9160447120666504, + "logits/rejected": -1.9288972616195679, + "logps/chosen": -41.33256530761719, + "logps/rejected": -199.0360870361328, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18329773843288422, + "rewards/margins": 2.0291380882263184, + "rewards/rejected": -1.8458404541015625, + "step": 12461 + }, + { + "epoch": 0.73, + "learning_rate": 1.8529586343392856e-08, + "logits/chosen": -1.8625171184539795, + "logits/rejected": -1.8637839555740356, + "logps/chosen": -195.02383422851562, + "logps/rejected": -344.0291748046875, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.871667504310608, + "rewards/margins": 4.215795993804932, + "rewards/rejected": -2.344128370285034, + "step": 12462 + }, + { + "epoch": 0.73, + "learning_rate": 1.8522263730948273e-08, + "logits/chosen": -1.786051630973816, + "logits/rejected": -1.7871744632720947, + "logps/chosen": -30.558326721191406, + "logps/rejected": -189.0131072998047, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.040030479431152344, + "rewards/margins": 3.800656795501709, + "rewards/rejected": -3.7606263160705566, + "step": 12463 + }, + { + "epoch": 0.73, + "learning_rate": 1.851494223674684e-08, + "logits/chosen": -1.9928656816482544, + "logits/rejected": -1.9879462718963623, + "logps/chosen": -47.36936950683594, + "logps/rejected": -226.46864318847656, + "loss": 0.1722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.880719006061554, + "rewards/margins": 1.8333511352539062, + "rewards/rejected": -0.9526321291923523, + "step": 12464 + }, + { + "epoch": 0.73, + "learning_rate": 1.8507621861048683e-08, + "logits/chosen": -1.8345049619674683, + "logits/rejected": -1.8351655006408691, + "logps/chosen": -87.84846496582031, + "logps/rejected": -309.9033203125, + "loss": 0.1817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8418335318565369, + "rewards/margins": 3.559436082839966, + "rewards/rejected": -2.717602491378784, + "step": 12465 + }, + { + "epoch": 0.73, + "learning_rate": 1.8500302604113815e-08, + "logits/chosen": -1.8540409803390503, + "logits/rejected": -1.84940767288208, + "logps/chosen": -16.44610023498535, + "logps/rejected": -206.66815185546875, + "loss": 0.2827, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19850865006446838, + "rewards/margins": 3.3550808429718018, + "rewards/rejected": -3.156572103500366, + "step": 12466 + }, + { + "epoch": 0.73, + "learning_rate": 1.849298446620231e-08, + "logits/chosen": -1.870428442955017, + "logits/rejected": -1.8752890825271606, + "logps/chosen": -82.46381378173828, + "logps/rejected": -382.76434326171875, + "loss": 0.2378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3881874084472656, + "rewards/margins": 4.644303321838379, + "rewards/rejected": -4.256115913391113, + "step": 12467 + }, + { + "epoch": 0.73, + "learning_rate": 1.84856674475741e-08, + "logits/chosen": -1.9673835039138794, + "logits/rejected": -1.9387778043746948, + "logps/chosen": -285.11920166015625, + "logps/rejected": -468.9160461425781, + "loss": 0.0664, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.66522216796875, + "rewards/margins": 2.210763454437256, + "rewards/rejected": 0.454458624124527, + "step": 12468 + }, + { + "epoch": 0.73, + "learning_rate": 1.8478351548489135e-08, + "logits/chosen": -1.701446294784546, + "logits/rejected": -1.699318766593933, + "logps/chosen": -31.0533390045166, + "logps/rejected": -124.34589385986328, + "loss": 0.1741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9384927749633789, + "rewards/margins": 1.99343740940094, + "rewards/rejected": -1.054944634437561, + "step": 12469 + }, + { + "epoch": 0.73, + "learning_rate": 1.847103676920731e-08, + "logits/chosen": -1.9717072248458862, + "logits/rejected": -1.9657936096191406, + "logps/chosen": -71.9184341430664, + "logps/rejected": -143.87960815429688, + "loss": 0.5254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5497749447822571, + "rewards/margins": 0.1188911497592926, + "rewards/rejected": 0.4308837950229645, + "step": 12470 + }, + { + "epoch": 0.73, + "learning_rate": 1.8463723109988482e-08, + "logits/chosen": -2.0732433795928955, + "logits/rejected": -2.07556414604187, + "logps/chosen": -1.0547702312469482, + "logps/rejected": -156.35787963867188, + "loss": 0.3912, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04184481501579285, + "rewards/margins": 2.0931131839752197, + "rewards/rejected": -2.134958028793335, + "step": 12471 + }, + { + "epoch": 0.73, + "learning_rate": 1.845641057109249e-08, + "logits/chosen": -1.8100388050079346, + "logits/rejected": -1.8020960092544556, + "logps/chosen": -30.620586395263672, + "logps/rejected": -251.06297302246094, + "loss": 0.2805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2592323422431946, + "rewards/margins": 2.6115469932556152, + "rewards/rejected": -2.3523147106170654, + "step": 12472 + }, + { + "epoch": 0.73, + "learning_rate": 1.8449099152779075e-08, + "logits/chosen": -1.9567372798919678, + "logits/rejected": -1.9552137851715088, + "logps/chosen": -18.402416229248047, + "logps/rejected": -89.63874816894531, + "loss": 0.583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08282814174890518, + "rewards/margins": 0.5668689608573914, + "rewards/rejected": -0.6496971249580383, + "step": 12473 + }, + { + "epoch": 0.73, + "learning_rate": 1.844178885530799e-08, + "logits/chosen": -1.7776696681976318, + "logits/rejected": -1.7841416597366333, + "logps/chosen": -335.3081359863281, + "logps/rejected": -422.5380859375, + "loss": 0.5328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3922271728515625, + "rewards/margins": 0.18597108125686646, + "rewards/rejected": -0.578198254108429, + "step": 12474 + }, + { + "epoch": 0.73, + "learning_rate": 1.843447967893893e-08, + "logits/chosen": -1.849657416343689, + "logits/rejected": -1.8739749193191528, + "logps/chosen": -205.34353637695312, + "logps/rejected": -380.92510986328125, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5288331508636475, + "rewards/margins": 2.4221558570861816, + "rewards/rejected": 0.10667724907398224, + "step": 12475 + }, + { + "epoch": 0.73, + "learning_rate": 1.842717162393157e-08, + "logits/chosen": -1.8925387859344482, + "logits/rejected": -1.8913456201553345, + "logps/chosen": -33.3010368347168, + "logps/rejected": -215.97653198242188, + "loss": 0.3298, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.375043123960495, + "rewards/margins": 1.747226357460022, + "rewards/rejected": -1.3721832036972046, + "step": 12476 + }, + { + "epoch": 0.73, + "learning_rate": 1.84198646905455e-08, + "logits/chosen": -1.8597065210342407, + "logits/rejected": -1.7693431377410889, + "logps/chosen": -348.47454833984375, + "logps/rejected": -529.7673950195312, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.612689256668091, + "rewards/margins": 3.312884569168091, + "rewards/rejected": -0.7001953125, + "step": 12477 + }, + { + "epoch": 0.73, + "learning_rate": 1.8412558879040313e-08, + "logits/chosen": -1.7868661880493164, + "logits/rejected": -1.7949057817459106, + "logps/chosen": -32.829315185546875, + "logps/rejected": -155.0816650390625, + "loss": 0.4367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5963398218154907, + "rewards/margins": 0.36518594622612, + "rewards/rejected": 0.23115387558937073, + "step": 12478 + }, + { + "epoch": 0.73, + "learning_rate": 1.8405254189675545e-08, + "logits/chosen": -1.992687702178955, + "logits/rejected": -1.9672075510025024, + "logps/chosen": -206.945068359375, + "logps/rejected": -304.4701843261719, + "loss": 0.1398, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0511200428009033, + "rewards/margins": 1.2355163097381592, + "rewards/rejected": 0.8156036734580994, + "step": 12479 + }, + { + "epoch": 0.73, + "learning_rate": 1.839795062271071e-08, + "logits/chosen": -1.8741005659103394, + "logits/rejected": -1.8746877908706665, + "logps/chosen": -11.361529350280762, + "logps/rejected": -228.80929565429688, + "loss": 0.3183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2547730505466461, + "rewards/margins": 5.054296970367432, + "rewards/rejected": -4.799523830413818, + "step": 12480 + }, + { + "epoch": 0.73, + "learning_rate": 1.8390648178405232e-08, + "logits/chosen": -1.8911879062652588, + "logits/rejected": -1.9118555784225464, + "logps/chosen": -288.70220947265625, + "logps/rejected": -321.25067138671875, + "loss": 0.1762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.781768798828125, + "rewards/margins": 1.043035864830017, + "rewards/rejected": 0.7387329339981079, + "step": 12481 + }, + { + "epoch": 0.73, + "learning_rate": 1.8383346857018557e-08, + "logits/chosen": -1.882073163986206, + "logits/rejected": -1.8834127187728882, + "logps/chosen": -0.0003518547164276242, + "logps/rejected": -49.633087158203125, + "loss": 0.5388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00019780742877628654, + "rewards/margins": 0.7216726541519165, + "rewards/rejected": -0.721474826335907, + "step": 12482 + }, + { + "epoch": 0.73, + "learning_rate": 1.8376046658810047e-08, + "logits/chosen": -1.929890751838684, + "logits/rejected": -1.925948977470398, + "logps/chosen": -17.681663513183594, + "logps/rejected": -128.64459228515625, + "loss": 0.2237, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7425290942192078, + "rewards/margins": 2.641369342803955, + "rewards/rejected": -1.898840308189392, + "step": 12483 + }, + { + "epoch": 0.73, + "learning_rate": 1.836874758403907e-08, + "logits/chosen": -1.9848289489746094, + "logits/rejected": -1.9678704738616943, + "logps/chosen": -47.144386291503906, + "logps/rejected": -256.4432067871094, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029095077887177467, + "rewards/margins": 2.678144693374634, + "rewards/rejected": -2.6490495204925537, + "step": 12484 + }, + { + "epoch": 0.73, + "learning_rate": 1.8361449632964864e-08, + "logits/chosen": -2.0255162715911865, + "logits/rejected": -2.023359537124634, + "logps/chosen": -11.776467323303223, + "logps/rejected": -189.62051391601562, + "loss": 0.3641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.268752783536911, + "rewards/margins": 1.5192350149154663, + "rewards/rejected": -1.250482201576233, + "step": 12485 + }, + { + "epoch": 0.73, + "learning_rate": 1.8354152805846767e-08, + "logits/chosen": -1.835641860961914, + "logits/rejected": -1.824256181716919, + "logps/chosen": -0.0012604641960933805, + "logps/rejected": -58.80439376831055, + "loss": 0.637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002339669968932867, + "rewards/margins": 0.21551716327667236, + "rewards/rejected": -0.21317748725414276, + "step": 12486 + }, + { + "epoch": 0.73, + "learning_rate": 1.8346857102943947e-08, + "logits/chosen": -2.0816524028778076, + "logits/rejected": -2.0814480781555176, + "logps/chosen": -4.999927043914795, + "logps/rejected": -37.2205810546875, + "loss": 0.5947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1895003765821457, + "rewards/margins": 0.06317667663097382, + "rewards/rejected": 0.12632369995117188, + "step": 12487 + }, + { + "epoch": 0.73, + "learning_rate": 1.8339562524515607e-08, + "logits/chosen": -1.6406747102737427, + "logits/rejected": -1.6371324062347412, + "logps/chosen": -120.48467254638672, + "logps/rejected": -183.30865478515625, + "loss": 0.0636, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.914350152015686, + "rewards/margins": 3.511136531829834, + "rewards/rejected": -1.5967864990234375, + "step": 12488 + }, + { + "epoch": 0.73, + "learning_rate": 1.833226907082084e-08, + "logits/chosen": -2.1082348823547363, + "logits/rejected": -2.1030709743499756, + "logps/chosen": -3.694572687149048, + "logps/rejected": -196.0300750732422, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.060727573931217194, + "rewards/margins": 4.4889702796936035, + "rewards/rejected": -4.4282426834106445, + "step": 12489 + }, + { + "epoch": 0.73, + "learning_rate": 1.832497674211879e-08, + "logits/chosen": -1.9910626411437988, + "logits/rejected": -1.989490032196045, + "logps/chosen": -8.973010063171387, + "logps/rejected": -176.2178955078125, + "loss": 0.4643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3781636357307434, + "rewards/margins": 0.48244068026542664, + "rewards/rejected": -0.10427703708410263, + "step": 12490 + }, + { + "epoch": 0.73, + "learning_rate": 1.8317685538668533e-08, + "logits/chosen": -2.095789670944214, + "logits/rejected": -2.0978901386260986, + "logps/chosen": -21.997358322143555, + "logps/rejected": -96.21673583984375, + "loss": 0.6013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010803795419633389, + "rewards/margins": 0.29812031984329224, + "rewards/rejected": -0.3089241087436676, + "step": 12491 + }, + { + "epoch": 0.73, + "learning_rate": 1.831039546072904e-08, + "logits/chosen": -1.777933955192566, + "logits/rejected": -1.7714234590530396, + "logps/chosen": -0.23233440518379211, + "logps/rejected": -266.6075744628906, + "loss": 0.3532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017876073718070984, + "rewards/margins": 5.035889625549316, + "rewards/rejected": -5.053765773773193, + "step": 12492 + }, + { + "epoch": 0.73, + "learning_rate": 1.830310650855931e-08, + "logits/chosen": -1.7766366004943848, + "logits/rejected": -1.7887117862701416, + "logps/chosen": -214.8502960205078, + "logps/rejected": -394.3824462890625, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8222427368164062, + "rewards/margins": 4.791935920715332, + "rewards/rejected": -1.9696930646896362, + "step": 12493 + }, + { + "epoch": 0.73, + "learning_rate": 1.829581868241828e-08, + "logits/chosen": -2.0501716136932373, + "logits/rejected": -2.0507099628448486, + "logps/chosen": -110.27302551269531, + "logps/rejected": -284.3536071777344, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15659332275390625, + "rewards/margins": 2.887141466140747, + "rewards/rejected": -2.730548143386841, + "step": 12494 + }, + { + "epoch": 0.73, + "learning_rate": 1.8288531982564875e-08, + "logits/chosen": -1.6258872747421265, + "logits/rejected": -1.6200897693634033, + "logps/chosen": -53.4860954284668, + "logps/rejected": -192.7244873046875, + "loss": 0.4102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6415420770645142, + "rewards/margins": 0.22947540879249573, + "rewards/rejected": 0.41206666827201843, + "step": 12495 + }, + { + "epoch": 0.73, + "learning_rate": 1.828124640925791e-08, + "logits/chosen": -1.9485423564910889, + "logits/rejected": -1.9461263418197632, + "logps/chosen": -289.9001770019531, + "logps/rejected": -304.1640625, + "loss": 0.3516, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.299090623855591, + "rewards/margins": 0.01503300666809082, + "rewards/rejected": 2.2840576171875, + "step": 12496 + }, + { + "epoch": 0.73, + "learning_rate": 1.827396196275623e-08, + "logits/chosen": -2.0277087688446045, + "logits/rejected": -2.0215139389038086, + "logps/chosen": -130.02122497558594, + "logps/rejected": -244.53375244140625, + "loss": 0.3433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23304520547389984, + "rewards/margins": 4.391723155975342, + "rewards/rejected": -4.624768257141113, + "step": 12497 + }, + { + "epoch": 0.73, + "learning_rate": 1.8266678643318606e-08, + "logits/chosen": -1.9460691213607788, + "logits/rejected": -1.9471287727355957, + "logps/chosen": -20.833147048950195, + "logps/rejected": -170.49398803710938, + "loss": 0.3039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24928168952465057, + "rewards/margins": 1.1818439960479736, + "rewards/rejected": -0.9325622916221619, + "step": 12498 + }, + { + "epoch": 0.73, + "learning_rate": 1.8259396451203802e-08, + "logits/chosen": -1.894644856452942, + "logits/rejected": -1.8877549171447754, + "logps/chosen": -1.9462151527404785, + "logps/rejected": -245.21847534179688, + "loss": 0.3785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.059358835220336914, + "rewards/margins": 2.8031930923461914, + "rewards/rejected": -2.8625519275665283, + "step": 12499 + }, + { + "epoch": 0.73, + "learning_rate": 1.8252115386670475e-08, + "logits/chosen": -1.8558603525161743, + "logits/rejected": -1.863604187965393, + "logps/chosen": -204.0642547607422, + "logps/rejected": -293.42120361328125, + "loss": 0.1374, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6825973987579346, + "rewards/margins": 1.2215378284454346, + "rewards/rejected": 1.4610595703125, + "step": 12500 + }, + { + "epoch": 0.73, + "learning_rate": 1.824483544997731e-08, + "logits/chosen": -1.8723034858703613, + "logits/rejected": -1.839263916015625, + "logps/chosen": -211.64718627929688, + "logps/rejected": -418.4882507324219, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0554795265197754, + "rewards/margins": 3.305720567703247, + "rewards/rejected": -0.25024110078811646, + "step": 12501 + }, + { + "epoch": 0.73, + "learning_rate": 1.8237556641382918e-08, + "logits/chosen": -1.9112465381622314, + "logits/rejected": -1.9068711996078491, + "logps/chosen": -3.323213815689087, + "logps/rejected": -98.40573120117188, + "loss": 0.3941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07552926987409592, + "rewards/margins": 1.4619885683059692, + "rewards/rejected": -1.3864593505859375, + "step": 12502 + }, + { + "epoch": 0.73, + "learning_rate": 1.8230278961145895e-08, + "logits/chosen": -1.8965574502944946, + "logits/rejected": -1.8850032091140747, + "logps/chosen": -161.56961059570312, + "logps/rejected": -494.5970458984375, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.123278856277466, + "rewards/margins": 7.230990409851074, + "rewards/rejected": -5.1077117919921875, + "step": 12503 + }, + { + "epoch": 0.73, + "learning_rate": 1.8223002409524735e-08, + "logits/chosen": -2.1644952297210693, + "logits/rejected": -2.161956787109375, + "logps/chosen": -3.773332118988037, + "logps/rejected": -80.93158721923828, + "loss": 0.4152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11934874206781387, + "rewards/margins": 2.506542444229126, + "rewards/rejected": -2.6258912086486816, + "step": 12504 + }, + { + "epoch": 0.73, + "learning_rate": 1.8215726986778002e-08, + "logits/chosen": -1.8333667516708374, + "logits/rejected": -1.8328633308410645, + "logps/chosen": -206.53228759765625, + "logps/rejected": -364.457763671875, + "loss": 0.2661, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7845399379730225, + "rewards/margins": 0.39235854148864746, + "rewards/rejected": 2.392181396484375, + "step": 12505 + }, + { + "epoch": 0.73, + "learning_rate": 1.82084526931641e-08, + "logits/chosen": -1.9263631105422974, + "logits/rejected": -1.919162392616272, + "logps/chosen": -40.69767379760742, + "logps/rejected": -200.83270263671875, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0030796050559729338, + "rewards/margins": 3.47192120552063, + "rewards/rejected": -3.468841552734375, + "step": 12506 + }, + { + "epoch": 0.73, + "learning_rate": 1.820117952894149e-08, + "logits/chosen": -1.878551959991455, + "logits/rejected": -1.8681464195251465, + "logps/chosen": -64.15159606933594, + "logps/rejected": -199.83888244628906, + "loss": 0.1988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7444450259208679, + "rewards/margins": 2.418581485748291, + "rewards/rejected": -1.6741364002227783, + "step": 12507 + }, + { + "epoch": 0.73, + "learning_rate": 1.8193907494368488e-08, + "logits/chosen": -1.9648396968841553, + "logits/rejected": -2.006350517272949, + "logps/chosen": -172.06161499023438, + "logps/rejected": -346.26861572265625, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2512176036834717, + "rewards/margins": 4.065865993499756, + "rewards/rejected": -0.814648449420929, + "step": 12508 + }, + { + "epoch": 0.73, + "learning_rate": 1.818663658970349e-08, + "logits/chosen": -1.9494210481643677, + "logits/rejected": -1.927261471748352, + "logps/chosen": -8.07101058959961, + "logps/rejected": -168.37908935546875, + "loss": 0.2022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4119948446750641, + "rewards/margins": 4.055009365081787, + "rewards/rejected": -3.643014669418335, + "step": 12509 + }, + { + "epoch": 0.73, + "learning_rate": 1.81793668152048e-08, + "logits/chosen": -1.8751623630523682, + "logits/rejected": -1.8599449396133423, + "logps/chosen": -91.13252258300781, + "logps/rejected": -208.1261749267578, + "loss": 0.2013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.377050757408142, + "rewards/margins": 1.3523681163787842, + "rewards/rejected": 0.02468261681497097, + "step": 12510 + }, + { + "epoch": 0.73, + "learning_rate": 1.817209817113063e-08, + "logits/chosen": -2.0378618240356445, + "logits/rejected": -2.0298049449920654, + "logps/chosen": -110.94742584228516, + "logps/rejected": -231.5127410888672, + "loss": 0.4546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06584091484546661, + "rewards/margins": 0.6340171694755554, + "rewards/rejected": -0.56817626953125, + "step": 12511 + }, + { + "epoch": 0.73, + "learning_rate": 1.816483065773922e-08, + "logits/chosen": -1.9921175241470337, + "logits/rejected": -1.9875209331512451, + "logps/chosen": -53.59300231933594, + "logps/rejected": -213.7176055908203, + "loss": 0.1942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0007282495498657, + "rewards/margins": 3.1687674522399902, + "rewards/rejected": -2.168039083480835, + "step": 12512 + }, + { + "epoch": 0.73, + "learning_rate": 1.815756427528875e-08, + "logits/chosen": -1.9295624494552612, + "logits/rejected": -1.930109977722168, + "logps/chosen": -0.0001718933053780347, + "logps/rejected": -51.74633026123047, + "loss": 0.5625, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4393502573948354e-05, + "rewards/margins": 0.6129807233810425, + "rewards/rejected": -0.6129463315010071, + "step": 12513 + }, + { + "epoch": 0.73, + "learning_rate": 1.815029902403737e-08, + "logits/chosen": -1.8165315389633179, + "logits/rejected": -1.8046343326568604, + "logps/chosen": -45.75213623046875, + "logps/rejected": -270.24395751953125, + "loss": 0.2045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.760623574256897, + "rewards/margins": 5.195152759552002, + "rewards/rejected": -4.4345293045043945, + "step": 12514 + }, + { + "epoch": 0.73, + "learning_rate": 1.8143034904243147e-08, + "logits/chosen": -1.9180761575698853, + "logits/rejected": -1.9181982278823853, + "logps/chosen": -134.32574462890625, + "logps/rejected": -288.8885803222656, + "loss": 0.1439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8423981070518494, + "rewards/margins": 1.85691237449646, + "rewards/rejected": -1.0145142078399658, + "step": 12515 + }, + { + "epoch": 0.73, + "learning_rate": 1.813577191616415e-08, + "logits/chosen": -1.9246501922607422, + "logits/rejected": -1.9064754247665405, + "logps/chosen": -85.6441879272461, + "logps/rejected": -535.7260131835938, + "loss": 0.1013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1572258472442627, + "rewards/margins": 10.009697914123535, + "rewards/rejected": -8.852472305297852, + "step": 12516 + }, + { + "epoch": 0.73, + "learning_rate": 1.8128510060058404e-08, + "logits/chosen": -2.0157206058502197, + "logits/rejected": -1.982424020767212, + "logps/chosen": -149.40570068359375, + "logps/rejected": -530.0903930664062, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6222991943359375, + "rewards/margins": 4.487713813781738, + "rewards/rejected": -2.8654143810272217, + "step": 12517 + }, + { + "epoch": 0.73, + "learning_rate": 1.8121249336183897e-08, + "logits/chosen": -1.7941393852233887, + "logits/rejected": -1.796859622001648, + "logps/chosen": -0.033233702182769775, + "logps/rejected": -67.99252319335938, + "loss": 0.5756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0031335868407040834, + "rewards/margins": 0.3669847548007965, + "rewards/rejected": -0.3638511598110199, + "step": 12518 + }, + { + "epoch": 0.73, + "learning_rate": 1.811398974479853e-08, + "logits/chosen": -1.8899489641189575, + "logits/rejected": -1.8837409019470215, + "logps/chosen": -8.885978698730469, + "logps/rejected": -239.18841552734375, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11511268466711044, + "rewards/margins": 3.951805591583252, + "rewards/rejected": -3.8366928100585938, + "step": 12519 + }, + { + "epoch": 0.73, + "learning_rate": 1.810673128616022e-08, + "logits/chosen": -1.6442042589187622, + "logits/rejected": -1.6404954195022583, + "logps/chosen": -170.4755859375, + "logps/rejected": -365.47406005859375, + "loss": 0.1607, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.32267165184021, + "rewards/margins": 1.096750020980835, + "rewards/rejected": 1.225921630859375, + "step": 12520 + }, + { + "epoch": 0.73, + "learning_rate": 1.8099473960526823e-08, + "logits/chosen": -1.8108972311019897, + "logits/rejected": -1.7736127376556396, + "logps/chosen": -232.83322143554688, + "logps/rejected": -564.564208984375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1914641857147217, + "rewards/margins": 6.7516937255859375, + "rewards/rejected": -3.560229539871216, + "step": 12521 + }, + { + "epoch": 0.73, + "learning_rate": 1.8092217768156164e-08, + "logits/chosen": -1.9512743949890137, + "logits/rejected": -1.946894884109497, + "logps/chosen": -0.38710567355155945, + "logps/rejected": -179.34127807617188, + "loss": 0.3469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00800087209790945, + "rewards/margins": 4.6976494789123535, + "rewards/rejected": -4.689648628234863, + "step": 12522 + }, + { + "epoch": 0.73, + "learning_rate": 1.8084962709305983e-08, + "logits/chosen": -1.980239987373352, + "logits/rejected": -1.9741392135620117, + "logps/chosen": -262.63671875, + "logps/rejected": -367.45355224609375, + "loss": 0.112, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2021241188049316, + "rewards/margins": 1.7092286348342896, + "rewards/rejected": 0.4928955137729645, + "step": 12523 + }, + { + "epoch": 0.73, + "learning_rate": 1.8077708784234068e-08, + "logits/chosen": -1.5694890022277832, + "logits/rejected": -1.5535268783569336, + "logps/chosen": -23.54462432861328, + "logps/rejected": -166.60504150390625, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3218395411968231, + "rewards/margins": 3.687084674835205, + "rewards/rejected": -3.3652451038360596, + "step": 12524 + }, + { + "epoch": 0.73, + "learning_rate": 1.807045599319807e-08, + "logits/chosen": -1.7471009492874146, + "logits/rejected": -1.7478927373886108, + "logps/chosen": -40.20877456665039, + "logps/rejected": -183.92604064941406, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3356441259384155, + "rewards/margins": 3.569457530975342, + "rewards/rejected": -2.233813524246216, + "step": 12525 + }, + { + "epoch": 0.73, + "learning_rate": 1.8063204336455678e-08, + "logits/chosen": -1.6897629499435425, + "logits/rejected": -1.7640255689620972, + "logps/chosen": -198.286865234375, + "logps/rejected": -358.37841796875, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.964398145675659, + "rewards/margins": 3.8089356422424316, + "rewards/rejected": -0.8445373773574829, + "step": 12526 + }, + { + "epoch": 0.73, + "learning_rate": 1.8055953814264453e-08, + "logits/chosen": -1.9393976926803589, + "logits/rejected": -1.9455238580703735, + "logps/chosen": -0.1376236081123352, + "logps/rejected": -119.19109344482422, + "loss": 0.3707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021361657418310642, + "rewards/margins": 2.943777561187744, + "rewards/rejected": -2.945913791656494, + "step": 12527 + }, + { + "epoch": 0.73, + "learning_rate": 1.8048704426882033e-08, + "logits/chosen": -1.7781010866165161, + "logits/rejected": -1.766349196434021, + "logps/chosen": -278.10992431640625, + "logps/rejected": -326.65142822265625, + "loss": 0.4758, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.10732422024011612, + "rewards/margins": -0.01022949069738388, + "rewards/rejected": -0.09709472954273224, + "step": 12528 + }, + { + "epoch": 0.73, + "learning_rate": 1.8041456174565912e-08, + "logits/chosen": -1.9266493320465088, + "logits/rejected": -1.8301498889923096, + "logps/chosen": -248.57952880859375, + "logps/rejected": -682.3880615234375, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.98325514793396, + "rewards/margins": 3.476407051086426, + "rewards/rejected": -0.49315187335014343, + "step": 12529 + }, + { + "epoch": 0.73, + "learning_rate": 1.8034209057573592e-08, + "logits/chosen": -1.9816086292266846, + "logits/rejected": -1.9781179428100586, + "logps/chosen": -6.254010200500488, + "logps/rejected": -220.25633239746094, + "loss": 0.4395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1687653511762619, + "rewards/margins": 2.617781162261963, + "rewards/rejected": -2.786546468734741, + "step": 12530 + }, + { + "epoch": 0.73, + "learning_rate": 1.8026963076162527e-08, + "logits/chosen": -1.768892526626587, + "logits/rejected": -1.822136640548706, + "logps/chosen": -213.0143585205078, + "logps/rejected": -281.090087890625, + "loss": 0.1441, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8153457641601562, + "rewards/margins": 1.2983901500701904, + "rewards/rejected": 0.516955554485321, + "step": 12531 + }, + { + "epoch": 0.73, + "learning_rate": 1.801971823059013e-08, + "logits/chosen": -1.9437752962112427, + "logits/rejected": -1.9433238506317139, + "logps/chosen": -0.47591155767440796, + "logps/rejected": -59.52183532714844, + "loss": 0.632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035155344754457474, + "rewards/margins": 0.16143059730529785, + "rewards/rejected": -0.12627525627613068, + "step": 12532 + }, + { + "epoch": 0.73, + "learning_rate": 1.8012474521113792e-08, + "logits/chosen": -1.9700713157653809, + "logits/rejected": -1.975022792816162, + "logps/chosen": -29.78593635559082, + "logps/rejected": -219.6216583251953, + "loss": 0.2925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2673215866088867, + "rewards/margins": 3.2779510021209717, + "rewards/rejected": -3.010629415512085, + "step": 12533 + }, + { + "epoch": 0.73, + "learning_rate": 1.8005231947990806e-08, + "logits/chosen": -1.752471685409546, + "logits/rejected": -1.764634370803833, + "logps/chosen": -234.62051391601562, + "logps/rejected": -368.6192626953125, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8020782470703125, + "rewards/margins": 3.442910671234131, + "rewards/rejected": -0.6408325433731079, + "step": 12534 + }, + { + "epoch": 0.73, + "learning_rate": 1.799799051147849e-08, + "logits/chosen": -1.9524486064910889, + "logits/rejected": -1.9540948867797852, + "logps/chosen": -66.83279418945312, + "logps/rejected": -168.73016357421875, + "loss": 0.3265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.219737246632576, + "rewards/margins": 1.7156035900115967, + "rewards/rejected": -1.9353408813476562, + "step": 12535 + }, + { + "epoch": 0.73, + "learning_rate": 1.7990750211834088e-08, + "logits/chosen": -2.0196335315704346, + "logits/rejected": -2.029083251953125, + "logps/chosen": -39.11106872558594, + "logps/rejected": -129.17703247070312, + "loss": 0.2497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7272476553916931, + "rewards/margins": 2.22257924079895, + "rewards/rejected": -1.4953316450119019, + "step": 12536 + }, + { + "epoch": 0.73, + "learning_rate": 1.798351104931483e-08, + "logits/chosen": -2.0678489208221436, + "logits/rejected": -2.0547797679901123, + "logps/chosen": -106.20271301269531, + "logps/rejected": -187.42605590820312, + "loss": 0.2579, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3332207202911377, + "rewards/margins": 0.9566582441329956, + "rewards/rejected": 0.3765625059604645, + "step": 12537 + }, + { + "epoch": 0.73, + "learning_rate": 1.797627302417785e-08, + "logits/chosen": -1.8323408365249634, + "logits/rejected": -1.8337041139602661, + "logps/chosen": -0.005171208642423153, + "logps/rejected": -143.3199920654297, + "loss": 0.3811, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004913985612802207, + "rewards/margins": 2.760087490081787, + "rewards/rejected": -2.7605788707733154, + "step": 12538 + }, + { + "epoch": 0.73, + "learning_rate": 1.79690361366803e-08, + "logits/chosen": -1.795803427696228, + "logits/rejected": -1.788823127746582, + "logps/chosen": -31.090911865234375, + "logps/rejected": -175.09017944335938, + "loss": 0.5152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08076057583093643, + "rewards/margins": 0.6358478665351868, + "rewards/rejected": -0.5550872683525085, + "step": 12539 + }, + { + "epoch": 0.73, + "learning_rate": 1.7961800387079262e-08, + "logits/chosen": -1.7970117330551147, + "logits/rejected": -1.7898143529891968, + "logps/chosen": -52.04916763305664, + "logps/rejected": -316.7774353027344, + "loss": 0.1691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7430923581123352, + "rewards/margins": 5.084272861480713, + "rewards/rejected": -4.341180324554443, + "step": 12540 + }, + { + "epoch": 0.73, + "learning_rate": 1.7954565775631813e-08, + "logits/chosen": -1.6990693807601929, + "logits/rejected": -1.7026420831680298, + "logps/chosen": -17.061403274536133, + "logps/rejected": -266.48394775390625, + "loss": 0.3146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19370651245117188, + "rewards/margins": 3.063692569732666, + "rewards/rejected": -2.869986057281494, + "step": 12541 + }, + { + "epoch": 0.73, + "learning_rate": 1.794733230259492e-08, + "logits/chosen": -2.0471909046173096, + "logits/rejected": -2.0376622676849365, + "logps/chosen": -58.14698791503906, + "logps/rejected": -270.46234130859375, + "loss": 0.0971, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4231895208358765, + "rewards/margins": 5.22274112701416, + "rewards/rejected": -3.799551486968994, + "step": 12542 + }, + { + "epoch": 0.73, + "learning_rate": 1.794009996822557e-08, + "logits/chosen": -2.1073966026306152, + "logits/rejected": -2.106034278869629, + "logps/chosen": -14.573474884033203, + "logps/rejected": -225.69180297851562, + "loss": 0.2406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27711278200149536, + "rewards/margins": 2.466172218322754, + "rewards/rejected": -2.1890594959259033, + "step": 12543 + }, + { + "epoch": 0.73, + "learning_rate": 1.793286877278069e-08, + "logits/chosen": -2.011213779449463, + "logits/rejected": -2.015148162841797, + "logps/chosen": -11.877445220947266, + "logps/rejected": -122.04057312011719, + "loss": 0.3592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4256557524204254, + "rewards/margins": 1.2118549346923828, + "rewards/rejected": -0.7861992120742798, + "step": 12544 + }, + { + "epoch": 0.73, + "learning_rate": 1.7925638716517182e-08, + "logits/chosen": -2.045544147491455, + "logits/rejected": -2.0265817642211914, + "logps/chosen": -13.435901641845703, + "logps/rejected": -234.158935546875, + "loss": 0.2221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6292701959609985, + "rewards/margins": 4.183390140533447, + "rewards/rejected": -3.554119825363159, + "step": 12545 + }, + { + "epoch": 0.73, + "learning_rate": 1.7918409799691852e-08, + "logits/chosen": -2.0337512493133545, + "logits/rejected": -2.025651216506958, + "logps/chosen": -0.2905435264110565, + "logps/rejected": -86.31881713867188, + "loss": 0.5226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024227295070886612, + "rewards/margins": 0.7247858047485352, + "rewards/rejected": -0.7005584836006165, + "step": 12546 + }, + { + "epoch": 0.73, + "learning_rate": 1.7911182022561565e-08, + "logits/chosen": -1.657462477684021, + "logits/rejected": -1.6552445888519287, + "logps/chosen": -133.0050048828125, + "logps/rejected": -210.57821655273438, + "loss": 0.3036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.692999243736267, + "rewards/margins": 0.38586270809173584, + "rewards/rejected": 1.3071365356445312, + "step": 12547 + }, + { + "epoch": 0.73, + "learning_rate": 1.7903955385383042e-08, + "logits/chosen": -1.8311986923217773, + "logits/rejected": -1.8226181268692017, + "logps/chosen": -51.004905700683594, + "logps/rejected": -335.646484375, + "loss": 0.1129, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0007644891738892, + "rewards/margins": 5.6763410568237305, + "rewards/rejected": -4.675576686859131, + "step": 12548 + }, + { + "epoch": 0.73, + "learning_rate": 1.7896729888413036e-08, + "logits/chosen": -1.745278000831604, + "logits/rejected": -1.7538154125213623, + "logps/chosen": -299.4964599609375, + "logps/rejected": -447.2996826171875, + "loss": 0.0392, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.350811719894409, + "rewards/margins": 2.882488965988159, + "rewards/rejected": -0.53167724609375, + "step": 12549 + }, + { + "epoch": 0.73, + "learning_rate": 1.7889505531908184e-08, + "logits/chosen": -2.1308021545410156, + "logits/rejected": -2.144355535507202, + "logps/chosen": -151.31077575683594, + "logps/rejected": -301.11505126953125, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18581543862819672, + "rewards/margins": 0.9225311875343323, + "rewards/rejected": -0.7367157340049744, + "step": 12550 + }, + { + "epoch": 0.73, + "learning_rate": 1.7882282316125184e-08, + "logits/chosen": -1.9190107583999634, + "logits/rejected": -1.918872594833374, + "logps/chosen": -6.945444107055664, + "logps/rejected": -310.93798828125, + "loss": 0.308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029723644256591797, + "rewards/margins": 5.106219291687012, + "rewards/rejected": -5.07649564743042, + "step": 12551 + }, + { + "epoch": 0.73, + "learning_rate": 1.787506024132064e-08, + "logits/chosen": -2.0083765983581543, + "logits/rejected": -2.001990795135498, + "logps/chosen": -97.99678039550781, + "logps/rejected": -204.34002685546875, + "loss": 0.1299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6723381280899048, + "rewards/margins": 2.0772910118103027, + "rewards/rejected": -0.4049530029296875, + "step": 12552 + }, + { + "epoch": 0.73, + "learning_rate": 1.786783930775107e-08, + "logits/chosen": -1.763516902923584, + "logits/rejected": -1.757972240447998, + "logps/chosen": -106.53150939941406, + "logps/rejected": -313.54736328125, + "loss": 0.1591, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0076416730880737, + "rewards/margins": 2.423520088195801, + "rewards/rejected": -1.4158782958984375, + "step": 12553 + }, + { + "epoch": 0.73, + "learning_rate": 1.786061951567303e-08, + "logits/chosen": -2.0073039531707764, + "logits/rejected": -2.009124517440796, + "logps/chosen": -0.6821198463439941, + "logps/rejected": -166.1988983154297, + "loss": 0.3627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021791711449623108, + "rewards/margins": 2.510591983795166, + "rewards/rejected": -2.532383680343628, + "step": 12554 + }, + { + "epoch": 0.73, + "learning_rate": 1.7853400865343e-08, + "logits/chosen": -2.019545793533325, + "logits/rejected": -2.018540143966675, + "logps/chosen": -31.970203399658203, + "logps/rejected": -193.0557403564453, + "loss": 0.2436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5636600852012634, + "rewards/margins": 2.5013294219970703, + "rewards/rejected": -1.9376693964004517, + "step": 12555 + }, + { + "epoch": 0.73, + "learning_rate": 1.7846183357017424e-08, + "logits/chosen": -1.8841224908828735, + "logits/rejected": -1.8616316318511963, + "logps/chosen": -46.386016845703125, + "logps/rejected": -388.0452575683594, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1563388854265213, + "rewards/margins": 6.43703031539917, + "rewards/rejected": -6.280691623687744, + "step": 12556 + }, + { + "epoch": 0.73, + "learning_rate": 1.7838966990952687e-08, + "logits/chosen": -1.72312593460083, + "logits/rejected": -1.7228140830993652, + "logps/chosen": -42.52044677734375, + "logps/rejected": -394.2276611328125, + "loss": 0.2096, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5856372714042664, + "rewards/margins": 6.723179817199707, + "rewards/rejected": -6.137542724609375, + "step": 12557 + }, + { + "epoch": 0.73, + "learning_rate": 1.7831751767405157e-08, + "logits/chosen": -1.9259040355682373, + "logits/rejected": -1.9219534397125244, + "logps/chosen": -0.0001310041843680665, + "logps/rejected": -165.94017028808594, + "loss": 0.3226, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.340098141459748e-06, + "rewards/margins": 3.6250603199005127, + "rewards/rejected": -3.625065565109253, + "step": 12558 + }, + { + "epoch": 0.73, + "learning_rate": 1.782453768663116e-08, + "logits/chosen": -1.9171041250228882, + "logits/rejected": -1.9070919752120972, + "logps/chosen": -15.351587295532227, + "logps/rejected": -226.88644409179688, + "loss": 0.3769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08713217079639435, + "rewards/margins": 3.9582245349884033, + "rewards/rejected": -4.045356750488281, + "step": 12559 + }, + { + "epoch": 0.73, + "learning_rate": 1.781732474888698e-08, + "logits/chosen": -1.786848783493042, + "logits/rejected": -1.7923362255096436, + "logps/chosen": -191.53631591796875, + "logps/rejected": -412.0860595703125, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3864043951034546, + "rewards/margins": 3.622897148132324, + "rewards/rejected": -2.236492872238159, + "step": 12560 + }, + { + "epoch": 0.73, + "learning_rate": 1.7810112954428835e-08, + "logits/chosen": -2.0877206325531006, + "logits/rejected": -2.08123779296875, + "logps/chosen": -0.00025009195087477565, + "logps/rejected": -76.84791564941406, + "loss": 0.9423, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.0132302122656256e-05, + "rewards/margins": -0.8294016122817993, + "rewards/rejected": 0.8293914794921875, + "step": 12561 + }, + { + "epoch": 0.73, + "learning_rate": 1.7802902303512934e-08, + "logits/chosen": -1.8303505182266235, + "logits/rejected": -1.9086923599243164, + "logps/chosen": -370.652099609375, + "logps/rejected": -552.8202514648438, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1389405727386475, + "rewards/margins": 4.08253812789917, + "rewards/rejected": -1.943597435951233, + "step": 12562 + }, + { + "epoch": 0.73, + "learning_rate": 1.779569279639544e-08, + "logits/chosen": -1.876725673675537, + "logits/rejected": -1.8796929121017456, + "logps/chosen": -122.81626892089844, + "logps/rejected": -227.17837524414062, + "loss": 0.0485, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6025986671447754, + "rewards/margins": 2.429225206375122, + "rewards/rejected": 0.17337341606616974, + "step": 12563 + }, + { + "epoch": 0.73, + "learning_rate": 1.778848443333248e-08, + "logits/chosen": -1.9271667003631592, + "logits/rejected": -1.9168145656585693, + "logps/chosen": -32.937339782714844, + "logps/rejected": -176.23214721679688, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36460572481155396, + "rewards/margins": 2.1063506603240967, + "rewards/rejected": -1.7417449951171875, + "step": 12564 + }, + { + "epoch": 0.73, + "learning_rate": 1.7781277214580082e-08, + "logits/chosen": -1.8690738677978516, + "logits/rejected": -1.8788269758224487, + "logps/chosen": -112.21807861328125, + "logps/rejected": -218.33062744140625, + "loss": 0.2645, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1791473627090454, + "rewards/margins": 1.1707123517990112, + "rewards/rejected": 0.008435058407485485, + "step": 12565 + }, + { + "epoch": 0.73, + "learning_rate": 1.7774071140394355e-08, + "logits/chosen": -2.073495864868164, + "logits/rejected": -2.06895112991333, + "logps/chosen": -42.1091423034668, + "logps/rejected": -266.75396728515625, + "loss": 0.2079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49141961336135864, + "rewards/margins": 2.857926607131958, + "rewards/rejected": -2.366507053375244, + "step": 12566 + }, + { + "epoch": 0.73, + "learning_rate": 1.7766866211031234e-08, + "logits/chosen": -2.0425801277160645, + "logits/rejected": -2.0390024185180664, + "logps/chosen": -48.0625, + "logps/rejected": -204.48020935058594, + "loss": 0.1619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7641693353652954, + "rewards/margins": 3.5398759841918945, + "rewards/rejected": -2.7757065296173096, + "step": 12567 + }, + { + "epoch": 0.73, + "learning_rate": 1.7759662426746713e-08, + "logits/chosen": -1.81028413772583, + "logits/rejected": -1.8115872144699097, + "logps/chosen": -0.0101316561922431, + "logps/rejected": -115.48478698730469, + "loss": 0.5806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006198345217853785, + "rewards/margins": 0.6373814344406128, + "rewards/rejected": -0.6380012631416321, + "step": 12568 + }, + { + "epoch": 0.73, + "learning_rate": 1.7752459787796654e-08, + "logits/chosen": -1.9131141901016235, + "logits/rejected": -1.9204710721969604, + "logps/chosen": -258.3464050292969, + "logps/rejected": -496.48333740234375, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.880929708480835, + "rewards/margins": 4.176144599914551, + "rewards/rejected": -1.2952148914337158, + "step": 12569 + }, + { + "epoch": 0.73, + "learning_rate": 1.7745258294436978e-08, + "logits/chosen": -1.9752711057662964, + "logits/rejected": -1.9654461145401, + "logps/chosen": -250.36212158203125, + "logps/rejected": -475.7794189453125, + "loss": 0.0534, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.074505567550659, + "rewards/margins": 5.341723442077637, + "rewards/rejected": -3.2672181129455566, + "step": 12570 + }, + { + "epoch": 0.73, + "learning_rate": 1.7738057946923518e-08, + "logits/chosen": -1.90669846534729, + "logits/rejected": -1.8973802328109741, + "logps/chosen": -34.45105743408203, + "logps/rejected": -193.34706115722656, + "loss": 0.2126, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6296440362930298, + "rewards/margins": 1.0350944995880127, + "rewards/rejected": 0.5945495963096619, + "step": 12571 + }, + { + "epoch": 0.73, + "learning_rate": 1.7730858745512035e-08, + "logits/chosen": -1.911913514137268, + "logits/rejected": -1.8886322975158691, + "logps/chosen": -157.89663696289062, + "logps/rejected": -240.93199157714844, + "loss": 0.2269, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1335647106170654, + "rewards/margins": 0.8243666887283325, + "rewards/rejected": 1.309198021888733, + "step": 12572 + }, + { + "epoch": 0.73, + "learning_rate": 1.7723660690458293e-08, + "logits/chosen": -1.6706135272979736, + "logits/rejected": -1.6691964864730835, + "logps/chosen": -191.1984100341797, + "logps/rejected": -218.31314086914062, + "loss": 0.5666, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.169677734375, + "rewards/margins": -0.28445130586624146, + "rewards/rejected": 0.45412904024124146, + "step": 12573 + }, + { + "epoch": 0.73, + "learning_rate": 1.7716463782018004e-08, + "logits/chosen": -2.028254270553589, + "logits/rejected": -2.0259952545166016, + "logps/chosen": -4.020203590393066, + "logps/rejected": -125.64553833007812, + "loss": 0.4138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06105170398950577, + "rewards/margins": 2.2024967670440674, + "rewards/rejected": -2.2635483741760254, + "step": 12574 + }, + { + "epoch": 0.73, + "learning_rate": 1.7709268020446855e-08, + "logits/chosen": -1.906829595565796, + "logits/rejected": -1.9416478872299194, + "logps/chosen": -189.31591796875, + "logps/rejected": -465.5734558105469, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.000924825668335, + "rewards/margins": 7.271990776062012, + "rewards/rejected": -5.271066188812256, + "step": 12575 + }, + { + "epoch": 0.73, + "learning_rate": 1.7702073406000446e-08, + "logits/chosen": -1.9569191932678223, + "logits/rejected": -1.9521424770355225, + "logps/chosen": -55.70298385620117, + "logps/rejected": -153.7566680908203, + "loss": 1.4114, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.5136559009552002, + "rewards/margins": -1.1724556684494019, + "rewards/rejected": -0.3412002623081207, + "step": 12576 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694879938934366e-08, + "logits/chosen": -2.0420892238616943, + "logits/rejected": -2.0462942123413086, + "logps/chosen": -0.001740762498229742, + "logps/rejected": -280.1055908203125, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.591490091523156e-05, + "rewards/margins": 4.898731708526611, + "rewards/rejected": -4.898797512054443, + "step": 12577 + }, + { + "epoch": 0.73, + "learning_rate": 1.7687687619504177e-08, + "logits/chosen": -1.7326924800872803, + "logits/rejected": -1.6751633882522583, + "logps/chosen": -258.61822509765625, + "logps/rejected": -491.47991943359375, + "loss": 0.1107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7330108880996704, + "rewards/margins": 2.4021148681640625, + "rewards/rejected": -0.6691040396690369, + "step": 12578 + }, + { + "epoch": 0.73, + "learning_rate": 1.7680496447965398e-08, + "logits/chosen": -1.5220869779586792, + "logits/rejected": -1.5206058025360107, + "logps/chosen": -104.13175964355469, + "logps/rejected": -322.29949951171875, + "loss": 0.1003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6208282709121704, + "rewards/margins": 6.3434295654296875, + "rewards/rejected": -4.722601413726807, + "step": 12579 + }, + { + "epoch": 0.73, + "learning_rate": 1.767330642457346e-08, + "logits/chosen": -1.783607840538025, + "logits/rejected": -1.799935221672058, + "logps/chosen": -170.74969482421875, + "logps/rejected": -264.6090087890625, + "loss": 0.1221, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8527679443359375, + "rewards/margins": 1.4549713134765625, + "rewards/rejected": 0.397796630859375, + "step": 12580 + }, + { + "epoch": 0.73, + "learning_rate": 1.7666117549583803e-08, + "logits/chosen": -1.8724217414855957, + "logits/rejected": -1.8818564414978027, + "logps/chosen": -302.9150695800781, + "logps/rejected": -539.1931762695312, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6572418212890625, + "rewards/margins": 5.613995552062988, + "rewards/rejected": -2.9567534923553467, + "step": 12581 + }, + { + "epoch": 0.73, + "learning_rate": 1.7658929823251812e-08, + "logits/chosen": -1.9705698490142822, + "logits/rejected": -1.966954231262207, + "logps/chosen": -12.16629409790039, + "logps/rejected": -75.6448745727539, + "loss": 0.4877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10083675384521484, + "rewards/margins": 0.8193945288658142, + "rewards/rejected": -0.7185577750205994, + "step": 12582 + }, + { + "epoch": 0.73, + "learning_rate": 1.765174324583285e-08, + "logits/chosen": -1.9040485620498657, + "logits/rejected": -1.9333521127700806, + "logps/chosen": -226.13096618652344, + "logps/rejected": -341.22430419921875, + "loss": 0.0764, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.155409336090088, + "rewards/margins": 1.881282091140747, + "rewards/rejected": 1.2741272449493408, + "step": 12583 + }, + { + "epoch": 0.73, + "learning_rate": 1.7644557817582185e-08, + "logits/chosen": -2.016965627670288, + "logits/rejected": -2.020439863204956, + "logps/chosen": -271.3639831542969, + "logps/rejected": -393.5756530761719, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.9208953380584717, + "rewards/margins": 4.260693550109863, + "rewards/rejected": -0.3397979736328125, + "step": 12584 + }, + { + "epoch": 0.73, + "learning_rate": 1.7637373538755092e-08, + "logits/chosen": -1.7168422937393188, + "logits/rejected": -1.6605541706085205, + "logps/chosen": -188.95034790039062, + "logps/rejected": -500.3863830566406, + "loss": 0.1097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.245869517326355, + "rewards/margins": 3.8132004737854004, + "rewards/rejected": -2.567331075668335, + "step": 12585 + }, + { + "epoch": 0.73, + "learning_rate": 1.7630190409606787e-08, + "logits/chosen": -1.8548232316970825, + "logits/rejected": -1.8561313152313232, + "logps/chosen": -5.304777005221695e-05, + "logps/rejected": -229.83282470703125, + "loss": 0.3485, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8000399677475798e-06, + "rewards/margins": 5.4092936515808105, + "rewards/rejected": -5.409295558929443, + "step": 12586 + }, + { + "epoch": 0.73, + "learning_rate": 1.7623008430392483e-08, + "logits/chosen": -1.8468554019927979, + "logits/rejected": -1.8506022691726685, + "logps/chosen": -231.880126953125, + "logps/rejected": -373.0006103515625, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9129822254180908, + "rewards/margins": 3.653338670730591, + "rewards/rejected": -1.7403564453125, + "step": 12587 + }, + { + "epoch": 0.73, + "learning_rate": 1.7615827601367255e-08, + "logits/chosen": -1.9044933319091797, + "logits/rejected": -1.9075261354446411, + "logps/chosen": -15.140850067138672, + "logps/rejected": -54.81247329711914, + "loss": 0.8002, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3697570860385895, + "rewards/margins": -0.11220511794090271, + "rewards/rejected": -0.25755196809768677, + "step": 12588 + }, + { + "epoch": 0.73, + "learning_rate": 1.7608647922786273e-08, + "logits/chosen": -2.1555123329162598, + "logits/rejected": -2.154270887374878, + "logps/chosen": -0.00010657189704943448, + "logps/rejected": -121.09195709228516, + "loss": 0.4726, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.017294031655183e-06, + "rewards/margins": 1.190926194190979, + "rewards/rejected": -1.1909302473068237, + "step": 12589 + }, + { + "epoch": 0.73, + "learning_rate": 1.7601469394904545e-08, + "logits/chosen": -1.8904255628585815, + "logits/rejected": -1.8926719427108765, + "logps/chosen": -30.195293426513672, + "logps/rejected": -150.16555786132812, + "loss": 0.2054, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1073204278945923, + "rewards/margins": 1.6568657159805298, + "rewards/rejected": -0.5495452880859375, + "step": 12590 + }, + { + "epoch": 0.73, + "learning_rate": 1.7594292017977095e-08, + "logits/chosen": -1.8902490139007568, + "logits/rejected": -1.9000147581100464, + "logps/chosen": -294.8167724609375, + "logps/rejected": -434.7486572265625, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7521790266036987, + "rewards/margins": 4.310284614562988, + "rewards/rejected": -2.55810546875, + "step": 12591 + }, + { + "epoch": 0.73, + "learning_rate": 1.7587115792258917e-08, + "logits/chosen": -1.8818172216415405, + "logits/rejected": -1.8492921590805054, + "logps/chosen": -179.3089599609375, + "logps/rejected": -367.39849853515625, + "loss": 0.2151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6338653564453125, + "rewards/margins": 1.3529174327850342, + "rewards/rejected": -0.7190521359443665, + "step": 12592 + }, + { + "epoch": 0.73, + "learning_rate": 1.7579940718004928e-08, + "logits/chosen": -2.1108672618865967, + "logits/rejected": -2.1046769618988037, + "logps/chosen": -74.7971420288086, + "logps/rejected": -259.91058349609375, + "loss": 0.1361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0077415704727173, + "rewards/margins": 4.1901116371154785, + "rewards/rejected": -3.1823699474334717, + "step": 12593 + }, + { + "epoch": 0.73, + "learning_rate": 1.7572766795470044e-08, + "logits/chosen": -1.8370178937911987, + "logits/rejected": -1.836001992225647, + "logps/chosen": -22.512710571289062, + "logps/rejected": -88.53108215332031, + "loss": 0.5984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32789650559425354, + "rewards/margins": 1.3412929773330688, + "rewards/rejected": -1.669189453125, + "step": 12594 + }, + { + "epoch": 0.73, + "learning_rate": 1.7565594024909085e-08, + "logits/chosen": -1.9215863943099976, + "logits/rejected": -1.9238771200180054, + "logps/chosen": -76.84766387939453, + "logps/rejected": -187.36300659179688, + "loss": 0.2061, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0526748895645142, + "rewards/margins": 2.3099989891052246, + "rewards/rejected": -1.25732421875, + "step": 12595 + }, + { + "epoch": 0.73, + "learning_rate": 1.7558422406576883e-08, + "logits/chosen": -1.8156158924102783, + "logits/rejected": -1.8209834098815918, + "logps/chosen": -201.07534790039062, + "logps/rejected": -272.1650695800781, + "loss": 0.0555, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.182081699371338, + "rewards/margins": 2.401139974594116, + "rewards/rejected": -0.21905823051929474, + "step": 12596 + }, + { + "epoch": 0.73, + "learning_rate": 1.7551251940728208e-08, + "logits/chosen": -1.8759634494781494, + "logits/rejected": -1.8687505722045898, + "logps/chosen": -147.75906372070312, + "logps/rejected": -406.1711120605469, + "loss": 0.0445, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.21164870262146, + "rewards/margins": 2.6970856189727783, + "rewards/rejected": -0.4854370057582855, + "step": 12597 + }, + { + "epoch": 0.73, + "learning_rate": 1.7544082627617802e-08, + "logits/chosen": -2.053344249725342, + "logits/rejected": -2.044743061065674, + "logps/chosen": -14.013993263244629, + "logps/rejected": -197.76382446289062, + "loss": 0.2688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3429597020149231, + "rewards/margins": 4.458029747009277, + "rewards/rejected": -4.11506986618042, + "step": 12598 + }, + { + "epoch": 0.73, + "learning_rate": 1.7536914467500323e-08, + "logits/chosen": -1.895823359489441, + "logits/rejected": -1.8855797052383423, + "logps/chosen": -21.043869018554688, + "logps/rejected": -191.2419891357422, + "loss": 0.4225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10490379482507706, + "rewards/margins": 1.6939159631729126, + "rewards/rejected": -1.5890121459960938, + "step": 12599 + }, + { + "epoch": 0.73, + "learning_rate": 1.752974746063044e-08, + "logits/chosen": -1.7189321517944336, + "logits/rejected": -1.7617758512496948, + "logps/chosen": -352.30487060546875, + "logps/rejected": -387.7265930175781, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.671307325363159, + "rewards/margins": 3.88625168800354, + "rewards/rejected": -0.21494446694850922, + "step": 12600 + }, + { + "epoch": 0.73, + "learning_rate": 1.7522581607262755e-08, + "logits/chosen": -2.0178873538970947, + "logits/rejected": -2.006443738937378, + "logps/chosen": -93.56442260742188, + "logps/rejected": -320.18963623046875, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.83489990234375, + "rewards/margins": 4.264819145202637, + "rewards/rejected": -2.429919481277466, + "step": 12601 + }, + { + "epoch": 0.73, + "learning_rate": 1.7515416907651848e-08, + "logits/chosen": -1.913883090019226, + "logits/rejected": -1.9142833948135376, + "logps/chosen": -0.0014355505118146539, + "logps/rejected": -273.21685791015625, + "loss": 0.3682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012419663835316896, + "rewards/margins": 2.9544014930725098, + "rewards/rejected": -2.9545257091522217, + "step": 12602 + }, + { + "epoch": 0.73, + "learning_rate": 1.7508253362052218e-08, + "logits/chosen": -1.8899885416030884, + "logits/rejected": -1.8111765384674072, + "logps/chosen": -251.59420776367188, + "logps/rejected": -401.7099609375, + "loss": 0.156, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4977569580078125, + "rewards/margins": 1.9423919916152954, + "rewards/rejected": -0.4446350038051605, + "step": 12603 + }, + { + "epoch": 0.73, + "learning_rate": 1.7501090970718364e-08, + "logits/chosen": -1.6812952756881714, + "logits/rejected": -1.6890642642974854, + "logps/chosen": -164.0857696533203, + "logps/rejected": -224.55186462402344, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.167872667312622, + "rewards/margins": 0.3097594976425171, + "rewards/rejected": 1.858113169670105, + "step": 12604 + }, + { + "epoch": 0.73, + "learning_rate": 1.749392973390472e-08, + "logits/chosen": -1.871835470199585, + "logits/rejected": -1.8699147701263428, + "logps/chosen": -133.24615478515625, + "logps/rejected": -256.95330810546875, + "loss": 0.0975, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6257812976837158, + "rewards/margins": 2.1888856887817383, + "rewards/rejected": -0.5631042718887329, + "step": 12605 + }, + { + "epoch": 0.73, + "learning_rate": 1.7486769651865722e-08, + "logits/chosen": -1.6486741304397583, + "logits/rejected": -1.6617348194122314, + "logps/chosen": -263.6636657714844, + "logps/rejected": -459.7648620605469, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.60740065574646, + "rewards/margins": 3.7165651321411133, + "rewards/rejected": -1.1091644763946533, + "step": 12606 + }, + { + "epoch": 0.73, + "learning_rate": 1.747961072485567e-08, + "logits/chosen": -1.9747921228408813, + "logits/rejected": -1.9741629362106323, + "logps/chosen": -10.977530479431152, + "logps/rejected": -161.65396118164062, + "loss": 0.3586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06276064366102219, + "rewards/margins": 2.100440740585327, + "rewards/rejected": -2.037680149078369, + "step": 12607 + }, + { + "epoch": 0.73, + "learning_rate": 1.7472452953128953e-08, + "logits/chosen": -1.9219781160354614, + "logits/rejected": -1.9118151664733887, + "logps/chosen": -199.16677856445312, + "logps/rejected": -341.330810546875, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1544647216796875, + "rewards/margins": 3.9798707962036133, + "rewards/rejected": -1.8254059553146362, + "step": 12608 + }, + { + "epoch": 0.73, + "learning_rate": 1.74652963369398e-08, + "logits/chosen": -2.082628011703491, + "logits/rejected": -2.08329176902771, + "logps/chosen": -40.74504852294922, + "logps/rejected": -205.597412109375, + "loss": 0.5272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4293327331542969, + "rewards/margins": 0.04006728529930115, + "rewards/rejected": 0.3892654478549957, + "step": 12609 + }, + { + "epoch": 0.73, + "learning_rate": 1.745814087654248e-08, + "logits/chosen": -1.8481197357177734, + "logits/rejected": -1.8423373699188232, + "logps/chosen": -4.5287184715271, + "logps/rejected": -173.89767456054688, + "loss": 0.51, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05028596147894859, + "rewards/margins": 0.819295346736908, + "rewards/rejected": -0.7690094113349915, + "step": 12610 + }, + { + "epoch": 0.73, + "learning_rate": 1.7450986572191145e-08, + "logits/chosen": -1.7655872106552124, + "logits/rejected": -1.734541893005371, + "logps/chosen": -244.26181030273438, + "logps/rejected": -577.1113891601562, + "loss": 0.0243, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9591797590255737, + "rewards/margins": 6.242901802062988, + "rewards/rejected": -4.283721923828125, + "step": 12611 + }, + { + "epoch": 0.73, + "learning_rate": 1.7443833424139996e-08, + "logits/chosen": -1.928315281867981, + "logits/rejected": -1.920804738998413, + "logps/chosen": -43.769622802734375, + "logps/rejected": -181.282958984375, + "loss": 0.4141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21618042886257172, + "rewards/margins": 2.4604172706604004, + "rewards/rejected": -2.6765975952148438, + "step": 12612 + }, + { + "epoch": 0.73, + "learning_rate": 1.743668143264315e-08, + "logits/chosen": -1.5993446111679077, + "logits/rejected": -1.5937641859054565, + "logps/chosen": -169.91009521484375, + "logps/rejected": -293.2746887207031, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7059967517852783, + "rewards/margins": 3.878558397293091, + "rewards/rejected": -1.1725616455078125, + "step": 12613 + }, + { + "epoch": 0.73, + "learning_rate": 1.742953059795465e-08, + "logits/chosen": -1.7021911144256592, + "logits/rejected": -1.701865315437317, + "logps/chosen": -15.8687105178833, + "logps/rejected": -123.43927001953125, + "loss": 0.3815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10102853924036026, + "rewards/margins": 1.6328843832015991, + "rewards/rejected": -1.531855821609497, + "step": 12614 + }, + { + "epoch": 0.73, + "learning_rate": 1.742238092032854e-08, + "logits/chosen": -1.8955531120300293, + "logits/rejected": -1.8994919061660767, + "logps/chosen": -303.36114501953125, + "logps/rejected": -439.95587158203125, + "loss": 0.2453, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.926950216293335, + "rewards/margins": 0.4707062244415283, + "rewards/rejected": 3.4562439918518066, + "step": 12615 + }, + { + "epoch": 0.73, + "learning_rate": 1.7415232400018815e-08, + "logits/chosen": -2.066129446029663, + "logits/rejected": -2.0496177673339844, + "logps/chosen": -62.72771453857422, + "logps/rejected": -284.6368408203125, + "loss": 0.1667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3669578731060028, + "rewards/margins": 4.1143059730529785, + "rewards/rejected": -3.7473480701446533, + "step": 12616 + }, + { + "epoch": 0.73, + "learning_rate": 1.7408085037279442e-08, + "logits/chosen": -1.8070545196533203, + "logits/rejected": -1.8094316720962524, + "logps/chosen": -33.13323974609375, + "logps/rejected": -100.86286926269531, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1828891783952713, + "rewards/margins": 1.5047138929367065, + "rewards/rejected": -1.3218246698379517, + "step": 12617 + }, + { + "epoch": 0.73, + "learning_rate": 1.7400938832364294e-08, + "logits/chosen": -2.0242807865142822, + "logits/rejected": -2.0119574069976807, + "logps/chosen": -10.422290802001953, + "logps/rejected": -269.1366271972656, + "loss": 0.3657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09157915413379669, + "rewards/margins": 2.4722371101379395, + "rewards/rejected": -2.380657911300659, + "step": 12618 + }, + { + "epoch": 0.73, + "learning_rate": 1.7393793785527255e-08, + "logits/chosen": -1.8591145277023315, + "logits/rejected": -1.827527403831482, + "logps/chosen": -165.38436889648438, + "logps/rejected": -390.87652587890625, + "loss": 0.3394, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5657929182052612, + "rewards/margins": 0.1663665771484375, + "rewards/rejected": 1.3994263410568237, + "step": 12619 + }, + { + "epoch": 0.73, + "learning_rate": 1.738664989702216e-08, + "logits/chosen": -1.9745821952819824, + "logits/rejected": -1.9852091073989868, + "logps/chosen": -337.9658203125, + "logps/rejected": -374.95526123046875, + "loss": 0.1299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9914612174034119, + "rewards/margins": 2.690329074859619, + "rewards/rejected": -1.6988677978515625, + "step": 12620 + }, + { + "epoch": 0.73, + "learning_rate": 1.73795071671028e-08, + "logits/chosen": -2.039551019668579, + "logits/rejected": -2.0390400886535645, + "logps/chosen": -6.720578193664551, + "logps/rejected": -128.20802307128906, + "loss": 0.2437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.433292955160141, + "rewards/margins": 3.697211980819702, + "rewards/rejected": -3.2639191150665283, + "step": 12621 + }, + { + "epoch": 0.73, + "learning_rate": 1.7372365596022905e-08, + "logits/chosen": -1.8862507343292236, + "logits/rejected": -1.8864494562149048, + "logps/chosen": -0.0002072980860248208, + "logps/rejected": -69.78763580322266, + "loss": 0.5737, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.761598110140767e-06, + "rewards/margins": 0.5514271259307861, + "rewards/rejected": -0.5514358878135681, + "step": 12622 + }, + { + "epoch": 0.73, + "learning_rate": 1.7365225184036174e-08, + "logits/chosen": -1.7790664434432983, + "logits/rejected": -1.782047152519226, + "logps/chosen": -7.477059364318848, + "logps/rejected": -107.17638397216797, + "loss": 0.4617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03259458765387535, + "rewards/margins": 1.4326375722885132, + "rewards/rejected": -1.4652321338653564, + "step": 12623 + }, + { + "epoch": 0.73, + "learning_rate": 1.7358085931396277e-08, + "logits/chosen": -1.982273817062378, + "logits/rejected": -1.9815659523010254, + "logps/chosen": -77.72956085205078, + "logps/rejected": -254.02874755859375, + "loss": 0.5069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2365516722202301, + "rewards/margins": 0.42949143052101135, + "rewards/rejected": -0.19293975830078125, + "step": 12624 + }, + { + "epoch": 0.73, + "learning_rate": 1.7350947838356862e-08, + "logits/chosen": -1.945691704750061, + "logits/rejected": -1.9420963525772095, + "logps/chosen": -0.13509613275527954, + "logps/rejected": -158.06707763671875, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02022850327193737, + "rewards/margins": 2.018266201019287, + "rewards/rejected": -1.9980376958847046, + "step": 12625 + }, + { + "epoch": 0.73, + "learning_rate": 1.734381090517144e-08, + "logits/chosen": -1.9689562320709229, + "logits/rejected": -2.042961359024048, + "logps/chosen": -258.510986328125, + "logps/rejected": -387.047607421875, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.70420241355896, + "rewards/margins": 4.9807281494140625, + "rewards/rejected": -2.2765259742736816, + "step": 12626 + }, + { + "epoch": 0.73, + "learning_rate": 1.733667513209363e-08, + "logits/chosen": -2.0047662258148193, + "logits/rejected": -2.0202300548553467, + "logps/chosen": -191.5370635986328, + "logps/rejected": -265.8961486816406, + "loss": 0.0832, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.241442918777466, + "rewards/margins": 2.0303285121917725, + "rewards/rejected": 0.21111451089382172, + "step": 12627 + }, + { + "epoch": 0.73, + "learning_rate": 1.732954051937688e-08, + "logits/chosen": -1.9762637615203857, + "logits/rejected": -1.976846694946289, + "logps/chosen": -12.370485305786133, + "logps/rejected": -137.4833221435547, + "loss": 0.6034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04154033586382866, + "rewards/margins": 0.5433962345123291, + "rewards/rejected": -0.5849365592002869, + "step": 12628 + }, + { + "epoch": 0.73, + "learning_rate": 1.7322407067274674e-08, + "logits/chosen": -1.9869937896728516, + "logits/rejected": -1.9900619983673096, + "logps/chosen": -22.766820907592773, + "logps/rejected": -100.89899444580078, + "loss": 0.4093, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06613864749670029, + "rewards/margins": 1.5408403873443604, + "rewards/rejected": -1.4747017621994019, + "step": 12629 + }, + { + "epoch": 0.73, + "learning_rate": 1.7315274776040378e-08, + "logits/chosen": -1.689888596534729, + "logits/rejected": -1.6765977144241333, + "logps/chosen": -187.20068359375, + "logps/rejected": -329.0893859863281, + "loss": 0.2232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02903595007956028, + "rewards/margins": 1.5064194202423096, + "rewards/rejected": -1.4773834943771362, + "step": 12630 + }, + { + "epoch": 0.74, + "learning_rate": 1.730814364592742e-08, + "logits/chosen": -1.9956508874893188, + "logits/rejected": -2.0003774166107178, + "logps/chosen": -69.17854309082031, + "logps/rejected": -231.52149963378906, + "loss": 0.4856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3352615535259247, + "rewards/margins": 2.5495636463165283, + "rewards/rejected": -2.8848252296447754, + "step": 12631 + }, + { + "epoch": 0.74, + "learning_rate": 1.7301013677189125e-08, + "logits/chosen": -1.9625556468963623, + "logits/rejected": -1.9609739780426025, + "logps/chosen": -0.1008230522274971, + "logps/rejected": -214.13156127929688, + "loss": 0.321, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005121495109051466, + "rewards/margins": 6.2434563636779785, + "rewards/rejected": -6.248578071594238, + "step": 12632 + }, + { + "epoch": 0.74, + "learning_rate": 1.7293884870078757e-08, + "logits/chosen": -1.8334128856658936, + "logits/rejected": -1.8346948623657227, + "logps/chosen": -9.783333778381348, + "logps/rejected": -85.07514190673828, + "loss": 0.3896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21495218575000763, + "rewards/margins": 1.3778070211410522, + "rewards/rejected": -1.162854790687561, + "step": 12633 + }, + { + "epoch": 0.74, + "learning_rate": 1.728675722484958e-08, + "logits/chosen": -1.7738804817199707, + "logits/rejected": -1.7730244398117065, + "logps/chosen": -36.00701141357422, + "logps/rejected": -65.58485412597656, + "loss": 0.3786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6371776461601257, + "rewards/margins": 1.0848468542099, + "rewards/rejected": -0.44766923785209656, + "step": 12634 + }, + { + "epoch": 0.74, + "learning_rate": 1.727963074175481e-08, + "logits/chosen": -1.9397587776184082, + "logits/rejected": -1.926743507385254, + "logps/chosen": -139.4308319091797, + "logps/rejected": -263.08343505859375, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5539779663085938, + "rewards/margins": 0.48599088191986084, + "rewards/rejected": 1.067987084388733, + "step": 12635 + }, + { + "epoch": 0.74, + "learning_rate": 1.7272505421047617e-08, + "logits/chosen": -1.7618577480316162, + "logits/rejected": -1.7596532106399536, + "logps/chosen": -19.881589889526367, + "logps/rejected": -231.59524536132812, + "loss": 0.1636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9973441958427429, + "rewards/margins": 3.789400577545166, + "rewards/rejected": -2.7920563220977783, + "step": 12636 + }, + { + "epoch": 0.74, + "learning_rate": 1.726538126298111e-08, + "logits/chosen": -1.8979668617248535, + "logits/rejected": -1.8802216053009033, + "logps/chosen": -0.00025652290787547827, + "logps/rejected": -280.11566162109375, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.146626113448292e-06, + "rewards/margins": 6.314599514007568, + "rewards/rejected": -6.314602851867676, + "step": 12637 + }, + { + "epoch": 0.74, + "learning_rate": 1.725825826780838e-08, + "logits/chosen": -2.0236916542053223, + "logits/rejected": -2.0201034545898438, + "logps/chosen": -0.6188905835151672, + "logps/rejected": -189.85227966308594, + "loss": 0.3895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0061068059876561165, + "rewards/margins": 2.447265386581421, + "rewards/rejected": -2.4533722400665283, + "step": 12638 + }, + { + "epoch": 0.74, + "learning_rate": 1.725113643578247e-08, + "logits/chosen": -1.8859636783599854, + "logits/rejected": -1.8779710531234741, + "logps/chosen": -32.21681594848633, + "logps/rejected": -166.1444549560547, + "loss": 0.358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03180847316980362, + "rewards/margins": 2.660191535949707, + "rewards/rejected": -2.691999912261963, + "step": 12639 + }, + { + "epoch": 0.74, + "learning_rate": 1.7244015767156407e-08, + "logits/chosen": -1.7811782360076904, + "logits/rejected": -1.7636127471923828, + "logps/chosen": -15.855125427246094, + "logps/rejected": -189.56655883789062, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1655748337507248, + "rewards/margins": 3.6233325004577637, + "rewards/rejected": -3.4577577114105225, + "step": 12640 + }, + { + "epoch": 0.74, + "learning_rate": 1.7236896262183108e-08, + "logits/chosen": -2.0431900024414062, + "logits/rejected": -2.02941632270813, + "logps/chosen": -82.62116241455078, + "logps/rejected": -264.984619140625, + "loss": 0.2836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35370713472366333, + "rewards/margins": 2.7052879333496094, + "rewards/rejected": -2.351580858230591, + "step": 12641 + }, + { + "epoch": 0.74, + "learning_rate": 1.7229777921115522e-08, + "logits/chosen": -1.9265385866165161, + "logits/rejected": -1.9336376190185547, + "logps/chosen": -20.89915657043457, + "logps/rejected": -148.30601501464844, + "loss": 0.4697, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12040252983570099, + "rewards/margins": 0.782635509967804, + "rewards/rejected": -0.6622329950332642, + "step": 12642 + }, + { + "epoch": 0.74, + "learning_rate": 1.7222660744206518e-08, + "logits/chosen": -1.9732780456542969, + "logits/rejected": -1.9710241556167603, + "logps/chosen": -19.764368057250977, + "logps/rejected": -124.67874908447266, + "loss": 0.5418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3997032344341278, + "rewards/margins": 1.20111083984375, + "rewards/rejected": -1.6008141040802002, + "step": 12643 + }, + { + "epoch": 0.74, + "learning_rate": 1.7215544731708952e-08, + "logits/chosen": -1.9916483163833618, + "logits/rejected": -1.987919569015503, + "logps/chosen": -80.78650665283203, + "logps/rejected": -318.9317626953125, + "loss": 0.4798, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4789283871650696, + "rewards/margins": 2.677715301513672, + "rewards/rejected": -3.1566436290740967, + "step": 12644 + }, + { + "epoch": 0.74, + "learning_rate": 1.720842988387559e-08, + "logits/chosen": -1.7321405410766602, + "logits/rejected": -1.7282415628433228, + "logps/chosen": -28.51472282409668, + "logps/rejected": -204.05999755859375, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26026400923728943, + "rewards/margins": 2.2794580459594727, + "rewards/rejected": -2.0191941261291504, + "step": 12645 + }, + { + "epoch": 0.74, + "learning_rate": 1.72013162009592e-08, + "logits/chosen": -1.948006510734558, + "logits/rejected": -1.9365109205245972, + "logps/chosen": -71.716796875, + "logps/rejected": -238.37416076660156, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2411453276872635, + "rewards/margins": 4.622808933258057, + "rewards/rejected": -4.863954067230225, + "step": 12646 + }, + { + "epoch": 0.74, + "learning_rate": 1.7194203683212487e-08, + "logits/chosen": -1.9725401401519775, + "logits/rejected": -1.9734079837799072, + "logps/chosen": -49.471656799316406, + "logps/rejected": -284.74658203125, + "loss": 0.2236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5359367728233337, + "rewards/margins": 4.497950077056885, + "rewards/rejected": -3.9620132446289062, + "step": 12647 + }, + { + "epoch": 0.74, + "learning_rate": 1.7187092330888153e-08, + "logits/chosen": -1.9494234323501587, + "logits/rejected": -1.959992527961731, + "logps/chosen": -157.70616149902344, + "logps/rejected": -175.38323974609375, + "loss": 0.3489, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5499435663223267, + "rewards/margins": 0.3183319568634033, + "rewards/rejected": 1.2316116094589233, + "step": 12648 + }, + { + "epoch": 0.74, + "learning_rate": 1.717998214423877e-08, + "logits/chosen": -1.664206862449646, + "logits/rejected": -1.6282422542572021, + "logps/chosen": -230.34974670410156, + "logps/rejected": -567.963623046875, + "loss": 0.0876, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1566574573516846, + "rewards/margins": 2.465837240219116, + "rewards/rejected": -0.3091796934604645, + "step": 12649 + }, + { + "epoch": 0.74, + "learning_rate": 1.717287312351699e-08, + "logits/chosen": -1.816690444946289, + "logits/rejected": -1.807745337486267, + "logps/chosen": -66.78351593017578, + "logps/rejected": -221.39584350585938, + "loss": 0.4718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13335494697093964, + "rewards/margins": 1.2395790815353394, + "rewards/rejected": -1.3729339838027954, + "step": 12650 + }, + { + "epoch": 0.74, + "learning_rate": 1.716576526897532e-08, + "logits/chosen": -1.9783835411071777, + "logits/rejected": -1.9740756750106812, + "logps/chosen": -16.722429275512695, + "logps/rejected": -180.73724365234375, + "loss": 0.0776, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.446488618850708, + "rewards/margins": 4.934560775756836, + "rewards/rejected": -3.488072156906128, + "step": 12651 + }, + { + "epoch": 0.74, + "learning_rate": 1.7158658580866275e-08, + "logits/chosen": -1.8912465572357178, + "logits/rejected": -1.8872061967849731, + "logps/chosen": -171.0956268310547, + "logps/rejected": -310.3623962402344, + "loss": 0.1422, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0753250122070312, + "rewards/margins": 1.8447494506835938, + "rewards/rejected": -0.7694244384765625, + "step": 12652 + }, + { + "epoch": 0.74, + "learning_rate": 1.7151553059442324e-08, + "logits/chosen": -1.911865472793579, + "logits/rejected": -1.8999892473220825, + "logps/chosen": -2.0450382232666016, + "logps/rejected": -390.3475646972656, + "loss": 0.3574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02671816386282444, + "rewards/margins": 8.232287406921387, + "rewards/rejected": -8.259005546569824, + "step": 12653 + }, + { + "epoch": 0.74, + "learning_rate": 1.7144448704955884e-08, + "logits/chosen": -2.094831705093384, + "logits/rejected": -2.0868403911590576, + "logps/chosen": -18.762836456298828, + "logps/rejected": -219.38217163085938, + "loss": 0.1502, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1219005584716797, + "rewards/margins": 3.628422260284424, + "rewards/rejected": -2.506521701812744, + "step": 12654 + }, + { + "epoch": 0.74, + "learning_rate": 1.713734551765936e-08, + "logits/chosen": -2.0896053314208984, + "logits/rejected": -2.0904343128204346, + "logps/chosen": -4.116299629211426, + "logps/rejected": -43.72285842895508, + "loss": 0.4969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08198290318250656, + "rewards/margins": 0.7449319362640381, + "rewards/rejected": -0.6629490256309509, + "step": 12655 + }, + { + "epoch": 0.74, + "learning_rate": 1.713024349780505e-08, + "logits/chosen": -1.977225422859192, + "logits/rejected": -1.969077229499817, + "logps/chosen": -68.53968048095703, + "logps/rejected": -315.4632263183594, + "loss": 0.2297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9049896597862244, + "rewards/margins": 1.7288178205490112, + "rewards/rejected": -0.8238281607627869, + "step": 12656 + }, + { + "epoch": 0.74, + "learning_rate": 1.712314264564528e-08, + "logits/chosen": -1.8822823762893677, + "logits/rejected": -1.8627958297729492, + "logps/chosen": -200.6346435546875, + "logps/rejected": -279.76605224609375, + "loss": 0.3456, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8599365949630737, + "rewards/margins": 0.15231633186340332, + "rewards/rejected": 1.7076202630996704, + "step": 12657 + }, + { + "epoch": 0.74, + "learning_rate": 1.7116042961432297e-08, + "logits/chosen": -1.8187596797943115, + "logits/rejected": -1.822313904762268, + "logps/chosen": -2.3843913078308105, + "logps/rejected": -260.7090148925781, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2752291262149811, + "rewards/margins": 2.8978030681610107, + "rewards/rejected": -2.6225738525390625, + "step": 12658 + }, + { + "epoch": 0.74, + "learning_rate": 1.7108944445418343e-08, + "logits/chosen": -1.7986595630645752, + "logits/rejected": -1.8196512460708618, + "logps/chosen": -319.48260498046875, + "logps/rejected": -376.8149719238281, + "loss": 0.1187, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1378204822540283, + "rewards/margins": 1.8085389137268066, + "rewards/rejected": 0.32928162813186646, + "step": 12659 + }, + { + "epoch": 0.74, + "learning_rate": 1.710184709785555e-08, + "logits/chosen": -1.9218086004257202, + "logits/rejected": -1.912502646446228, + "logps/chosen": -20.483076095581055, + "logps/rejected": -147.55087280273438, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04821453243494034, + "rewards/margins": 2.0196409225463867, + "rewards/rejected": -1.9714263677597046, + "step": 12660 + }, + { + "epoch": 0.74, + "learning_rate": 1.709475091899607e-08, + "logits/chosen": -2.014842987060547, + "logits/rejected": -2.0138983726501465, + "logps/chosen": -23.984073638916016, + "logps/rejected": -285.77349853515625, + "loss": 0.3078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05510883405804634, + "rewards/margins": 6.361664295196533, + "rewards/rejected": -6.306555271148682, + "step": 12661 + }, + { + "epoch": 0.74, + "learning_rate": 1.708765590909199e-08, + "logits/chosen": -1.9027279615402222, + "logits/rejected": -1.8952573537826538, + "logps/chosen": -46.0108642578125, + "logps/rejected": -137.33047485351562, + "loss": 0.6562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.294903963804245, + "rewards/margins": 0.2648281157016754, + "rewards/rejected": -0.5597320795059204, + "step": 12662 + }, + { + "epoch": 0.74, + "learning_rate": 1.7080562068395388e-08, + "logits/chosen": -1.7840895652770996, + "logits/rejected": -1.7847234010696411, + "logps/chosen": -161.98733520507812, + "logps/rejected": -234.20767211914062, + "loss": 0.1918, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.815710425376892, + "rewards/margins": 1.2369017601013184, + "rewards/rejected": 0.578808605670929, + "step": 12663 + }, + { + "epoch": 0.74, + "learning_rate": 1.707346939715823e-08, + "logits/chosen": -1.824833631515503, + "logits/rejected": -1.825319528579712, + "logps/chosen": -206.96389770507812, + "logps/rejected": -269.7443542480469, + "loss": 0.1125, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9062179327011108, + "rewards/margins": 2.3493728637695312, + "rewards/rejected": -0.443154901266098, + "step": 12664 + }, + { + "epoch": 0.74, + "learning_rate": 1.7066377895632495e-08, + "logits/chosen": -1.983011245727539, + "logits/rejected": -1.9675781726837158, + "logps/chosen": -109.96788787841797, + "logps/rejected": -287.4954833984375, + "loss": 0.1424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6999214291572571, + "rewards/margins": 3.058155059814453, + "rewards/rejected": -2.358233690261841, + "step": 12665 + }, + { + "epoch": 0.74, + "learning_rate": 1.705928756407012e-08, + "logits/chosen": -2.043973207473755, + "logits/rejected": -2.0438520908355713, + "logps/chosen": -5.388198042055592e-05, + "logps/rejected": -152.11288452148438, + "loss": 0.3616, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.841127972416871e-07, + "rewards/margins": 2.6501920223236084, + "rewards/rejected": -2.65019154548645, + "step": 12666 + }, + { + "epoch": 0.74, + "learning_rate": 1.705219840272299e-08, + "logits/chosen": -1.7634695768356323, + "logits/rejected": -1.6843894720077515, + "logps/chosen": -279.224609375, + "logps/rejected": -538.8441162109375, + "loss": 0.0833, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.407492160797119, + "rewards/margins": 2.402395725250244, + "rewards/rejected": 0.005096435546875, + "step": 12667 + }, + { + "epoch": 0.74, + "learning_rate": 1.7045110411842907e-08, + "logits/chosen": -2.0845870971679688, + "logits/rejected": -2.084904670715332, + "logps/chosen": -3.7414495944976807, + "logps/rejected": -107.36619567871094, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14015009999275208, + "rewards/margins": 1.7713909149169922, + "rewards/rejected": -1.6312408447265625, + "step": 12668 + }, + { + "epoch": 0.74, + "learning_rate": 1.7038023591681737e-08, + "logits/chosen": -1.7442022562026978, + "logits/rejected": -1.7639628648757935, + "logps/chosen": -10.537938117980957, + "logps/rejected": -177.92991638183594, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05214071273803711, + "rewards/margins": 2.222769021987915, + "rewards/rejected": -2.170628309249878, + "step": 12669 + }, + { + "epoch": 0.74, + "learning_rate": 1.7030937942491187e-08, + "logits/chosen": -1.777740240097046, + "logits/rejected": -1.7858234643936157, + "logps/chosen": -157.66555786132812, + "logps/rejected": -284.4922790527344, + "loss": 0.3246, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.001556396484375, + "rewards/margins": 0.6624816656112671, + "rewards/rejected": 0.3390747010707855, + "step": 12670 + }, + { + "epoch": 0.74, + "learning_rate": 1.7023853464523006e-08, + "logits/chosen": -2.0333170890808105, + "logits/rejected": -2.0266332626342773, + "logps/chosen": -0.0010276944376528263, + "logps/rejected": -198.7847442626953, + "loss": 0.3343, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.241025403141975e-05, + "rewards/margins": 5.320858955383301, + "rewards/rejected": -5.320921421051025, + "step": 12671 + }, + { + "epoch": 0.74, + "learning_rate": 1.701677015802882e-08, + "logits/chosen": -2.0353012084960938, + "logits/rejected": -2.0351550579071045, + "logps/chosen": -28.362295150756836, + "logps/rejected": -115.91951751708984, + "loss": 0.2669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8362604379653931, + "rewards/margins": 1.2325648069381714, + "rewards/rejected": -0.39630433917045593, + "step": 12672 + }, + { + "epoch": 0.74, + "learning_rate": 1.7009688023260317e-08, + "logits/chosen": -1.8639620542526245, + "logits/rejected": -1.8807544708251953, + "logps/chosen": -190.71279907226562, + "logps/rejected": -338.9584655761719, + "loss": 0.0303, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.024707078933716, + "rewards/margins": 4.312228202819824, + "rewards/rejected": -2.2875213623046875, + "step": 12673 + }, + { + "epoch": 0.74, + "learning_rate": 1.700260706046908e-08, + "logits/chosen": -1.9216814041137695, + "logits/rejected": -1.9161297082901, + "logps/chosen": -4.8828206062316895, + "logps/rejected": -137.8116455078125, + "loss": 0.3558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16786570847034454, + "rewards/margins": 2.471930742263794, + "rewards/rejected": -2.304064989089966, + "step": 12674 + }, + { + "epoch": 0.74, + "learning_rate": 1.6995527269906635e-08, + "logits/chosen": -1.8465038537979126, + "logits/rejected": -1.7914613485336304, + "logps/chosen": -155.2593231201172, + "logps/rejected": -252.08233642578125, + "loss": 0.2418, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1623367071151733, + "rewards/margins": 1.2597824335098267, + "rewards/rejected": -0.09744568169116974, + "step": 12675 + }, + { + "epoch": 0.74, + "learning_rate": 1.69884486518245e-08, + "logits/chosen": -1.877205491065979, + "logits/rejected": -1.8882343769073486, + "logps/chosen": -181.19927978515625, + "logps/rejected": -267.1903991699219, + "loss": 0.0609, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0355743169784546, + "rewards/margins": 2.992239475250244, + "rewards/rejected": -1.9566650390625, + "step": 12676 + }, + { + "epoch": 0.74, + "learning_rate": 1.6981371206474148e-08, + "logits/chosen": -2.0783910751342773, + "logits/rejected": -2.0750479698181152, + "logps/chosen": -3.8014633655548096, + "logps/rejected": -282.59765625, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07257931679487228, + "rewards/margins": 7.721813678741455, + "rewards/rejected": -7.649234294891357, + "step": 12677 + }, + { + "epoch": 0.74, + "learning_rate": 1.697429493410702e-08, + "logits/chosen": -1.8095886707305908, + "logits/rejected": -1.802714228630066, + "logps/chosen": -8.077016830444336, + "logps/rejected": -219.3743896484375, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2289501279592514, + "rewards/margins": 1.7090115547180176, + "rewards/rejected": -1.480061411857605, + "step": 12678 + }, + { + "epoch": 0.74, + "learning_rate": 1.696721983497446e-08, + "logits/chosen": -1.9629592895507812, + "logits/rejected": -1.962357997894287, + "logps/chosen": -6.933353424072266, + "logps/rejected": -62.75154113769531, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9307929873466492, + "rewards/margins": 0.6231643557548523, + "rewards/rejected": 0.3076286315917969, + "step": 12679 + }, + { + "epoch": 0.74, + "learning_rate": 1.6960145909327837e-08, + "logits/chosen": -2.1101033687591553, + "logits/rejected": -2.0937368869781494, + "logps/chosen": -6.084514617919922, + "logps/rejected": -253.59820556640625, + "loss": 0.3088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15874119102954865, + "rewards/margins": 5.724924564361572, + "rewards/rejected": -5.566183567047119, + "step": 12680 + }, + { + "epoch": 0.74, + "learning_rate": 1.6953073157418446e-08, + "logits/chosen": -1.9549341201782227, + "logits/rejected": -1.956520438194275, + "logps/chosen": -4.840506076812744, + "logps/rejected": -88.34453582763672, + "loss": 0.568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11032624542713165, + "rewards/margins": 0.6214601397514343, + "rewards/rejected": -0.7317863702774048, + "step": 12681 + }, + { + "epoch": 0.74, + "learning_rate": 1.6946001579497566e-08, + "logits/chosen": -1.8891957998275757, + "logits/rejected": -1.9238818883895874, + "logps/chosen": -213.88931274414062, + "logps/rejected": -427.29498291015625, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.533281087875366, + "rewards/margins": 6.469310283660889, + "rewards/rejected": -2.9360291957855225, + "step": 12682 + }, + { + "epoch": 0.74, + "learning_rate": 1.6938931175816374e-08, + "logits/chosen": -1.9048269987106323, + "logits/rejected": -1.906607985496521, + "logps/chosen": -158.0965576171875, + "logps/rejected": -227.447021484375, + "loss": 0.4264, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3286590576171875, + "rewards/margins": -0.21830594539642334, + "rewards/rejected": 1.5469650030136108, + "step": 12683 + }, + { + "epoch": 0.74, + "learning_rate": 1.6931861946626063e-08, + "logits/chosen": -1.9300620555877686, + "logits/rejected": -1.9260677099227905, + "logps/chosen": -0.0009448778000660241, + "logps/rejected": -229.17193603515625, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.632063766825013e-05, + "rewards/margins": 6.398530960083008, + "rewards/rejected": -6.398587226867676, + "step": 12684 + }, + { + "epoch": 0.74, + "learning_rate": 1.6924793892177773e-08, + "logits/chosen": -1.8232438564300537, + "logits/rejected": -1.8139837980270386, + "logps/chosen": -39.414772033691406, + "logps/rejected": -181.5286865234375, + "loss": 0.179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5597847104072571, + "rewards/margins": 2.9981162548065186, + "rewards/rejected": -2.4383316040039062, + "step": 12685 + }, + { + "epoch": 0.74, + "learning_rate": 1.691772701272261e-08, + "logits/chosen": -1.772985577583313, + "logits/rejected": -1.7482342720031738, + "logps/chosen": -160.8982391357422, + "logps/rejected": -305.9317626953125, + "loss": 0.0592, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.524827718734741, + "rewards/margins": 2.9977617263793945, + "rewards/rejected": -0.47293397784233093, + "step": 12686 + }, + { + "epoch": 0.74, + "learning_rate": 1.6910661308511594e-08, + "logits/chosen": -2.01017427444458, + "logits/rejected": -2.003882884979248, + "logps/chosen": -5.662385592586361e-05, + "logps/rejected": -132.806396484375, + "loss": 0.5402, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.5217017183604185e-06, + "rewards/margins": 0.7454459071159363, + "rewards/rejected": -0.7454383969306946, + "step": 12687 + }, + { + "epoch": 0.74, + "learning_rate": 1.6903596779795737e-08, + "logits/chosen": -2.0495681762695312, + "logits/rejected": -2.0540130138397217, + "logps/chosen": -0.0014738162280991673, + "logps/rejected": -107.08880615234375, + "loss": 0.4323, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.562602821853943e-05, + "rewards/margins": 1.5974072217941284, + "rewards/rejected": -1.597381591796875, + "step": 12688 + }, + { + "epoch": 0.74, + "learning_rate": 1.6896533426826022e-08, + "logits/chosen": -1.8575496673583984, + "logits/rejected": -1.8625632524490356, + "logps/chosen": -132.69873046875, + "logps/rejected": -246.05885314941406, + "loss": 1.042, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3218521177768707, + "rewards/margins": -1.2703049182891846, + "rewards/rejected": 0.9484527707099915, + "step": 12689 + }, + { + "epoch": 0.74, + "learning_rate": 1.6889471249853376e-08, + "logits/chosen": -1.8022940158843994, + "logits/rejected": -1.79084050655365, + "logps/chosen": -26.92678451538086, + "logps/rejected": -323.9399108886719, + "loss": 0.1109, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.119848608970642, + "rewards/margins": 6.34185791015625, + "rewards/rejected": -5.222009181976318, + "step": 12690 + }, + { + "epoch": 0.74, + "learning_rate": 1.6882410249128642e-08, + "logits/chosen": -1.816606044769287, + "logits/rejected": -1.8116596937179565, + "logps/chosen": -3.135176302748732e-05, + "logps/rejected": -228.52764892578125, + "loss": 0.3544, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.317988550108566e-07, + "rewards/margins": 3.382521867752075, + "rewards/rejected": -3.3825225830078125, + "step": 12691 + }, + { + "epoch": 0.74, + "learning_rate": 1.6875350424902706e-08, + "logits/chosen": -1.5709108114242554, + "logits/rejected": -1.5647244453430176, + "logps/chosen": -261.8402404785156, + "logps/rejected": -399.83685302734375, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.114462375640869, + "rewards/margins": 3.432422161102295, + "rewards/rejected": -1.3179596662521362, + "step": 12692 + }, + { + "epoch": 0.74, + "learning_rate": 1.6868291777426373e-08, + "logits/chosen": -1.7747708559036255, + "logits/rejected": -1.7796943187713623, + "logps/chosen": -10.237139701843262, + "logps/rejected": -45.30641555786133, + "loss": 0.7385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36619192361831665, + "rewards/margins": 0.19580340385437012, + "rewards/rejected": -0.5619953274726868, + "step": 12693 + }, + { + "epoch": 0.74, + "learning_rate": 1.686123430695036e-08, + "logits/chosen": -1.950048565864563, + "logits/rejected": -1.9735194444656372, + "logps/chosen": -86.52259063720703, + "logps/rejected": -288.3812561035156, + "loss": 0.6342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6086716055870056, + "rewards/margins": 0.5076766610145569, + "rewards/rejected": -1.1163482666015625, + "step": 12694 + }, + { + "epoch": 0.74, + "learning_rate": 1.685417801372541e-08, + "logits/chosen": -1.8955388069152832, + "logits/rejected": -1.9003018140792847, + "logps/chosen": -0.24431681632995605, + "logps/rejected": -160.1337890625, + "loss": 0.3695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024157708510756493, + "rewards/margins": 2.3199539184570312, + "rewards/rejected": -2.344111680984497, + "step": 12695 + }, + { + "epoch": 0.74, + "learning_rate": 1.6847122898002186e-08, + "logits/chosen": -1.6779571771621704, + "logits/rejected": -1.6796919107437134, + "logps/chosen": -229.18658447265625, + "logps/rejected": -268.10687255859375, + "loss": 0.386, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.2326111793518066, + "rewards/margins": -0.09728693962097168, + "rewards/rejected": 2.3298981189727783, + "step": 12696 + }, + { + "epoch": 0.74, + "learning_rate": 1.6840068960031344e-08, + "logits/chosen": -1.9989264011383057, + "logits/rejected": -1.9959876537322998, + "logps/chosen": -37.92583084106445, + "logps/rejected": -151.99554443359375, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7134395837783813, + "rewards/margins": 1.6119778156280518, + "rewards/rejected": -0.8985382318496704, + "step": 12697 + }, + { + "epoch": 0.74, + "learning_rate": 1.683301620006344e-08, + "logits/chosen": -2.0562961101531982, + "logits/rejected": -2.0598018169403076, + "logps/chosen": -19.786975860595703, + "logps/rejected": -93.63803100585938, + "loss": 0.4127, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2819044291973114, + "rewards/margins": 1.2408536672592163, + "rewards/rejected": -0.9589492678642273, + "step": 12698 + }, + { + "epoch": 0.74, + "learning_rate": 1.6825964618349036e-08, + "logits/chosen": -1.7614550590515137, + "logits/rejected": -1.75257408618927, + "logps/chosen": -45.43245315551758, + "logps/rejected": -277.041015625, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7506794333457947, + "rewards/margins": 1.1892658472061157, + "rewards/rejected": -0.43858644366264343, + "step": 12699 + }, + { + "epoch": 0.74, + "learning_rate": 1.6818914215138635e-08, + "logits/chosen": -1.9207415580749512, + "logits/rejected": -1.905371069908142, + "logps/chosen": -190.1977996826172, + "logps/rejected": -364.426025390625, + "loss": 0.0446, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4102370738983154, + "rewards/margins": 3.2502822875976562, + "rewards/rejected": -0.840045154094696, + "step": 12700 + }, + { + "epoch": 0.74, + "learning_rate": 1.6811864990682732e-08, + "logits/chosen": -1.9234070777893066, + "logits/rejected": -1.925801396369934, + "logps/chosen": -0.1416873335838318, + "logps/rejected": -198.24790954589844, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008998766425065696, + "rewards/margins": 3.329900026321411, + "rewards/rejected": -3.3307998180389404, + "step": 12701 + }, + { + "epoch": 0.74, + "learning_rate": 1.6804816945231705e-08, + "logits/chosen": -1.7801060676574707, + "logits/rejected": -1.7699882984161377, + "logps/chosen": -82.23820495605469, + "logps/rejected": -358.0323486328125, + "loss": 0.1542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.449899286031723, + "rewards/margins": 3.143289089202881, + "rewards/rejected": -2.693389892578125, + "step": 12702 + }, + { + "epoch": 0.74, + "learning_rate": 1.6797770079035955e-08, + "logits/chosen": -1.9446362257003784, + "logits/rejected": -1.9386221170425415, + "logps/chosen": -148.2229461669922, + "logps/rejected": -545.87353515625, + "loss": 0.105, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1993225812911987, + "rewards/margins": 10.386303901672363, + "rewards/rejected": -9.186981201171875, + "step": 12703 + }, + { + "epoch": 0.74, + "learning_rate": 1.6790724392345823e-08, + "logits/chosen": -1.7967901229858398, + "logits/rejected": -1.8514260053634644, + "logps/chosen": -325.658935546875, + "logps/rejected": -521.6048583984375, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9789520502090454, + "rewards/margins": 6.259060859680176, + "rewards/rejected": -4.28010892868042, + "step": 12704 + }, + { + "epoch": 0.74, + "learning_rate": 1.6783679885411623e-08, + "logits/chosen": -2.049907684326172, + "logits/rejected": -2.039992094039917, + "logps/chosen": -35.894073486328125, + "logps/rejected": -310.8211669921875, + "loss": 0.206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47866517305374146, + "rewards/margins": 6.974423408508301, + "rewards/rejected": -6.495758056640625, + "step": 12705 + }, + { + "epoch": 0.74, + "learning_rate": 1.677663655848357e-08, + "logits/chosen": -1.9060522317886353, + "logits/rejected": -1.905349850654602, + "logps/chosen": -51.25968933105469, + "logps/rejected": -169.71080017089844, + "loss": 0.2056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7805008292198181, + "rewards/margins": 2.5678634643554688, + "rewards/rejected": -1.7873626947402954, + "step": 12706 + }, + { + "epoch": 0.74, + "learning_rate": 1.6769594411811905e-08, + "logits/chosen": -1.7363110780715942, + "logits/rejected": -1.745173692703247, + "logps/chosen": -0.00033209973480552435, + "logps/rejected": -282.82684326171875, + "loss": 0.3386, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.351565828779712e-05, + "rewards/margins": 6.308292865753174, + "rewards/rejected": -6.3082594871521, + "step": 12707 + }, + { + "epoch": 0.74, + "learning_rate": 1.6762553445646787e-08, + "logits/chosen": -2.0143473148345947, + "logits/rejected": -2.013537645339966, + "logps/chosen": -18.207788467407227, + "logps/rejected": -208.8929443359375, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40299874544143677, + "rewards/margins": 3.466527223587036, + "rewards/rejected": -3.063528537750244, + "step": 12708 + }, + { + "epoch": 0.74, + "learning_rate": 1.6755513660238374e-08, + "logits/chosen": -2.0184903144836426, + "logits/rejected": -2.0111169815063477, + "logps/chosen": -163.99826049804688, + "logps/rejected": -269.01458740234375, + "loss": 0.0778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8030608892440796, + "rewards/margins": 2.5887084007263184, + "rewards/rejected": -0.7856475710868835, + "step": 12709 + }, + { + "epoch": 0.74, + "learning_rate": 1.67484750558367e-08, + "logits/chosen": -1.8832712173461914, + "logits/rejected": -1.8867406845092773, + "logps/chosen": -270.3026428222656, + "logps/rejected": -400.68292236328125, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.186669945716858, + "rewards/margins": 4.5440521240234375, + "rewards/rejected": -3.357382297515869, + "step": 12710 + }, + { + "epoch": 0.74, + "learning_rate": 1.6741437632691873e-08, + "logits/chosen": -1.9292867183685303, + "logits/rejected": -1.9273521900177002, + "logps/chosen": -118.33029174804688, + "logps/rejected": -238.18707275390625, + "loss": 0.1638, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.505340576171875, + "rewards/margins": 1.261407494544983, + "rewards/rejected": 0.24393311142921448, + "step": 12711 + }, + { + "epoch": 0.74, + "learning_rate": 1.6734401391053855e-08, + "logits/chosen": -2.037696599960327, + "logits/rejected": -2.0374233722686768, + "logps/chosen": -2.8312461376190186, + "logps/rejected": -212.9870147705078, + "loss": 0.4382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03373224660754204, + "rewards/margins": 1.5819339752197266, + "rewards/rejected": -1.6156662702560425, + "step": 12712 + }, + { + "epoch": 0.74, + "learning_rate": 1.6727366331172617e-08, + "logits/chosen": -1.8226064443588257, + "logits/rejected": -1.8488872051239014, + "logps/chosen": -170.58798217773438, + "logps/rejected": -165.6343231201172, + "loss": 0.1945, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3353195190429688, + "rewards/margins": 0.9554580450057983, + "rewards/rejected": 1.3798614740371704, + "step": 12713 + }, + { + "epoch": 0.74, + "learning_rate": 1.6720332453298076e-08, + "logits/chosen": -1.991894006729126, + "logits/rejected": -2.0035977363586426, + "logps/chosen": -2.3693439960479736, + "logps/rejected": -233.91683959960938, + "loss": 0.3611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00727005023509264, + "rewards/margins": 3.275057315826416, + "rewards/rejected": -3.267787218093872, + "step": 12714 + }, + { + "epoch": 0.74, + "learning_rate": 1.671329975768012e-08, + "logits/chosen": -1.940123438835144, + "logits/rejected": -1.9373455047607422, + "logps/chosen": -8.666346549987793, + "logps/rejected": -213.67762756347656, + "loss": 0.3367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07583742588758469, + "rewards/margins": 3.428870916366577, + "rewards/rejected": -3.3530335426330566, + "step": 12715 + }, + { + "epoch": 0.74, + "learning_rate": 1.6706268244568594e-08, + "logits/chosen": -2.1002442836761475, + "logits/rejected": -2.1007080078125, + "logps/chosen": -0.004063074477016926, + "logps/rejected": -183.47052001953125, + "loss": 0.3826, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009755161590874195, + "rewards/margins": 2.4883506298065186, + "rewards/rejected": -2.487375020980835, + "step": 12716 + }, + { + "epoch": 0.74, + "learning_rate": 1.669923791421326e-08, + "logits/chosen": -1.8061530590057373, + "logits/rejected": -1.8090767860412598, + "logps/chosen": -36.901397705078125, + "logps/rejected": -87.77581787109375, + "loss": 0.7695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39965248107910156, + "rewards/margins": 0.15607112646102905, + "rewards/rejected": -0.5557236075401306, + "step": 12717 + }, + { + "epoch": 0.74, + "learning_rate": 1.669220876686389e-08, + "logits/chosen": -1.823079228401184, + "logits/rejected": -1.7953212261199951, + "logps/chosen": -202.22906494140625, + "logps/rejected": -330.4439697265625, + "loss": 0.056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8460159301757812, + "rewards/margins": 3.6246750354766846, + "rewards/rejected": -1.7786591053009033, + "step": 12718 + }, + { + "epoch": 0.74, + "learning_rate": 1.6685180802770186e-08, + "logits/chosen": -1.932202935218811, + "logits/rejected": -1.9307785034179688, + "logps/chosen": -19.082143783569336, + "logps/rejected": -80.82743835449219, + "loss": 0.2324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7623693346977234, + "rewards/margins": 1.8881018161773682, + "rewards/rejected": -1.125732421875, + "step": 12719 + }, + { + "epoch": 0.74, + "learning_rate": 1.667815402218184e-08, + "logits/chosen": -1.9896705150604248, + "logits/rejected": -1.9961049556732178, + "logps/chosen": -20.44903564453125, + "logps/rejected": -207.1572265625, + "loss": 0.1926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9032058715820312, + "rewards/margins": 2.5784897804260254, + "rewards/rejected": -1.6752837896347046, + "step": 12720 + }, + { + "epoch": 0.74, + "learning_rate": 1.6671128425348442e-08, + "logits/chosen": -1.862514853477478, + "logits/rejected": -1.851058006286621, + "logps/chosen": -246.99415588378906, + "logps/rejected": -327.26629638671875, + "loss": 0.2741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3625656068325043, + "rewards/margins": 1.0623031854629517, + "rewards/rejected": -0.699737548828125, + "step": 12721 + }, + { + "epoch": 0.74, + "learning_rate": 1.6664104012519593e-08, + "logits/chosen": -1.8214722871780396, + "logits/rejected": -1.813471794128418, + "logps/chosen": -58.52121353149414, + "logps/rejected": -98.39131927490234, + "loss": 0.5135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1027706190943718, + "rewards/margins": 0.8416271209716797, + "rewards/rejected": -0.9443977475166321, + "step": 12722 + }, + { + "epoch": 0.74, + "learning_rate": 1.6657080783944825e-08, + "logits/chosen": -1.9903358221054077, + "logits/rejected": -1.9774905443191528, + "logps/chosen": -18.90265655517578, + "logps/rejected": -253.32772827148438, + "loss": 0.2544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4975746273994446, + "rewards/margins": 2.70255970954895, + "rewards/rejected": -2.2049851417541504, + "step": 12723 + }, + { + "epoch": 0.74, + "learning_rate": 1.6650058739873666e-08, + "logits/chosen": -1.6971758604049683, + "logits/rejected": -1.6936333179473877, + "logps/chosen": -50.32557678222656, + "logps/rejected": -223.09573364257812, + "loss": 0.2483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33595773577690125, + "rewards/margins": 2.730653762817383, + "rewards/rejected": -2.394695997238159, + "step": 12724 + }, + { + "epoch": 0.74, + "learning_rate": 1.6643037880555538e-08, + "logits/chosen": -1.8751312494277954, + "logits/rejected": -1.8741230964660645, + "logps/chosen": -0.00131507171317935, + "logps/rejected": -194.77249145507812, + "loss": 0.3533, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.812423842144199e-05, + "rewards/margins": 4.298862934112549, + "rewards/rejected": -4.29892110824585, + "step": 12725 + }, + { + "epoch": 0.74, + "learning_rate": 1.663601820623987e-08, + "logits/chosen": -1.931278109550476, + "logits/rejected": -1.937012791633606, + "logps/chosen": -10.730195045471191, + "logps/rejected": -240.09750366210938, + "loss": 0.2484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7039099931716919, + "rewards/margins": 2.0995795726776123, + "rewards/rejected": -1.3956695795059204, + "step": 12726 + }, + { + "epoch": 0.74, + "learning_rate": 1.662899971717604e-08, + "logits/chosen": -1.9318373203277588, + "logits/rejected": -1.9284229278564453, + "logps/chosen": -233.57257080078125, + "logps/rejected": -323.571044921875, + "loss": 0.0275, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.126499891281128, + "rewards/margins": 3.5730299949645996, + "rewards/rejected": -1.4465302228927612, + "step": 12727 + }, + { + "epoch": 0.74, + "learning_rate": 1.662198241361339e-08, + "logits/chosen": -1.9984824657440186, + "logits/rejected": -1.967881202697754, + "logps/chosen": -188.95004272460938, + "logps/rejected": -306.0211181640625, + "loss": 0.094, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7433379888534546, + "rewards/margins": 1.877838134765625, + "rewards/rejected": -0.13450013101100922, + "step": 12728 + }, + { + "epoch": 0.74, + "learning_rate": 1.661496629580116e-08, + "logits/chosen": -1.8511638641357422, + "logits/rejected": -1.8247828483581543, + "logps/chosen": -227.68077087402344, + "logps/rejected": -440.36260986328125, + "loss": 0.1679, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.114912509918213, + "rewards/margins": 1.126783847808838, + "rewards/rejected": 0.988128662109375, + "step": 12729 + }, + { + "epoch": 0.74, + "learning_rate": 1.660795136398866e-08, + "logits/chosen": -1.9011939764022827, + "logits/rejected": -1.9109233617782593, + "logps/chosen": -5.816188812255859, + "logps/rejected": -209.30142211914062, + "loss": 0.4294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05662098154425621, + "rewards/margins": 1.2439959049224854, + "rewards/rejected": -1.1873749494552612, + "step": 12730 + }, + { + "epoch": 0.74, + "learning_rate": 1.6600937618425055e-08, + "logits/chosen": -1.609412670135498, + "logits/rejected": -1.5948323011398315, + "logps/chosen": -177.95225524902344, + "logps/rejected": -281.6349182128906, + "loss": 0.1634, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0874465703964233, + "rewards/margins": 1.4602950811386108, + "rewards/rejected": -0.3728485107421875, + "step": 12731 + }, + { + "epoch": 0.74, + "learning_rate": 1.6593925059359532e-08, + "logits/chosen": -1.8687955141067505, + "logits/rejected": -1.840293526649475, + "logps/chosen": -197.87216186523438, + "logps/rejected": -233.82630920410156, + "loss": 0.1977, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.468501329421997, + "rewards/margins": 0.8192322254180908, + "rewards/rejected": 1.6492691040039062, + "step": 12732 + }, + { + "epoch": 0.74, + "learning_rate": 1.6586913687041166e-08, + "logits/chosen": -1.8448189496994019, + "logits/rejected": -1.8410202264785767, + "logps/chosen": -113.16282653808594, + "logps/rejected": -263.4214782714844, + "loss": 0.1218, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1511719226837158, + "rewards/margins": 2.786459445953369, + "rewards/rejected": -1.6352875232696533, + "step": 12733 + }, + { + "epoch": 0.74, + "learning_rate": 1.6579903501719077e-08, + "logits/chosen": -1.6630200147628784, + "logits/rejected": -1.6694954633712769, + "logps/chosen": -15.872146606445312, + "logps/rejected": -144.66439819335938, + "loss": 0.4284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5660993456840515, + "rewards/margins": 0.5427930951118469, + "rewards/rejected": 0.02330627478659153, + "step": 12734 + }, + { + "epoch": 0.74, + "learning_rate": 1.6572894503642314e-08, + "logits/chosen": -1.8594492673873901, + "logits/rejected": -1.859264612197876, + "logps/chosen": -178.23745727539062, + "logps/rejected": -249.914306640625, + "loss": 0.3815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36778566241264343, + "rewards/margins": 0.3754822015762329, + "rewards/rejected": -0.0076965331099927425, + "step": 12735 + }, + { + "epoch": 0.74, + "learning_rate": 1.6565886693059832e-08, + "logits/chosen": -1.7243740558624268, + "logits/rejected": -1.7281806468963623, + "logps/chosen": -21.97321319580078, + "logps/rejected": -131.73052978515625, + "loss": 0.7658, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.07640399783849716, + "rewards/margins": -0.29036885499954224, + "rewards/rejected": 0.21396484971046448, + "step": 12736 + }, + { + "epoch": 0.74, + "learning_rate": 1.6558880070220597e-08, + "logits/chosen": -1.8408416509628296, + "logits/rejected": -1.8392850160598755, + "logps/chosen": -192.39920043945312, + "logps/rejected": -276.6925048828125, + "loss": 0.1422, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.679478406906128, + "rewards/margins": 1.3050001859664917, + "rewards/rejected": 1.3744782209396362, + "step": 12737 + }, + { + "epoch": 0.74, + "learning_rate": 1.655187463537352e-08, + "logits/chosen": -1.8285009860992432, + "logits/rejected": -1.8179824352264404, + "logps/chosen": -93.98844909667969, + "logps/rejected": -266.8937072753906, + "loss": 0.5928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5865745544433594, + "rewards/margins": 3.2517616748809814, + "rewards/rejected": -3.838336229324341, + "step": 12738 + }, + { + "epoch": 0.74, + "learning_rate": 1.6544870388767485e-08, + "logits/chosen": -2.014477491378784, + "logits/rejected": -2.0619966983795166, + "logps/chosen": -120.22530364990234, + "logps/rejected": -257.61614990234375, + "loss": 0.1562, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.653821587562561, + "rewards/margins": 1.193433403968811, + "rewards/rejected": 0.46038818359375, + "step": 12739 + }, + { + "epoch": 0.74, + "learning_rate": 1.6537867330651285e-08, + "logits/chosen": -1.9862972497940063, + "logits/rejected": -1.9928467273712158, + "logps/chosen": -13.708105087280273, + "logps/rejected": -203.81613159179688, + "loss": 0.3589, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10824241489171982, + "rewards/margins": 3.043680191040039, + "rewards/rejected": -3.1519227027893066, + "step": 12740 + }, + { + "epoch": 0.74, + "learning_rate": 1.653086546127372e-08, + "logits/chosen": -1.8490750789642334, + "logits/rejected": -1.8519935607910156, + "logps/chosen": -25.412485122680664, + "logps/rejected": -237.75131225585938, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006644058506935835, + "rewards/margins": 4.461704730987549, + "rewards/rejected": -4.468348979949951, + "step": 12741 + }, + { + "epoch": 0.74, + "learning_rate": 1.652386478088352e-08, + "logits/chosen": -1.7766937017440796, + "logits/rejected": -1.7798882722854614, + "logps/chosen": -4.345909595489502, + "logps/rejected": -134.27708435058594, + "loss": 0.4264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1308915615081787, + "rewards/margins": 1.224534034729004, + "rewards/rejected": -1.0936424732208252, + "step": 12742 + }, + { + "epoch": 0.74, + "learning_rate": 1.651686528972942e-08, + "logits/chosen": -2.050522565841675, + "logits/rejected": -2.028311014175415, + "logps/chosen": -146.1611328125, + "logps/rejected": -366.2994079589844, + "loss": 0.2524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23674164712429047, + "rewards/margins": 1.560627818107605, + "rewards/rejected": -1.3238861560821533, + "step": 12743 + }, + { + "epoch": 0.74, + "learning_rate": 1.650986698806003e-08, + "logits/chosen": -1.834707498550415, + "logits/rejected": -1.8245924711227417, + "logps/chosen": -178.87258911132812, + "logps/rejected": -433.84564208984375, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5505356788635254, + "rewards/margins": 3.488389730453491, + "rewards/rejected": -0.937853991985321, + "step": 12744 + }, + { + "epoch": 0.74, + "learning_rate": 1.6502869876123976e-08, + "logits/chosen": -1.7436126470565796, + "logits/rejected": -1.741529107093811, + "logps/chosen": -0.2041715532541275, + "logps/rejected": -283.5027160644531, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010171820409595966, + "rewards/margins": 5.647766590118408, + "rewards/rejected": -5.637594699859619, + "step": 12745 + }, + { + "epoch": 0.74, + "learning_rate": 1.6495873954169844e-08, + "logits/chosen": -1.8562183380126953, + "logits/rejected": -1.8440271615982056, + "logps/chosen": -297.00634765625, + "logps/rejected": -531.5897827148438, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0546875, + "rewards/margins": 3.321850538253784, + "rewards/rejected": -0.26716309785842896, + "step": 12746 + }, + { + "epoch": 0.74, + "learning_rate": 1.6488879222446165e-08, + "logits/chosen": -1.8653390407562256, + "logits/rejected": -1.8402366638183594, + "logps/chosen": -199.99252319335938, + "logps/rejected": -480.89422607421875, + "loss": 0.0994, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4436248540878296, + "rewards/margins": 1.7494659423828125, + "rewards/rejected": -0.3058410584926605, + "step": 12747 + }, + { + "epoch": 0.74, + "learning_rate": 1.64818856812014e-08, + "logits/chosen": -1.7827028036117554, + "logits/rejected": -1.8072949647903442, + "logps/chosen": -221.675537109375, + "logps/rejected": -294.36151123046875, + "loss": 0.0982, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7447052001953125, + "rewards/margins": 1.923974633216858, + "rewards/rejected": -0.17926941812038422, + "step": 12748 + }, + { + "epoch": 0.74, + "learning_rate": 1.647489333068402e-08, + "logits/chosen": -2.0783092975616455, + "logits/rejected": -2.072896718978882, + "logps/chosen": -200.97421264648438, + "logps/rejected": -299.90655517578125, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.06565260887146, + "rewards/margins": 0.13454902172088623, + "rewards/rejected": 1.9311035871505737, + "step": 12749 + }, + { + "epoch": 0.74, + "learning_rate": 1.6467902171142412e-08, + "logits/chosen": -1.8319340944290161, + "logits/rejected": -1.8326644897460938, + "logps/chosen": -29.803821563720703, + "logps/rejected": -144.3041534423828, + "loss": 0.4112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028799820691347122, + "rewards/margins": 1.8528850078582764, + "rewards/rejected": -1.8240852355957031, + "step": 12750 + }, + { + "epoch": 0.74, + "learning_rate": 1.6460912202824957e-08, + "logits/chosen": -1.925491452217102, + "logits/rejected": -1.9671686887741089, + "logps/chosen": -323.1185302734375, + "logps/rejected": -277.2548522949219, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2789764404296875, + "rewards/margins": 0.2504059076309204, + "rewards/rejected": 1.028570532798767, + "step": 12751 + }, + { + "epoch": 0.74, + "learning_rate": 1.6453923425979925e-08, + "logits/chosen": -1.8947086334228516, + "logits/rejected": -1.8968091011047363, + "logps/chosen": -35.265159606933594, + "logps/rejected": -252.12643432617188, + "loss": 0.2706, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2644840180873871, + "rewards/margins": 3.284454584121704, + "rewards/rejected": -3.019970655441284, + "step": 12752 + }, + { + "epoch": 0.74, + "learning_rate": 1.6446935840855643e-08, + "logits/chosen": -1.9151389598846436, + "logits/rejected": -1.9145967960357666, + "logps/chosen": -0.0004239672562107444, + "logps/rejected": -248.7068634033203, + "loss": 0.3351, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.153694706270471e-05, + "rewards/margins": 8.313013076782227, + "rewards/rejected": -8.313044548034668, + "step": 12753 + }, + { + "epoch": 0.74, + "learning_rate": 1.643994944770034e-08, + "logits/chosen": -1.8184610605239868, + "logits/rejected": -1.8099287748336792, + "logps/chosen": -200.39413452148438, + "logps/rejected": -340.4765625, + "loss": 0.1247, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2233123779296875, + "rewards/margins": 1.7793304920196533, + "rewards/rejected": 0.44398194551467896, + "step": 12754 + }, + { + "epoch": 0.74, + "learning_rate": 1.6432964246762182e-08, + "logits/chosen": -1.988545536994934, + "logits/rejected": -1.9830719232559204, + "logps/chosen": -109.90625762939453, + "logps/rejected": -335.6895751953125, + "loss": 0.4109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7575218081474304, + "rewards/margins": 0.4572959840297699, + "rewards/rejected": 0.3002258241176605, + "step": 12755 + }, + { + "epoch": 0.74, + "learning_rate": 1.642598023828932e-08, + "logits/chosen": -1.989536166191101, + "logits/rejected": -1.9832133054733276, + "logps/chosen": -2.288799805683084e-05, + "logps/rejected": -96.46449279785156, + "loss": 0.5977, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.152211622951654e-08, + "rewards/margins": 0.409263551235199, + "rewards/rejected": -0.40926361083984375, + "step": 12756 + }, + { + "epoch": 0.74, + "learning_rate": 1.6418997422529874e-08, + "logits/chosen": -2.0570595264434814, + "logits/rejected": -2.0535595417022705, + "logps/chosen": -0.00022064158110879362, + "logps/rejected": -218.69178771972656, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.19540288526332e-05, + "rewards/margins": 6.934664249420166, + "rewards/rejected": -6.934642314910889, + "step": 12757 + }, + { + "epoch": 0.74, + "learning_rate": 1.6412015799731914e-08, + "logits/chosen": -2.037769317626953, + "logits/rejected": -1.994737982749939, + "logps/chosen": -60.001060485839844, + "logps/rejected": -474.5645751953125, + "loss": 0.1301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.060266137123108, + "rewards/margins": 9.890976905822754, + "rewards/rejected": -8.830710411071777, + "step": 12758 + }, + { + "epoch": 0.74, + "learning_rate": 1.6405035370143437e-08, + "logits/chosen": -1.7835431098937988, + "logits/rejected": -1.7685459852218628, + "logps/chosen": -196.8663330078125, + "logps/rejected": -256.0361633300781, + "loss": 0.3934, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4139450192451477, + "rewards/margins": 0.06582489609718323, + "rewards/rejected": 0.3481201231479645, + "step": 12759 + }, + { + "epoch": 0.74, + "learning_rate": 1.6398056134012435e-08, + "logits/chosen": -1.9218024015426636, + "logits/rejected": -1.9257935285568237, + "logps/chosen": -13.12541389465332, + "logps/rejected": -150.05845642089844, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24387454986572266, + "rewards/margins": 1.6690882444381714, + "rewards/rejected": -1.4252136945724487, + "step": 12760 + }, + { + "epoch": 0.74, + "learning_rate": 1.639107809158684e-08, + "logits/chosen": -2.0462021827697754, + "logits/rejected": -2.0494964122772217, + "logps/chosen": -0.43020129203796387, + "logps/rejected": -81.84033203125, + "loss": 0.3776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03510115295648575, + "rewards/margins": 3.0219011306762695, + "rewards/rejected": -3.057002305984497, + "step": 12761 + }, + { + "epoch": 0.74, + "learning_rate": 1.6384101243114568e-08, + "logits/chosen": -1.8298581838607788, + "logits/rejected": -1.7422763109207153, + "logps/chosen": -105.82693481445312, + "logps/rejected": -389.6371154785156, + "loss": 0.1427, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8409416675567627, + "rewards/margins": 1.851870059967041, + "rewards/rejected": -0.010928344912827015, + "step": 12762 + }, + { + "epoch": 0.74, + "learning_rate": 1.6377125588843437e-08, + "logits/chosen": -1.8230878114700317, + "logits/rejected": -1.8499574661254883, + "logps/chosen": -193.99996948242188, + "logps/rejected": -440.3597717285156, + "loss": 0.049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1186431646347046, + "rewards/margins": 5.292883396148682, + "rewards/rejected": -4.1742401123046875, + "step": 12763 + }, + { + "epoch": 0.74, + "learning_rate": 1.6370151129021272e-08, + "logits/chosen": -1.9586584568023682, + "logits/rejected": -1.960113525390625, + "logps/chosen": -0.00010156385542359203, + "logps/rejected": -164.20953369140625, + "loss": 0.3852, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.037176843965426e-06, + "rewards/margins": 2.4937245845794678, + "rewards/rejected": -2.493725538253784, + "step": 12764 + }, + { + "epoch": 0.74, + "learning_rate": 1.6363177863895837e-08, + "logits/chosen": -2.0335583686828613, + "logits/rejected": -2.018892288208008, + "logps/chosen": -56.81880187988281, + "logps/rejected": -297.50958251953125, + "loss": 0.6706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9013687372207642, + "rewards/margins": 2.4332685470581055, + "rewards/rejected": -3.334637403488159, + "step": 12765 + }, + { + "epoch": 0.74, + "learning_rate": 1.6356205793714884e-08, + "logits/chosen": -1.7272061109542847, + "logits/rejected": -1.726569414138794, + "logps/chosen": -208.12921142578125, + "logps/rejected": -253.21630859375, + "loss": 0.2786, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9598969221115112, + "rewards/margins": 0.43099069595336914, + "rewards/rejected": 1.528906226158142, + "step": 12766 + }, + { + "epoch": 0.74, + "learning_rate": 1.6349234918726052e-08, + "logits/chosen": -1.8279281854629517, + "logits/rejected": -1.8213081359863281, + "logps/chosen": -33.49604797363281, + "logps/rejected": -316.6039123535156, + "loss": 0.0802, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3326324224472046, + "rewards/margins": 4.808840751647949, + "rewards/rejected": -3.476208448410034, + "step": 12767 + }, + { + "epoch": 0.74, + "learning_rate": 1.6342265239176995e-08, + "logits/chosen": -1.8343318700790405, + "logits/rejected": -1.831339955329895, + "logps/chosen": -197.90435791015625, + "logps/rejected": -373.633544921875, + "loss": 0.0262, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.493396043777466, + "rewards/margins": 3.5620360374450684, + "rewards/rejected": -1.068640112876892, + "step": 12768 + }, + { + "epoch": 0.74, + "learning_rate": 1.6335296755315315e-08, + "logits/chosen": -2.0853939056396484, + "logits/rejected": -2.0691399574279785, + "logps/chosen": -0.0005625810008496046, + "logps/rejected": -318.04644775390625, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00025514623848721385, + "rewards/margins": 8.075560569763184, + "rewards/rejected": -8.075304985046387, + "step": 12769 + }, + { + "epoch": 0.74, + "learning_rate": 1.6328329467388586e-08, + "logits/chosen": -1.8001188039779663, + "logits/rejected": -1.7976151704788208, + "logps/chosen": -21.85804557800293, + "logps/rejected": -121.64878845214844, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18264122307300568, + "rewards/margins": 2.218999147415161, + "rewards/rejected": -2.036357879638672, + "step": 12770 + }, + { + "epoch": 0.74, + "learning_rate": 1.6321363375644263e-08, + "logits/chosen": -1.914656162261963, + "logits/rejected": -1.886593222618103, + "logps/chosen": -0.0013756162952631712, + "logps/rejected": -567.40869140625, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002072107046842575, + "rewards/margins": 15.97240924835205, + "rewards/rejected": -15.9703369140625, + "step": 12771 + }, + { + "epoch": 0.74, + "learning_rate": 1.6314398480329893e-08, + "logits/chosen": -2.0826995372772217, + "logits/rejected": -2.082857847213745, + "logps/chosen": -9.613056182861328, + "logps/rejected": -123.81407165527344, + "loss": 0.2761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1326940506696701, + "rewards/margins": 1.9751996994018555, + "rewards/rejected": -1.842505693435669, + "step": 12772 + }, + { + "epoch": 0.74, + "learning_rate": 1.630743478169284e-08, + "logits/chosen": -1.947009563446045, + "logits/rejected": -1.9423280954360962, + "logps/chosen": -9.408618927001953, + "logps/rejected": -268.95062255859375, + "loss": 0.2573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2013198882341385, + "rewards/margins": 5.812613010406494, + "rewards/rejected": -5.611293315887451, + "step": 12773 + }, + { + "epoch": 0.74, + "learning_rate": 1.6300472279980516e-08, + "logits/chosen": -1.8626664876937866, + "logits/rejected": -1.8679563999176025, + "logps/chosen": -1.5197818279266357, + "logps/rejected": -145.5796661376953, + "loss": 0.4911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1083885207772255, + "rewards/margins": 1.0494911670684814, + "rewards/rejected": -1.1578797101974487, + "step": 12774 + }, + { + "epoch": 0.74, + "learning_rate": 1.6293510975440256e-08, + "logits/chosen": -2.012115716934204, + "logits/rejected": -2.007520914077759, + "logps/chosen": -18.728038787841797, + "logps/rejected": -126.71904754638672, + "loss": 0.4045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2461681365966797, + "rewards/margins": 1.159200668334961, + "rewards/rejected": -0.9130325317382812, + "step": 12775 + }, + { + "epoch": 0.74, + "learning_rate": 1.6286550868319366e-08, + "logits/chosen": -1.9493640661239624, + "logits/rejected": -1.9586101770401, + "logps/chosen": -239.07272338867188, + "logps/rejected": -465.6384582519531, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3824708461761475, + "rewards/margins": 5.146109104156494, + "rewards/rejected": -2.7636382579803467, + "step": 12776 + }, + { + "epoch": 0.74, + "learning_rate": 1.627959195886511e-08, + "logits/chosen": -1.7073873281478882, + "logits/rejected": -1.7313404083251953, + "logps/chosen": -180.08099365234375, + "logps/rejected": -306.6179504394531, + "loss": 0.0324, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7018096446990967, + "rewards/margins": 3.7185332775115967, + "rewards/rejected": -1.0167236328125, + "step": 12777 + }, + { + "epoch": 0.74, + "learning_rate": 1.627263424732468e-08, + "logits/chosen": -1.805628776550293, + "logits/rejected": -1.7887628078460693, + "logps/chosen": -78.0073013305664, + "logps/rejected": -415.03668212890625, + "loss": 0.3367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.058754730969667435, + "rewards/margins": 7.971573829650879, + "rewards/rejected": -8.030328750610352, + "step": 12778 + }, + { + "epoch": 0.74, + "learning_rate": 1.6265677733945255e-08, + "logits/chosen": -1.7976701259613037, + "logits/rejected": -1.7975575923919678, + "logps/chosen": -55.98664093017578, + "logps/rejected": -336.60968017578125, + "loss": 0.2267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8131484985351562, + "rewards/margins": 3.562455892562866, + "rewards/rejected": -2.74930739402771, + "step": 12779 + }, + { + "epoch": 0.74, + "learning_rate": 1.6258722418973976e-08, + "logits/chosen": -1.8278253078460693, + "logits/rejected": -1.8228756189346313, + "logps/chosen": -0.9028530120849609, + "logps/rejected": -182.907470703125, + "loss": 0.4148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08540471643209457, + "rewards/margins": 1.8602221012115479, + "rewards/rejected": -1.9456268548965454, + "step": 12780 + }, + { + "epoch": 0.74, + "learning_rate": 1.6251768302657936e-08, + "logits/chosen": -1.9623595476150513, + "logits/rejected": -1.9485843181610107, + "logps/chosen": -65.11022186279297, + "logps/rejected": -239.69497680664062, + "loss": 0.1257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9189994931221008, + "rewards/margins": 3.6385185718536377, + "rewards/rejected": -2.7195191383361816, + "step": 12781 + }, + { + "epoch": 0.74, + "learning_rate": 1.6244815385244148e-08, + "logits/chosen": -1.7935924530029297, + "logits/rejected": -1.7964494228363037, + "logps/chosen": -25.88207244873047, + "logps/rejected": -89.13369750976562, + "loss": 0.5695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3346370756626129, + "rewards/margins": 0.03138619661331177, + "rewards/rejected": 0.30325087904930115, + "step": 12782 + }, + { + "epoch": 0.74, + "learning_rate": 1.6237863666979636e-08, + "logits/chosen": -1.995765209197998, + "logits/rejected": -1.9893121719360352, + "logps/chosen": -21.20956802368164, + "logps/rejected": -173.05548095703125, + "loss": 0.4452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41369685530662537, + "rewards/margins": 2.230057954788208, + "rewards/rejected": -2.643754720687866, + "step": 12783 + }, + { + "epoch": 0.74, + "learning_rate": 1.623091314811135e-08, + "logits/chosen": -1.8038759231567383, + "logits/rejected": -1.7999542951583862, + "logps/chosen": -20.728885650634766, + "logps/rejected": -44.495704650878906, + "loss": 0.7982, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.30669745802879333, + "rewards/margins": -0.22665899991989136, + "rewards/rejected": -0.08003845065832138, + "step": 12784 + }, + { + "epoch": 0.74, + "learning_rate": 1.6223963828886234e-08, + "logits/chosen": -1.8740026950836182, + "logits/rejected": -1.8452179431915283, + "logps/chosen": -206.20497131347656, + "logps/rejected": -327.7561950683594, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2992234230041504, + "rewards/margins": 4.8969011306762695, + "rewards/rejected": -2.597677707672119, + "step": 12785 + }, + { + "epoch": 0.74, + "learning_rate": 1.621701570955112e-08, + "logits/chosen": -1.9721789360046387, + "logits/rejected": -1.9757661819458008, + "logps/chosen": -1.314112663269043, + "logps/rejected": -175.33880615234375, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1385437250137329, + "rewards/margins": 3.1661925315856934, + "rewards/rejected": -3.02764892578125, + "step": 12786 + }, + { + "epoch": 0.74, + "learning_rate": 1.621006879035286e-08, + "logits/chosen": -1.7847745418548584, + "logits/rejected": -1.7508902549743652, + "logps/chosen": -170.21026611328125, + "logps/rejected": -396.68695068359375, + "loss": 0.0604, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4997451305389404, + "rewards/margins": 2.9463820457458496, + "rewards/rejected": -0.44663697481155396, + "step": 12787 + }, + { + "epoch": 0.74, + "learning_rate": 1.6203123071538238e-08, + "logits/chosen": -1.97535240650177, + "logits/rejected": -1.974225640296936, + "logps/chosen": -3.728222608566284, + "logps/rejected": -73.63626861572266, + "loss": 0.4639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04367396980524063, + "rewards/margins": 1.334195613861084, + "rewards/rejected": -1.2905216217041016, + "step": 12788 + }, + { + "epoch": 0.74, + "learning_rate": 1.6196178553354024e-08, + "logits/chosen": -1.9315463304519653, + "logits/rejected": -1.9336029291152954, + "logps/chosen": -37.08690643310547, + "logps/rejected": -163.7423858642578, + "loss": 0.4819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2502433955669403, + "rewards/margins": 0.7360801696777344, + "rewards/rejected": -0.48583680391311646, + "step": 12789 + }, + { + "epoch": 0.74, + "learning_rate": 1.618923523604688e-08, + "logits/chosen": -1.8289393186569214, + "logits/rejected": -1.8348710536956787, + "logps/chosen": -6.7903900146484375, + "logps/rejected": -445.0646057128906, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22450371086597443, + "rewards/margins": 11.588966369628906, + "rewards/rejected": -11.364462852478027, + "step": 12790 + }, + { + "epoch": 0.74, + "learning_rate": 1.6182293119863483e-08, + "logits/chosen": -1.7315407991409302, + "logits/rejected": -1.7369906902313232, + "logps/chosen": -14.508392333984375, + "logps/rejected": -157.6376495361328, + "loss": 0.5198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2562370300292969, + "rewards/margins": 0.5499763488769531, + "rewards/rejected": -0.29373931884765625, + "step": 12791 + }, + { + "epoch": 0.74, + "learning_rate": 1.6175352205050458e-08, + "logits/chosen": -1.9220370054244995, + "logits/rejected": -1.9154843091964722, + "logps/chosen": -106.15594482421875, + "logps/rejected": -190.97764587402344, + "loss": 0.1704, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8338470458984375, + "rewards/margins": 1.603092908859253, + "rewards/rejected": 0.230754092335701, + "step": 12792 + }, + { + "epoch": 0.74, + "learning_rate": 1.6168412491854396e-08, + "logits/chosen": -1.956210970878601, + "logits/rejected": -1.945921778678894, + "logps/chosen": -0.09168979525566101, + "logps/rejected": -336.03143310546875, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006432086229324341, + "rewards/margins": 4.207650184631348, + "rewards/rejected": -4.207006931304932, + "step": 12793 + }, + { + "epoch": 0.74, + "learning_rate": 1.616147398052178e-08, + "logits/chosen": -2.1035642623901367, + "logits/rejected": -2.0982165336608887, + "logps/chosen": -0.37961235642433167, + "logps/rejected": -229.51873779296875, + "loss": 0.3402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03254484012722969, + "rewards/margins": 4.88024377822876, + "rewards/rejected": -4.912788391113281, + "step": 12794 + }, + { + "epoch": 0.74, + "learning_rate": 1.615453667129914e-08, + "logits/chosen": -1.8434275388717651, + "logits/rejected": -1.8447345495224, + "logps/chosen": -52.80029296875, + "logps/rejected": -296.314697265625, + "loss": 0.1099, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2286964654922485, + "rewards/margins": 3.7014780044555664, + "rewards/rejected": -2.4727814197540283, + "step": 12795 + }, + { + "epoch": 0.74, + "learning_rate": 1.6147600564432933e-08, + "logits/chosen": -1.9393815994262695, + "logits/rejected": -1.9390491247177124, + "logps/chosen": -0.0005072043859399855, + "logps/rejected": -11.125514030456543, + "loss": 0.7087, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.5246130462619476e-05, + "rewards/margins": -0.06249568238854408, + "rewards/rejected": 0.062470436096191406, + "step": 12796 + }, + { + "epoch": 0.74, + "learning_rate": 1.614066566016953e-08, + "logits/chosen": -1.9518792629241943, + "logits/rejected": -1.931803584098816, + "logps/chosen": -0.00012146926746936515, + "logps/rejected": -314.0380554199219, + "loss": 0.3402, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6682243142440711e-07, + "rewards/margins": 4.9079742431640625, + "rewards/rejected": -4.9079742431640625, + "step": 12797 + }, + { + "epoch": 0.74, + "learning_rate": 1.6133731958755297e-08, + "logits/chosen": -1.7579165697097778, + "logits/rejected": -1.7352889776229858, + "logps/chosen": -128.39389038085938, + "logps/rejected": -315.26544189453125, + "loss": 0.1114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8614517450332642, + "rewards/margins": 2.550938367843628, + "rewards/rejected": -0.6894866824150085, + "step": 12798 + }, + { + "epoch": 0.74, + "learning_rate": 1.612679946043657e-08, + "logits/chosen": -1.933174967765808, + "logits/rejected": -1.9304521083831787, + "logps/chosen": -17.372737884521484, + "logps/rejected": -234.21728515625, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05746765062212944, + "rewards/margins": 4.591996669769287, + "rewards/rejected": -4.534529209136963, + "step": 12799 + }, + { + "epoch": 0.74, + "learning_rate": 1.6119868165459633e-08, + "logits/chosen": -1.8163763284683228, + "logits/rejected": -1.8267393112182617, + "logps/chosen": -200.09022521972656, + "logps/rejected": -444.3290710449219, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0646896362304688, + "rewards/margins": 6.370710849761963, + "rewards/rejected": -4.306021213531494, + "step": 12800 + }, + { + "epoch": 0.74, + "learning_rate": 1.6112938074070692e-08, + "logits/chosen": -1.9294896125793457, + "logits/rejected": -1.9291270971298218, + "logps/chosen": -1.5209697484970093, + "logps/rejected": -32.244659423828125, + "loss": 0.6089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0630500465631485, + "rewards/margins": 0.2313544899225235, + "rewards/rejected": -0.168304443359375, + "step": 12801 + }, + { + "epoch": 0.74, + "learning_rate": 1.6106009186515945e-08, + "logits/chosen": -1.9388498067855835, + "logits/rejected": -1.9391062259674072, + "logps/chosen": -29.687597274780273, + "logps/rejected": -171.00608825683594, + "loss": 0.2414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15893879532814026, + "rewards/margins": 2.7146427631378174, + "rewards/rejected": -2.55570387840271, + "step": 12802 + }, + { + "epoch": 0.75, + "learning_rate": 1.6099081503041545e-08, + "logits/chosen": -1.7702162265777588, + "logits/rejected": -1.7795908451080322, + "logps/chosen": -34.93365478515625, + "logps/rejected": -223.07058715820312, + "loss": 0.2799, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2727653682231903, + "rewards/margins": 5.4796223640441895, + "rewards/rejected": -5.206857204437256, + "step": 12803 + }, + { + "epoch": 0.75, + "learning_rate": 1.6092155023893617e-08, + "logits/chosen": -1.9232499599456787, + "logits/rejected": -1.9255380630493164, + "logps/chosen": -4.057991981506348, + "logps/rejected": -75.52046203613281, + "loss": 0.4178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03228874132037163, + "rewards/margins": 1.2596333026885986, + "rewards/rejected": -1.2273445129394531, + "step": 12804 + }, + { + "epoch": 0.75, + "learning_rate": 1.6085229749318186e-08, + "logits/chosen": -1.815927505493164, + "logits/rejected": -1.8056515455245972, + "logps/chosen": -12.000081062316895, + "logps/rejected": -189.80419921875, + "loss": 0.3808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1374983787536621, + "rewards/margins": 1.7863844633102417, + "rewards/rejected": -1.6488860845565796, + "step": 12805 + }, + { + "epoch": 0.75, + "learning_rate": 1.6078305679561287e-08, + "logits/chosen": -1.9333244562149048, + "logits/rejected": -1.9321507215499878, + "logps/chosen": -213.25628662109375, + "logps/rejected": -338.0812072753906, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.670190334320068, + "rewards/margins": 3.874398708343506, + "rewards/rejected": 0.7957916259765625, + "step": 12806 + }, + { + "epoch": 0.75, + "learning_rate": 1.6071382814868904e-08, + "logits/chosen": -1.852552890777588, + "logits/rejected": -1.849853515625, + "logps/chosen": -20.659534454345703, + "logps/rejected": -107.41348266601562, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.145002007484436, + "rewards/margins": 0.8035224676132202, + "rewards/rejected": 0.34147951006889343, + "step": 12807 + }, + { + "epoch": 0.75, + "learning_rate": 1.606446115548698e-08, + "logits/chosen": -1.9530866146087646, + "logits/rejected": -1.945760726928711, + "logps/chosen": -0.002738351933658123, + "logps/rejected": -93.41000366210938, + "loss": 0.4355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0016698249382898211, + "rewards/margins": 1.6013853549957275, + "rewards/rejected": -1.5997154712677002, + "step": 12808 + }, + { + "epoch": 0.75, + "learning_rate": 1.6057540701661377e-08, + "logits/chosen": -1.8728312253952026, + "logits/rejected": -1.8712208271026611, + "logps/chosen": -45.2956657409668, + "logps/rejected": -302.9848937988281, + "loss": 0.2407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3194473385810852, + "rewards/margins": 4.257698059082031, + "rewards/rejected": -3.938250780105591, + "step": 12809 + }, + { + "epoch": 0.75, + "learning_rate": 1.605062145363796e-08, + "logits/chosen": -2.1303954124450684, + "logits/rejected": -2.127793788909912, + "logps/chosen": -0.00041326301288791, + "logps/rejected": -225.73329162597656, + "loss": 0.3639, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7039274098351598e-06, + "rewards/margins": 2.6593172550201416, + "rewards/rejected": -2.659315586090088, + "step": 12810 + }, + { + "epoch": 0.75, + "learning_rate": 1.6043703411662528e-08, + "logits/chosen": -1.7920136451721191, + "logits/rejected": -1.792052149772644, + "logps/chosen": -0.6846948266029358, + "logps/rejected": -56.330718994140625, + "loss": 0.5273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002560573862865567, + "rewards/margins": 0.6513276696205139, + "rewards/rejected": -0.6487671136856079, + "step": 12811 + }, + { + "epoch": 0.75, + "learning_rate": 1.603678657598087e-08, + "logits/chosen": -1.9982503652572632, + "logits/rejected": -1.9874347448349, + "logps/chosen": -136.02528381347656, + "logps/rejected": -266.33673095703125, + "loss": 0.2009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0953384637832642, + "rewards/margins": 1.2455825805664062, + "rewards/rejected": -0.15024414658546448, + "step": 12812 + }, + { + "epoch": 0.75, + "learning_rate": 1.6029870946838654e-08, + "logits/chosen": -1.9893054962158203, + "logits/rejected": -1.973502278327942, + "logps/chosen": -153.71759033203125, + "logps/rejected": -352.4720764160156, + "loss": 0.0585, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.571758985519409, + "rewards/margins": 2.297677516937256, + "rewards/rejected": 0.27408143877983093, + "step": 12813 + }, + { + "epoch": 0.75, + "learning_rate": 1.6022956524481596e-08, + "logits/chosen": -2.002720594406128, + "logits/rejected": -2.0069167613983154, + "logps/chosen": -30.4339599609375, + "logps/rejected": -112.60880279541016, + "loss": 0.5121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09837837517261505, + "rewards/margins": 0.9609333872795105, + "rewards/rejected": -1.0593117475509644, + "step": 12814 + }, + { + "epoch": 0.75, + "learning_rate": 1.6016043309155346e-08, + "logits/chosen": -1.9507533311843872, + "logits/rejected": -1.9598320722579956, + "logps/chosen": -41.57038497924805, + "logps/rejected": -135.4065704345703, + "loss": 0.3836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9363681674003601, + "rewards/margins": 0.46989935636520386, + "rewards/rejected": 0.46646881103515625, + "step": 12815 + }, + { + "epoch": 0.75, + "learning_rate": 1.600913130110545e-08, + "logits/chosen": -1.7592623233795166, + "logits/rejected": -1.6962732076644897, + "logps/chosen": -278.0889587402344, + "logps/rejected": -361.67254638671875, + "loss": 0.0775, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9413909912109375, + "rewards/margins": 2.043417453765869, + "rewards/rejected": 0.8979736566543579, + "step": 12816 + }, + { + "epoch": 0.75, + "learning_rate": 1.600222050057748e-08, + "logits/chosen": -1.7423919439315796, + "logits/rejected": -1.7159700393676758, + "logps/chosen": -221.1675262451172, + "logps/rejected": -298.0162353515625, + "loss": 0.1933, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7197235822677612, + "rewards/margins": 1.1010102033615112, + "rewards/rejected": 0.61871337890625, + "step": 12817 + }, + { + "epoch": 0.75, + "learning_rate": 1.599531090781694e-08, + "logits/chosen": -1.9196799993515015, + "logits/rejected": -1.9159669876098633, + "logps/chosen": -0.00010096787445945665, + "logps/rejected": -145.33523559570312, + "loss": 0.3735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4065728919376852e-06, + "rewards/margins": 2.886610269546509, + "rewards/rejected": -2.886608839035034, + "step": 12818 + }, + { + "epoch": 0.75, + "learning_rate": 1.5988402523069305e-08, + "logits/chosen": -1.842631220817566, + "logits/rejected": -1.8398009538650513, + "logps/chosen": -201.2735595703125, + "logps/rejected": -485.1506042480469, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3003509044647217, + "rewards/margins": 5.654214382171631, + "rewards/rejected": -2.353863477706909, + "step": 12819 + }, + { + "epoch": 0.75, + "learning_rate": 1.5981495346579966e-08, + "logits/chosen": -1.8698632717132568, + "logits/rejected": -1.8546022176742554, + "logps/chosen": -222.8585968017578, + "logps/rejected": -434.8672790527344, + "loss": 0.1111, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3896392583847046, + "rewards/margins": 1.7545989751815796, + "rewards/rejected": -0.364959716796875, + "step": 12820 + }, + { + "epoch": 0.75, + "learning_rate": 1.5974589378594317e-08, + "logits/chosen": -1.8084666728973389, + "logits/rejected": -1.8113521337509155, + "logps/chosen": -80.41254425048828, + "logps/rejected": -298.6092834472656, + "loss": 0.1736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9450317621231079, + "rewards/margins": 3.1045870780944824, + "rewards/rejected": -2.159555196762085, + "step": 12821 + }, + { + "epoch": 0.75, + "learning_rate": 1.5967684619357684e-08, + "logits/chosen": -1.93626868724823, + "logits/rejected": -1.9209192991256714, + "logps/chosen": -110.48112487792969, + "logps/rejected": -323.5901184082031, + "loss": 0.4418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39416274428367615, + "rewards/margins": 4.623424530029297, + "rewards/rejected": -5.017587184906006, + "step": 12822 + }, + { + "epoch": 0.75, + "learning_rate": 1.5960781069115387e-08, + "logits/chosen": -2.006666898727417, + "logits/rejected": -2.006178617477417, + "logps/chosen": -3.5881574149243534e-05, + "logps/rejected": -149.77752685546875, + "loss": 0.3463, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.850289886031533e-06, + "rewards/margins": 4.782689571380615, + "rewards/rejected": -4.78268575668335, + "step": 12823 + }, + { + "epoch": 0.75, + "learning_rate": 1.595387872811263e-08, + "logits/chosen": -1.8840129375457764, + "logits/rejected": -1.8873069286346436, + "logps/chosen": -60.025726318359375, + "logps/rejected": -248.54840087890625, + "loss": 0.111, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2109787464141846, + "rewards/margins": 3.4398193359375, + "rewards/rejected": -2.2288405895233154, + "step": 12824 + }, + { + "epoch": 0.75, + "learning_rate": 1.5946977596594636e-08, + "logits/chosen": -1.8501167297363281, + "logits/rejected": -1.8320660591125488, + "logps/chosen": -393.05877685546875, + "logps/rejected": -557.7684936523438, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.9322755336761475, + "rewards/margins": 3.771576166152954, + "rewards/rejected": 0.16069947183132172, + "step": 12825 + }, + { + "epoch": 0.75, + "learning_rate": 1.5940077674806572e-08, + "logits/chosen": -2.009134292602539, + "logits/rejected": -1.9825397729873657, + "logps/chosen": -189.32135009765625, + "logps/rejected": -456.5567626953125, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0452682971954346, + "rewards/margins": 3.7016282081604004, + "rewards/rejected": -0.656359851360321, + "step": 12826 + }, + { + "epoch": 0.75, + "learning_rate": 1.5933178962993573e-08, + "logits/chosen": -1.844704508781433, + "logits/rejected": -1.8280162811279297, + "logps/chosen": -91.20085144042969, + "logps/rejected": -297.27935791015625, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8955947756767273, + "rewards/margins": 7.5097455978393555, + "rewards/rejected": -6.6141510009765625, + "step": 12827 + }, + { + "epoch": 0.75, + "learning_rate": 1.592628146140067e-08, + "logits/chosen": -1.787002682685852, + "logits/rejected": -1.8075616359710693, + "logps/chosen": -255.41384887695312, + "logps/rejected": -430.4013671875, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9168609380722046, + "rewards/margins": 7.817648410797119, + "rewards/rejected": -5.900787353515625, + "step": 12828 + }, + { + "epoch": 0.75, + "learning_rate": 1.591938517027293e-08, + "logits/chosen": -1.7585177421569824, + "logits/rejected": -1.7620410919189453, + "logps/chosen": -43.007293701171875, + "logps/rejected": -113.60624694824219, + "loss": 0.421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10124168545007706, + "rewards/margins": 1.2355999946594238, + "rewards/rejected": -1.134358286857605, + "step": 12829 + }, + { + "epoch": 0.75, + "learning_rate": 1.5912490089855334e-08, + "logits/chosen": -1.6892863512039185, + "logits/rejected": -1.6601951122283936, + "logps/chosen": -260.3612976074219, + "logps/rejected": -452.8368835449219, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4227874279022217, + "rewards/margins": 2.9440276622772217, + "rewards/rejected": 0.478759765625, + "step": 12830 + }, + { + "epoch": 0.75, + "learning_rate": 1.5905596220392848e-08, + "logits/chosen": -1.9461500644683838, + "logits/rejected": -1.9451121091842651, + "logps/chosen": -25.406333923339844, + "logps/rejected": -145.16629028320312, + "loss": 0.2527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8985801935195923, + "rewards/margins": 1.9388084411621094, + "rewards/rejected": -1.040228247642517, + "step": 12831 + }, + { + "epoch": 0.75, + "learning_rate": 1.5898703562130322e-08, + "logits/chosen": -1.8607288599014282, + "logits/rejected": -1.8492127656936646, + "logps/chosen": -155.0046844482422, + "logps/rejected": -407.7529296875, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7746292352676392, + "rewards/margins": 4.593122959136963, + "rewards/rejected": -2.818493604660034, + "step": 12832 + }, + { + "epoch": 0.75, + "learning_rate": 1.5891812115312687e-08, + "logits/chosen": -2.026146650314331, + "logits/rejected": -2.0103795528411865, + "logps/chosen": -6.4386162757873535, + "logps/rejected": -171.14207458496094, + "loss": 0.3406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4265348017215729, + "rewards/margins": 1.4625760316848755, + "rewards/rejected": -1.036041259765625, + "step": 12833 + }, + { + "epoch": 0.75, + "learning_rate": 1.588492188018471e-08, + "logits/chosen": -1.8848356008529663, + "logits/rejected": -1.8592770099639893, + "logps/chosen": -263.0260314941406, + "logps/rejected": -587.1245727539062, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1342315673828125, + "rewards/margins": 6.635013103485107, + "rewards/rejected": -4.500781536102295, + "step": 12834 + }, + { + "epoch": 0.75, + "learning_rate": 1.587803285699118e-08, + "logits/chosen": -1.905691146850586, + "logits/rejected": -1.9097058773040771, + "logps/chosen": -153.41757202148438, + "logps/rejected": -441.0115966796875, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.748193383216858, + "rewards/margins": 4.6485595703125, + "rewards/rejected": -2.9003663063049316, + "step": 12835 + }, + { + "epoch": 0.75, + "learning_rate": 1.5871145045976836e-08, + "logits/chosen": -1.895961880683899, + "logits/rejected": -1.9078247547149658, + "logps/chosen": -211.37130737304688, + "logps/rejected": -206.15219116210938, + "loss": 0.5395, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.894909679889679, + "rewards/margins": -0.01957094669342041, + "rewards/rejected": 0.9144806265830994, + "step": 12836 + }, + { + "epoch": 0.75, + "learning_rate": 1.5864258447386364e-08, + "logits/chosen": -1.8248910903930664, + "logits/rejected": -1.8475565910339355, + "logps/chosen": -179.27520751953125, + "logps/rejected": -545.59814453125, + "loss": 0.0358, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4718964099884033, + "rewards/margins": 3.103109836578369, + "rewards/rejected": -1.6312134265899658, + "step": 12837 + }, + { + "epoch": 0.75, + "learning_rate": 1.5857373061464417e-08, + "logits/chosen": -1.77179753780365, + "logits/rejected": -1.7784132957458496, + "logps/chosen": -10.8872709274292, + "logps/rejected": -86.23565673828125, + "loss": 0.4265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03550367429852486, + "rewards/margins": 1.7155405282974243, + "rewards/rejected": -1.6800369024276733, + "step": 12838 + }, + { + "epoch": 0.75, + "learning_rate": 1.5850488888455578e-08, + "logits/chosen": -1.935009479522705, + "logits/rejected": -1.9538419246673584, + "logps/chosen": -178.62091064453125, + "logps/rejected": -212.20985412597656, + "loss": 0.1046, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2772185802459717, + "rewards/margins": 1.930222988128662, + "rewards/rejected": 0.3469955623149872, + "step": 12839 + }, + { + "epoch": 0.75, + "learning_rate": 1.5843605928604413e-08, + "logits/chosen": -1.9829976558685303, + "logits/rejected": -1.9719549417495728, + "logps/chosen": -5.3166269935900345e-05, + "logps/rejected": -114.13617706298828, + "loss": 0.5219, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.860906533896923e-07, + "rewards/margins": 0.8453967571258545, + "rewards/rejected": -0.8453964591026306, + "step": 12840 + }, + { + "epoch": 0.75, + "learning_rate": 1.5836724182155447e-08, + "logits/chosen": -1.7654823064804077, + "logits/rejected": -1.7524545192718506, + "logps/chosen": -176.8060302734375, + "logps/rejected": -370.6053466796875, + "loss": 0.117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.449462890625, + "rewards/margins": 1.5885405540466309, + "rewards/rejected": 0.8609222769737244, + "step": 12841 + }, + { + "epoch": 0.75, + "learning_rate": 1.5829843649353165e-08, + "logits/chosen": -1.944334626197815, + "logits/rejected": -1.939708948135376, + "logps/chosen": -38.19224548339844, + "logps/rejected": -198.44284057617188, + "loss": 0.216, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0110981464385986, + "rewards/margins": 1.6624164581298828, + "rewards/rejected": -0.651318371295929, + "step": 12842 + }, + { + "epoch": 0.75, + "learning_rate": 1.5822964330441963e-08, + "logits/chosen": -1.9519387483596802, + "logits/rejected": -1.957046627998352, + "logps/chosen": -13.114479064941406, + "logps/rejected": -252.6874542236328, + "loss": 0.3994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029253482818603516, + "rewards/margins": 2.3022286891937256, + "rewards/rejected": -2.272975206375122, + "step": 12843 + }, + { + "epoch": 0.75, + "learning_rate": 1.581608622566624e-08, + "logits/chosen": -1.9760171175003052, + "logits/rejected": -1.9762707948684692, + "logps/chosen": -39.29509353637695, + "logps/rejected": -113.36739349365234, + "loss": 0.602, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09970054775476456, + "rewards/margins": 0.5085476040840149, + "rewards/rejected": -0.6082481741905212, + "step": 12844 + }, + { + "epoch": 0.75, + "learning_rate": 1.580920933527035e-08, + "logits/chosen": -1.9663273096084595, + "logits/rejected": -1.9484840631484985, + "logps/chosen": -199.37619018554688, + "logps/rejected": -354.1649475097656, + "loss": 0.1912, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9115723371505737, + "rewards/margins": 1.1079834699630737, + "rewards/rejected": 0.8035888671875, + "step": 12845 + }, + { + "epoch": 0.75, + "learning_rate": 1.5802333659498614e-08, + "logits/chosen": -1.7950780391693115, + "logits/rejected": -1.8191860914230347, + "logps/chosen": -363.9327392578125, + "logps/rejected": -421.0205383300781, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.607611060142517, + "rewards/margins": 3.014517307281494, + "rewards/rejected": -1.4069061279296875, + "step": 12846 + }, + { + "epoch": 0.75, + "learning_rate": 1.579545919859524e-08, + "logits/chosen": -1.9181760549545288, + "logits/rejected": -1.9115025997161865, + "logps/chosen": -210.10202026367188, + "logps/rejected": -370.88580322265625, + "loss": 0.1315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.742083728313446, + "rewards/margins": 2.2253355979919434, + "rewards/rejected": -1.483251929283142, + "step": 12847 + }, + { + "epoch": 0.75, + "learning_rate": 1.5788585952804466e-08, + "logits/chosen": -1.8578697443008423, + "logits/rejected": -1.8501167297363281, + "logps/chosen": -29.435009002685547, + "logps/rejected": -65.76158142089844, + "loss": 0.3529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43478432297706604, + "rewards/margins": 0.9130150079727173, + "rewards/rejected": -0.47823068499565125, + "step": 12848 + }, + { + "epoch": 0.75, + "learning_rate": 1.578171392237047e-08, + "logits/chosen": -1.7533845901489258, + "logits/rejected": -1.7486484050750732, + "logps/chosen": -0.0018561688484624028, + "logps/rejected": -64.9788818359375, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009675666806288064, + "rewards/margins": 1.743712067604065, + "rewards/rejected": -1.7427444458007812, + "step": 12849 + }, + { + "epoch": 0.75, + "learning_rate": 1.5774843107537387e-08, + "logits/chosen": -1.807321310043335, + "logits/rejected": -1.7904560565948486, + "logps/chosen": -57.530555725097656, + "logps/rejected": -326.8757019042969, + "loss": 0.1848, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5432464480400085, + "rewards/margins": 3.1632447242736816, + "rewards/rejected": -2.6199982166290283, + "step": 12850 + }, + { + "epoch": 0.75, + "learning_rate": 1.576797350854928e-08, + "logits/chosen": -1.8232181072235107, + "logits/rejected": -1.8566789627075195, + "logps/chosen": -210.40797424316406, + "logps/rejected": -394.62152099609375, + "loss": 0.0473, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3209335803985596, + "rewards/margins": 5.997001647949219, + "rewards/rejected": -4.676068305969238, + "step": 12851 + }, + { + "epoch": 0.75, + "learning_rate": 1.57611051256502e-08, + "logits/chosen": -1.9192261695861816, + "logits/rejected": -1.916164755821228, + "logps/chosen": -0.022642821073532104, + "logps/rejected": -115.19186401367188, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015952986432239413, + "rewards/margins": 2.1607589721679688, + "rewards/rejected": -2.1623542308807373, + "step": 12852 + }, + { + "epoch": 0.75, + "learning_rate": 1.5754237959084143e-08, + "logits/chosen": -1.8965437412261963, + "logits/rejected": -1.899182677268982, + "logps/chosen": -54.519927978515625, + "logps/rejected": -137.07199096679688, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.236338809132576, + "rewards/margins": 1.1583389043807983, + "rewards/rejected": -0.9220001101493835, + "step": 12853 + }, + { + "epoch": 0.75, + "learning_rate": 1.574737200909509e-08, + "logits/chosen": -1.900707483291626, + "logits/rejected": -1.8998137712478638, + "logps/chosen": -37.11878967285156, + "logps/rejected": -160.03564453125, + "loss": 0.4278, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15521851181983948, + "rewards/margins": 1.51873779296875, + "rewards/rejected": -1.673956274986267, + "step": 12854 + }, + { + "epoch": 0.75, + "learning_rate": 1.574050727592689e-08, + "logits/chosen": -1.9448968172073364, + "logits/rejected": -1.9551562070846558, + "logps/chosen": -121.83865356445312, + "logps/rejected": -280.09173583984375, + "loss": 0.0903, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.828901767730713, + "rewards/margins": 1.7881180047988892, + "rewards/rejected": 1.0407837629318237, + "step": 12855 + }, + { + "epoch": 0.75, + "learning_rate": 1.5733643759823473e-08, + "logits/chosen": -1.9524444341659546, + "logits/rejected": -1.9554626941680908, + "logps/chosen": -18.509658813476562, + "logps/rejected": -171.687255859375, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18382835388183594, + "rewards/margins": 3.2632014751434326, + "rewards/rejected": -3.0793731212615967, + "step": 12856 + }, + { + "epoch": 0.75, + "learning_rate": 1.572678146102866e-08, + "logits/chosen": -1.7406294345855713, + "logits/rejected": -1.7357523441314697, + "logps/chosen": -233.04705810546875, + "logps/rejected": -303.13262939453125, + "loss": 0.0435, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.181810140609741, + "rewards/margins": 2.865654230117798, + "rewards/rejected": -0.6838440299034119, + "step": 12857 + }, + { + "epoch": 0.75, + "learning_rate": 1.5719920379786206e-08, + "logits/chosen": -1.8685213327407837, + "logits/rejected": -1.8722351789474487, + "logps/chosen": -269.5922546386719, + "logps/rejected": -485.68328857421875, + "loss": 0.1749, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.934439182281494, + "rewards/margins": 0.8829987049102783, + "rewards/rejected": 3.051440477371216, + "step": 12858 + }, + { + "epoch": 0.75, + "learning_rate": 1.571306051633986e-08, + "logits/chosen": -1.726444959640503, + "logits/rejected": -1.726862907409668, + "logps/chosen": -21.92354965209961, + "logps/rejected": -74.08014678955078, + "loss": 0.9599, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.42464563250541687, + "rewards/margins": -0.610019326210022, + "rewards/rejected": 0.1853736937046051, + "step": 12859 + }, + { + "epoch": 0.75, + "learning_rate": 1.5706201870933315e-08, + "logits/chosen": -1.9110242128372192, + "logits/rejected": -1.9165688753128052, + "logps/chosen": -19.9989070892334, + "logps/rejected": -69.77728271484375, + "loss": 0.4678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1829771101474762, + "rewards/margins": 0.8781031370162964, + "rewards/rejected": -0.6951259970664978, + "step": 12860 + }, + { + "epoch": 0.75, + "learning_rate": 1.5699344443810246e-08, + "logits/chosen": -1.9066177606582642, + "logits/rejected": -1.882901906967163, + "logps/chosen": -193.13339233398438, + "logps/rejected": -246.28823852539062, + "loss": 0.2162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0255173444747925, + "rewards/margins": 1.658442735671997, + "rewards/rejected": -0.6329254508018494, + "step": 12861 + }, + { + "epoch": 0.75, + "learning_rate": 1.5692488235214236e-08, + "logits/chosen": -1.9011229276657104, + "logits/rejected": -1.8192428350448608, + "logps/chosen": -183.8231201171875, + "logps/rejected": -404.3505859375, + "loss": 0.1748, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5883820056915283, + "rewards/margins": 1.7648162841796875, + "rewards/rejected": -0.17643432319164276, + "step": 12862 + }, + { + "epoch": 0.75, + "learning_rate": 1.568563324538885e-08, + "logits/chosen": -1.8982701301574707, + "logits/rejected": -1.904559850692749, + "logps/chosen": -1.9126582145690918, + "logps/rejected": -88.43821716308594, + "loss": 0.37, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10182154178619385, + "rewards/margins": 2.0754504203796387, + "rewards/rejected": -1.9736289978027344, + "step": 12863 + }, + { + "epoch": 0.75, + "learning_rate": 1.567877947457763e-08, + "logits/chosen": -1.7187880277633667, + "logits/rejected": -1.7759993076324463, + "logps/chosen": -276.432373046875, + "logps/rejected": -519.6297607421875, + "loss": 0.0337, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.089959740638733, + "rewards/margins": 5.499029636383057, + "rewards/rejected": -4.409070014953613, + "step": 12864 + }, + { + "epoch": 0.75, + "learning_rate": 1.567192692302406e-08, + "logits/chosen": -2.003671407699585, + "logits/rejected": -2.016432046890259, + "logps/chosen": -181.1034698486328, + "logps/rejected": -291.7159729003906, + "loss": 0.1779, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.862995982170105, + "rewards/margins": 1.3665757179260254, + "rewards/rejected": 0.496420294046402, + "step": 12865 + }, + { + "epoch": 0.75, + "learning_rate": 1.5665075590971545e-08, + "logits/chosen": -1.8639247417449951, + "logits/rejected": -1.8305299282073975, + "logps/chosen": -259.8671569824219, + "logps/rejected": -340.10504150390625, + "loss": 0.0577, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3091461658477783, + "rewards/margins": 2.895742893218994, + "rewards/rejected": -0.586596667766571, + "step": 12866 + }, + { + "epoch": 0.75, + "learning_rate": 1.5658225478663496e-08, + "logits/chosen": -2.0551867485046387, + "logits/rejected": -2.0547945499420166, + "logps/chosen": -29.812236785888672, + "logps/rejected": -160.89852905273438, + "loss": 0.2472, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4395672082901, + "rewards/margins": 1.189646601676941, + "rewards/rejected": 0.24992065131664276, + "step": 12867 + }, + { + "epoch": 0.75, + "learning_rate": 1.565137658634326e-08, + "logits/chosen": -1.9565987586975098, + "logits/rejected": -1.9527982473373413, + "logps/chosen": -208.13201904296875, + "logps/rejected": -273.48681640625, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.672076404094696, + "rewards/margins": 0.44185787439346313, + "rewards/rejected": 0.23021851480007172, + "step": 12868 + }, + { + "epoch": 0.75, + "learning_rate": 1.5644528914254164e-08, + "logits/chosen": -1.9135876893997192, + "logits/rejected": -1.9072179794311523, + "logps/chosen": -12.345475196838379, + "logps/rejected": -39.53438186645508, + "loss": 0.7981, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.14850874245166779, + "rewards/margins": -0.28951674699783325, + "rewards/rejected": 0.14100800454616547, + "step": 12869 + }, + { + "epoch": 0.75, + "learning_rate": 1.5637682462639435e-08, + "logits/chosen": -2.0072245597839355, + "logits/rejected": -1.995694875717163, + "logps/chosen": -1.223656415939331, + "logps/rejected": -200.4117889404297, + "loss": 0.3279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1126733347773552, + "rewards/margins": 3.2939035892486572, + "rewards/rejected": -3.181230306625366, + "step": 12870 + }, + { + "epoch": 0.75, + "learning_rate": 1.5630837231742305e-08, + "logits/chosen": -1.7933818101882935, + "logits/rejected": -1.7918510437011719, + "logps/chosen": -0.07997310906648636, + "logps/rejected": -50.2110595703125, + "loss": 0.6808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007100833114236593, + "rewards/margins": 0.015472637489438057, + "rewards/rejected": -0.022573471069335938, + "step": 12871 + }, + { + "epoch": 0.75, + "learning_rate": 1.562399322180596e-08, + "logits/chosen": -1.9503512382507324, + "logits/rejected": -1.9794113636016846, + "logps/chosen": -185.6256866455078, + "logps/rejected": -303.6590576171875, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.587406873703003, + "rewards/margins": 5.734574794769287, + "rewards/rejected": -3.147167921066284, + "step": 12872 + }, + { + "epoch": 0.75, + "learning_rate": 1.5617150433073538e-08, + "logits/chosen": -1.6977473497390747, + "logits/rejected": -1.7277346849441528, + "logps/chosen": -318.1657409667969, + "logps/rejected": -440.200439453125, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6207001209259033, + "rewards/margins": 3.2992188930511475, + "rewards/rejected": 0.321481317281723, + "step": 12873 + }, + { + "epoch": 0.75, + "learning_rate": 1.5610308865788084e-08, + "logits/chosen": -2.029914140701294, + "logits/rejected": -2.0335938930511475, + "logps/chosen": -28.5830078125, + "logps/rejected": -283.8021240234375, + "loss": 0.15, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8521255850791931, + "rewards/margins": 5.550371170043945, + "rewards/rejected": -4.698245525360107, + "step": 12874 + }, + { + "epoch": 0.75, + "learning_rate": 1.5603468520192696e-08, + "logits/chosen": -1.9998095035552979, + "logits/rejected": -1.9888070821762085, + "logps/chosen": -105.2205581665039, + "logps/rejected": -179.1165771484375, + "loss": 0.2518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6925773620605469, + "rewards/margins": 1.6128075122833252, + "rewards/rejected": -0.9202300906181335, + "step": 12875 + }, + { + "epoch": 0.75, + "learning_rate": 1.559662939653038e-08, + "logits/chosen": -1.963026762008667, + "logits/rejected": -1.9542073011398315, + "logps/chosen": -54.73358917236328, + "logps/rejected": -268.83807373046875, + "loss": 0.4075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01225357037037611, + "rewards/margins": 1.405489444732666, + "rewards/rejected": -1.4177429676055908, + "step": 12876 + }, + { + "epoch": 0.75, + "learning_rate": 1.5589791495044054e-08, + "logits/chosen": -1.8761793375015259, + "logits/rejected": -1.8841137886047363, + "logps/chosen": -252.58740234375, + "logps/rejected": -336.7290954589844, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0454652309417725, + "rewards/margins": 3.394345283508301, + "rewards/rejected": -0.34888002276420593, + "step": 12877 + }, + { + "epoch": 0.75, + "learning_rate": 1.5582954815976646e-08, + "logits/chosen": -1.7201831340789795, + "logits/rejected": -1.7641421556472778, + "logps/chosen": -228.12252807617188, + "logps/rejected": -335.6103515625, + "loss": 0.194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.76776123046875, + "rewards/margins": 0.9023528695106506, + "rewards/rejected": 0.8654083609580994, + "step": 12878 + }, + { + "epoch": 0.75, + "learning_rate": 1.557611935957104e-08, + "logits/chosen": -2.0439510345458984, + "logits/rejected": -2.0272982120513916, + "logps/chosen": -291.8507080078125, + "logps/rejected": -404.26708984375, + "loss": 0.1689, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8417786359786987, + "rewards/margins": 1.0439270734786987, + "rewards/rejected": 0.7978515625, + "step": 12879 + }, + { + "epoch": 0.75, + "learning_rate": 1.5569285126070075e-08, + "logits/chosen": -1.9716291427612305, + "logits/rejected": -1.944542646408081, + "logps/chosen": -73.53826904296875, + "logps/rejected": -357.3826904296875, + "loss": 0.0592, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.06333327293396, + "rewards/margins": 6.822598457336426, + "rewards/rejected": -4.759265422821045, + "step": 12880 + }, + { + "epoch": 0.75, + "learning_rate": 1.5562452115716502e-08, + "logits/chosen": -1.855007290840149, + "logits/rejected": -1.7733473777770996, + "logps/chosen": -235.25808715820312, + "logps/rejected": -540.5341186523438, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2677154541015625, + "rewards/margins": 5.3202362060546875, + "rewards/rejected": -2.052520751953125, + "step": 12881 + }, + { + "epoch": 0.75, + "learning_rate": 1.555562032875309e-08, + "logits/chosen": -2.0585057735443115, + "logits/rejected": -2.0541343688964844, + "logps/chosen": -8.56936264038086, + "logps/rejected": -215.54273986816406, + "loss": 0.3136, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18001165986061096, + "rewards/margins": 3.7116386890411377, + "rewards/rejected": -3.5316269397735596, + "step": 12882 + }, + { + "epoch": 0.75, + "learning_rate": 1.5548789765422522e-08, + "logits/chosen": -1.8332360982894897, + "logits/rejected": -1.856552243232727, + "logps/chosen": -267.9096984863281, + "logps/rejected": -513.1276245117188, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4075714349746704, + "rewards/margins": 6.814691066741943, + "rewards/rejected": -5.4071197509765625, + "step": 12883 + }, + { + "epoch": 0.75, + "learning_rate": 1.5541960425967478e-08, + "logits/chosen": -2.0283188819885254, + "logits/rejected": -2.0293352603912354, + "logps/chosen": -8.623161315917969, + "logps/rejected": -184.62283325195312, + "loss": 0.2709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3390520215034485, + "rewards/margins": 3.513577461242676, + "rewards/rejected": -3.174525499343872, + "step": 12884 + }, + { + "epoch": 0.75, + "learning_rate": 1.553513231063054e-08, + "logits/chosen": -1.917317271232605, + "logits/rejected": -1.9151870012283325, + "logps/chosen": -39.08363342285156, + "logps/rejected": -96.02827453613281, + "loss": 0.4168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08966904133558273, + "rewards/margins": 2.5991783142089844, + "rewards/rejected": -2.688847303390503, + "step": 12885 + }, + { + "epoch": 0.75, + "learning_rate": 1.5528305419654288e-08, + "logits/chosen": -1.9090368747711182, + "logits/rejected": -1.9003069400787354, + "logps/chosen": -10.192048072814941, + "logps/rejected": -191.6651611328125, + "loss": 0.3838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032923221588134766, + "rewards/margins": 2.4711296558380127, + "rewards/rejected": -2.5040528774261475, + "step": 12886 + }, + { + "epoch": 0.75, + "learning_rate": 1.552147975328124e-08, + "logits/chosen": -2.0084118843078613, + "logits/rejected": -1.999569058418274, + "logps/chosen": -69.08708190917969, + "logps/rejected": -202.96051025390625, + "loss": 0.5998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5165008902549744, + "rewards/margins": 1.085780382156372, + "rewards/rejected": -1.6022812128067017, + "step": 12887 + }, + { + "epoch": 0.75, + "learning_rate": 1.5514655311753904e-08, + "logits/chosen": -1.890052080154419, + "logits/rejected": -1.8880406618118286, + "logps/chosen": -0.028649255633354187, + "logps/rejected": -106.22454071044922, + "loss": 0.4895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027910792268812656, + "rewards/margins": 1.0920385122299194, + "rewards/rejected": -1.0948295593261719, + "step": 12888 + }, + { + "epoch": 0.75, + "learning_rate": 1.5507832095314684e-08, + "logits/chosen": -1.8694565296173096, + "logits/rejected": -1.8639662265777588, + "logps/chosen": -63.37083435058594, + "logps/rejected": -191.91683959960938, + "loss": 0.4003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49434661865234375, + "rewards/margins": 1.089257836341858, + "rewards/rejected": -0.5949112176895142, + "step": 12889 + }, + { + "epoch": 0.75, + "learning_rate": 1.5501010104205984e-08, + "logits/chosen": -1.864566683769226, + "logits/rejected": -1.8479582071304321, + "logps/chosen": -343.24652099609375, + "logps/rejected": -508.85369873046875, + "loss": 0.0543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9317108392715454, + "rewards/margins": 4.535244941711426, + "rewards/rejected": -3.603533983230591, + "step": 12890 + }, + { + "epoch": 0.75, + "learning_rate": 1.549418933867016e-08, + "logits/chosen": -1.8977761268615723, + "logits/rejected": -1.891302227973938, + "logps/chosen": -211.02069091796875, + "logps/rejected": -326.44647216796875, + "loss": 0.0786, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5144410133361816, + "rewards/margins": 2.322528123855591, + "rewards/rejected": 0.19191284477710724, + "step": 12891 + }, + { + "epoch": 0.75, + "learning_rate": 1.5487369798949527e-08, + "logits/chosen": -2.0707297325134277, + "logits/rejected": -2.067437171936035, + "logps/chosen": -5.927152156829834, + "logps/rejected": -212.3105926513672, + "loss": 0.2511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3309665322303772, + "rewards/margins": 3.863975763320923, + "rewards/rejected": -3.5330092906951904, + "step": 12892 + }, + { + "epoch": 0.75, + "learning_rate": 1.5480551485286332e-08, + "logits/chosen": -1.745051622390747, + "logits/rejected": -1.741889476776123, + "logps/chosen": -22.290115356445312, + "logps/rejected": -204.47259521484375, + "loss": 0.2418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48484498262405396, + "rewards/margins": 4.941168308258057, + "rewards/rejected": -4.456323146820068, + "step": 12893 + }, + { + "epoch": 0.75, + "learning_rate": 1.5473734397922795e-08, + "logits/chosen": -1.8102883100509644, + "logits/rejected": -1.8075393438339233, + "logps/chosen": -39.156494140625, + "logps/rejected": -160.50006103515625, + "loss": 0.242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.578800618648529, + "rewards/margins": 2.924769401550293, + "rewards/rejected": -2.345968723297119, + "step": 12894 + }, + { + "epoch": 0.75, + "learning_rate": 1.54669185371011e-08, + "logits/chosen": -1.8567686080932617, + "logits/rejected": -1.860839605331421, + "logps/chosen": -27.460582733154297, + "logps/rejected": -100.58563995361328, + "loss": 0.532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035833168774843216, + "rewards/margins": 0.5511720776557922, + "rewards/rejected": -0.5153388977050781, + "step": 12895 + }, + { + "epoch": 0.75, + "learning_rate": 1.5460103903063377e-08, + "logits/chosen": -1.9123259782791138, + "logits/rejected": -1.9076303243637085, + "logps/chosen": -0.02201284095644951, + "logps/rejected": -121.88236999511719, + "loss": 0.4225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018390556797385216, + "rewards/margins": 1.7309958934783936, + "rewards/rejected": -1.712605357170105, + "step": 12896 + }, + { + "epoch": 0.75, + "learning_rate": 1.545329049605172e-08, + "logits/chosen": -2.1364998817443848, + "logits/rejected": -2.1405937671661377, + "logps/chosen": -15.695455551147461, + "logps/rejected": -295.7484130859375, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.387712299823761, + "rewards/margins": 2.783818244934082, + "rewards/rejected": -2.396106004714966, + "step": 12897 + }, + { + "epoch": 0.75, + "learning_rate": 1.5446478316308164e-08, + "logits/chosen": -1.7497050762176514, + "logits/rejected": -1.7682009935379028, + "logps/chosen": -194.4098358154297, + "logps/rejected": -478.40032958984375, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8109970092773438, + "rewards/margins": 5.402952671051025, + "rewards/rejected": -2.5919556617736816, + "step": 12898 + }, + { + "epoch": 0.75, + "learning_rate": 1.5439667364074736e-08, + "logits/chosen": -1.9021155834197998, + "logits/rejected": -1.8844751119613647, + "logps/chosen": -194.01260375976562, + "logps/rejected": -242.18946838378906, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.50982666015625, + "rewards/margins": 0.3348296880722046, + "rewards/rejected": 1.1749969720840454, + "step": 12899 + }, + { + "epoch": 0.75, + "learning_rate": 1.543285763959336e-08, + "logits/chosen": -1.415785789489746, + "logits/rejected": -1.419110894203186, + "logps/chosen": -9.416969299316406, + "logps/rejected": -191.0389404296875, + "loss": 0.295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22964945435523987, + "rewards/margins": 3.589268684387207, + "rewards/rejected": -3.359619140625, + "step": 12900 + }, + { + "epoch": 0.75, + "learning_rate": 1.5426049143105973e-08, + "logits/chosen": -1.8954150676727295, + "logits/rejected": -1.8847835063934326, + "logps/chosen": -14.627923965454102, + "logps/rejected": -135.46104431152344, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4123006761074066, + "rewards/margins": 1.6105047464370728, + "rewards/rejected": -1.1982040405273438, + "step": 12901 + }, + { + "epoch": 0.75, + "learning_rate": 1.5419241874854434e-08, + "logits/chosen": -1.825191617012024, + "logits/rejected": -1.8108913898468018, + "logps/chosen": -155.03964233398438, + "logps/rejected": -268.1162109375, + "loss": 0.4108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8139846920967102, + "rewards/margins": 0.3418533504009247, + "rewards/rejected": 0.4721313416957855, + "step": 12902 + }, + { + "epoch": 0.75, + "learning_rate": 1.5412435835080593e-08, + "logits/chosen": -1.9904390573501587, + "logits/rejected": -1.9793859720230103, + "logps/chosen": -39.89509963989258, + "logps/rejected": -249.34658813476562, + "loss": 0.2083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6056507229804993, + "rewards/margins": 2.728128433227539, + "rewards/rejected": -2.1224777698516846, + "step": 12903 + }, + { + "epoch": 0.75, + "learning_rate": 1.54056310240262e-08, + "logits/chosen": -2.0473668575286865, + "logits/rejected": -2.0451016426086426, + "logps/chosen": -79.87257385253906, + "logps/rejected": -234.53384399414062, + "loss": 0.1584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8779884576797485, + "rewards/margins": 4.548480987548828, + "rewards/rejected": -3.670492649078369, + "step": 12904 + }, + { + "epoch": 0.75, + "learning_rate": 1.5398827441933017e-08, + "logits/chosen": -1.8767294883728027, + "logits/rejected": -1.8774853944778442, + "logps/chosen": -0.02980501390993595, + "logps/rejected": -167.50155639648438, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011608919594436884, + "rewards/margins": 3.306654691696167, + "rewards/rejected": -3.3078155517578125, + "step": 12905 + }, + { + "epoch": 0.75, + "learning_rate": 1.5392025089042732e-08, + "logits/chosen": -2.173251152038574, + "logits/rejected": -2.174685001373291, + "logps/chosen": -7.152443868108094e-05, + "logps/rejected": -63.6827392578125, + "loss": 0.3873, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6462855657882756e-06, + "rewards/margins": 2.232377529144287, + "rewards/rejected": -2.232374906539917, + "step": 12906 + }, + { + "epoch": 0.75, + "learning_rate": 1.5385223965597023e-08, + "logits/chosen": -2.0427277088165283, + "logits/rejected": -2.035435914993286, + "logps/chosen": -0.7295610904693604, + "logps/rejected": -208.86117553710938, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023755615577101707, + "rewards/margins": 3.9462928771972656, + "rewards/rejected": -3.922537326812744, + "step": 12907 + }, + { + "epoch": 0.75, + "learning_rate": 1.5378424071837453e-08, + "logits/chosen": -1.8330512046813965, + "logits/rejected": -1.828891634941101, + "logps/chosen": -233.02923583984375, + "logps/rejected": -285.265869140625, + "loss": 0.0855, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7045533657073975, + "rewards/margins": 1.8055908679962158, + "rewards/rejected": 0.8989624381065369, + "step": 12908 + }, + { + "epoch": 0.75, + "learning_rate": 1.5371625408005616e-08, + "logits/chosen": -2.0375072956085205, + "logits/rejected": -2.026315450668335, + "logps/chosen": -54.20887756347656, + "logps/rejected": -120.16358184814453, + "loss": 0.2069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4345314502716064, + "rewards/margins": 1.503157138824463, + "rewards/rejected": -0.06862564384937286, + "step": 12909 + }, + { + "epoch": 0.75, + "learning_rate": 1.5364827974343035e-08, + "logits/chosen": -1.8860282897949219, + "logits/rejected": -1.9218891859054565, + "logps/chosen": -191.84075927734375, + "logps/rejected": -389.85296630859375, + "loss": 0.1171, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6211609840393066, + "rewards/margins": 1.4057434797286987, + "rewards/rejected": 1.215417504310608, + "step": 12910 + }, + { + "epoch": 0.75, + "learning_rate": 1.5358031771091197e-08, + "logits/chosen": -1.8408520221710205, + "logits/rejected": -1.8475457429885864, + "logps/chosen": -174.1135711669922, + "logps/rejected": -370.5146179199219, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2671493291854858, + "rewards/margins": 4.9505109786987305, + "rewards/rejected": -3.683361768722534, + "step": 12911 + }, + { + "epoch": 0.75, + "learning_rate": 1.5351236798491503e-08, + "logits/chosen": -1.9676094055175781, + "logits/rejected": -1.9669722318649292, + "logps/chosen": -255.13522338867188, + "logps/rejected": -435.81005859375, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6544647216796875, + "rewards/margins": 3.910885810852051, + "rewards/rejected": -1.2564209699630737, + "step": 12912 + }, + { + "epoch": 0.75, + "learning_rate": 1.5344443056785367e-08, + "logits/chosen": -1.8189691305160522, + "logits/rejected": -1.8124973773956299, + "logps/chosen": -90.31803131103516, + "logps/rejected": -340.60693359375, + "loss": 0.1215, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2939270734786987, + "rewards/margins": 2.901293992996216, + "rewards/rejected": -1.607366919517517, + "step": 12913 + }, + { + "epoch": 0.75, + "learning_rate": 1.5337650546214132e-08, + "logits/chosen": -1.96468985080719, + "logits/rejected": -1.9657987356185913, + "logps/chosen": -56.5041618347168, + "logps/rejected": -387.63287353515625, + "loss": 0.3481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0694580078125, + "rewards/margins": 5.249108791351318, + "rewards/rejected": -5.179650783538818, + "step": 12914 + }, + { + "epoch": 0.75, + "learning_rate": 1.5330859267019114e-08, + "logits/chosen": -1.8410552740097046, + "logits/rejected": -1.835024118423462, + "logps/chosen": -92.15502166748047, + "logps/rejected": -245.02886962890625, + "loss": 0.0992, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3075828552246094, + "rewards/margins": 5.616750240325928, + "rewards/rejected": -4.309167385101318, + "step": 12915 + }, + { + "epoch": 0.75, + "learning_rate": 1.532406921944153e-08, + "logits/chosen": -1.9865080118179321, + "logits/rejected": -1.9807261228561401, + "logps/chosen": -211.43695068359375, + "logps/rejected": -292.36016845703125, + "loss": 0.03, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8029847145080566, + "rewards/margins": 3.281970262527466, + "rewards/rejected": -0.47898560762405396, + "step": 12916 + }, + { + "epoch": 0.75, + "learning_rate": 1.5317280403722644e-08, + "logits/chosen": -1.9567158222198486, + "logits/rejected": -1.9944700002670288, + "logps/chosen": -106.02899932861328, + "logps/rejected": -154.25021362304688, + "loss": 0.372, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.8962386846542358, + "rewards/margins": -0.0021317005157470703, + "rewards/rejected": 1.898370385169983, + "step": 12917 + }, + { + "epoch": 0.75, + "learning_rate": 1.531049282010362e-08, + "logits/chosen": -1.8218575716018677, + "logits/rejected": -1.773545742034912, + "logps/chosen": -256.27386474609375, + "logps/rejected": -482.61590576171875, + "loss": 0.0836, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0590332746505737, + "rewards/margins": 3.140194892883301, + "rewards/rejected": -2.0811614990234375, + "step": 12918 + }, + { + "epoch": 0.75, + "learning_rate": 1.5303706468825568e-08, + "logits/chosen": -2.057788848876953, + "logits/rejected": -2.0201053619384766, + "logps/chosen": -158.42391967773438, + "logps/rejected": -707.4554443359375, + "loss": 0.135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5797454714775085, + "rewards/margins": 11.58911418914795, + "rewards/rejected": -11.009368896484375, + "step": 12919 + }, + { + "epoch": 0.75, + "learning_rate": 1.529692135012957e-08, + "logits/chosen": -1.8514349460601807, + "logits/rejected": -1.8705371618270874, + "logps/chosen": -180.533203125, + "logps/rejected": -294.30242919921875, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.746220350265503, + "rewards/margins": 5.21291971206665, + "rewards/rejected": -1.466699242591858, + "step": 12920 + }, + { + "epoch": 0.75, + "learning_rate": 1.5290137464256684e-08, + "logits/chosen": -2.0277931690216064, + "logits/rejected": -2.019583225250244, + "logps/chosen": -0.0001047824916895479, + "logps/rejected": -256.9700927734375, + "loss": 0.3424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00014281031326390803, + "rewards/margins": 6.595479488372803, + "rewards/rejected": -6.5953369140625, + "step": 12921 + }, + { + "epoch": 0.75, + "learning_rate": 1.5283354811447918e-08, + "logits/chosen": -1.9749091863632202, + "logits/rejected": -1.9643168449401855, + "logps/chosen": -42.996559143066406, + "logps/rejected": -281.1275939941406, + "loss": 0.5133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4633617401123047, + "rewards/margins": 2.4119114875793457, + "rewards/rejected": -2.8752732276916504, + "step": 12922 + }, + { + "epoch": 0.75, + "learning_rate": 1.5276573391944185e-08, + "logits/chosen": -1.9815000295639038, + "logits/rejected": -2.0216622352600098, + "logps/chosen": -353.3074951171875, + "logps/rejected": -461.100341796875, + "loss": 0.0614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7670959830284119, + "rewards/margins": 4.457000732421875, + "rewards/rejected": -3.6899049282073975, + "step": 12923 + }, + { + "epoch": 0.75, + "learning_rate": 1.5269793205986416e-08, + "logits/chosen": -1.9426183700561523, + "logits/rejected": -1.9389477968215942, + "logps/chosen": -258.0628662109375, + "logps/rejected": -382.87371826171875, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.317150831222534, + "rewards/margins": 3.8371033668518066, + "rewards/rejected": -0.5199524164199829, + "step": 12924 + }, + { + "epoch": 0.75, + "learning_rate": 1.5263014253815478e-08, + "logits/chosen": -1.8521320819854736, + "logits/rejected": -1.834100365638733, + "logps/chosen": -23.507675170898438, + "logps/rejected": -335.89300537109375, + "loss": 0.0887, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1800777912139893, + "rewards/margins": 7.253375053405762, + "rewards/rejected": -6.073297023773193, + "step": 12925 + }, + { + "epoch": 0.75, + "learning_rate": 1.5256236535672206e-08, + "logits/chosen": -1.8038033246994019, + "logits/rejected": -1.767025351524353, + "logps/chosen": -392.93206787109375, + "logps/rejected": -619.8211669921875, + "loss": 0.1569, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0111817121505737, + "rewards/margins": 1.3117066621780396, + "rewards/rejected": -0.30052492022514343, + "step": 12926 + }, + { + "epoch": 0.75, + "learning_rate": 1.5249460051797346e-08, + "logits/chosen": -1.9208420515060425, + "logits/rejected": -1.9610364437103271, + "logps/chosen": -236.31396484375, + "logps/rejected": -398.5704345703125, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.301260471343994, + "rewards/margins": 5.135873794555664, + "rewards/rejected": -2.834613084793091, + "step": 12927 + }, + { + "epoch": 0.75, + "learning_rate": 1.5242684802431648e-08, + "logits/chosen": -1.9254921674728394, + "logits/rejected": -1.9131100177764893, + "logps/chosen": -17.694934844970703, + "logps/rejected": -180.29525756835938, + "loss": 0.258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41147157549858093, + "rewards/margins": 3.7002854347229004, + "rewards/rejected": -3.288813829421997, + "step": 12928 + }, + { + "epoch": 0.75, + "learning_rate": 1.5235910787815805e-08, + "logits/chosen": -1.8675296306610107, + "logits/rejected": -1.8774034976959229, + "logps/chosen": -202.18016052246094, + "logps/rejected": -422.075927734375, + "loss": 0.0376, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8861862421035767, + "rewards/margins": 3.712153673171997, + "rewards/rejected": -1.8259674310684204, + "step": 12929 + }, + { + "epoch": 0.75, + "learning_rate": 1.5229138008190472e-08, + "logits/chosen": -1.9328110218048096, + "logits/rejected": -1.929046630859375, + "logps/chosen": -105.00183868408203, + "logps/rejected": -281.03570556640625, + "loss": 0.2689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7813751101493835, + "rewards/margins": 0.8257537484169006, + "rewards/rejected": -0.04437866434454918, + "step": 12930 + }, + { + "epoch": 0.75, + "learning_rate": 1.5222366463796225e-08, + "logits/chosen": -1.8046822547912598, + "logits/rejected": -1.8078641891479492, + "logps/chosen": -15.210526466369629, + "logps/rejected": -95.3564682006836, + "loss": 0.4349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09573163837194443, + "rewards/margins": 1.030890941619873, + "rewards/rejected": -0.9351593255996704, + "step": 12931 + }, + { + "epoch": 0.75, + "learning_rate": 1.521559615487364e-08, + "logits/chosen": -1.7890660762786865, + "logits/rejected": -1.7830750942230225, + "logps/chosen": -20.803590774536133, + "logps/rejected": -348.2231750488281, + "loss": 0.1828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7525390982627869, + "rewards/margins": 7.051947116851807, + "rewards/rejected": -6.299407958984375, + "step": 12932 + }, + { + "epoch": 0.75, + "learning_rate": 1.5208827081663223e-08, + "logits/chosen": -1.705376386642456, + "logits/rejected": -1.751470923423767, + "logps/chosen": -197.21458435058594, + "logps/rejected": -452.5760192871094, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1362838745117188, + "rewards/margins": 7.33542013168335, + "rewards/rejected": -5.199136257171631, + "step": 12933 + }, + { + "epoch": 0.75, + "learning_rate": 1.5202059244405476e-08, + "logits/chosen": -1.8622392416000366, + "logits/rejected": -1.8473020792007446, + "logps/chosen": -265.511962890625, + "logps/rejected": -475.3601379394531, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.292748928070068, + "rewards/margins": 4.960146903991699, + "rewards/rejected": -0.6673980951309204, + "step": 12934 + }, + { + "epoch": 0.75, + "learning_rate": 1.5195292643340756e-08, + "logits/chosen": -1.859946370124817, + "logits/rejected": -1.9054367542266846, + "logps/chosen": -144.50119018554688, + "logps/rejected": -535.4806518554688, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.488745093345642, + "rewards/margins": 8.453845024108887, + "rewards/rejected": -6.965100288391113, + "step": 12935 + }, + { + "epoch": 0.75, + "learning_rate": 1.5188527278709514e-08, + "logits/chosen": -2.0346179008483887, + "logits/rejected": -2.0350327491760254, + "logps/chosen": -0.2396351397037506, + "logps/rejected": -159.7568359375, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010629666037857533, + "rewards/margins": 3.2457568645477295, + "rewards/rejected": -3.2351272106170654, + "step": 12936 + }, + { + "epoch": 0.75, + "learning_rate": 1.5181763150752076e-08, + "logits/chosen": -1.8399112224578857, + "logits/rejected": -1.8472718000411987, + "logps/chosen": -178.3703155517578, + "logps/rejected": -340.08697509765625, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4375076293945312, + "rewards/margins": 4.461604118347168, + "rewards/rejected": -2.024096727371216, + "step": 12937 + }, + { + "epoch": 0.75, + "learning_rate": 1.5175000259708708e-08, + "logits/chosen": -1.863381028175354, + "logits/rejected": -1.8612254858016968, + "logps/chosen": -143.85214233398438, + "logps/rejected": -206.57643127441406, + "loss": 0.1363, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6442229747772217, + "rewards/margins": 1.4033735990524292, + "rewards/rejected": 1.2408493757247925, + "step": 12938 + }, + { + "epoch": 0.75, + "learning_rate": 1.5168238605819683e-08, + "logits/chosen": -2.004687786102295, + "logits/rejected": -1.9987069368362427, + "logps/chosen": -6.866291369078681e-05, + "logps/rejected": -75.01871490478516, + "loss": 0.4841, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.610639057820663e-06, + "rewards/margins": 1.1495425701141357, + "rewards/rejected": -1.1495399475097656, + "step": 12939 + }, + { + "epoch": 0.75, + "learning_rate": 1.51614781893252e-08, + "logits/chosen": -1.9450255632400513, + "logits/rejected": -1.9482983350753784, + "logps/chosen": -4.959068610332906e-05, + "logps/rejected": -103.1119384765625, + "loss": 0.4828, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.884832383642788e-06, + "rewards/margins": 1.0443209409713745, + "rewards/rejected": -1.0443238019943237, + "step": 12940 + }, + { + "epoch": 0.75, + "learning_rate": 1.5154719010465438e-08, + "logits/chosen": -1.905990481376648, + "logits/rejected": -2.0136964321136475, + "logps/chosen": -233.59512329101562, + "logps/rejected": -350.13140869140625, + "loss": 0.0407, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.287927269935608, + "rewards/margins": 4.7716522216796875, + "rewards/rejected": -3.483725070953369, + "step": 12941 + }, + { + "epoch": 0.75, + "learning_rate": 1.5147961069480497e-08, + "logits/chosen": -1.9152165651321411, + "logits/rejected": -1.9347656965255737, + "logps/chosen": -384.12042236328125, + "logps/rejected": -368.5999755859375, + "loss": 0.4579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19176025688648224, + "rewards/margins": 0.7377411127090454, + "rewards/rejected": -0.5459808707237244, + "step": 12942 + }, + { + "epoch": 0.75, + "learning_rate": 1.514120436661045e-08, + "logits/chosen": -1.895945429801941, + "logits/rejected": -1.881396770477295, + "logps/chosen": -43.87965774536133, + "logps/rejected": -347.4473571777344, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1465320587158203, + "rewards/margins": 3.8477704524993896, + "rewards/rejected": -3.99430251121521, + "step": 12943 + }, + { + "epoch": 0.75, + "learning_rate": 1.5134448902095342e-08, + "logits/chosen": -1.746991515159607, + "logits/rejected": -1.7526856660842896, + "logps/chosen": -4.339165025157854e-05, + "logps/rejected": -377.3515625, + "loss": 0.3327, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.622586237066571e-07, + "rewards/margins": 10.820337295532227, + "rewards/rejected": -10.820337295532227, + "step": 12944 + }, + { + "epoch": 0.75, + "learning_rate": 1.512769467617517e-08, + "logits/chosen": -2.0279712677001953, + "logits/rejected": -2.02838134765625, + "logps/chosen": -112.675048828125, + "logps/rejected": -373.4225158691406, + "loss": 0.1085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.63615882396698, + "rewards/margins": 2.718122959136963, + "rewards/rejected": -1.081964135169983, + "step": 12945 + }, + { + "epoch": 0.75, + "learning_rate": 1.5120941689089845e-08, + "logits/chosen": -1.9183619022369385, + "logits/rejected": -1.9118224382400513, + "logps/chosen": -0.0001982320100069046, + "logps/rejected": -180.33383178710938, + "loss": 0.3469, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7260221284232102e-05, + "rewards/margins": 5.559628009796143, + "rewards/rejected": -5.559645175933838, + "step": 12946 + }, + { + "epoch": 0.75, + "learning_rate": 1.511418994107928e-08, + "logits/chosen": -1.879940390586853, + "logits/rejected": -1.8962739706039429, + "logps/chosen": -204.98785400390625, + "logps/rejected": -417.53997802734375, + "loss": 0.0453, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5234971046447754, + "rewards/margins": 2.576890707015991, + "rewards/rejected": -0.05339355394244194, + "step": 12947 + }, + { + "epoch": 0.75, + "learning_rate": 1.5107439432383335e-08, + "logits/chosen": -1.985053300857544, + "logits/rejected": -1.9727205038070679, + "logps/chosen": -66.62821197509766, + "logps/rejected": -216.90855407714844, + "loss": 0.5064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09376221150159836, + "rewards/margins": 0.6542526483535767, + "rewards/rejected": -0.7480148673057556, + "step": 12948 + }, + { + "epoch": 0.75, + "learning_rate": 1.5100690163241835e-08, + "logits/chosen": -1.9746900796890259, + "logits/rejected": -1.961445689201355, + "logps/chosen": -195.74365234375, + "logps/rejected": -759.3078002929688, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.971588134765625, + "rewards/margins": 12.413812637329102, + "rewards/rejected": -10.442224502563477, + "step": 12949 + }, + { + "epoch": 0.75, + "learning_rate": 1.5093942133894522e-08, + "logits/chosen": -1.8608797788619995, + "logits/rejected": -1.8639336824417114, + "logps/chosen": -274.52325439453125, + "logps/rejected": -389.2918701171875, + "loss": 0.0402, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4561309814453125, + "rewards/margins": 2.8088653087615967, + "rewards/rejected": -0.35273438692092896, + "step": 12950 + }, + { + "epoch": 0.75, + "learning_rate": 1.5087195344581123e-08, + "logits/chosen": -1.8886934518814087, + "logits/rejected": -1.8733105659484863, + "logps/chosen": -11.17959213256836, + "logps/rejected": -318.19329833984375, + "loss": 0.3818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06257801502943039, + "rewards/margins": 4.228175163269043, + "rewards/rejected": -4.290753364562988, + "step": 12951 + }, + { + "epoch": 0.75, + "learning_rate": 1.5080449795541322e-08, + "logits/chosen": -2.0118134021759033, + "logits/rejected": -2.002974271774292, + "logps/chosen": -0.2812601625919342, + "logps/rejected": -266.462158203125, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061697401106357574, + "rewards/margins": 4.998435974121094, + "rewards/rejected": -4.93673849105835, + "step": 12952 + }, + { + "epoch": 0.75, + "learning_rate": 1.507370548701477e-08, + "logits/chosen": -2.0118839740753174, + "logits/rejected": -2.001091480255127, + "logps/chosen": -0.0026338123716413975, + "logps/rejected": -95.73902893066406, + "loss": 0.4724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012182992941234261, + "rewards/margins": 1.2817140817642212, + "rewards/rejected": -1.281835913658142, + "step": 12953 + }, + { + "epoch": 0.75, + "learning_rate": 1.5066962419241025e-08, + "logits/chosen": -1.79452383518219, + "logits/rejected": -1.8036351203918457, + "logps/chosen": -225.16534423828125, + "logps/rejected": -290.2652587890625, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.049612522125244, + "rewards/margins": 3.4156036376953125, + "rewards/rejected": -0.3659912049770355, + "step": 12954 + }, + { + "epoch": 0.75, + "learning_rate": 1.5060220592459655e-08, + "logits/chosen": -2.057333469390869, + "logits/rejected": -2.0522897243499756, + "logps/chosen": -5.7100354752037674e-05, + "logps/rejected": -172.1889190673828, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1933894913672702e-06, + "rewards/margins": 5.4436845779418945, + "rewards/rejected": -5.4436869621276855, + "step": 12955 + }, + { + "epoch": 0.75, + "learning_rate": 1.505348000691015e-08, + "logits/chosen": -1.9675776958465576, + "logits/rejected": -1.960119366645813, + "logps/chosen": -86.0084228515625, + "logps/rejected": -259.39825439453125, + "loss": 0.1162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3140640258789062, + "rewards/margins": 3.210139513015747, + "rewards/rejected": -1.8960754871368408, + "step": 12956 + }, + { + "epoch": 0.75, + "learning_rate": 1.5046740662831987e-08, + "logits/chosen": -1.8251667022705078, + "logits/rejected": -1.8765413761138916, + "logps/chosen": -204.26193237304688, + "logps/rejected": -429.6813049316406, + "loss": 0.0658, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1893829107284546, + "rewards/margins": 4.470583915710449, + "rewards/rejected": -3.281201124191284, + "step": 12957 + }, + { + "epoch": 0.75, + "learning_rate": 1.5040002560464564e-08, + "logits/chosen": -1.994667410850525, + "logits/rejected": -2.0449981689453125, + "logps/chosen": -234.31851196289062, + "logps/rejected": -669.0780639648438, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.286144971847534, + "rewards/margins": 10.934515953063965, + "rewards/rejected": -8.648370742797852, + "step": 12958 + }, + { + "epoch": 0.75, + "learning_rate": 1.5033265700047265e-08, + "logits/chosen": -2.0447654724121094, + "logits/rejected": -2.041950225830078, + "logps/chosen": -138.65542602539062, + "logps/rejected": -366.1041259765625, + "loss": 0.0341, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.527108907699585, + "rewards/margins": 3.182565450668335, + "rewards/rejected": -0.65545654296875, + "step": 12959 + }, + { + "epoch": 0.75, + "learning_rate": 1.502653008181942e-08, + "logits/chosen": -1.9687669277191162, + "logits/rejected": -1.9672499895095825, + "logps/chosen": -38.54273986816406, + "logps/rejected": -107.55101013183594, + "loss": 0.477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.336691290140152, + "rewards/margins": 0.5849136114120483, + "rewards/rejected": -0.24822235107421875, + "step": 12960 + }, + { + "epoch": 0.75, + "learning_rate": 1.5019795706020284e-08, + "logits/chosen": -1.6090694665908813, + "logits/rejected": -1.611532211303711, + "logps/chosen": -256.40460205078125, + "logps/rejected": -429.14129638671875, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.105181932449341, + "rewards/margins": 4.851788520812988, + "rewards/rejected": -2.7466065883636475, + "step": 12961 + }, + { + "epoch": 0.75, + "learning_rate": 1.5013062572889113e-08, + "logits/chosen": -1.813937783241272, + "logits/rejected": -1.8281116485595703, + "logps/chosen": -104.28152465820312, + "logps/rejected": -435.712646484375, + "loss": 0.0396, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.972051978111267, + "rewards/margins": 3.6958892345428467, + "rewards/rejected": -1.7238372564315796, + "step": 12962 + }, + { + "epoch": 0.75, + "learning_rate": 1.5006330682665097e-08, + "logits/chosen": -1.8617932796478271, + "logits/rejected": -1.8534685373306274, + "logps/chosen": -81.2525405883789, + "logps/rejected": -423.6272277832031, + "loss": 0.065, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6937538385391235, + "rewards/margins": 6.775562286376953, + "rewards/rejected": -5.081808567047119, + "step": 12963 + }, + { + "epoch": 0.75, + "learning_rate": 1.4999600035587408e-08, + "logits/chosen": -1.8833361864089966, + "logits/rejected": -1.8685821294784546, + "logps/chosen": -10.962508201599121, + "logps/rejected": -444.2421569824219, + "loss": 0.2522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34538039565086365, + "rewards/margins": 8.596928596496582, + "rewards/rejected": -8.251547813415527, + "step": 12964 + }, + { + "epoch": 0.75, + "learning_rate": 1.499287063189511e-08, + "logits/chosen": -1.8834153413772583, + "logits/rejected": -1.8631656169891357, + "logps/chosen": -283.69744873046875, + "logps/rejected": -449.96331787109375, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.838842749595642, + "rewards/margins": 6.282232761383057, + "rewards/rejected": -4.443389892578125, + "step": 12965 + }, + { + "epoch": 0.75, + "learning_rate": 1.4986142471827284e-08, + "logits/chosen": -1.8825095891952515, + "logits/rejected": -1.978973627090454, + "logps/chosen": -185.900146484375, + "logps/rejected": -388.6566162109375, + "loss": 0.0557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6448822021484375, + "rewards/margins": 2.5089690685272217, + "rewards/rejected": -0.864086925983429, + "step": 12966 + }, + { + "epoch": 0.75, + "learning_rate": 1.4979415555622953e-08, + "logits/chosen": -1.7116166353225708, + "logits/rejected": -1.7139291763305664, + "logps/chosen": -2.599358320236206, + "logps/rejected": -317.2384948730469, + "loss": 0.3349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02675175666809082, + "rewards/margins": 5.739035606384277, + "rewards/rejected": -5.712283611297607, + "step": 12967 + }, + { + "epoch": 0.75, + "learning_rate": 1.4972689883521106e-08, + "logits/chosen": -1.8392494916915894, + "logits/rejected": -1.792718529701233, + "logps/chosen": -301.8837890625, + "logps/rejected": -437.87017822265625, + "loss": 0.1145, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8701965808868408, + "rewards/margins": 2.2216796875, + "rewards/rejected": -0.35148316621780396, + "step": 12968 + }, + { + "epoch": 0.75, + "learning_rate": 1.4965965455760628e-08, + "logits/chosen": -2.02136492729187, + "logits/rejected": -1.9842866659164429, + "logps/chosen": -68.44133758544922, + "logps/rejected": -357.3576354980469, + "loss": 0.1954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.58101886510849, + "rewards/margins": 7.927515506744385, + "rewards/rejected": -7.34649658203125, + "step": 12969 + }, + { + "epoch": 0.75, + "learning_rate": 1.495924227258043e-08, + "logits/chosen": -1.9074736833572388, + "logits/rejected": -1.9019960165023804, + "logps/chosen": -163.73379516601562, + "logps/rejected": -223.50802612304688, + "loss": 0.2305, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.663311719894409, + "rewards/margins": 0.5901076793670654, + "rewards/rejected": 2.0732040405273438, + "step": 12970 + }, + { + "epoch": 0.75, + "learning_rate": 1.495252033421935e-08, + "logits/chosen": -1.8500645160675049, + "logits/rejected": -1.8769279718399048, + "logps/chosen": -193.93917846679688, + "logps/rejected": -456.8763427734375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.08500075340271, + "rewards/margins": 9.052804946899414, + "rewards/rejected": -5.967803955078125, + "step": 12971 + }, + { + "epoch": 0.75, + "learning_rate": 1.4945799640916197e-08, + "logits/chosen": -1.618496060371399, + "logits/rejected": -1.6190263032913208, + "logps/chosen": -202.13323974609375, + "logps/rejected": -524.5574951171875, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.068316698074341, + "rewards/margins": 4.19903564453125, + "rewards/rejected": -1.1307190656661987, + "step": 12972 + }, + { + "epoch": 0.75, + "learning_rate": 1.4939080192909694e-08, + "logits/chosen": -1.8890526294708252, + "logits/rejected": -1.916368842124939, + "logps/chosen": -284.5853576660156, + "logps/rejected": -406.61297607421875, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.194967746734619, + "rewards/margins": 3.7537384033203125, + "rewards/rejected": -1.558770775794983, + "step": 12973 + }, + { + "epoch": 0.76, + "learning_rate": 1.4932361990438564e-08, + "logits/chosen": -2.0080151557922363, + "logits/rejected": -2.013723850250244, + "logps/chosen": -35.793052673339844, + "logps/rejected": -257.76812744140625, + "loss": 0.2253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4748184382915497, + "rewards/margins": 5.106034755706787, + "rewards/rejected": -4.631216526031494, + "step": 12974 + }, + { + "epoch": 0.76, + "learning_rate": 1.4925645033741475e-08, + "logits/chosen": -1.8624988794326782, + "logits/rejected": -1.8726601600646973, + "logps/chosen": -23.739913940429688, + "logps/rejected": -263.298095703125, + "loss": 0.2806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5134706497192383, + "rewards/margins": 1.7505358457565308, + "rewards/rejected": -1.2370651960372925, + "step": 12975 + }, + { + "epoch": 0.76, + "learning_rate": 1.491892932305705e-08, + "logits/chosen": -1.86752450466156, + "logits/rejected": -1.8712772130966187, + "logps/chosen": -3.7393503189086914, + "logps/rejected": -60.584815979003906, + "loss": 0.5364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1251436471939087, + "rewards/margins": 0.6401513814926147, + "rewards/rejected": -0.7652950286865234, + "step": 12976 + }, + { + "epoch": 0.76, + "learning_rate": 1.491221485862383e-08, + "logits/chosen": -1.739343523979187, + "logits/rejected": -1.732663869857788, + "logps/chosen": -44.52898406982422, + "logps/rejected": -370.45416259765625, + "loss": 0.2293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44819948077201843, + "rewards/margins": 3.3371734619140625, + "rewards/rejected": -2.8889739513397217, + "step": 12977 + }, + { + "epoch": 0.76, + "learning_rate": 1.490550164068038e-08, + "logits/chosen": -1.6948744058609009, + "logits/rejected": -1.6865614652633667, + "logps/chosen": -58.80134963989258, + "logps/rejected": -208.0908203125, + "loss": 0.3125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6352825164794922, + "rewards/margins": 1.2363125085830688, + "rewards/rejected": -0.6010299921035767, + "step": 12978 + }, + { + "epoch": 0.76, + "learning_rate": 1.48987896694652e-08, + "logits/chosen": -1.9032087326049805, + "logits/rejected": -1.9007790088653564, + "logps/chosen": -50.17341613769531, + "logps/rejected": -283.56292724609375, + "loss": 0.1722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8092907071113586, + "rewards/margins": 8.876543045043945, + "rewards/rejected": -8.067252159118652, + "step": 12979 + }, + { + "epoch": 0.76, + "learning_rate": 1.489207894521669e-08, + "logits/chosen": -1.9806557893753052, + "logits/rejected": -1.9581801891326904, + "logps/chosen": -184.7789306640625, + "logps/rejected": -338.4510498046875, + "loss": 0.2349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8081817626953125, + "rewards/margins": 0.6408110857009888, + "rewards/rejected": 1.1673706769943237, + "step": 12980 + }, + { + "epoch": 0.76, + "learning_rate": 1.488536946817327e-08, + "logits/chosen": -1.6899791955947876, + "logits/rejected": -1.6946895122528076, + "logps/chosen": -155.4981689453125, + "logps/rejected": -227.78399658203125, + "loss": 0.2271, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0841827392578125, + "rewards/margins": 0.6550475358963013, + "rewards/rejected": 1.4291352033615112, + "step": 12981 + }, + { + "epoch": 0.76, + "learning_rate": 1.4878661238573286e-08, + "logits/chosen": -1.700100064277649, + "logits/rejected": -1.704824686050415, + "logps/chosen": -0.0032249977812170982, + "logps/rejected": -189.19134521484375, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011694708518916741, + "rewards/margins": 3.7350759506225586, + "rewards/rejected": -3.7351930141448975, + "step": 12982 + }, + { + "epoch": 0.76, + "learning_rate": 1.4871954256655072e-08, + "logits/chosen": -1.932678461074829, + "logits/rejected": -1.9291661977767944, + "logps/chosen": -123.18162536621094, + "logps/rejected": -357.5849609375, + "loss": 0.0415, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8713363409042358, + "rewards/margins": 3.7076187133789062, + "rewards/rejected": -1.8362823724746704, + "step": 12983 + }, + { + "epoch": 0.76, + "learning_rate": 1.4865248522656858e-08, + "logits/chosen": -1.978425145149231, + "logits/rejected": -1.9835028648376465, + "logps/chosen": -8.217696189880371, + "logps/rejected": -133.39816284179688, + "loss": 0.3556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09465370327234268, + "rewards/margins": 1.4191852807998657, + "rewards/rejected": -1.3245315551757812, + "step": 12984 + }, + { + "epoch": 0.76, + "learning_rate": 1.4858544036816877e-08, + "logits/chosen": -2.0346078872680664, + "logits/rejected": -2.029799222946167, + "logps/chosen": -21.441585540771484, + "logps/rejected": -254.02671813964844, + "loss": 0.315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06585083156824112, + "rewards/margins": 3.5378220081329346, + "rewards/rejected": -3.471971273422241, + "step": 12985 + }, + { + "epoch": 0.76, + "learning_rate": 1.4851840799373305e-08, + "logits/chosen": -1.7084661722183228, + "logits/rejected": -1.7379882335662842, + "logps/chosen": -250.04478454589844, + "logps/rejected": -427.3467102050781, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5535600185394287, + "rewards/margins": 5.080476760864258, + "rewards/rejected": -2.52691650390625, + "step": 12986 + }, + { + "epoch": 0.76, + "learning_rate": 1.4845138810564295e-08, + "logits/chosen": -1.8949912786483765, + "logits/rejected": -1.8850895166397095, + "logps/chosen": -16.869855880737305, + "logps/rejected": -212.55384826660156, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3211914002895355, + "rewards/margins": 2.708667039871216, + "rewards/rejected": -2.3874757289886475, + "step": 12987 + }, + { + "epoch": 0.76, + "learning_rate": 1.4838438070627901e-08, + "logits/chosen": -1.844103455543518, + "logits/rejected": -1.8497576713562012, + "logps/chosen": -37.25066375732422, + "logps/rejected": -191.78408813476562, + "loss": 0.2197, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6124984622001648, + "rewards/margins": 5.032356262207031, + "rewards/rejected": -4.419857978820801, + "step": 12988 + }, + { + "epoch": 0.76, + "learning_rate": 1.4831738579802178e-08, + "logits/chosen": -1.9663705825805664, + "logits/rejected": -1.9651920795440674, + "logps/chosen": -12.546506881713867, + "logps/rejected": -56.3347282409668, + "loss": 0.6163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10019741207361221, + "rewards/margins": 0.1858324110507965, + "rewards/rejected": -0.0856349989771843, + "step": 12989 + }, + { + "epoch": 0.76, + "learning_rate": 1.4825040338325129e-08, + "logits/chosen": -1.8866022825241089, + "logits/rejected": -1.8905880451202393, + "logps/chosen": -271.237060546875, + "logps/rejected": -467.5276794433594, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.036492824554443, + "rewards/margins": 4.8208770751953125, + "rewards/rejected": -0.7843841910362244, + "step": 12990 + }, + { + "epoch": 0.76, + "learning_rate": 1.481834334643472e-08, + "logits/chosen": -1.891326665878296, + "logits/rejected": -1.878637671470642, + "logps/chosen": -21.74725341796875, + "logps/rejected": -492.2593994140625, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004081916995346546, + "rewards/margins": 8.008498191833496, + "rewards/rejected": -8.012579917907715, + "step": 12991 + }, + { + "epoch": 0.76, + "learning_rate": 1.4811647604368833e-08, + "logits/chosen": -1.8942564725875854, + "logits/rejected": -1.8881134986877441, + "logps/chosen": -0.005016076844185591, + "logps/rejected": -257.96728515625, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0004175992216914892, + "rewards/margins": 6.3109965324401855, + "rewards/rejected": -6.3105788230896, + "step": 12992 + }, + { + "epoch": 0.76, + "learning_rate": 1.4804953112365342e-08, + "logits/chosen": -1.8769311904907227, + "logits/rejected": -1.877331256866455, + "logps/chosen": -193.19281005859375, + "logps/rejected": -374.8978576660156, + "loss": 0.0803, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6679779291152954, + "rewards/margins": 3.2948639392852783, + "rewards/rejected": -1.626886010169983, + "step": 12993 + }, + { + "epoch": 0.76, + "learning_rate": 1.4798259870662084e-08, + "logits/chosen": -1.6340025663375854, + "logits/rejected": -1.639397144317627, + "logps/chosen": -18.57445526123047, + "logps/rejected": -101.59690856933594, + "loss": 0.3432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7513044476509094, + "rewards/margins": 0.8366560339927673, + "rewards/rejected": -0.08535156399011612, + "step": 12994 + }, + { + "epoch": 0.76, + "learning_rate": 1.4791567879496835e-08, + "logits/chosen": -1.926831603050232, + "logits/rejected": -1.9217097759246826, + "logps/chosen": -14.617220878601074, + "logps/rejected": -214.1297149658203, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30429449677467346, + "rewards/margins": 1.7356222867965698, + "rewards/rejected": -1.4313278198242188, + "step": 12995 + }, + { + "epoch": 0.76, + "learning_rate": 1.4784877139107305e-08, + "logits/chosen": -1.8620802164077759, + "logits/rejected": -1.8597662448883057, + "logps/chosen": -20.518949508666992, + "logps/rejected": -118.54071807861328, + "loss": 0.4282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.153407484292984, + "rewards/margins": 0.9982508420944214, + "rewards/rejected": -0.844843327999115, + "step": 12996 + }, + { + "epoch": 0.76, + "learning_rate": 1.4778187649731177e-08, + "logits/chosen": -2.0367696285247803, + "logits/rejected": -2.0415291786193848, + "logps/chosen": -62.32424545288086, + "logps/rejected": -240.3475341796875, + "loss": 0.1959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8495640158653259, + "rewards/margins": 2.0203800201416016, + "rewards/rejected": -1.1708160638809204, + "step": 12997 + }, + { + "epoch": 0.76, + "learning_rate": 1.4771499411606147e-08, + "logits/chosen": -1.8528327941894531, + "logits/rejected": -1.8642401695251465, + "logps/chosen": -0.22523383796215057, + "logps/rejected": -171.202392578125, + "loss": 0.3388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013870318420231342, + "rewards/margins": 3.1373672485351562, + "rewards/rejected": -3.1234970092773438, + "step": 12998 + }, + { + "epoch": 0.76, + "learning_rate": 1.4764812424969759e-08, + "logits/chosen": -1.9092388153076172, + "logits/rejected": -1.8794983625411987, + "logps/chosen": -29.60031509399414, + "logps/rejected": -226.16702270507812, + "loss": 0.492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05636138841509819, + "rewards/margins": 0.6704086065292358, + "rewards/rejected": -0.6140472292900085, + "step": 12999 + }, + { + "epoch": 0.76, + "learning_rate": 1.4758126690059591e-08, + "logits/chosen": -1.7827680110931396, + "logits/rejected": -1.769351601600647, + "logps/chosen": -35.7608642578125, + "logps/rejected": -259.86376953125, + "loss": 0.2043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43383750319480896, + "rewards/margins": 4.542942523956299, + "rewards/rejected": -4.109105110168457, + "step": 13000 + }, + { + "epoch": 0.76, + "learning_rate": 1.4751442207113146e-08, + "logits/chosen": -1.9561210870742798, + "logits/rejected": -1.9443624019622803, + "logps/chosen": -147.68331909179688, + "logps/rejected": -235.93832397460938, + "loss": 0.185, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.187762498855591, + "rewards/margins": 1.258294701576233, + "rewards/rejected": 0.9294677972793579, + "step": 13001 + }, + { + "epoch": 0.76, + "learning_rate": 1.474475897636791e-08, + "logits/chosen": -1.924984335899353, + "logits/rejected": -1.922141671180725, + "logps/chosen": -46.44179916381836, + "logps/rejected": -313.501708984375, + "loss": 0.1978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5284428000450134, + "rewards/margins": 4.443366050720215, + "rewards/rejected": -3.9149231910705566, + "step": 13002 + }, + { + "epoch": 0.76, + "learning_rate": 1.4738076998061278e-08, + "logits/chosen": -1.8619539737701416, + "logits/rejected": -1.8542263507843018, + "logps/chosen": -0.07964145392179489, + "logps/rejected": -299.07244873046875, + "loss": 0.3494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0072326320223510265, + "rewards/margins": 5.577206611633301, + "rewards/rejected": -5.584439277648926, + "step": 13003 + }, + { + "epoch": 0.76, + "learning_rate": 1.473139627243063e-08, + "logits/chosen": -1.959816813468933, + "logits/rejected": -1.9649019241333008, + "logps/chosen": -149.736572265625, + "logps/rejected": -261.6605224609375, + "loss": 0.1184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4498398303985596, + "rewards/margins": 1.6224228143692017, + "rewards/rejected": -0.17258301377296448, + "step": 13004 + }, + { + "epoch": 0.76, + "learning_rate": 1.4724716799713304e-08, + "logits/chosen": -1.9249229431152344, + "logits/rejected": -1.9421601295471191, + "logps/chosen": -25.651716232299805, + "logps/rejected": -186.99920654296875, + "loss": 0.2278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4868484437465668, + "rewards/margins": 5.142640590667725, + "rewards/rejected": -4.655792236328125, + "step": 13005 + }, + { + "epoch": 0.76, + "learning_rate": 1.4718038580146602e-08, + "logits/chosen": -1.9049534797668457, + "logits/rejected": -1.8965928554534912, + "logps/chosen": -2.1269571781158447, + "logps/rejected": -119.80906677246094, + "loss": 0.6132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1524374932050705, + "rewards/margins": 0.120195671916008, + "rewards/rejected": 0.0322418212890625, + "step": 13006 + }, + { + "epoch": 0.76, + "learning_rate": 1.4711361613967738e-08, + "logits/chosen": -1.984429955482483, + "logits/rejected": -1.9829126596450806, + "logps/chosen": -84.52595520019531, + "logps/rejected": -228.33518981933594, + "loss": 0.2687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34639817476272583, + "rewards/margins": 3.1832664012908936, + "rewards/rejected": -2.8368682861328125, + "step": 13007 + }, + { + "epoch": 0.76, + "learning_rate": 1.4704685901413927e-08, + "logits/chosen": -1.8274375200271606, + "logits/rejected": -1.8308345079421997, + "logps/chosen": -0.00019907410023733974, + "logps/rejected": -135.88856506347656, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1828603823669255e-06, + "rewards/margins": 2.8322219848632812, + "rewards/rejected": -2.8322250843048096, + "step": 13008 + }, + { + "epoch": 0.76, + "learning_rate": 1.4698011442722313e-08, + "logits/chosen": -1.8515931367874146, + "logits/rejected": -1.8430308103561401, + "logps/chosen": -144.81024169921875, + "logps/rejected": -369.1505432128906, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5762665271759033, + "rewards/margins": 3.140286445617676, + "rewards/rejected": -0.5640197992324829, + "step": 13009 + }, + { + "epoch": 0.76, + "learning_rate": 1.4691338238130036e-08, + "logits/chosen": -1.8942965269088745, + "logits/rejected": -1.8940131664276123, + "logps/chosen": -118.98271179199219, + "logps/rejected": -295.0594787597656, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8006027936935425, + "rewards/margins": 5.4311842918396, + "rewards/rejected": -3.6305816173553467, + "step": 13010 + }, + { + "epoch": 0.76, + "learning_rate": 1.468466628787412e-08, + "logits/chosen": -2.0150928497314453, + "logits/rejected": -2.041517972946167, + "logps/chosen": -178.27993774414062, + "logps/rejected": -341.3531799316406, + "loss": 0.0591, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6756577491760254, + "rewards/margins": 2.2490310668945312, + "rewards/rejected": 0.426626592874527, + "step": 13011 + }, + { + "epoch": 0.76, + "learning_rate": 1.4677995592191605e-08, + "logits/chosen": -1.9650377035140991, + "logits/rejected": -1.957056999206543, + "logps/chosen": -17.087644577026367, + "logps/rejected": -144.8770294189453, + "loss": 0.2331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7764126062393188, + "rewards/margins": 2.2230710983276367, + "rewards/rejected": -1.4466583728790283, + "step": 13012 + }, + { + "epoch": 0.76, + "learning_rate": 1.4671326151319469e-08, + "logits/chosen": -1.9871782064437866, + "logits/rejected": -1.9860786199569702, + "logps/chosen": -24.508403778076172, + "logps/rejected": -120.21551513671875, + "loss": 0.1483, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1061012744903564, + "rewards/margins": 3.1735329627990723, + "rewards/rejected": -2.067431688308716, + "step": 13013 + }, + { + "epoch": 0.76, + "learning_rate": 1.4664657965494648e-08, + "logits/chosen": -1.9558825492858887, + "logits/rejected": -1.957387089729309, + "logps/chosen": -47.709529876708984, + "logps/rejected": -145.83642578125, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4998241364955902, + "rewards/margins": 1.2022167444229126, + "rewards/rejected": -0.702392578125, + "step": 13014 + }, + { + "epoch": 0.76, + "learning_rate": 1.4657991034954009e-08, + "logits/chosen": -2.0004119873046875, + "logits/rejected": -1.9504942893981934, + "logps/chosen": -161.79324340820312, + "logps/rejected": -288.2781066894531, + "loss": 0.2641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5852600336074829, + "rewards/margins": 1.4485199451446533, + "rewards/rejected": -0.8632599115371704, + "step": 13015 + }, + { + "epoch": 0.76, + "learning_rate": 1.4651325359934403e-08, + "logits/chosen": -2.013638973236084, + "logits/rejected": -2.002210855484009, + "logps/chosen": -45.280792236328125, + "logps/rejected": -158.51544189453125, + "loss": 0.4355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4316467344760895, + "rewards/margins": 4.218885898590088, + "rewards/rejected": -4.6505327224731445, + "step": 13016 + }, + { + "epoch": 0.76, + "learning_rate": 1.4644660940672625e-08, + "logits/chosen": -1.9803355932235718, + "logits/rejected": -1.9471982717514038, + "logps/chosen": -130.86912536621094, + "logps/rejected": -277.9289855957031, + "loss": 0.1877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8850143551826477, + "rewards/margins": 1.2242233753204346, + "rewards/rejected": -0.3392089903354645, + "step": 13017 + }, + { + "epoch": 0.76, + "learning_rate": 1.4637997777405437e-08, + "logits/chosen": -1.63336980342865, + "logits/rejected": -1.6399554014205933, + "logps/chosen": -233.62298583984375, + "logps/rejected": -328.1572265625, + "loss": 0.4449, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.0560624599456787, + "rewards/margins": -0.2573835849761963, + "rewards/rejected": 2.313446044921875, + "step": 13018 + }, + { + "epoch": 0.76, + "learning_rate": 1.4631335870369543e-08, + "logits/chosen": -1.933386206626892, + "logits/rejected": -1.9332399368286133, + "logps/chosen": -198.6038818359375, + "logps/rejected": -474.2950439453125, + "loss": 0.1339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37313538789749146, + "rewards/margins": 8.328204154968262, + "rewards/rejected": -7.955069065093994, + "step": 13019 + }, + { + "epoch": 0.76, + "learning_rate": 1.4624675219801601e-08, + "logits/chosen": -2.007490873336792, + "logits/rejected": -2.0032215118408203, + "logps/chosen": -0.08361783623695374, + "logps/rejected": -352.44940185546875, + "loss": 0.3541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019914710894227028, + "rewards/margins": 7.714461326599121, + "rewards/rejected": -7.694546699523926, + "step": 13020 + }, + { + "epoch": 0.76, + "learning_rate": 1.4618015825938257e-08, + "logits/chosen": -1.8736333847045898, + "logits/rejected": -1.858571171760559, + "logps/chosen": -341.18682861328125, + "logps/rejected": -524.488037109375, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3594483137130737, + "rewards/margins": 5.070868015289307, + "rewards/rejected": -3.7114198207855225, + "step": 13021 + }, + { + "epoch": 0.76, + "learning_rate": 1.461135768901604e-08, + "logits/chosen": -1.884201169013977, + "logits/rejected": -1.8786845207214355, + "logps/chosen": -189.98422241210938, + "logps/rejected": -260.59674072265625, + "loss": 0.3326, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.051470994949341, + "rewards/margins": 0.13330388069152832, + "rewards/rejected": 1.9181671142578125, + "step": 13022 + }, + { + "epoch": 0.76, + "learning_rate": 1.4604700809271508e-08, + "logits/chosen": -2.009495258331299, + "logits/rejected": -2.0387794971466064, + "logps/chosen": -145.40570068359375, + "logps/rejected": -183.92942810058594, + "loss": 0.2631, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3168060779571533, + "rewards/margins": 0.6076095700263977, + "rewards/rejected": 0.7091965079307556, + "step": 13023 + }, + { + "epoch": 0.76, + "learning_rate": 1.4598045186941138e-08, + "logits/chosen": -2.009234666824341, + "logits/rejected": -2.029174327850342, + "logps/chosen": -164.96731567382812, + "logps/rejected": -248.26953125, + "loss": 0.319, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0673569440841675, + "rewards/margins": 0.323341429233551, + "rewards/rejected": 0.7440155148506165, + "step": 13024 + }, + { + "epoch": 0.76, + "learning_rate": 1.4591390822261391e-08, + "logits/chosen": -1.9978843927383423, + "logits/rejected": -1.9963231086730957, + "logps/chosen": -20.270780563354492, + "logps/rejected": -185.65371704101562, + "loss": 0.3505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21282939612865448, + "rewards/margins": 1.6215649843215942, + "rewards/rejected": -1.4087356328964233, + "step": 13025 + }, + { + "epoch": 0.76, + "learning_rate": 1.458473771546862e-08, + "logits/chosen": -2.0668070316314697, + "logits/rejected": -2.0703272819519043, + "logps/chosen": -7.5847554206848145, + "logps/rejected": -58.174530029296875, + "loss": 0.5664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16115747392177582, + "rewards/margins": 0.41741204261779785, + "rewards/rejected": -0.2562545835971832, + "step": 13026 + }, + { + "epoch": 0.76, + "learning_rate": 1.4578085866799211e-08, + "logits/chosen": -1.8969597816467285, + "logits/rejected": -1.8080812692642212, + "logps/chosen": -215.51136779785156, + "logps/rejected": -514.6607055664062, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.398851156234741, + "rewards/margins": 4.747541904449463, + "rewards/rejected": -1.3486908674240112, + "step": 13027 + }, + { + "epoch": 0.76, + "learning_rate": 1.4571435276489452e-08, + "logits/chosen": -2.1054511070251465, + "logits/rejected": -2.099365472793579, + "logps/chosen": -10.539133071899414, + "logps/rejected": -207.77606201171875, + "loss": 0.388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4645340144634247, + "rewards/margins": 2.8011200428009033, + "rewards/rejected": -3.2656540870666504, + "step": 13028 + }, + { + "epoch": 0.76, + "learning_rate": 1.4564785944775632e-08, + "logits/chosen": -1.85965895652771, + "logits/rejected": -1.85746431350708, + "logps/chosen": -6.4265265464782715, + "logps/rejected": -125.47053527832031, + "loss": 0.6145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08773064613342285, + "rewards/margins": 0.4751569628715515, + "rewards/rejected": -0.5628876090049744, + "step": 13029 + }, + { + "epoch": 0.76, + "learning_rate": 1.4558137871893928e-08, + "logits/chosen": -1.8109936714172363, + "logits/rejected": -1.8140032291412354, + "logps/chosen": -124.98109436035156, + "logps/rejected": -375.43798828125, + "loss": 0.0704, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.40966796875, + "rewards/margins": 4.658953666687012, + "rewards/rejected": -3.249285936355591, + "step": 13030 + }, + { + "epoch": 0.76, + "learning_rate": 1.4551491058080533e-08, + "logits/chosen": -1.9804083108901978, + "logits/rejected": -1.9802392721176147, + "logps/chosen": -0.0003735370410140604, + "logps/rejected": -109.64556884765625, + "loss": 0.4529, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.399115874140989e-05, + "rewards/margins": 1.2078983783721924, + "rewards/rejected": -1.207922339439392, + "step": 13031 + }, + { + "epoch": 0.76, + "learning_rate": 1.4544845503571573e-08, + "logits/chosen": -1.8109493255615234, + "logits/rejected": -1.8002798557281494, + "logps/chosen": -1.2236926555633545, + "logps/rejected": -278.8036193847656, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20644353330135345, + "rewards/margins": 6.029308795928955, + "rewards/rejected": -5.8228654861450195, + "step": 13032 + }, + { + "epoch": 0.76, + "learning_rate": 1.4538201208603152e-08, + "logits/chosen": -2.0297062397003174, + "logits/rejected": -2.0202646255493164, + "logps/chosen": -3.1559932231903076, + "logps/rejected": -48.971649169921875, + "loss": 0.8247, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0064130546525120735, + "rewards/margins": -0.44130805134773254, + "rewards/rejected": 0.44772109389305115, + "step": 13033 + }, + { + "epoch": 0.76, + "learning_rate": 1.4531558173411262e-08, + "logits/chosen": -1.9263970851898193, + "logits/rejected": -1.9153680801391602, + "logps/chosen": -72.73965454101562, + "logps/rejected": -277.03021240234375, + "loss": 0.1707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8849464654922485, + "rewards/margins": 4.258319854736328, + "rewards/rejected": -3.373373508453369, + "step": 13034 + }, + { + "epoch": 0.76, + "learning_rate": 1.4524916398231924e-08, + "logits/chosen": -2.039670705795288, + "logits/rejected": -2.0618834495544434, + "logps/chosen": -173.72323608398438, + "logps/rejected": -326.9849548339844, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.654240369796753, + "rewards/margins": 2.894608974456787, + "rewards/rejected": -0.24036864936351776, + "step": 13035 + }, + { + "epoch": 0.76, + "learning_rate": 1.4518275883301085e-08, + "logits/chosen": -1.8411253690719604, + "logits/rejected": -1.8354015350341797, + "logps/chosen": -36.146305084228516, + "logps/rejected": -183.6471405029297, + "loss": 0.1552, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5237785577774048, + "rewards/margins": 2.1447060108184814, + "rewards/rejected": -0.6209274530410767, + "step": 13036 + }, + { + "epoch": 0.76, + "learning_rate": 1.4511636628854662e-08, + "logits/chosen": -1.8120328187942505, + "logits/rejected": -1.8490551710128784, + "logps/chosen": -307.942626953125, + "logps/rejected": -390.18707275390625, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1402618885040283, + "rewards/margins": 3.406015157699585, + "rewards/rejected": -0.2657531797885895, + "step": 13037 + }, + { + "epoch": 0.76, + "learning_rate": 1.4504998635128462e-08, + "logits/chosen": -1.9256876707077026, + "logits/rejected": -1.9134210348129272, + "logps/chosen": -48.25634002685547, + "logps/rejected": -336.904296875, + "loss": 0.3222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1254676878452301, + "rewards/margins": 6.106302738189697, + "rewards/rejected": -5.9808349609375, + "step": 13038 + }, + { + "epoch": 0.76, + "learning_rate": 1.4498361902358358e-08, + "logits/chosen": -1.790259838104248, + "logits/rejected": -1.8321627378463745, + "logps/chosen": -233.7029571533203, + "logps/rejected": -428.13677978515625, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.376600742340088, + "rewards/margins": 5.8805341720581055, + "rewards/rejected": -3.5039336681365967, + "step": 13039 + }, + { + "epoch": 0.76, + "learning_rate": 1.4491726430780104e-08, + "logits/chosen": -1.5334810018539429, + "logits/rejected": -1.5378084182739258, + "logps/chosen": -144.79718017578125, + "logps/rejected": -305.38018798828125, + "loss": 0.0774, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0797746181488037, + "rewards/margins": 2.1079301834106445, + "rewards/rejected": -0.02815551869571209, + "step": 13040 + }, + { + "epoch": 0.76, + "learning_rate": 1.4485092220629408e-08, + "logits/chosen": -1.8860650062561035, + "logits/rejected": -1.9015499353408813, + "logps/chosen": -275.15826416015625, + "logps/rejected": -497.169921875, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8056548833847046, + "rewards/margins": 5.312588691711426, + "rewards/rejected": -3.5069336891174316, + "step": 13041 + }, + { + "epoch": 0.76, + "learning_rate": 1.4478459272141958e-08, + "logits/chosen": -1.945678949356079, + "logits/rejected": -1.944279432296753, + "logps/chosen": -250.16461181640625, + "logps/rejected": -380.68231201171875, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.418377637863159, + "rewards/margins": 3.0471129417419434, + "rewards/rejected": -0.628735363483429, + "step": 13042 + }, + { + "epoch": 0.76, + "learning_rate": 1.4471827585553387e-08, + "logits/chosen": -1.6890661716461182, + "logits/rejected": -1.7006126642227173, + "logps/chosen": -225.86447143554688, + "logps/rejected": -337.71923828125, + "loss": 0.1487, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9524017572402954, + "rewards/margins": 1.5958435535430908, + "rewards/rejected": 0.356558233499527, + "step": 13043 + }, + { + "epoch": 0.76, + "learning_rate": 1.4465197161099301e-08, + "logits/chosen": -1.8807803392410278, + "logits/rejected": -1.9464472532272339, + "logps/chosen": -234.77951049804688, + "logps/rejected": -364.57611083984375, + "loss": 0.1334, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.140472412109375, + "rewards/margins": 1.785638451576233, + "rewards/rejected": -0.6451660394668579, + "step": 13044 + }, + { + "epoch": 0.76, + "learning_rate": 1.445856799901522e-08, + "logits/chosen": -1.8262395858764648, + "logits/rejected": -1.826365351676941, + "logps/chosen": -13.278444290161133, + "logps/rejected": -126.73892211914062, + "loss": 0.1321, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0613759756088257, + "rewards/margins": 3.0268499851226807, + "rewards/rejected": -1.965474009513855, + "step": 13045 + }, + { + "epoch": 0.76, + "learning_rate": 1.4451940099536653e-08, + "logits/chosen": -1.8772554397583008, + "logits/rejected": -1.8475885391235352, + "logps/chosen": -247.46498107910156, + "logps/rejected": -387.31341552734375, + "loss": 0.1508, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7674789428710938, + "rewards/margins": 1.722651720046997, + "rewards/rejected": 0.04482727125287056, + "step": 13046 + }, + { + "epoch": 0.76, + "learning_rate": 1.444531346289906e-08, + "logits/chosen": -1.9295238256454468, + "logits/rejected": -1.9218028783798218, + "logps/chosen": -5.966627597808838, + "logps/rejected": -224.24118041992188, + "loss": 0.365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013378381729125977, + "rewards/margins": 5.715061187744141, + "rewards/rejected": -5.7284393310546875, + "step": 13047 + }, + { + "epoch": 0.76, + "learning_rate": 1.4438688089337865e-08, + "logits/chosen": -1.9033616781234741, + "logits/rejected": -1.8977477550506592, + "logps/chosen": -4.747076988220215, + "logps/rejected": -68.9011001586914, + "loss": 0.561, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12717299163341522, + "rewards/margins": 0.7268259525299072, + "rewards/rejected": -0.8539989590644836, + "step": 13048 + }, + { + "epoch": 0.76, + "learning_rate": 1.44320639790884e-08, + "logits/chosen": -1.8483200073242188, + "logits/rejected": -1.8303828239440918, + "logps/chosen": -137.5806121826172, + "logps/rejected": -228.7515869140625, + "loss": 0.1764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2992172241210938, + "rewards/margins": 1.797856092453003, + "rewards/rejected": -0.49863892793655396, + "step": 13049 + }, + { + "epoch": 0.76, + "learning_rate": 1.4425441132386007e-08, + "logits/chosen": -2.074954032897949, + "logits/rejected": -2.074277639389038, + "logps/chosen": -42.208404541015625, + "logps/rejected": -104.05451965332031, + "loss": 0.5077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2434925138950348, + "rewards/margins": 1.35453462600708, + "rewards/rejected": -1.5980271100997925, + "step": 13050 + }, + { + "epoch": 0.76, + "learning_rate": 1.4418819549465955e-08, + "logits/chosen": -1.696913719177246, + "logits/rejected": -1.674793004989624, + "logps/chosen": -192.35256958007812, + "logps/rejected": -319.8859558105469, + "loss": 0.0579, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0518829822540283, + "rewards/margins": 2.558276414871216, + "rewards/rejected": 0.4936065673828125, + "step": 13051 + }, + { + "epoch": 0.76, + "learning_rate": 1.4412199230563499e-08, + "logits/chosen": -1.8924798965454102, + "logits/rejected": -1.893524408340454, + "logps/chosen": -11.146934509277344, + "logps/rejected": -288.934814453125, + "loss": 0.3481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0787806510925293, + "rewards/margins": 4.104546546936035, + "rewards/rejected": -4.025765895843506, + "step": 13052 + }, + { + "epoch": 0.76, + "learning_rate": 1.4405580175913785e-08, + "logits/chosen": -1.9821029901504517, + "logits/rejected": -1.9659600257873535, + "logps/chosen": -235.67205810546875, + "logps/rejected": -434.7451477050781, + "loss": 0.0758, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.616969347000122, + "rewards/margins": 5.306550979614258, + "rewards/rejected": -3.6895813941955566, + "step": 13053 + }, + { + "epoch": 0.76, + "learning_rate": 1.4398962385751978e-08, + "logits/chosen": -1.790724277496338, + "logits/rejected": -1.8033825159072876, + "logps/chosen": -209.3357696533203, + "logps/rejected": -498.46282958984375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.039759874343872, + "rewards/margins": 9.363371849060059, + "rewards/rejected": -6.323611736297607, + "step": 13054 + }, + { + "epoch": 0.76, + "learning_rate": 1.4392345860313171e-08, + "logits/chosen": -1.8895403146743774, + "logits/rejected": -1.895487904548645, + "logps/chosen": -0.0003059741575270891, + "logps/rejected": -210.97979736328125, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.887413863092661e-06, + "rewards/margins": 5.152137279510498, + "rewards/rejected": -5.1521315574646, + "step": 13055 + }, + { + "epoch": 0.76, + "learning_rate": 1.438573059983243e-08, + "logits/chosen": -1.9716616868972778, + "logits/rejected": -1.967000961303711, + "logps/chosen": -10.251744270324707, + "logps/rejected": -201.88796997070312, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07491817325353622, + "rewards/margins": 3.8471291065216064, + "rewards/rejected": -3.7722108364105225, + "step": 13056 + }, + { + "epoch": 0.76, + "learning_rate": 1.4379116604544727e-08, + "logits/chosen": -1.7750744819641113, + "logits/rejected": -1.830635905265808, + "logps/chosen": -202.42236328125, + "logps/rejected": -357.035400390625, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9522796869277954, + "rewards/margins": 1.8583893775939941, + "rewards/rejected": -0.906109631061554, + "step": 13057 + }, + { + "epoch": 0.76, + "learning_rate": 1.4372503874685032e-08, + "logits/chosen": -1.8051714897155762, + "logits/rejected": -1.8306418657302856, + "logps/chosen": -208.0889892578125, + "logps/rejected": -287.7829895019531, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.116302490234375, + "rewards/margins": 3.8901214599609375, + "rewards/rejected": -1.7738189697265625, + "step": 13058 + }, + { + "epoch": 0.76, + "learning_rate": 1.4365892410488301e-08, + "logits/chosen": -1.9106054306030273, + "logits/rejected": -1.931418776512146, + "logps/chosen": -243.51641845703125, + "logps/rejected": -430.1259765625, + "loss": 0.0516, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.537335157394409, + "rewards/margins": 2.4361205101013184, + "rewards/rejected": 0.10121460258960724, + "step": 13059 + }, + { + "epoch": 0.76, + "learning_rate": 1.435928221218936e-08, + "logits/chosen": -1.7963906526565552, + "logits/rejected": -1.8655024766921997, + "logps/chosen": -243.96609497070312, + "logps/rejected": -363.85040283203125, + "loss": 0.1139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6174179315567017, + "rewards/margins": 2.508317470550537, + "rewards/rejected": -1.890899658203125, + "step": 13060 + }, + { + "epoch": 0.76, + "learning_rate": 1.4352673280023053e-08, + "logits/chosen": -1.824569821357727, + "logits/rejected": -1.786720871925354, + "logps/chosen": -281.55767822265625, + "logps/rejected": -535.1357421875, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.7391204833984375, + "rewards/margins": 4.793142795562744, + "rewards/rejected": -0.05402221903204918, + "step": 13061 + }, + { + "epoch": 0.76, + "learning_rate": 1.4346065614224162e-08, + "logits/chosen": -1.852250576019287, + "logits/rejected": -1.8049818277359009, + "logps/chosen": -191.32669067382812, + "logps/rejected": -348.9034729003906, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.852003574371338, + "rewards/margins": 5.3363356590271, + "rewards/rejected": -0.48433229327201843, + "step": 13062 + }, + { + "epoch": 0.76, + "learning_rate": 1.433945921502744e-08, + "logits/chosen": -2.0027241706848145, + "logits/rejected": -1.9991658926010132, + "logps/chosen": -0.0001156280268332921, + "logps/rejected": -252.73500061035156, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.193262616856373e-06, + "rewards/margins": 8.600433349609375, + "rewards/rejected": -8.600435256958008, + "step": 13063 + }, + { + "epoch": 0.76, + "learning_rate": 1.4332854082667545e-08, + "logits/chosen": -1.875972032546997, + "logits/rejected": -1.8709841966629028, + "logps/chosen": -60.07768630981445, + "logps/rejected": -188.38311767578125, + "loss": 0.2549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9094967246055603, + "rewards/margins": 1.5101559162139893, + "rewards/rejected": -0.600659191608429, + "step": 13064 + }, + { + "epoch": 0.76, + "learning_rate": 1.4326250217379138e-08, + "logits/chosen": -2.0620367527008057, + "logits/rejected": -2.017873525619507, + "logps/chosen": -176.4427947998047, + "logps/rejected": -382.2952880859375, + "loss": 0.0516, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4241654872894287, + "rewards/margins": 4.215934753417969, + "rewards/rejected": -1.7917693853378296, + "step": 13065 + }, + { + "epoch": 0.76, + "learning_rate": 1.4319647619396825e-08, + "logits/chosen": -2.023993968963623, + "logits/rejected": -2.0013368129730225, + "logps/chosen": -278.6112060546875, + "logps/rejected": -620.1473388671875, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3057708740234375, + "rewards/margins": 5.490488052368164, + "rewards/rejected": -3.1847169399261475, + "step": 13066 + }, + { + "epoch": 0.76, + "learning_rate": 1.4313046288955172e-08, + "logits/chosen": -1.795383334159851, + "logits/rejected": -1.814820408821106, + "logps/chosen": -273.9823913574219, + "logps/rejected": -449.123046875, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3727569580078125, + "rewards/margins": 4.799557685852051, + "rewards/rejected": -2.426800489425659, + "step": 13067 + }, + { + "epoch": 0.76, + "learning_rate": 1.4306446226288666e-08, + "logits/chosen": -2.0787999629974365, + "logits/rejected": -2.0470328330993652, + "logps/chosen": -0.003364445408806205, + "logps/rejected": -317.1126708984375, + "loss": 0.3691, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017096508236136287, + "rewards/margins": 6.442319869995117, + "rewards/rejected": -6.442491054534912, + "step": 13068 + }, + { + "epoch": 0.76, + "learning_rate": 1.4299847431631784e-08, + "logits/chosen": -2.002302408218384, + "logits/rejected": -1.982668399810791, + "logps/chosen": -257.748291015625, + "logps/rejected": -434.1480407714844, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6017425060272217, + "rewards/margins": 3.7109861373901367, + "rewards/rejected": -1.1092437505722046, + "step": 13069 + }, + { + "epoch": 0.76, + "learning_rate": 1.4293249905218952e-08, + "logits/chosen": -1.8086239099502563, + "logits/rejected": -1.8392506837844849, + "logps/chosen": -157.596435546875, + "logps/rejected": -579.96533203125, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7960633039474487, + "rewards/margins": 10.281699180603027, + "rewards/rejected": -8.485635757446289, + "step": 13070 + }, + { + "epoch": 0.76, + "learning_rate": 1.4286653647284558e-08, + "logits/chosen": -2.1123948097229004, + "logits/rejected": -2.104952573776245, + "logps/chosen": -7.856020450592041, + "logps/rejected": -139.62869262695312, + "loss": 0.6838, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05739598348736763, + "rewards/margins": -0.02233729138970375, + "rewards/rejected": 0.07973327487707138, + "step": 13071 + }, + { + "epoch": 0.76, + "learning_rate": 1.4280058658062904e-08, + "logits/chosen": -1.8052400350570679, + "logits/rejected": -1.8665764331817627, + "logps/chosen": -279.28759765625, + "logps/rejected": -511.41363525390625, + "loss": 0.052, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4029266834259033, + "rewards/margins": 7.5847625732421875, + "rewards/rejected": -6.181836128234863, + "step": 13072 + }, + { + "epoch": 0.76, + "learning_rate": 1.4273464937788288e-08, + "logits/chosen": -2.0596046447753906, + "logits/rejected": -2.0538277626037598, + "logps/chosen": -2.9563612770289183e-05, + "logps/rejected": -203.02645874023438, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.390892164949037e-07, + "rewards/margins": 5.188924789428711, + "rewards/rejected": -5.1889238357543945, + "step": 13073 + }, + { + "epoch": 0.76, + "learning_rate": 1.4266872486694959e-08, + "logits/chosen": -1.7762993574142456, + "logits/rejected": -1.7836140394210815, + "logps/chosen": -12.170076370239258, + "logps/rejected": -238.75840759277344, + "loss": 0.2447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6364316940307617, + "rewards/margins": 2.8103086948394775, + "rewards/rejected": -2.173877000808716, + "step": 13074 + }, + { + "epoch": 0.76, + "learning_rate": 1.4260281305017124e-08, + "logits/chosen": -1.9039124250411987, + "logits/rejected": -1.8983391523361206, + "logps/chosen": -31.875078201293945, + "logps/rejected": -136.95455932617188, + "loss": 0.3964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07496394962072372, + "rewards/margins": 2.2622759342193604, + "rewards/rejected": -2.337239980697632, + "step": 13075 + }, + { + "epoch": 0.76, + "learning_rate": 1.4253691392988904e-08, + "logits/chosen": -1.8993984460830688, + "logits/rejected": -1.95236074924469, + "logps/chosen": -118.81241607666016, + "logps/rejected": -253.704833984375, + "loss": 0.1317, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2348320484161377, + "rewards/margins": 1.3537681102752686, + "rewards/rejected": 0.8810638785362244, + "step": 13076 + }, + { + "epoch": 0.76, + "learning_rate": 1.424710275084442e-08, + "logits/chosen": -1.7294822931289673, + "logits/rejected": -1.7323576211929321, + "logps/chosen": -252.93643188476562, + "logps/rejected": -439.38665771484375, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4657745361328125, + "rewards/margins": 4.3718414306640625, + "rewards/rejected": -1.90606689453125, + "step": 13077 + }, + { + "epoch": 0.76, + "learning_rate": 1.424051537881773e-08, + "logits/chosen": -1.894639015197754, + "logits/rejected": -1.8750718832015991, + "logps/chosen": -0.10379116982221603, + "logps/rejected": -220.9261016845703, + "loss": 0.3542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0011067762970924377, + "rewards/margins": 3.976752281188965, + "rewards/rejected": -3.9756455421447754, + "step": 13078 + }, + { + "epoch": 0.76, + "learning_rate": 1.4233929277142853e-08, + "logits/chosen": -2.0854947566986084, + "logits/rejected": -2.0957274436950684, + "logps/chosen": -1.1559927463531494, + "logps/rejected": -182.29209899902344, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008769476786255836, + "rewards/margins": 2.1262028217315674, + "rewards/rejected": -2.1174333095550537, + "step": 13079 + }, + { + "epoch": 0.76, + "learning_rate": 1.4227344446053757e-08, + "logits/chosen": -1.9699351787567139, + "logits/rejected": -1.968701958656311, + "logps/chosen": -3.9100315916584805e-05, + "logps/rejected": -243.54443359375, + "loss": 0.3152, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.510076329708681e-07, + "rewards/margins": 7.6243977546691895, + "rewards/rejected": -7.624398708343506, + "step": 13080 + }, + { + "epoch": 0.76, + "learning_rate": 1.4220760885784372e-08, + "logits/chosen": -2.045724391937256, + "logits/rejected": -2.0370736122131348, + "logps/chosen": -44.978370666503906, + "logps/rejected": -166.41078186035156, + "loss": 0.1654, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0394779443740845, + "rewards/margins": 2.74554181098938, + "rewards/rejected": -1.7060638666152954, + "step": 13081 + }, + { + "epoch": 0.76, + "learning_rate": 1.4214178596568593e-08, + "logits/chosen": -1.7688629627227783, + "logits/rejected": -1.7576631307601929, + "logps/chosen": -232.36328125, + "logps/rejected": -402.3785400390625, + "loss": 0.1393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.275390625, + "rewards/margins": 1.233740210533142, + "rewards/rejected": 1.041650414466858, + "step": 13082 + }, + { + "epoch": 0.76, + "learning_rate": 1.4207597578640218e-08, + "logits/chosen": -1.781266689300537, + "logits/rejected": -1.7853820323944092, + "logps/chosen": -2.4649760723114014, + "logps/rejected": -205.43035888671875, + "loss": 0.3348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011439419351518154, + "rewards/margins": 3.923853874206543, + "rewards/rejected": -3.91241455078125, + "step": 13083 + }, + { + "epoch": 0.76, + "learning_rate": 1.4201017832233058e-08, + "logits/chosen": -1.910180687904358, + "logits/rejected": -1.9091724157333374, + "logps/chosen": -12.27855396270752, + "logps/rejected": -225.00286865234375, + "loss": 0.2937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06412458419799805, + "rewards/margins": 5.249466896057129, + "rewards/rejected": -5.185342311859131, + "step": 13084 + }, + { + "epoch": 0.76, + "learning_rate": 1.4194439357580862e-08, + "logits/chosen": -2.120975971221924, + "logits/rejected": -2.113509178161621, + "logps/chosen": -27.0293025970459, + "logps/rejected": -191.05300903320312, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1270851194858551, + "rewards/margins": 4.017917633056641, + "rewards/rejected": -3.8908326625823975, + "step": 13085 + }, + { + "epoch": 0.76, + "learning_rate": 1.418786215491733e-08, + "logits/chosen": -1.8872151374816895, + "logits/rejected": -1.8907917737960815, + "logps/chosen": -9.035909897647798e-05, + "logps/rejected": -200.08590698242188, + "loss": 0.3321, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2887754766998114e-06, + "rewards/margins": 3.392984628677368, + "rewards/rejected": -3.392987012863159, + "step": 13086 + }, + { + "epoch": 0.76, + "learning_rate": 1.4181286224476102e-08, + "logits/chosen": -1.8231197595596313, + "logits/rejected": -1.7995986938476562, + "logps/chosen": -189.02294921875, + "logps/rejected": -362.46820068359375, + "loss": 0.0459, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9999862909317017, + "rewards/margins": 2.7563705444335938, + "rewards/rejected": -0.7563843131065369, + "step": 13087 + }, + { + "epoch": 0.76, + "learning_rate": 1.4174711566490793e-08, + "logits/chosen": -1.8895853757858276, + "logits/rejected": -1.8740930557250977, + "logps/chosen": -179.62106323242188, + "logps/rejected": -307.23187255859375, + "loss": 0.3119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5701309442520142, + "rewards/margins": 0.7948196530342102, + "rewards/rejected": -0.22468872368335724, + "step": 13088 + }, + { + "epoch": 0.76, + "learning_rate": 1.4168138181194966e-08, + "logits/chosen": -1.5676085948944092, + "logits/rejected": -1.5821017026901245, + "logps/chosen": -17.87233543395996, + "logps/rejected": -256.1903076171875, + "loss": 0.5628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3457040786743164, + "rewards/margins": 0.030158430337905884, + "rewards/rejected": 0.3155456483364105, + "step": 13089 + }, + { + "epoch": 0.76, + "learning_rate": 1.4161566068822162e-08, + "logits/chosen": -1.6603460311889648, + "logits/rejected": -1.6597518920898438, + "logps/chosen": -78.52571105957031, + "logps/rejected": -171.9880828857422, + "loss": 1.1579, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.8026527762413025, + "rewards/margins": -0.7306236624717712, + "rewards/rejected": -0.07202911376953125, + "step": 13090 + }, + { + "epoch": 0.76, + "learning_rate": 1.415499522960582e-08, + "logits/chosen": -1.5662761926651, + "logits/rejected": -1.5674011707305908, + "logps/chosen": -52.85283660888672, + "logps/rejected": -285.70794677734375, + "loss": 0.2949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39471206068992615, + "rewards/margins": 1.4699928760528564, + "rewards/rejected": -1.075280785560608, + "step": 13091 + }, + { + "epoch": 0.76, + "learning_rate": 1.414842566377939e-08, + "logits/chosen": -1.8396719694137573, + "logits/rejected": -1.8651659488677979, + "logps/chosen": -145.43368530273438, + "logps/rejected": -405.33935546875, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.253361463546753, + "rewards/margins": 4.1624555587768555, + "rewards/rejected": -1.909094214439392, + "step": 13092 + }, + { + "epoch": 0.76, + "learning_rate": 1.4141857371576244e-08, + "logits/chosen": -1.803330898284912, + "logits/rejected": -1.7948857545852661, + "logps/chosen": -0.7073763608932495, + "logps/rejected": -173.73313903808594, + "loss": 0.4963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03729521110653877, + "rewards/margins": 0.8916485905647278, + "rewards/rejected": -0.8543533682823181, + "step": 13093 + }, + { + "epoch": 0.76, + "learning_rate": 1.413529035322974e-08, + "logits/chosen": -1.9536449909210205, + "logits/rejected": -1.9439815282821655, + "logps/chosen": -207.05873107910156, + "logps/rejected": -430.37933349609375, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.024655342102051, + "rewards/margins": 4.725027561187744, + "rewards/rejected": -0.7003723382949829, + "step": 13094 + }, + { + "epoch": 0.76, + "learning_rate": 1.4128724608973141e-08, + "logits/chosen": -2.0749552249908447, + "logits/rejected": -2.070197820663452, + "logps/chosen": -35.6252555847168, + "logps/rejected": -157.16348266601562, + "loss": 0.0931, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5068401098251343, + "rewards/margins": 3.64351749420166, + "rewards/rejected": -2.1366775035858154, + "step": 13095 + }, + { + "epoch": 0.76, + "learning_rate": 1.4122160139039708e-08, + "logits/chosen": -1.7877020835876465, + "logits/rejected": -1.798457384109497, + "logps/chosen": -124.49217224121094, + "logps/rejected": -250.43692016601562, + "loss": 0.3064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9787536859512329, + "rewards/margins": 0.6044830083847046, + "rewards/rejected": 0.37427064776420593, + "step": 13096 + }, + { + "epoch": 0.76, + "learning_rate": 1.411559694366265e-08, + "logits/chosen": -2.001823663711548, + "logits/rejected": -2.0304996967315674, + "logps/chosen": -151.75987243652344, + "logps/rejected": -197.28546142578125, + "loss": 0.4304, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.395503282546997, + "rewards/margins": -0.17634117603302002, + "rewards/rejected": 1.571844458580017, + "step": 13097 + }, + { + "epoch": 0.76, + "learning_rate": 1.4109035023075128e-08, + "logits/chosen": -1.9747363328933716, + "logits/rejected": -1.9152199029922485, + "logps/chosen": -290.5011901855469, + "logps/rejected": -647.4908447265625, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.067038059234619, + "rewards/margins": 3.221310615539551, + "rewards/rejected": -1.154272437095642, + "step": 13098 + }, + { + "epoch": 0.76, + "learning_rate": 1.4102474377510232e-08, + "logits/chosen": -2.083221435546875, + "logits/rejected": -2.079807996749878, + "logps/chosen": -7.131661415100098, + "logps/rejected": -202.24041748046875, + "loss": 0.3806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05023803934454918, + "rewards/margins": 4.542269706726074, + "rewards/rejected": -4.592507839202881, + "step": 13099 + }, + { + "epoch": 0.76, + "learning_rate": 1.409591500720102e-08, + "logits/chosen": -1.755385160446167, + "logits/rejected": -1.7387723922729492, + "logps/chosen": -310.7005615234375, + "logps/rejected": -425.01763916015625, + "loss": 0.3071, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.637927293777466, + "rewards/margins": 0.17569589614868164, + "rewards/rejected": 3.462231397628784, + "step": 13100 + }, + { + "epoch": 0.76, + "learning_rate": 1.4089356912380567e-08, + "logits/chosen": -1.8640092611312866, + "logits/rejected": -1.8650249242782593, + "logps/chosen": -39.93356704711914, + "logps/rejected": -177.66050720214844, + "loss": 0.117, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3055256605148315, + "rewards/margins": 4.783835411071777, + "rewards/rejected": -3.4783096313476562, + "step": 13101 + }, + { + "epoch": 0.76, + "learning_rate": 1.40828000932818e-08, + "logits/chosen": -1.9660985469818115, + "logits/rejected": -1.9517461061477661, + "logps/chosen": -155.68679809570312, + "logps/rejected": -250.604248046875, + "loss": 0.1053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.871612548828125, + "rewards/margins": 1.9765441417694092, + "rewards/rejected": -0.10493164509534836, + "step": 13102 + }, + { + "epoch": 0.76, + "learning_rate": 1.4076244550137662e-08, + "logits/chosen": -1.8086998462677002, + "logits/rejected": -1.8070803880691528, + "logps/chosen": -167.96585083007812, + "logps/rejected": -395.0677490234375, + "loss": 0.044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3845245838165283, + "rewards/margins": 3.7940704822540283, + "rewards/rejected": -2.4095458984375, + "step": 13103 + }, + { + "epoch": 0.76, + "learning_rate": 1.4069690283181034e-08, + "logits/chosen": -1.9263317584991455, + "logits/rejected": -1.939777135848999, + "logps/chosen": -31.207500457763672, + "logps/rejected": -379.3180847167969, + "loss": 0.3506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036904335021972656, + "rewards/margins": 2.9210240840911865, + "rewards/rejected": -2.957928419113159, + "step": 13104 + }, + { + "epoch": 0.76, + "learning_rate": 1.4063137292644783e-08, + "logits/chosen": -1.779870867729187, + "logits/rejected": -1.7708933353424072, + "logps/chosen": -65.7601547241211, + "logps/rejected": -192.3338623046875, + "loss": 0.1216, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5227211713790894, + "rewards/margins": 2.960369110107422, + "rewards/rejected": -1.437648057937622, + "step": 13105 + }, + { + "epoch": 0.76, + "learning_rate": 1.4056585578761665e-08, + "logits/chosen": -1.9841651916503906, + "logits/rejected": -1.974360466003418, + "logps/chosen": -120.97296142578125, + "logps/rejected": -334.73114013671875, + "loss": 0.1892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8751495480537415, + "rewards/margins": 6.578067302703857, + "rewards/rejected": -5.702917575836182, + "step": 13106 + }, + { + "epoch": 0.76, + "learning_rate": 1.405003514176445e-08, + "logits/chosen": -1.8787848949432373, + "logits/rejected": -1.8430259227752686, + "logps/chosen": -210.9761962890625, + "logps/rejected": -427.2877502441406, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9832336902618408, + "rewards/margins": 5.008829116821289, + "rewards/rejected": -3.025595188140869, + "step": 13107 + }, + { + "epoch": 0.76, + "learning_rate": 1.4043485981885833e-08, + "logits/chosen": -2.0134060382843018, + "logits/rejected": -2.0153634548187256, + "logps/chosen": -71.08443450927734, + "logps/rejected": -218.06427001953125, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19605790078639984, + "rewards/margins": 3.3632776737213135, + "rewards/rejected": -3.559335470199585, + "step": 13108 + }, + { + "epoch": 0.76, + "learning_rate": 1.4036938099358492e-08, + "logits/chosen": -1.9803940057754517, + "logits/rejected": -1.9644052982330322, + "logps/chosen": -6.61599769955501e-05, + "logps/rejected": -179.82711791992188, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6688617279214668e-06, + "rewards/margins": 3.6851518154144287, + "rewards/rejected": -3.685150146484375, + "step": 13109 + }, + { + "epoch": 0.76, + "learning_rate": 1.4030391494415005e-08, + "logits/chosen": -1.8009116649627686, + "logits/rejected": -1.8034303188323975, + "logps/chosen": -128.80136108398438, + "logps/rejected": -271.33343505859375, + "loss": 0.1563, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3610504865646362, + "rewards/margins": 1.2619293928146362, + "rewards/rejected": 0.09912109375, + "step": 13110 + }, + { + "epoch": 0.76, + "learning_rate": 1.402384616728796e-08, + "logits/chosen": -1.761345386505127, + "logits/rejected": -1.7647563219070435, + "logps/chosen": -212.53575134277344, + "logps/rejected": -310.55078125, + "loss": 0.1464, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.094407796859741, + "rewards/margins": 1.2535783052444458, + "rewards/rejected": 0.8408294916152954, + "step": 13111 + }, + { + "epoch": 0.76, + "learning_rate": 1.4017302118209884e-08, + "logits/chosen": -2.116718292236328, + "logits/rejected": -2.1156301498413086, + "logps/chosen": -13.50003719329834, + "logps/rejected": -253.79519653320312, + "loss": 0.3876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08750028908252716, + "rewards/margins": 7.137704372406006, + "rewards/rejected": -7.2252044677734375, + "step": 13112 + }, + { + "epoch": 0.76, + "learning_rate": 1.4010759347413253e-08, + "logits/chosen": -1.7773146629333496, + "logits/rejected": -1.774979829788208, + "logps/chosen": -27.34873390197754, + "logps/rejected": -407.848388671875, + "loss": 0.2642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2656061351299286, + "rewards/margins": 2.42989444732666, + "rewards/rejected": -2.164288282394409, + "step": 13113 + }, + { + "epoch": 0.76, + "learning_rate": 1.4004217855130485e-08, + "logits/chosen": -1.812529444694519, + "logits/rejected": -1.804442048072815, + "logps/chosen": -205.47157287597656, + "logps/rejected": -541.4471435546875, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.54278564453125, + "rewards/margins": 5.862131118774414, + "rewards/rejected": -3.319345235824585, + "step": 13114 + }, + { + "epoch": 0.76, + "learning_rate": 1.3997677641593969e-08, + "logits/chosen": -1.9387849569320679, + "logits/rejected": -1.930443286895752, + "logps/chosen": -12.143847465515137, + "logps/rejected": -279.38177490234375, + "loss": 0.2122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17794199287891388, + "rewards/margins": 5.3279008865356445, + "rewards/rejected": -5.149959087371826, + "step": 13115 + }, + { + "epoch": 0.76, + "learning_rate": 1.3991138707036048e-08, + "logits/chosen": -1.5803937911987305, + "logits/rejected": -1.582880973815918, + "logps/chosen": -245.62916564941406, + "logps/rejected": -338.88433837890625, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8112716674804688, + "rewards/margins": 2.6802902221679688, + "rewards/rejected": -0.8690185546875, + "step": 13116 + }, + { + "epoch": 0.76, + "learning_rate": 1.3984601051689038e-08, + "logits/chosen": -1.7034807205200195, + "logits/rejected": -1.719159722328186, + "logps/chosen": -238.38772583007812, + "logps/rejected": -279.69219970703125, + "loss": 0.0481, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7820708751678467, + "rewards/margins": 2.85577392578125, + "rewards/rejected": -0.07370300590991974, + "step": 13117 + }, + { + "epoch": 0.76, + "learning_rate": 1.397806467578515e-08, + "logits/chosen": -1.9426995515823364, + "logits/rejected": -1.9623442888259888, + "logps/chosen": -232.54107666015625, + "logps/rejected": -370.7306823730469, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8977386951446533, + "rewards/margins": 4.544610500335693, + "rewards/rejected": -1.6468719244003296, + "step": 13118 + }, + { + "epoch": 0.76, + "learning_rate": 1.3971529579556595e-08, + "logits/chosen": -2.017085313796997, + "logits/rejected": -2.010786294937134, + "logps/chosen": -1.9171539545059204, + "logps/rejected": -218.02786254882812, + "loss": 0.4147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09399918466806412, + "rewards/margins": 2.3551554679870605, + "rewards/rejected": -2.4491546154022217, + "step": 13119 + }, + { + "epoch": 0.76, + "learning_rate": 1.3964995763235577e-08, + "logits/chosen": -1.8858011960983276, + "logits/rejected": -1.8846815824508667, + "logps/chosen": -0.022096974775195122, + "logps/rejected": -221.09043884277344, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008448099833913147, + "rewards/margins": 5.648879051208496, + "rewards/rejected": -5.649724006652832, + "step": 13120 + }, + { + "epoch": 0.76, + "learning_rate": 1.3958463227054157e-08, + "logits/chosen": -1.6623845100402832, + "logits/rejected": -1.6447237730026245, + "logps/chosen": -171.99307250976562, + "logps/rejected": -537.1508178710938, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.61387038230896, + "rewards/margins": 6.089688301086426, + "rewards/rejected": -3.475817918777466, + "step": 13121 + }, + { + "epoch": 0.76, + "learning_rate": 1.3951931971244419e-08, + "logits/chosen": -1.8079192638397217, + "logits/rejected": -1.79120671749115, + "logps/chosen": -59.76756286621094, + "logps/rejected": -170.68223571777344, + "loss": 0.2548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6099273562431335, + "rewards/margins": 2.5211212635040283, + "rewards/rejected": -1.91119384765625, + "step": 13122 + }, + { + "epoch": 0.76, + "learning_rate": 1.3945401996038386e-08, + "logits/chosen": -1.8927024602890015, + "logits/rejected": -1.9056895971298218, + "logps/chosen": -234.35775756835938, + "logps/rejected": -411.4822082519531, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.490741014480591, + "rewards/margins": 7.273672580718994, + "rewards/rejected": -3.7829315662384033, + "step": 13123 + }, + { + "epoch": 0.76, + "learning_rate": 1.3938873301668059e-08, + "logits/chosen": -1.920678973197937, + "logits/rejected": -1.9229483604431152, + "logps/chosen": -0.1803455501794815, + "logps/rejected": -21.30686378479004, + "loss": 0.8108, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.005855494178831577, + "rewards/margins": -0.4617684781551361, + "rewards/rejected": 0.4559129774570465, + "step": 13124 + }, + { + "epoch": 0.76, + "learning_rate": 1.3932345888365327e-08, + "logits/chosen": -1.8459621667861938, + "logits/rejected": -1.825974941253662, + "logps/chosen": -86.0254135131836, + "logps/rejected": -415.7309875488281, + "loss": 0.5484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7169456481933594, + "rewards/margins": 7.2553629875183105, + "rewards/rejected": -7.97230863571167, + "step": 13125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3925819756362096e-08, + "logits/chosen": -2.089461088180542, + "logits/rejected": -2.0915939807891846, + "logps/chosen": -176.6448974609375, + "logps/rejected": -331.39556884765625, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.611261010169983, + "rewards/margins": 3.4818878173828125, + "rewards/rejected": -1.8706268072128296, + "step": 13126 + }, + { + "epoch": 0.76, + "learning_rate": 1.39192949058902e-08, + "logits/chosen": -1.833196759223938, + "logits/rejected": -1.8382203578948975, + "logps/chosen": -10.001534461975098, + "logps/rejected": -214.04429626464844, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2173762321472168, + "rewards/margins": 2.662501096725464, + "rewards/rejected": -2.445124864578247, + "step": 13127 + }, + { + "epoch": 0.76, + "learning_rate": 1.3912771337181461e-08, + "logits/chosen": -1.9591504335403442, + "logits/rejected": -1.957284688949585, + "logps/chosen": -144.59405517578125, + "logps/rejected": -288.5898132324219, + "loss": 0.1158, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6228805780410767, + "rewards/margins": 1.75523841381073, + "rewards/rejected": -0.13235779106616974, + "step": 13128 + }, + { + "epoch": 0.76, + "learning_rate": 1.3906249050467588e-08, + "logits/chosen": -1.9155361652374268, + "logits/rejected": -1.9156020879745483, + "logps/chosen": -26.465627670288086, + "logps/rejected": -113.43183135986328, + "loss": 0.694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5441234707832336, + "rewards/margins": 0.6794810891151428, + "rewards/rejected": -1.2236045598983765, + "step": 13129 + }, + { + "epoch": 0.76, + "learning_rate": 1.3899728045980303e-08, + "logits/chosen": -2.0420799255371094, + "logits/rejected": -2.029413938522339, + "logps/chosen": -61.729305267333984, + "logps/rejected": -206.55873107910156, + "loss": 0.136, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.366665244102478, + "rewards/margins": 2.629706859588623, + "rewards/rejected": -1.2630417346954346, + "step": 13130 + }, + { + "epoch": 0.76, + "learning_rate": 1.389320832395126e-08, + "logits/chosen": -1.919137716293335, + "logits/rejected": -1.8969067335128784, + "logps/chosen": -5.3089399337768555, + "logps/rejected": -310.79449462890625, + "loss": 0.3021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09438391029834747, + "rewards/margins": 6.509154319763184, + "rewards/rejected": -6.414770603179932, + "step": 13131 + }, + { + "epoch": 0.76, + "learning_rate": 1.388668988461209e-08, + "logits/chosen": -1.9929287433624268, + "logits/rejected": -1.9909852743148804, + "logps/chosen": -15.702465057373047, + "logps/rejected": -120.8181381225586, + "loss": 0.2172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6727697253227234, + "rewards/margins": 3.2386245727539062, + "rewards/rejected": -2.565854787826538, + "step": 13132 + }, + { + "epoch": 0.76, + "learning_rate": 1.3880172728194333e-08, + "logits/chosen": -1.9664560556411743, + "logits/rejected": -1.967052936553955, + "logps/chosen": -89.28865051269531, + "logps/rejected": -268.10577392578125, + "loss": 0.2248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24962539970874786, + "rewards/margins": 2.2899606227874756, + "rewards/rejected": -2.040335178375244, + "step": 13133 + }, + { + "epoch": 0.76, + "learning_rate": 1.3873656854929521e-08, + "logits/chosen": -1.7127652168273926, + "logits/rejected": -1.700868844985962, + "logps/chosen": -61.7210693359375, + "logps/rejected": -195.3133544921875, + "loss": 0.1848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.403601884841919, + "rewards/margins": 1.4482582807540894, + "rewards/rejected": -0.04465637356042862, + "step": 13134 + }, + { + "epoch": 0.76, + "learning_rate": 1.3867142265049131e-08, + "logits/chosen": -1.8393093347549438, + "logits/rejected": -1.8407728672027588, + "logps/chosen": -253.83908081054688, + "logps/rejected": -306.05126953125, + "loss": 0.2452, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6936920881271362, + "rewards/margins": 0.5675537586212158, + "rewards/rejected": 1.1261383295059204, + "step": 13135 + }, + { + "epoch": 0.76, + "learning_rate": 1.386062895878461e-08, + "logits/chosen": -1.8388628959655762, + "logits/rejected": -1.842983365058899, + "logps/chosen": -46.24799346923828, + "logps/rejected": -105.18855285644531, + "loss": 1.1533, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.399069607257843, + "rewards/margins": -1.1913013458251953, + "rewards/rejected": 0.7922317385673523, + "step": 13136 + }, + { + "epoch": 0.76, + "learning_rate": 1.3854116936367305e-08, + "logits/chosen": -1.6862587928771973, + "logits/rejected": -1.6668362617492676, + "logps/chosen": -218.41171264648438, + "logps/rejected": -402.85321044921875, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7093642950057983, + "rewards/margins": 2.8340041637420654, + "rewards/rejected": -1.124639868736267, + "step": 13137 + }, + { + "epoch": 0.76, + "learning_rate": 1.3847606198028577e-08, + "logits/chosen": -1.8881866931915283, + "logits/rejected": -1.8802732229232788, + "logps/chosen": -33.49517822265625, + "logps/rejected": -317.4559631347656, + "loss": 0.2593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21149292588233948, + "rewards/margins": 4.123831272125244, + "rewards/rejected": -3.9123382568359375, + "step": 13138 + }, + { + "epoch": 0.76, + "learning_rate": 1.3841096743999714e-08, + "logits/chosen": -1.940110683441162, + "logits/rejected": -1.9286807775497437, + "logps/chosen": -18.63045883178711, + "logps/rejected": -244.43746948242188, + "loss": 0.1987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6406307220458984, + "rewards/margins": 3.1931087970733643, + "rewards/rejected": -2.552478075027466, + "step": 13139 + }, + { + "epoch": 0.76, + "learning_rate": 1.3834588574511974e-08, + "logits/chosen": -2.100855588912964, + "logits/rejected": -2.058974266052246, + "logps/chosen": -144.81591796875, + "logps/rejected": -457.0133056640625, + "loss": 0.0416, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9724915027618408, + "rewards/margins": 3.572833299636841, + "rewards/rejected": -1.600341796875, + "step": 13140 + }, + { + "epoch": 0.76, + "learning_rate": 1.3828081689796545e-08, + "logits/chosen": -1.52577805519104, + "logits/rejected": -1.5349602699279785, + "logps/chosen": -300.40155029296875, + "logps/rejected": -559.6223754882812, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.35312819480896, + "rewards/margins": 5.242489814758301, + "rewards/rejected": -2.889361619949341, + "step": 13141 + }, + { + "epoch": 0.76, + "learning_rate": 1.3821576090084596e-08, + "logits/chosen": -1.8633780479431152, + "logits/rejected": -1.861567497253418, + "logps/chosen": -151.69850158691406, + "logps/rejected": -167.3293914794922, + "loss": 0.1344, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8575042486190796, + "rewards/margins": 1.6473556756973267, + "rewards/rejected": 0.2101486176252365, + "step": 13142 + }, + { + "epoch": 0.76, + "learning_rate": 1.3815071775607246e-08, + "logits/chosen": -1.8258569240570068, + "logits/rejected": -1.8148021697998047, + "logps/chosen": -38.16636657714844, + "logps/rejected": -181.109619140625, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36986351013183594, + "rewards/margins": 1.896018624305725, + "rewards/rejected": -1.5261551141738892, + "step": 13143 + }, + { + "epoch": 0.76, + "learning_rate": 1.3808568746595529e-08, + "logits/chosen": -1.8105357885360718, + "logits/rejected": -1.9479668140411377, + "logps/chosen": -364.84808349609375, + "logps/rejected": -382.64227294921875, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7576569318771362, + "rewards/margins": 5.4809417724609375, + "rewards/rejected": -3.723284959793091, + "step": 13144 + }, + { + "epoch": 0.76, + "learning_rate": 1.3802067003280482e-08, + "logits/chosen": -1.9514888525009155, + "logits/rejected": -1.8902931213378906, + "logps/chosen": -175.4873046875, + "logps/rejected": -295.20269775390625, + "loss": 0.083, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8378937244415283, + "rewards/margins": 1.9150421619415283, + "rewards/rejected": 0.9228515625, + "step": 13145 + }, + { + "epoch": 0.77, + "learning_rate": 1.3795566545893078e-08, + "logits/chosen": -1.7379014492034912, + "logits/rejected": -1.7343981266021729, + "logps/chosen": -86.09371948242188, + "logps/rejected": -198.19088745117188, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4097541868686676, + "rewards/margins": 1.6690925359725952, + "rewards/rejected": -1.25933837890625, + "step": 13146 + }, + { + "epoch": 0.77, + "learning_rate": 1.3789067374664259e-08, + "logits/chosen": -1.8683050870895386, + "logits/rejected": -1.8662354946136475, + "logps/chosen": -303.5790710449219, + "logps/rejected": -388.3565368652344, + "loss": 0.072, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6803619861602783, + "rewards/margins": 2.3887481689453125, + "rewards/rejected": 0.29161378741264343, + "step": 13147 + }, + { + "epoch": 0.77, + "learning_rate": 1.3782569489824886e-08, + "logits/chosen": -1.9530351161956787, + "logits/rejected": -1.952717900276184, + "logps/chosen": -0.0025083087384700775, + "logps/rejected": -203.03099060058594, + "loss": 0.3633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019375697593204677, + "rewards/margins": 4.511804580688477, + "rewards/rejected": -4.511998176574707, + "step": 13148 + }, + { + "epoch": 0.77, + "learning_rate": 1.3776072891605795e-08, + "logits/chosen": -1.9453271627426147, + "logits/rejected": -1.9441865682601929, + "logps/chosen": -41.11216735839844, + "logps/rejected": -49.435096740722656, + "loss": 2.0662, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.1483309268951416, + "rewards/margins": -1.7950682640075684, + "rewards/rejected": -0.353262722492218, + "step": 13149 + }, + { + "epoch": 0.77, + "learning_rate": 1.376957758023779e-08, + "logits/chosen": -1.855430245399475, + "logits/rejected": -1.8240618705749512, + "logps/chosen": -287.22314453125, + "logps/rejected": -664.2802734375, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.944732666015625, + "rewards/margins": 7.868420600891113, + "rewards/rejected": -4.923687934875488, + "step": 13150 + }, + { + "epoch": 0.77, + "learning_rate": 1.376308355595162e-08, + "logits/chosen": -1.8748971223831177, + "logits/rejected": -1.876737117767334, + "logps/chosen": -45.898948669433594, + "logps/rejected": -205.1239776611328, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7383896112442017, + "rewards/margins": 1.0080413818359375, + "rewards/rejected": -0.2696518003940582, + "step": 13151 + }, + { + "epoch": 0.77, + "learning_rate": 1.375659081897797e-08, + "logits/chosen": -1.9296427965164185, + "logits/rejected": -1.9195468425750732, + "logps/chosen": -35.037864685058594, + "logps/rejected": -174.17697143554688, + "loss": 0.2698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5002113580703735, + "rewards/margins": 2.189014434814453, + "rewards/rejected": -1.6888030767440796, + "step": 13152 + }, + { + "epoch": 0.77, + "learning_rate": 1.3750099369547497e-08, + "logits/chosen": -1.7530044317245483, + "logits/rejected": -1.7143076658248901, + "logps/chosen": -280.7915954589844, + "logps/rejected": -510.00396728515625, + "loss": 0.0676, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.331677198410034, + "rewards/margins": 2.549569606781006, + "rewards/rejected": -0.21789245307445526, + "step": 13153 + }, + { + "epoch": 0.77, + "learning_rate": 1.374360920789081e-08, + "logits/chosen": -2.058594226837158, + "logits/rejected": -2.0237209796905518, + "logps/chosen": -229.37184143066406, + "logps/rejected": -375.33685302734375, + "loss": 0.1414, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.114741563796997, + "rewards/margins": 1.663661241531372, + "rewards/rejected": 0.451080322265625, + "step": 13154 + }, + { + "epoch": 0.77, + "learning_rate": 1.3737120334238489e-08, + "logits/chosen": -1.974185585975647, + "logits/rejected": -1.9731099605560303, + "logps/chosen": -38.60198974609375, + "logps/rejected": -146.91632080078125, + "loss": 0.2692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4243522584438324, + "rewards/margins": 2.7683327198028564, + "rewards/rejected": -2.343980550765991, + "step": 13155 + }, + { + "epoch": 0.77, + "learning_rate": 1.373063274882102e-08, + "logits/chosen": -1.707321047782898, + "logits/rejected": -1.6765079498291016, + "logps/chosen": -257.801513671875, + "logps/rejected": -511.2741394042969, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.534838914871216, + "rewards/margins": 3.652535915374756, + "rewards/rejected": -1.1176971197128296, + "step": 13156 + }, + { + "epoch": 0.77, + "learning_rate": 1.3724146451868884e-08, + "logits/chosen": -1.8129218816757202, + "logits/rejected": -1.8486560583114624, + "logps/chosen": -270.4098205566406, + "logps/rejected": -444.1233215332031, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.242489814758301, + "rewards/margins": 4.073047161102295, + "rewards/rejected": 0.16944275796413422, + "step": 13157 + }, + { + "epoch": 0.77, + "learning_rate": 1.3717661443612516e-08, + "logits/chosen": -1.9487383365631104, + "logits/rejected": -1.9302279949188232, + "logps/chosen": -227.66065979003906, + "logps/rejected": -347.9223327636719, + "loss": 0.0709, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.399984836578369, + "rewards/margins": 2.5107851028442383, + "rewards/rejected": -0.11080016940832138, + "step": 13158 + }, + { + "epoch": 0.77, + "learning_rate": 1.3711177724282302e-08, + "logits/chosen": -1.7671053409576416, + "logits/rejected": -1.7818894386291504, + "logps/chosen": -62.29559326171875, + "logps/rejected": -261.711669921875, + "loss": 0.1147, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3662292957305908, + "rewards/margins": 3.0615036487579346, + "rewards/rejected": -1.6952743530273438, + "step": 13159 + }, + { + "epoch": 0.77, + "learning_rate": 1.3704695294108548e-08, + "logits/chosen": -1.9140887260437012, + "logits/rejected": -1.9193782806396484, + "logps/chosen": -1.80504310131073, + "logps/rejected": -182.5616455078125, + "loss": 0.4108, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17259831726551056, + "rewards/margins": 2.6969757080078125, + "rewards/rejected": -2.8695740699768066, + "step": 13160 + }, + { + "epoch": 0.77, + "learning_rate": 1.3698214153321541e-08, + "logits/chosen": -1.9461705684661865, + "logits/rejected": -1.9447933435440063, + "logps/chosen": -241.5986328125, + "logps/rejected": -426.42718505859375, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9147522449493408, + "rewards/margins": 3.702441453933716, + "rewards/rejected": -1.787689208984375, + "step": 13161 + }, + { + "epoch": 0.77, + "learning_rate": 1.369173430215157e-08, + "logits/chosen": -1.9736077785491943, + "logits/rejected": -1.9774333238601685, + "logps/chosen": -9.446653366088867, + "logps/rejected": -114.11468505859375, + "loss": 0.5286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08086128532886505, + "rewards/margins": 0.5767200589179993, + "rewards/rejected": -0.495858758687973, + "step": 13162 + }, + { + "epoch": 0.77, + "learning_rate": 1.3685255740828783e-08, + "logits/chosen": -1.7483667135238647, + "logits/rejected": -1.760545253753662, + "logps/chosen": -6.310353755950928, + "logps/rejected": -260.6185302734375, + "loss": 0.3452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15065526962280273, + "rewards/margins": 3.5269875526428223, + "rewards/rejected": -3.677642822265625, + "step": 13163 + }, + { + "epoch": 0.77, + "learning_rate": 1.3678778469583351e-08, + "logits/chosen": -1.8209383487701416, + "logits/rejected": -1.7249351739883423, + "logps/chosen": -184.56224060058594, + "logps/rejected": -532.2315673828125, + "loss": 0.0512, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.402796983718872, + "rewards/margins": 3.1135575771331787, + "rewards/rejected": -0.7107605338096619, + "step": 13164 + }, + { + "epoch": 0.77, + "learning_rate": 1.3672302488645372e-08, + "logits/chosen": -1.9286960363388062, + "logits/rejected": -1.9331132173538208, + "logps/chosen": -0.00022195550263859332, + "logps/rejected": -201.126708984375, + "loss": 0.3478, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5889873945270665e-06, + "rewards/margins": 4.093691825866699, + "rewards/rejected": -4.093696594238281, + "step": 13165 + }, + { + "epoch": 0.77, + "learning_rate": 1.3665827798244922e-08, + "logits/chosen": -1.9450782537460327, + "logits/rejected": -1.9401607513427734, + "logps/chosen": -0.04353686049580574, + "logps/rejected": -153.49325561523438, + "loss": 0.4655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014523535035550594, + "rewards/margins": 1.189610481262207, + "rewards/rejected": -1.1750869750976562, + "step": 13166 + }, + { + "epoch": 0.77, + "learning_rate": 1.3659354398611978e-08, + "logits/chosen": -1.9724527597427368, + "logits/rejected": -1.9664262533187866, + "logps/chosen": -76.49779510498047, + "logps/rejected": -150.53085327148438, + "loss": 0.3817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43525391817092896, + "rewards/margins": 0.609814465045929, + "rewards/rejected": -0.174560546875, + "step": 13167 + }, + { + "epoch": 0.77, + "learning_rate": 1.365288228997653e-08, + "logits/chosen": -1.861944556236267, + "logits/rejected": -1.8593653440475464, + "logps/chosen": -245.93966674804688, + "logps/rejected": -378.9631652832031, + "loss": 0.0472, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.014373779296875, + "rewards/margins": 2.897082567214966, + "rewards/rejected": -0.882708728313446, + "step": 13168 + }, + { + "epoch": 0.77, + "learning_rate": 1.3646411472568498e-08, + "logits/chosen": -2.0258216857910156, + "logits/rejected": -2.0197367668151855, + "logps/chosen": -231.0780792236328, + "logps/rejected": -434.09686279296875, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2230850458145142, + "rewards/margins": 3.7929582595825195, + "rewards/rejected": -2.569873094558716, + "step": 13169 + }, + { + "epoch": 0.77, + "learning_rate": 1.3639941946617772e-08, + "logits/chosen": -1.8393516540527344, + "logits/rejected": -1.9227427244186401, + "logps/chosen": -160.56427001953125, + "logps/rejected": -366.36474609375, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5976455211639404, + "rewards/margins": 3.606928825378418, + "rewards/rejected": -1.009283423423767, + "step": 13170 + }, + { + "epoch": 0.77, + "learning_rate": 1.3633473712354143e-08, + "logits/chosen": -1.9468324184417725, + "logits/rejected": -1.9468570947647095, + "logps/chosen": -126.57549285888672, + "logps/rejected": -309.481201171875, + "loss": 0.0809, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6950584650039673, + "rewards/margins": 2.184062957763672, + "rewards/rejected": -0.489004522562027, + "step": 13171 + }, + { + "epoch": 0.77, + "learning_rate": 1.3627006770007421e-08, + "logits/chosen": -1.805314302444458, + "logits/rejected": -1.8178777694702148, + "logps/chosen": -125.95677947998047, + "logps/rejected": -266.5908508300781, + "loss": 0.206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.651227593421936, + "rewards/margins": 1.829261064529419, + "rewards/rejected": -1.178033471107483, + "step": 13172 + }, + { + "epoch": 0.77, + "learning_rate": 1.3620541119807339e-08, + "logits/chosen": -1.8928958177566528, + "logits/rejected": -1.9012370109558105, + "logps/chosen": -3.676525354385376, + "logps/rejected": -104.11380767822266, + "loss": 0.3417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28447431325912476, + "rewards/margins": 1.2988016605377197, + "rewards/rejected": -1.0143272876739502, + "step": 13173 + }, + { + "epoch": 0.77, + "learning_rate": 1.3614076761983595e-08, + "logits/chosen": -2.0600361824035645, + "logits/rejected": -2.0536437034606934, + "logps/chosen": -39.032684326171875, + "logps/rejected": -305.2126159667969, + "loss": 0.2161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23913346230983734, + "rewards/margins": 5.613326072692871, + "rewards/rejected": -5.374192714691162, + "step": 13174 + }, + { + "epoch": 0.77, + "learning_rate": 1.360761369676582e-08, + "logits/chosen": -1.7650740146636963, + "logits/rejected": -1.8020522594451904, + "logps/chosen": -200.08502197265625, + "logps/rejected": -350.3778076171875, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.022268772125244, + "rewards/margins": 4.677386283874512, + "rewards/rejected": -2.6551177501678467, + "step": 13175 + }, + { + "epoch": 0.77, + "learning_rate": 1.360115192438362e-08, + "logits/chosen": -1.9310499429702759, + "logits/rejected": -1.916610598564148, + "logps/chosen": -34.623199462890625, + "logps/rejected": -230.690185546875, + "loss": 0.3745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11417847126722336, + "rewards/margins": 2.7048370838165283, + "rewards/rejected": -2.8190155029296875, + "step": 13176 + }, + { + "epoch": 0.77, + "learning_rate": 1.3594691445066548e-08, + "logits/chosen": -1.8116421699523926, + "logits/rejected": -1.8048150539398193, + "logps/chosen": -0.006721246521919966, + "logps/rejected": -225.19796752929688, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0032223693560808897, + "rewards/margins": 6.185937404632568, + "rewards/rejected": -6.182714939117432, + "step": 13177 + }, + { + "epoch": 0.77, + "learning_rate": 1.3588232259044124e-08, + "logits/chosen": -1.9630283117294312, + "logits/rejected": -1.9627728462219238, + "logps/chosen": -0.006865406408905983, + "logps/rejected": -161.54843139648438, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005434391787275672, + "rewards/margins": 3.3924484252929688, + "rewards/rejected": -3.392991781234741, + "step": 13178 + }, + { + "epoch": 0.77, + "learning_rate": 1.358177436654579e-08, + "logits/chosen": -1.9413093328475952, + "logits/rejected": -1.951425313949585, + "logps/chosen": -242.0917205810547, + "logps/rejected": -339.2832946777344, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.463413953781128, + "rewards/margins": 2.3437118530273438, + "rewards/rejected": 0.11970215290784836, + "step": 13179 + }, + { + "epoch": 0.77, + "learning_rate": 1.3575317767800954e-08, + "logits/chosen": -1.872910499572754, + "logits/rejected": -1.8662956953048706, + "logps/chosen": -263.13348388671875, + "logps/rejected": -317.5325927734375, + "loss": 0.2865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27131348848342896, + "rewards/margins": 0.6179717779159546, + "rewards/rejected": -0.346658319234848, + "step": 13180 + }, + { + "epoch": 0.77, + "learning_rate": 1.3568862463039032e-08, + "logits/chosen": -1.947006106376648, + "logits/rejected": -1.9342974424362183, + "logps/chosen": -339.729736328125, + "logps/rejected": -550.8636474609375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.190661907196045, + "rewards/margins": 7.535327434539795, + "rewards/rejected": -3.34466552734375, + "step": 13181 + }, + { + "epoch": 0.77, + "learning_rate": 1.356240845248931e-08, + "logits/chosen": -1.8046770095825195, + "logits/rejected": -1.806326985359192, + "logps/chosen": -154.03924560546875, + "logps/rejected": -268.99688720703125, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0860612392425537, + "rewards/margins": 0.04491734504699707, + "rewards/rejected": 2.0411438941955566, + "step": 13182 + }, + { + "epoch": 0.77, + "learning_rate": 1.3555955736381069e-08, + "logits/chosen": -1.9205924272537231, + "logits/rejected": -1.9119571447372437, + "logps/chosen": -158.99130249023438, + "logps/rejected": -264.8844299316406, + "loss": 0.0616, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4248443841934204, + "rewards/margins": 3.2060577869415283, + "rewards/rejected": -1.781213402748108, + "step": 13183 + }, + { + "epoch": 0.77, + "learning_rate": 1.354950431494355e-08, + "logits/chosen": -1.757684588432312, + "logits/rejected": -1.8076977729797363, + "logps/chosen": -170.1659393310547, + "logps/rejected": -155.27999877929688, + "loss": 0.3887, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.584370493888855, + "rewards/margins": -0.06104278564453125, + "rewards/rejected": 1.6454132795333862, + "step": 13184 + }, + { + "epoch": 0.77, + "learning_rate": 1.354305418840595e-08, + "logits/chosen": -2.0481889247894287, + "logits/rejected": -2.0411267280578613, + "logps/chosen": -86.09619140625, + "logps/rejected": -366.2056884765625, + "loss": 0.1213, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.060845971107483, + "rewards/margins": 2.4214110374450684, + "rewards/rejected": -1.360565185546875, + "step": 13185 + }, + { + "epoch": 0.77, + "learning_rate": 1.3536605356997382e-08, + "logits/chosen": -1.930774211883545, + "logits/rejected": -1.8461395502090454, + "logps/chosen": -229.9440460205078, + "logps/rejected": -598.5693359375, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.461137533187866, + "rewards/margins": 3.6575608253479004, + "rewards/rejected": -1.1964234113693237, + "step": 13186 + }, + { + "epoch": 0.77, + "learning_rate": 1.3530157820946953e-08, + "logits/chosen": -1.8166491985321045, + "logits/rejected": -1.7902112007141113, + "logps/chosen": -85.7248306274414, + "logps/rejected": -401.8768310546875, + "loss": 0.2072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6027435660362244, + "rewards/margins": 8.79114055633545, + "rewards/rejected": -8.188397407531738, + "step": 13187 + }, + { + "epoch": 0.77, + "learning_rate": 1.3523711580483716e-08, + "logits/chosen": -1.8994982242584229, + "logits/rejected": -1.8962795734405518, + "logps/chosen": -0.00010609465243760496, + "logps/rejected": -75.41118621826172, + "loss": 0.6246, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.084769678527664e-06, + "rewards/margins": 0.26346176862716675, + "rewards/rejected": -0.2634628415107727, + "step": 13188 + }, + { + "epoch": 0.77, + "learning_rate": 1.3517266635836677e-08, + "logits/chosen": -2.062715768814087, + "logits/rejected": -2.058392286300659, + "logps/chosen": -6.881389141082764, + "logps/rejected": -203.10983276367188, + "loss": 0.6702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20551252365112305, + "rewards/margins": 0.16570940613746643, + "rewards/rejected": -0.3712219297885895, + "step": 13189 + }, + { + "epoch": 0.77, + "learning_rate": 1.3510822987234771e-08, + "logits/chosen": -1.787418246269226, + "logits/rejected": -1.7886165380477905, + "logps/chosen": -0.000530226097907871, + "logps/rejected": -107.07097625732422, + "loss": 0.5033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5514209735556506e-05, + "rewards/margins": 0.8240758776664734, + "rewards/rejected": -0.824091374874115, + "step": 13190 + }, + { + "epoch": 0.77, + "learning_rate": 1.350438063490692e-08, + "logits/chosen": -1.9281153678894043, + "logits/rejected": -2.009430170059204, + "logps/chosen": -229.4224853515625, + "logps/rejected": -302.85467529296875, + "loss": 0.1721, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.565698266029358, + "rewards/margins": 1.0500702857971191, + "rewards/rejected": 0.5156280398368835, + "step": 13191 + }, + { + "epoch": 0.77, + "learning_rate": 1.3497939579081985e-08, + "logits/chosen": -2.0090866088867188, + "logits/rejected": -2.002427577972412, + "logps/chosen": -46.15144348144531, + "logps/rejected": -313.1708984375, + "loss": 0.3624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11335144191980362, + "rewards/margins": 5.543252944946289, + "rewards/rejected": -5.656604290008545, + "step": 13192 + }, + { + "epoch": 0.77, + "learning_rate": 1.3491499819988805e-08, + "logits/chosen": -1.872320532798767, + "logits/rejected": -1.8720872402191162, + "logps/chosen": -68.84217071533203, + "logps/rejected": -311.8310546875, + "loss": 0.0973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1777671575546265, + "rewards/margins": 4.479045867919922, + "rewards/rejected": -3.301278829574585, + "step": 13193 + }, + { + "epoch": 0.77, + "learning_rate": 1.348506135785612e-08, + "logits/chosen": -1.9588031768798828, + "logits/rejected": -1.942844271659851, + "logps/chosen": -13.166360855102539, + "logps/rejected": -169.76632690429688, + "loss": 0.3599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1408197432756424, + "rewards/margins": 1.4591059684753418, + "rewards/rejected": -1.3182861804962158, + "step": 13194 + }, + { + "epoch": 0.77, + "learning_rate": 1.3478624192912669e-08, + "logits/chosen": -1.9476759433746338, + "logits/rejected": -1.9410526752471924, + "logps/chosen": -61.55525588989258, + "logps/rejected": -185.82623291015625, + "loss": 0.2537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7645137906074524, + "rewards/margins": 1.9076488018035889, + "rewards/rejected": -1.1431350708007812, + "step": 13195 + }, + { + "epoch": 0.77, + "learning_rate": 1.3472188325387134e-08, + "logits/chosen": -1.9040275812149048, + "logits/rejected": -1.835572361946106, + "logps/chosen": -363.67010498046875, + "logps/rejected": -609.3177490234375, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.342740058898926, + "rewards/margins": 4.014212131500244, + "rewards/rejected": 0.3285278379917145, + "step": 13196 + }, + { + "epoch": 0.77, + "learning_rate": 1.3465753755508157e-08, + "logits/chosen": -1.9588195085525513, + "logits/rejected": -1.9623593091964722, + "logps/chosen": -2.999445915222168, + "logps/rejected": -68.12342834472656, + "loss": 0.4782, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12563161551952362, + "rewards/margins": 0.9041056632995605, + "rewards/rejected": -0.7784740328788757, + "step": 13197 + }, + { + "epoch": 0.77, + "learning_rate": 1.34593204835043e-08, + "logits/chosen": -1.7651609182357788, + "logits/rejected": -1.7636538743972778, + "logps/chosen": -3.681915760040283, + "logps/rejected": -59.81227111816406, + "loss": 0.6577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029755854979157448, + "rewards/margins": 0.03279120847582817, + "rewards/rejected": -0.0030353546608239412, + "step": 13198 + }, + { + "epoch": 0.77, + "learning_rate": 1.3452888509604122e-08, + "logits/chosen": -1.8702267408370972, + "logits/rejected": -1.8700447082519531, + "logps/chosen": -81.23943328857422, + "logps/rejected": -320.7997131347656, + "loss": 0.2579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22130127251148224, + "rewards/margins": 3.5494508743286133, + "rewards/rejected": -3.3281495571136475, + "step": 13199 + }, + { + "epoch": 0.77, + "learning_rate": 1.3446457834036119e-08, + "logits/chosen": -1.9307339191436768, + "logits/rejected": -1.9109073877334595, + "logps/chosen": -34.66078186035156, + "logps/rejected": -174.7125244140625, + "loss": 0.068, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.075707197189331, + "rewards/margins": 3.3920416831970215, + "rewards/rejected": -1.31633460521698, + "step": 13200 + }, + { + "epoch": 0.77, + "learning_rate": 1.3440028457028729e-08, + "logits/chosen": -1.977235198020935, + "logits/rejected": -1.9776498079299927, + "logps/chosen": -111.18252563476562, + "logps/rejected": -348.7684326171875, + "loss": 0.1114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5947707891464233, + "rewards/margins": 2.4492812156677246, + "rewards/rejected": -0.854510486125946, + "step": 13201 + }, + { + "epoch": 0.77, + "learning_rate": 1.3433600378810379e-08, + "logits/chosen": -1.9836714267730713, + "logits/rejected": -1.9796974658966064, + "logps/chosen": -117.8275146484375, + "logps/rejected": -321.43408203125, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5141937732696533, + "rewards/margins": 0.3217315673828125, + "rewards/rejected": 1.1924622058868408, + "step": 13202 + }, + { + "epoch": 0.77, + "learning_rate": 1.3427173599609381e-08, + "logits/chosen": -2.02256178855896, + "logits/rejected": -2.023848056793213, + "logps/chosen": -42.808387756347656, + "logps/rejected": -102.98335266113281, + "loss": 0.9583, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.01019287109375, + "rewards/margins": -0.0031592845916748047, + "rewards/rejected": -1.0070335865020752, + "step": 13203 + }, + { + "epoch": 0.77, + "learning_rate": 1.3420748119654102e-08, + "logits/chosen": -1.7424262762069702, + "logits/rejected": -1.7422327995300293, + "logps/chosen": -13.956977844238281, + "logps/rejected": -232.99838256835938, + "loss": 0.3597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.038503553718328476, + "rewards/margins": 3.133312702178955, + "rewards/rejected": -3.094809055328369, + "step": 13204 + }, + { + "epoch": 0.77, + "learning_rate": 1.3414323939172767e-08, + "logits/chosen": -1.9427953958511353, + "logits/rejected": -1.9606969356536865, + "logps/chosen": -75.2849349975586, + "logps/rejected": -210.9636688232422, + "loss": 0.2954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4328514039516449, + "rewards/margins": 2.0958991050720215, + "rewards/rejected": -1.6630477905273438, + "step": 13205 + }, + { + "epoch": 0.77, + "learning_rate": 1.3407901058393601e-08, + "logits/chosen": -1.908189058303833, + "logits/rejected": -1.903272271156311, + "logps/chosen": -38.34141159057617, + "logps/rejected": -132.33750915527344, + "loss": 0.4798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5008953213691711, + "rewards/margins": 0.18977013230323792, + "rewards/rejected": 0.3111251890659332, + "step": 13206 + }, + { + "epoch": 0.77, + "learning_rate": 1.340147947754478e-08, + "logits/chosen": -1.9014192819595337, + "logits/rejected": -1.8943347930908203, + "logps/chosen": -25.93946075439453, + "logps/rejected": -248.0222930908203, + "loss": 0.3265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11153831332921982, + "rewards/margins": 3.1434004306793213, + "rewards/rejected": -3.0318620204925537, + "step": 13207 + }, + { + "epoch": 0.77, + "learning_rate": 1.3395059196854453e-08, + "logits/chosen": -1.9128268957138062, + "logits/rejected": -1.9203827381134033, + "logps/chosen": -162.0071258544922, + "logps/rejected": -356.5162353515625, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.28424072265625, + "rewards/margins": 2.8994414806365967, + "rewards/rejected": 0.38479921221733093, + "step": 13208 + }, + { + "epoch": 0.77, + "learning_rate": 1.3388640216550656e-08, + "logits/chosen": -1.875217080116272, + "logits/rejected": -1.8726520538330078, + "logps/chosen": -0.09941087663173676, + "logps/rejected": -115.88275146484375, + "loss": 0.3747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029065698385238647, + "rewards/margins": 2.5560972690582275, + "rewards/rejected": -2.527031660079956, + "step": 13209 + }, + { + "epoch": 0.77, + "learning_rate": 1.3382222536861448e-08, + "logits/chosen": -1.6312958002090454, + "logits/rejected": -1.631500244140625, + "logps/chosen": -0.00023529808095190674, + "logps/rejected": -83.99788665771484, + "loss": 0.6022, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.982686616334831e-06, + "rewards/margins": 0.24175097048282623, + "rewards/rejected": -0.2417449951171875, + "step": 13210 + }, + { + "epoch": 0.77, + "learning_rate": 1.3375806158014808e-08, + "logits/chosen": -1.8355162143707275, + "logits/rejected": -1.8348172903060913, + "logps/chosen": -227.32473754882812, + "logps/rejected": -559.4259033203125, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6132233142852783, + "rewards/margins": 4.780728340148926, + "rewards/rejected": -3.1675050258636475, + "step": 13211 + }, + { + "epoch": 0.77, + "learning_rate": 1.3369391080238701e-08, + "logits/chosen": -1.8938535451889038, + "logits/rejected": -1.8886382579803467, + "logps/chosen": -22.36124038696289, + "logps/rejected": -134.02719116210938, + "loss": 0.4392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10108719021081924, + "rewards/margins": 1.4465065002441406, + "rewards/rejected": -1.3454192876815796, + "step": 13212 + }, + { + "epoch": 0.77, + "learning_rate": 1.336297730376098e-08, + "logits/chosen": -1.9837641716003418, + "logits/rejected": -1.9814951419830322, + "logps/chosen": -18.37600326538086, + "logps/rejected": -176.03533935546875, + "loss": 0.3547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08607292175292969, + "rewards/margins": 3.9543232917785645, + "rewards/rejected": -4.040396213531494, + "step": 13213 + }, + { + "epoch": 0.77, + "learning_rate": 1.3356564828809525e-08, + "logits/chosen": -1.7915773391723633, + "logits/rejected": -1.7807977199554443, + "logps/chosen": -171.4025421142578, + "logps/rejected": -247.27874755859375, + "loss": 0.1362, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.562211751937866, + "rewards/margins": 1.4501909017562866, + "rewards/rejected": 1.1120208501815796, + "step": 13214 + }, + { + "epoch": 0.77, + "learning_rate": 1.3350153655612123e-08, + "logits/chosen": -1.7459558248519897, + "logits/rejected": -1.7471994161605835, + "logps/chosen": -227.387451171875, + "logps/rejected": -345.751708984375, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7914764881134033, + "rewards/margins": 2.805508613586426, + "rewards/rejected": -1.014032006263733, + "step": 13215 + }, + { + "epoch": 0.77, + "learning_rate": 1.3343743784396561e-08, + "logits/chosen": -1.6920804977416992, + "logits/rejected": -1.7071806192398071, + "logps/chosen": -82.03544616699219, + "logps/rejected": -436.4834289550781, + "loss": 0.1276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1433228254318237, + "rewards/margins": 3.5526490211486816, + "rewards/rejected": -2.4093263149261475, + "step": 13216 + }, + { + "epoch": 0.77, + "learning_rate": 1.33373352153905e-08, + "logits/chosen": -1.7486613988876343, + "logits/rejected": -1.6429625749588013, + "logps/chosen": -214.29843139648438, + "logps/rejected": -462.0600891113281, + "loss": 0.0518, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8475799560546875, + "rewards/margins": 2.8168272972106934, + "rewards/rejected": 0.03075256384909153, + "step": 13217 + }, + { + "epoch": 0.77, + "learning_rate": 1.3330927948821635e-08, + "logits/chosen": -1.7279828786849976, + "logits/rejected": -1.757857084274292, + "logps/chosen": -182.86351013183594, + "logps/rejected": -253.681396484375, + "loss": 0.1248, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9438766241073608, + "rewards/margins": 1.666520595550537, + "rewards/rejected": 0.27735596895217896, + "step": 13218 + }, + { + "epoch": 0.77, + "learning_rate": 1.3324521984917574e-08, + "logits/chosen": -1.7270225286483765, + "logits/rejected": -1.7296518087387085, + "logps/chosen": -37.019554138183594, + "logps/rejected": -90.23483276367188, + "loss": 0.3704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4878082275390625, + "rewards/margins": 1.042576551437378, + "rewards/rejected": -0.5547683835029602, + "step": 13219 + }, + { + "epoch": 0.77, + "learning_rate": 1.3318117323905908e-08, + "logits/chosen": -1.8579026460647583, + "logits/rejected": -1.8516441583633423, + "logps/chosen": -219.61895751953125, + "logps/rejected": -457.1483154296875, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0657241344451904, + "rewards/margins": 4.8419904708862305, + "rewards/rejected": -1.7762664556503296, + "step": 13220 + }, + { + "epoch": 0.77, + "learning_rate": 1.3311713966014127e-08, + "logits/chosen": -1.726938247680664, + "logits/rejected": -1.734811782836914, + "logps/chosen": -135.250732421875, + "logps/rejected": -218.25119018554688, + "loss": 0.3229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.891064465045929, + "rewards/margins": 0.5031188726425171, + "rewards/rejected": 0.3879455626010895, + "step": 13221 + }, + { + "epoch": 0.77, + "learning_rate": 1.3305311911469714e-08, + "logits/chosen": -1.8397083282470703, + "logits/rejected": -1.8247687816619873, + "logps/chosen": -52.53175354003906, + "logps/rejected": -205.3271484375, + "loss": 0.2689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4393600523471832, + "rewards/margins": 2.1100616455078125, + "rewards/rejected": -1.6707016229629517, + "step": 13222 + }, + { + "epoch": 0.77, + "learning_rate": 1.3298911160500147e-08, + "logits/chosen": -1.9045445919036865, + "logits/rejected": -1.8825169801712036, + "logps/chosen": -193.50149536132812, + "logps/rejected": -387.85589599609375, + "loss": 0.1283, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.740248203277588, + "rewards/margins": 1.308790683746338, + "rewards/rejected": 1.43145751953125, + "step": 13223 + }, + { + "epoch": 0.77, + "learning_rate": 1.3292511713332765e-08, + "logits/chosen": -1.8331773281097412, + "logits/rejected": -1.8306132555007935, + "logps/chosen": -41.546112060546875, + "logps/rejected": -145.352294921875, + "loss": 0.4482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2803657650947571, + "rewards/margins": 0.7535484433174133, + "rewards/rejected": -0.47318267822265625, + "step": 13224 + }, + { + "epoch": 0.77, + "learning_rate": 1.3286113570194918e-08, + "logits/chosen": -1.9742796421051025, + "logits/rejected": -1.9744330644607544, + "logps/chosen": -27.3725643157959, + "logps/rejected": -196.98214721679688, + "loss": 0.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8670759201049805, + "rewards/margins": 1.0883725881576538, + "rewards/rejected": -0.22129669785499573, + "step": 13225 + }, + { + "epoch": 0.77, + "learning_rate": 1.3279716731313911e-08, + "logits/chosen": -1.8758727312088013, + "logits/rejected": -1.9202324151992798, + "logps/chosen": -215.5375213623047, + "logps/rejected": -375.95233154296875, + "loss": 0.1135, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.367034912109375, + "rewards/margins": 1.973602294921875, + "rewards/rejected": -0.6065673828125, + "step": 13226 + }, + { + "epoch": 0.77, + "learning_rate": 1.3273321196916993e-08, + "logits/chosen": -1.9129763841629028, + "logits/rejected": -1.9186575412750244, + "logps/chosen": -15.433451652526855, + "logps/rejected": -253.8610076904297, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10527868568897247, + "rewards/margins": 2.5774528980255127, + "rewards/rejected": -2.4721741676330566, + "step": 13227 + }, + { + "epoch": 0.77, + "learning_rate": 1.3266926967231345e-08, + "logits/chosen": -1.9069918394088745, + "logits/rejected": -1.9301925897598267, + "logps/chosen": -195.18392944335938, + "logps/rejected": -462.3857421875, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3023102283477783, + "rewards/margins": 6.202633857727051, + "rewards/rejected": -3.9003236293792725, + "step": 13228 + }, + { + "epoch": 0.77, + "learning_rate": 1.3260534042484128e-08, + "logits/chosen": -1.8093305826187134, + "logits/rejected": -1.784723162651062, + "logps/chosen": -202.47760009765625, + "logps/rejected": -457.1161804199219, + "loss": 0.0589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9816207885742188, + "rewards/margins": 4.670845031738281, + "rewards/rejected": -3.6892242431640625, + "step": 13229 + }, + { + "epoch": 0.77, + "learning_rate": 1.3254142422902459e-08, + "logits/chosen": -1.9425206184387207, + "logits/rejected": -1.9325679540634155, + "logps/chosen": -167.75372314453125, + "logps/rejected": -404.79095458984375, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9656647443771362, + "rewards/margins": 3.834378242492676, + "rewards/rejected": -1.86871337890625, + "step": 13230 + }, + { + "epoch": 0.77, + "learning_rate": 1.3247752108713406e-08, + "logits/chosen": -2.0340213775634766, + "logits/rejected": -2.036050319671631, + "logps/chosen": -104.71996307373047, + "logps/rejected": -220.34765625, + "loss": 0.2548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17068099975585938, + "rewards/margins": 2.4055793285369873, + "rewards/rejected": -2.234898328781128, + "step": 13231 + }, + { + "epoch": 0.77, + "learning_rate": 1.324136310014396e-08, + "logits/chosen": -2.009451150894165, + "logits/rejected": -2.0176916122436523, + "logps/chosen": -0.0024090507067739964, + "logps/rejected": -169.91888427734375, + "loss": 0.3586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001975044870050624, + "rewards/margins": 3.711087942123413, + "rewards/rejected": -3.711285352706909, + "step": 13232 + }, + { + "epoch": 0.77, + "learning_rate": 1.32349753974211e-08, + "logits/chosen": -1.9402995109558105, + "logits/rejected": -1.9414609670639038, + "logps/chosen": -0.6440605521202087, + "logps/rejected": -112.1340103149414, + "loss": 0.5739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050638001412153244, + "rewards/margins": 0.4621858596801758, + "rewards/rejected": -0.41154786944389343, + "step": 13233 + }, + { + "epoch": 0.77, + "learning_rate": 1.322858900077175e-08, + "logits/chosen": -1.883054494857788, + "logits/rejected": -1.8330135345458984, + "logps/chosen": -136.8587188720703, + "logps/rejected": -366.3592224121094, + "loss": 0.1016, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.187678575515747, + "rewards/margins": 1.6488327980041504, + "rewards/rejected": 0.5388458371162415, + "step": 13234 + }, + { + "epoch": 0.77, + "learning_rate": 1.3222203910422808e-08, + "logits/chosen": -1.9132845401763916, + "logits/rejected": -1.9132676124572754, + "logps/chosen": -10.117995262145996, + "logps/rejected": -96.36897277832031, + "loss": 0.4435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4750106930732727, + "rewards/margins": 0.42285311222076416, + "rewards/rejected": 0.05215759202837944, + "step": 13235 + }, + { + "epoch": 0.77, + "learning_rate": 1.3215820126601063e-08, + "logits/chosen": -1.8976095914840698, + "logits/rejected": -1.896559238433838, + "logps/chosen": -5.699685096740723, + "logps/rejected": -203.55845642089844, + "loss": 0.36, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050549935549497604, + "rewards/margins": 2.8013267517089844, + "rewards/rejected": -2.750776767730713, + "step": 13236 + }, + { + "epoch": 0.77, + "learning_rate": 1.3209437649533317e-08, + "logits/chosen": -1.7005884647369385, + "logits/rejected": -1.6973274946212769, + "logps/chosen": -32.274391174316406, + "logps/rejected": -77.30455017089844, + "loss": 0.4254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9166271090507507, + "rewards/margins": 0.329403281211853, + "rewards/rejected": 0.5872238278388977, + "step": 13237 + }, + { + "epoch": 0.77, + "learning_rate": 1.3203056479446311e-08, + "logits/chosen": -1.9961897134780884, + "logits/rejected": -1.9978364706039429, + "logps/chosen": -17.013534545898438, + "logps/rejected": -196.83087158203125, + "loss": 0.3754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24968986213207245, + "rewards/margins": 1.6342189311981201, + "rewards/rejected": -1.3845291137695312, + "step": 13238 + }, + { + "epoch": 0.77, + "learning_rate": 1.3196676616566749e-08, + "logits/chosen": -2.133239984512329, + "logits/rejected": -2.1282594203948975, + "logps/chosen": -6.925924390088767e-05, + "logps/rejected": -202.25538635253906, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.539064098527888e-06, + "rewards/margins": 5.381435394287109, + "rewards/rejected": -5.3814377784729, + "step": 13239 + }, + { + "epoch": 0.77, + "learning_rate": 1.3190298061121236e-08, + "logits/chosen": -1.8677500486373901, + "logits/rejected": -1.8284680843353271, + "logps/chosen": -204.53025817871094, + "logps/rejected": -469.1226806640625, + "loss": 0.0756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.250447154045105, + "rewards/margins": 2.421858310699463, + "rewards/rejected": -1.171411156654358, + "step": 13240 + }, + { + "epoch": 0.77, + "learning_rate": 1.3183920813336385e-08, + "logits/chosen": -2.0682644844055176, + "logits/rejected": -2.0584218502044678, + "logps/chosen": -15.341197967529297, + "logps/rejected": -370.4486999511719, + "loss": 0.3407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001612854073755443, + "rewards/margins": 9.453731536865234, + "rewards/rejected": -9.455344200134277, + "step": 13241 + }, + { + "epoch": 0.77, + "learning_rate": 1.3177544873438778e-08, + "logits/chosen": -1.758249282836914, + "logits/rejected": -1.7598450183868408, + "logps/chosen": -207.57188415527344, + "logps/rejected": -354.6123046875, + "loss": 0.2081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.562860131263733, + "rewards/margins": 0.9081665277481079, + "rewards/rejected": 0.654693603515625, + "step": 13242 + }, + { + "epoch": 0.77, + "learning_rate": 1.3171170241654882e-08, + "logits/chosen": -1.705792784690857, + "logits/rejected": -1.7310681343078613, + "logps/chosen": -260.5335693359375, + "logps/rejected": -475.44842529296875, + "loss": 0.0243, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.956091284751892, + "rewards/margins": 4.835608005523682, + "rewards/rejected": -2.8795166015625, + "step": 13243 + }, + { + "epoch": 0.77, + "learning_rate": 1.316479691821117e-08, + "logits/chosen": -1.8191686868667603, + "logits/rejected": -1.7756454944610596, + "logps/chosen": -228.24310302734375, + "logps/rejected": -390.176513671875, + "loss": 0.1479, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9079681634902954, + "rewards/margins": 1.6042144298553467, + "rewards/rejected": 0.30375367403030396, + "step": 13244 + }, + { + "epoch": 0.77, + "learning_rate": 1.3158424903334048e-08, + "logits/chosen": -2.071014165878296, + "logits/rejected": -2.064629077911377, + "logps/chosen": -0.03050372563302517, + "logps/rejected": -336.91522216796875, + "loss": 0.3396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021555160637944937, + "rewards/margins": 6.381511688232422, + "rewards/rejected": -6.3836669921875, + "step": 13245 + }, + { + "epoch": 0.77, + "learning_rate": 1.31520541972499e-08, + "logits/chosen": -1.9471006393432617, + "logits/rejected": -1.9447579383850098, + "logps/chosen": -3.8027479604352266e-05, + "logps/rejected": -150.49420166015625, + "loss": 0.361, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.13381576941174e-06, + "rewards/margins": 3.3704092502593994, + "rewards/rejected": -3.3704071044921875, + "step": 13246 + }, + { + "epoch": 0.77, + "learning_rate": 1.3145684800185013e-08, + "logits/chosen": -1.6545517444610596, + "logits/rejected": -1.6476657390594482, + "logps/chosen": -11.268310546875, + "logps/rejected": -193.76028442382812, + "loss": 0.2259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22061243653297424, + "rewards/margins": 3.7095210552215576, + "rewards/rejected": -3.488908529281616, + "step": 13247 + }, + { + "epoch": 0.77, + "learning_rate": 1.3139316712365683e-08, + "logits/chosen": -1.722359299659729, + "logits/rejected": -1.7148003578186035, + "logps/chosen": -87.50502014160156, + "logps/rejected": -264.8660888671875, + "loss": 0.1995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.54012531042099, + "rewards/margins": 3.7581567764282227, + "rewards/rejected": -3.218031406402588, + "step": 13248 + }, + { + "epoch": 0.77, + "learning_rate": 1.3132949934018123e-08, + "logits/chosen": -1.7665951251983643, + "logits/rejected": -1.7596815824508667, + "logps/chosen": -160.19786071777344, + "logps/rejected": -547.2120361328125, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3228485584259033, + "rewards/margins": 7.422492027282715, + "rewards/rejected": -5.099643230438232, + "step": 13249 + }, + { + "epoch": 0.77, + "learning_rate": 1.3126584465368534e-08, + "logits/chosen": -2.0944931507110596, + "logits/rejected": -2.0613934993743896, + "logps/chosen": -225.84255981445312, + "logps/rejected": -464.50604248046875, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7727813720703125, + "rewards/margins": 5.618524551391602, + "rewards/rejected": -2.84574294090271, + "step": 13250 + }, + { + "epoch": 0.77, + "learning_rate": 1.312022030664302e-08, + "logits/chosen": -1.7581026554107666, + "logits/rejected": -1.7551370859146118, + "logps/chosen": -7.56835412979126, + "logps/rejected": -27.99945831298828, + "loss": 0.6412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023477792739868164, + "rewards/margins": 0.0968710407614708, + "rewards/rejected": -0.07339324802160263, + "step": 13251 + }, + { + "epoch": 0.77, + "learning_rate": 1.3113857458067679e-08, + "logits/chosen": -1.9859367609024048, + "logits/rejected": -1.9890941381454468, + "logps/chosen": -0.000434810237493366, + "logps/rejected": -149.38702392578125, + "loss": 0.3929, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0844629034399986e-05, + "rewards/margins": 1.9163304567337036, + "rewards/rejected": -1.916351318359375, + "step": 13252 + }, + { + "epoch": 0.77, + "learning_rate": 1.3107495919868545e-08, + "logits/chosen": -1.8803905248641968, + "logits/rejected": -1.8713085651397705, + "logps/chosen": -152.88211059570312, + "logps/rejected": -318.20208740234375, + "loss": 0.2343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9201629757881165, + "rewards/margins": 0.762835681438446, + "rewards/rejected": 0.15732727944850922, + "step": 13253 + }, + { + "epoch": 0.77, + "learning_rate": 1.3101135692271636e-08, + "logits/chosen": -1.7267447710037231, + "logits/rejected": -1.7241356372833252, + "logps/chosen": -293.552734375, + "logps/rejected": -322.9250793457031, + "loss": 0.6871, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1801483631134033, + "rewards/margins": 0.3529571294784546, + "rewards/rejected": -1.533105492591858, + "step": 13254 + }, + { + "epoch": 0.77, + "learning_rate": 1.3094776775502863e-08, + "logits/chosen": -2.131053924560547, + "logits/rejected": -2.1248373985290527, + "logps/chosen": -47.01116180419922, + "logps/rejected": -146.6161346435547, + "loss": 0.2169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8056492209434509, + "rewards/margins": 2.2602131366729736, + "rewards/rejected": -1.4545639753341675, + "step": 13255 + }, + { + "epoch": 0.77, + "learning_rate": 1.308841916978814e-08, + "logits/chosen": -2.0501832962036133, + "logits/rejected": -2.0532989501953125, + "logps/chosen": -0.8625626564025879, + "logps/rejected": -120.58026123046875, + "loss": 0.4274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048922277987003326, + "rewards/margins": 1.6693370342254639, + "rewards/rejected": -1.6204147338867188, + "step": 13256 + }, + { + "epoch": 0.77, + "learning_rate": 1.3082062875353328e-08, + "logits/chosen": -2.053483486175537, + "logits/rejected": -2.050626039505005, + "logps/chosen": -1.3909428119659424, + "logps/rejected": -44.41782760620117, + "loss": 0.7036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031593095511198044, + "rewards/margins": 0.03891967609524727, + "rewards/rejected": -0.07051277160644531, + "step": 13257 + }, + { + "epoch": 0.77, + "learning_rate": 1.3075707892424237e-08, + "logits/chosen": -1.96748948097229, + "logits/rejected": -1.9666827917099, + "logps/chosen": -8.555306434631348, + "logps/rejected": -87.35020446777344, + "loss": 0.3439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30819129943847656, + "rewards/margins": 1.53913152217865, + "rewards/rejected": -1.2309402227401733, + "step": 13258 + }, + { + "epoch": 0.77, + "learning_rate": 1.3069354221226609e-08, + "logits/chosen": -1.8143248558044434, + "logits/rejected": -1.821688175201416, + "logps/chosen": -123.71097564697266, + "logps/rejected": -355.5247802734375, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.250501275062561, + "rewards/margins": 3.3924689292907715, + "rewards/rejected": -2.1419677734375, + "step": 13259 + }, + { + "epoch": 0.77, + "learning_rate": 1.3063001861986162e-08, + "logits/chosen": -2.1347484588623047, + "logits/rejected": -2.129246950149536, + "logps/chosen": -31.308826446533203, + "logps/rejected": -191.90008544921875, + "loss": 0.2329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6336410641670227, + "rewards/margins": 2.789180040359497, + "rewards/rejected": -2.155539035797119, + "step": 13260 + }, + { + "epoch": 0.77, + "learning_rate": 1.3056650814928566e-08, + "logits/chosen": -1.7501060962677002, + "logits/rejected": -1.748246669769287, + "logps/chosen": -6.679023742675781, + "logps/rejected": -102.84549713134766, + "loss": 0.4052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08666854351758957, + "rewards/margins": 0.95078444480896, + "rewards/rejected": -0.8641158938407898, + "step": 13261 + }, + { + "epoch": 0.77, + "learning_rate": 1.3050301080279441e-08, + "logits/chosen": -1.917131781578064, + "logits/rejected": -1.9211759567260742, + "logps/chosen": -139.29498291015625, + "logps/rejected": -248.40609741210938, + "loss": 0.0777, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0074996948242188, + "rewards/margins": 2.743145704269409, + "rewards/rejected": -0.7356460690498352, + "step": 13262 + }, + { + "epoch": 0.77, + "learning_rate": 1.3043952658264384e-08, + "logits/chosen": -1.8735394477844238, + "logits/rejected": -1.835277795791626, + "logps/chosen": -307.23162841796875, + "logps/rejected": -520.463134765625, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.670828342437744, + "rewards/margins": 3.026113986968994, + "rewards/rejected": 0.64471435546875, + "step": 13263 + }, + { + "epoch": 0.77, + "learning_rate": 1.3037605549108865e-08, + "logits/chosen": -1.6128629446029663, + "logits/rejected": -1.601465106010437, + "logps/chosen": -201.68423461914062, + "logps/rejected": -248.07119750976562, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.760650634765625, + "rewards/margins": 0.6069244146347046, + "rewards/rejected": 0.15372620522975922, + "step": 13264 + }, + { + "epoch": 0.77, + "learning_rate": 1.3031259753038432e-08, + "logits/chosen": -2.0012948513031006, + "logits/rejected": -1.9983164072036743, + "logps/chosen": -3.8360252380371094, + "logps/rejected": -185.63092041015625, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19999073445796967, + "rewards/margins": 2.8683977127075195, + "rewards/rejected": -3.0683884620666504, + "step": 13265 + }, + { + "epoch": 0.77, + "learning_rate": 1.302491527027847e-08, + "logits/chosen": -1.9429574012756348, + "logits/rejected": -1.9431390762329102, + "logps/chosen": -5.6027653045021e-05, + "logps/rejected": -173.82894897460938, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.65574145084247e-07, + "rewards/margins": 3.6567130088806152, + "rewards/rejected": -3.6567139625549316, + "step": 13266 + }, + { + "epoch": 0.77, + "learning_rate": 1.3018572101054382e-08, + "logits/chosen": -1.975369930267334, + "logits/rejected": -1.9652355909347534, + "logps/chosen": -0.00036604516208171844, + "logps/rejected": -357.6802673339844, + "loss": 0.3277, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.327578492142493e-06, + "rewards/margins": 6.8134684562683105, + "rewards/rejected": -6.813473701477051, + "step": 13267 + }, + { + "epoch": 0.77, + "learning_rate": 1.301223024559151e-08, + "logits/chosen": -1.9405856132507324, + "logits/rejected": -1.9340933561325073, + "logps/chosen": -5.928608417510986, + "logps/rejected": -158.56813049316406, + "loss": 0.3922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11875996738672256, + "rewards/margins": 1.760332465171814, + "rewards/rejected": -1.8790924549102783, + "step": 13268 + }, + { + "epoch": 0.77, + "learning_rate": 1.3005889704115158e-08, + "logits/chosen": -1.8543514013290405, + "logits/rejected": -1.8616642951965332, + "logps/chosen": -0.0010155936470255256, + "logps/rejected": -88.89845275878906, + "loss": 0.4154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00043224167893640697, + "rewards/margins": 1.9069744348526, + "rewards/rejected": -1.906542181968689, + "step": 13269 + }, + { + "epoch": 0.77, + "learning_rate": 1.2999550476850551e-08, + "logits/chosen": -2.0702908039093018, + "logits/rejected": -2.070171594619751, + "logps/chosen": -0.7077030539512634, + "logps/rejected": -209.34356689453125, + "loss": 0.3519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14351998269557953, + "rewards/margins": 2.4137561321258545, + "rewards/rejected": -2.2702362537384033, + "step": 13270 + }, + { + "epoch": 0.77, + "learning_rate": 1.2993212564022898e-08, + "logits/chosen": -1.9681949615478516, + "logits/rejected": -1.9717339277267456, + "logps/chosen": -1.6785492897033691, + "logps/rejected": -62.41658401489258, + "loss": 0.6697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014550602994859219, + "rewards/margins": 0.10125444829463959, + "rewards/rejected": -0.11580505222082138, + "step": 13271 + }, + { + "epoch": 0.77, + "learning_rate": 1.2986875965857352e-08, + "logits/chosen": -1.893121361732483, + "logits/rejected": -1.904755711555481, + "logps/chosen": -232.04681396484375, + "logps/rejected": -373.22198486328125, + "loss": 0.2106, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6040284633636475, + "rewards/margins": 0.6994813680648804, + "rewards/rejected": 1.904547095298767, + "step": 13272 + }, + { + "epoch": 0.77, + "learning_rate": 1.2980540682579038e-08, + "logits/chosen": -2.018001079559326, + "logits/rejected": -1.9893715381622314, + "logps/chosen": -128.78245544433594, + "logps/rejected": -279.1791076660156, + "loss": 0.1243, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.590890645980835, + "rewards/margins": 1.360748291015625, + "rewards/rejected": 2.23014235496521, + "step": 13273 + }, + { + "epoch": 0.77, + "learning_rate": 1.2974206714412989e-08, + "logits/chosen": -1.8595232963562012, + "logits/rejected": -1.9310215711593628, + "logps/chosen": -375.1123046875, + "logps/rejected": -480.2628173828125, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5085084438323975, + "rewards/margins": 3.270691156387329, + "rewards/rejected": 0.23781739175319672, + "step": 13274 + }, + { + "epoch": 0.77, + "learning_rate": 1.2967874061584227e-08, + "logits/chosen": -1.9396151304244995, + "logits/rejected": -1.938420295715332, + "logps/chosen": -29.54237937927246, + "logps/rejected": -71.42144775390625, + "loss": 0.6436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10577621310949326, + "rewards/margins": 0.07316818088293076, + "rewards/rejected": -0.178944393992424, + "step": 13275 + }, + { + "epoch": 0.77, + "learning_rate": 1.2961542724317726e-08, + "logits/chosen": -1.9440011978149414, + "logits/rejected": -1.9404687881469727, + "logps/chosen": -8.042021751403809, + "logps/rejected": -111.5625, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4149596393108368, + "rewards/margins": 1.7285773754119873, + "rewards/rejected": -1.3136177062988281, + "step": 13276 + }, + { + "epoch": 0.77, + "learning_rate": 1.2955212702838413e-08, + "logits/chosen": -1.960537075996399, + "logits/rejected": -1.9496153593063354, + "logps/chosen": -186.34841918945312, + "logps/rejected": -441.0072021484375, + "loss": 0.0498, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6755249500274658, + "rewards/margins": 4.757568359375, + "rewards/rejected": -3.082043409347534, + "step": 13277 + }, + { + "epoch": 0.77, + "learning_rate": 1.294888399737114e-08, + "logits/chosen": -1.6085487604141235, + "logits/rejected": -1.6035548448562622, + "logps/chosen": -178.2574462890625, + "logps/rejected": -436.5198059082031, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.038098096847534, + "rewards/margins": 4.1433563232421875, + "rewards/rejected": -2.1052582263946533, + "step": 13278 + }, + { + "epoch": 0.77, + "learning_rate": 1.2942556608140742e-08, + "logits/chosen": -2.0655009746551514, + "logits/rejected": -2.062349557876587, + "logps/chosen": -0.008160115219652653, + "logps/rejected": -134.48171997070312, + "loss": 0.5119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0010081739164888859, + "rewards/margins": 0.9632396101951599, + "rewards/rejected": -0.962231457233429, + "step": 13279 + }, + { + "epoch": 0.77, + "learning_rate": 1.2936230535372e-08, + "logits/chosen": -1.7865301370620728, + "logits/rejected": -1.802217960357666, + "logps/chosen": -155.2956085205078, + "logps/rejected": -329.86029052734375, + "loss": 0.1627, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3979904651641846, + "rewards/margins": 1.2248337268829346, + "rewards/rejected": 0.17315673828125, + "step": 13280 + }, + { + "epoch": 0.77, + "learning_rate": 1.2929905779289663e-08, + "logits/chosen": -1.9114811420440674, + "logits/rejected": -1.9004253149032593, + "logps/chosen": -265.71234130859375, + "logps/rejected": -311.2680358886719, + "loss": 0.1421, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1167969703674316, + "rewards/margins": 1.1609528064727783, + "rewards/rejected": 1.9558441638946533, + "step": 13281 + }, + { + "epoch": 0.77, + "learning_rate": 1.2923582340118383e-08, + "logits/chosen": -1.8625233173370361, + "logits/rejected": -1.8541407585144043, + "logps/chosen": -198.49124145507812, + "logps/rejected": -264.608154296875, + "loss": 0.3697, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.4936081171035767, + "rewards/margins": -0.0023391246795654297, + "rewards/rejected": 1.495947241783142, + "step": 13282 + }, + { + "epoch": 0.77, + "learning_rate": 1.291726021808281e-08, + "logits/chosen": -2.0560057163238525, + "logits/rejected": -2.0531814098358154, + "logps/chosen": -17.778772354125977, + "logps/rejected": -167.4175262451172, + "loss": 0.1901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7229175567626953, + "rewards/margins": 2.7324345111846924, + "rewards/rejected": -2.009516954421997, + "step": 13283 + }, + { + "epoch": 0.77, + "learning_rate": 1.2910939413407573e-08, + "logits/chosen": -1.8391315937042236, + "logits/rejected": -1.8747122287750244, + "logps/chosen": -209.63833618164062, + "logps/rejected": -540.2224731445312, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6728423833847046, + "rewards/margins": 6.175470352172852, + "rewards/rejected": -4.502627849578857, + "step": 13284 + }, + { + "epoch": 0.77, + "learning_rate": 1.2904619926317178e-08, + "logits/chosen": -1.8085254430770874, + "logits/rejected": -1.7916241884231567, + "logps/chosen": -81.68655395507812, + "logps/rejected": -234.44607543945312, + "loss": 0.1276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.08488929271698, + "rewards/margins": 3.620845317840576, + "rewards/rejected": -2.5359559059143066, + "step": 13285 + }, + { + "epoch": 0.77, + "learning_rate": 1.2898301757036134e-08, + "logits/chosen": -1.7543319463729858, + "logits/rejected": -1.722395420074463, + "logps/chosen": -220.35296630859375, + "logps/rejected": -571.438232421875, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9837067127227783, + "rewards/margins": 4.000967502593994, + "rewards/rejected": -1.0172607898712158, + "step": 13286 + }, + { + "epoch": 0.77, + "learning_rate": 1.2891984905788894e-08, + "logits/chosen": -1.827736735343933, + "logits/rejected": -1.8184432983398438, + "logps/chosen": -5.9007950767409056e-05, + "logps/rejected": -175.54367065429688, + "loss": 0.487, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6226408067486773e-07, + "rewards/margins": 1.1221739053726196, + "rewards/rejected": -1.1221741437911987, + "step": 13287 + }, + { + "epoch": 0.77, + "learning_rate": 1.2885669372799879e-08, + "logits/chosen": -2.051466464996338, + "logits/rejected": -2.0441744327545166, + "logps/chosen": -18.672346115112305, + "logps/rejected": -162.97857666015625, + "loss": 0.2161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46956273913383484, + "rewards/margins": 2.338193655014038, + "rewards/rejected": -1.8686310052871704, + "step": 13288 + }, + { + "epoch": 0.77, + "learning_rate": 1.287935515829342e-08, + "logits/chosen": -1.851996660232544, + "logits/rejected": -1.8669543266296387, + "logps/chosen": -328.7089538574219, + "logps/rejected": -430.66973876953125, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5975677967071533, + "rewards/margins": 3.536489963531494, + "rewards/rejected": -0.938922107219696, + "step": 13289 + }, + { + "epoch": 0.77, + "learning_rate": 1.2873042262493843e-08, + "logits/chosen": -1.8539661169052124, + "logits/rejected": -1.9022642374038696, + "logps/chosen": -185.3037109375, + "logps/rejected": -394.1349792480469, + "loss": 0.0494, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.617864966392517, + "rewards/margins": 3.036181688308716, + "rewards/rejected": -1.4183167219161987, + "step": 13290 + }, + { + "epoch": 0.77, + "learning_rate": 1.2866730685625415e-08, + "logits/chosen": -1.9975190162658691, + "logits/rejected": -1.995822787284851, + "logps/chosen": -0.00704673957079649, + "logps/rejected": -207.60479736328125, + "loss": 0.3308, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.102698272210546e-05, + "rewards/margins": 5.003129482269287, + "rewards/rejected": -5.003108501434326, + "step": 13291 + }, + { + "epoch": 0.77, + "learning_rate": 1.2860420427912367e-08, + "logits/chosen": -2.06282377243042, + "logits/rejected": -2.059095859527588, + "logps/chosen": -0.004621520638465881, + "logps/rejected": -235.4779510498047, + "loss": 0.3485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0029397455509752035, + "rewards/margins": 4.981634140014648, + "rewards/rejected": -4.978694438934326, + "step": 13292 + }, + { + "epoch": 0.77, + "learning_rate": 1.2854111489578834e-08, + "logits/chosen": -1.7837222814559937, + "logits/rejected": -1.797838568687439, + "logps/chosen": -110.89601135253906, + "logps/rejected": -356.7841491699219, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6883705854415894, + "rewards/margins": 3.305112600326538, + "rewards/rejected": -1.6167420148849487, + "step": 13293 + }, + { + "epoch": 0.77, + "learning_rate": 1.2847803870848966e-08, + "logits/chosen": -1.9021642208099365, + "logits/rejected": -1.8989412784576416, + "logps/chosen": -19.824216842651367, + "logps/rejected": -207.68357849121094, + "loss": 0.2197, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3548690974712372, + "rewards/margins": 2.938913106918335, + "rewards/rejected": -2.5840439796447754, + "step": 13294 + }, + { + "epoch": 0.77, + "learning_rate": 1.2841497571946835e-08, + "logits/chosen": -1.9157497882843018, + "logits/rejected": -1.921891450881958, + "logps/chosen": -289.0103759765625, + "logps/rejected": -607.5022583007812, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6686553955078125, + "rewards/margins": 11.943942070007324, + "rewards/rejected": -10.275286674499512, + "step": 13295 + }, + { + "epoch": 0.77, + "learning_rate": 1.2835192593096484e-08, + "logits/chosen": -1.8201922178268433, + "logits/rejected": -1.8164002895355225, + "logps/chosen": -11.861200332641602, + "logps/rejected": -180.95101928710938, + "loss": 0.4107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05444974824786186, + "rewards/margins": 1.7890697717666626, + "rewards/rejected": -1.8435195684432983, + "step": 13296 + }, + { + "epoch": 0.77, + "learning_rate": 1.282888893452187e-08, + "logits/chosen": -1.8708577156066895, + "logits/rejected": -1.8649001121520996, + "logps/chosen": -0.06073901057243347, + "logps/rejected": -178.51593017578125, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0019449122482910752, + "rewards/margins": 2.7493510246276855, + "rewards/rejected": -2.747406005859375, + "step": 13297 + }, + { + "epoch": 0.77, + "learning_rate": 1.2822586596446949e-08, + "logits/chosen": -2.1283576488494873, + "logits/rejected": -2.0528547763824463, + "logps/chosen": -222.4149627685547, + "logps/rejected": -430.79510498046875, + "loss": 0.197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6600769758224487, + "rewards/margins": 0.8499634265899658, + "rewards/rejected": 0.8101135492324829, + "step": 13298 + }, + { + "epoch": 0.77, + "learning_rate": 1.2816285579095605e-08, + "logits/chosen": -1.7217442989349365, + "logits/rejected": -1.7251195907592773, + "logps/chosen": -166.80067443847656, + "logps/rejected": -338.8165283203125, + "loss": 0.1513, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.028207540512085, + "rewards/margins": 1.1280274391174316, + "rewards/rejected": 1.9001801013946533, + "step": 13299 + }, + { + "epoch": 0.77, + "learning_rate": 1.2809985882691693e-08, + "logits/chosen": -2.031984806060791, + "logits/rejected": -2.0175180435180664, + "logps/chosen": -10.20832633972168, + "logps/rejected": -121.02192687988281, + "loss": 0.4394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2710041105747223, + "rewards/margins": 0.9323101043701172, + "rewards/rejected": -0.6613060235977173, + "step": 13300 + }, + { + "epoch": 0.77, + "learning_rate": 1.2803687507458988e-08, + "logits/chosen": -2.084704637527466, + "logits/rejected": -2.0821709632873535, + "logps/chosen": -4.444011211395264, + "logps/rejected": -178.50006103515625, + "loss": 0.4012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06994809955358505, + "rewards/margins": 1.514973759651184, + "rewards/rejected": -1.4450256824493408, + "step": 13301 + }, + { + "epoch": 0.77, + "learning_rate": 1.2797390453621231e-08, + "logits/chosen": -2.0103695392608643, + "logits/rejected": -2.060553789138794, + "logps/chosen": -180.83062744140625, + "logps/rejected": -335.80218505859375, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2121522426605225, + "rewards/margins": 3.5440707206726074, + "rewards/rejected": -1.3319183588027954, + "step": 13302 + }, + { + "epoch": 0.77, + "learning_rate": 1.2791094721402179e-08, + "logits/chosen": -1.8872349262237549, + "logits/rejected": -1.8748583793640137, + "logps/chosen": -180.78477478027344, + "logps/rejected": -392.14947509765625, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.259033203125, + "rewards/margins": 3.6729979515075684, + "rewards/rejected": -1.413964867591858, + "step": 13303 + }, + { + "epoch": 0.77, + "learning_rate": 1.2784800311025428e-08, + "logits/chosen": -1.9226386547088623, + "logits/rejected": -1.923306941986084, + "logps/chosen": -26.732376098632812, + "logps/rejected": -108.32963562011719, + "loss": 0.2959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4852888286113739, + "rewards/margins": 1.565848469734192, + "rewards/rejected": -1.0805596113204956, + "step": 13304 + }, + { + "epoch": 0.77, + "learning_rate": 1.277850722271463e-08, + "logits/chosen": -1.955530047416687, + "logits/rejected": -1.945683240890503, + "logps/chosen": -6.969608306884766, + "logps/rejected": -181.90541076660156, + "loss": 0.4072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23950181901454926, + "rewards/margins": 3.263594388961792, + "rewards/rejected": -3.503096103668213, + "step": 13305 + }, + { + "epoch": 0.77, + "learning_rate": 1.2772215456693298e-08, + "logits/chosen": -1.8748151063919067, + "logits/rejected": -1.87721586227417, + "logps/chosen": -63.40892791748047, + "logps/rejected": -268.8147277832031, + "loss": 0.0811, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3562554121017456, + "rewards/margins": 4.3420586585998535, + "rewards/rejected": -2.9858033657073975, + "step": 13306 + }, + { + "epoch": 0.77, + "learning_rate": 1.2765925013184997e-08, + "logits/chosen": -1.9684927463531494, + "logits/rejected": -1.9761784076690674, + "logps/chosen": -58.07444381713867, + "logps/rejected": -173.52374267578125, + "loss": 0.5401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3171466886997223, + "rewards/margins": 1.3618007898330688, + "rewards/rejected": -1.6789474487304688, + "step": 13307 + }, + { + "epoch": 0.77, + "learning_rate": 1.2759635892413157e-08, + "logits/chosen": -1.8540093898773193, + "logits/rejected": -1.8428014516830444, + "logps/chosen": -3.0517227060045116e-05, + "logps/rejected": -260.9467468261719, + "loss": 0.345, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1457017851389537e-07, + "rewards/margins": 6.899816989898682, + "rewards/rejected": -6.899816989898682, + "step": 13308 + }, + { + "epoch": 0.77, + "learning_rate": 1.2753348094601212e-08, + "logits/chosen": -1.9180831909179688, + "logits/rejected": -1.932241439819336, + "logps/chosen": -127.98462677001953, + "logps/rejected": -316.87005615234375, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.360563039779663, + "rewards/margins": 2.9004952907562256, + "rewards/rejected": 0.4600677490234375, + "step": 13309 + }, + { + "epoch": 0.77, + "learning_rate": 1.2747061619972539e-08, + "logits/chosen": -1.9325714111328125, + "logits/rejected": -1.9108836650848389, + "logps/chosen": -203.10711669921875, + "logps/rejected": -460.7601318359375, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9816253781318665, + "rewards/margins": 4.49267578125, + "rewards/rejected": -3.5110504627227783, + "step": 13310 + }, + { + "epoch": 0.77, + "learning_rate": 1.2740776468750474e-08, + "logits/chosen": -2.1027376651763916, + "logits/rejected": -2.095484495162964, + "logps/chosen": -50.36132049560547, + "logps/rejected": -257.1621398925781, + "loss": 0.3153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06960830837488174, + "rewards/margins": 3.5016930103302, + "rewards/rejected": -3.571301221847534, + "step": 13311 + }, + { + "epoch": 0.77, + "learning_rate": 1.2734492641158267e-08, + "logits/chosen": -1.746225357055664, + "logits/rejected": -1.7278714179992676, + "logps/chosen": -185.34808349609375, + "logps/rejected": -307.4677734375, + "loss": 0.0974, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5306808948516846, + "rewards/margins": 1.898280382156372, + "rewards/rejected": 0.6324005126953125, + "step": 13312 + }, + { + "epoch": 0.77, + "learning_rate": 1.272821013741917e-08, + "logits/chosen": -2.2004735469818115, + "logits/rejected": -2.202716827392578, + "logps/chosen": -14.015138626098633, + "logps/rejected": -136.208984375, + "loss": 0.473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09982176125049591, + "rewards/margins": 1.1526042222976685, + "rewards/rejected": -1.052782416343689, + "step": 13313 + }, + { + "epoch": 0.77, + "learning_rate": 1.2721928957756368e-08, + "logits/chosen": -1.7353699207305908, + "logits/rejected": -1.7252442836761475, + "logps/chosen": -8.99890422821045, + "logps/rejected": -110.53266906738281, + "loss": 0.455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02425088919699192, + "rewards/margins": 1.1804215908050537, + "rewards/rejected": -1.1561707258224487, + "step": 13314 + }, + { + "epoch": 0.77, + "learning_rate": 1.2715649102393006e-08, + "logits/chosen": -1.8312686681747437, + "logits/rejected": -1.8302215337753296, + "logps/chosen": -35.79718017578125, + "logps/rejected": -169.61788940429688, + "loss": 0.5374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3916641175746918, + "rewards/margins": 0.2637802064418793, + "rewards/rejected": 0.1278839111328125, + "step": 13315 + }, + { + "epoch": 0.77, + "learning_rate": 1.2709370571552148e-08, + "logits/chosen": -1.7213319540023804, + "logits/rejected": -1.700878381729126, + "logps/chosen": -185.39279174804688, + "logps/rejected": -348.488037109375, + "loss": 0.038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.98602294921875, + "rewards/margins": 3.86474609375, + "rewards/rejected": -1.87872314453125, + "step": 13316 + }, + { + "epoch": 0.77, + "learning_rate": 1.2703093365456863e-08, + "logits/chosen": -1.9204963445663452, + "logits/rejected": -1.902582049369812, + "logps/chosen": -146.1970977783203, + "logps/rejected": -383.889892578125, + "loss": 0.2311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2943588197231293, + "rewards/margins": 2.661015272140503, + "rewards/rejected": -2.366656541824341, + "step": 13317 + }, + { + "epoch": 0.78, + "learning_rate": 1.2696817484330136e-08, + "logits/chosen": -2.1706323623657227, + "logits/rejected": -2.16723370552063, + "logps/chosen": -33.989707946777344, + "logps/rejected": -251.9271240234375, + "loss": 0.4531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37008363008499146, + "rewards/margins": 2.2687714099884033, + "rewards/rejected": -2.63885498046875, + "step": 13318 + }, + { + "epoch": 0.78, + "learning_rate": 1.2690542928394932e-08, + "logits/chosen": -1.824662685394287, + "logits/rejected": -1.8307603597640991, + "logps/chosen": -83.20289611816406, + "logps/rejected": -382.8729553222656, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7845535278320312, + "rewards/margins": 4.73906135559082, + "rewards/rejected": -2.95450758934021, + "step": 13319 + }, + { + "epoch": 0.78, + "learning_rate": 1.2684269697874134e-08, + "logits/chosen": -2.045503616333008, + "logits/rejected": -2.0725693702697754, + "logps/chosen": -271.29217529296875, + "logps/rejected": -553.1109619140625, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.098065137863159, + "rewards/margins": 7.218908309936523, + "rewards/rejected": -5.120843410491943, + "step": 13320 + }, + { + "epoch": 0.78, + "learning_rate": 1.2677997792990608e-08, + "logits/chosen": -1.8757174015045166, + "logits/rejected": -1.8740452527999878, + "logps/chosen": -48.65983581542969, + "logps/rejected": -251.74102783203125, + "loss": 0.2172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5829952359199524, + "rewards/margins": 2.9823970794677734, + "rewards/rejected": -2.399401903152466, + "step": 13321 + }, + { + "epoch": 0.78, + "learning_rate": 1.267172721396716e-08, + "logits/chosen": -1.9183818101882935, + "logits/rejected": -1.9002901315689087, + "logps/chosen": -100.86911010742188, + "logps/rejected": -275.8768615722656, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.664134979248047, + "rewards/margins": 2.6632347106933594, + "rewards/rejected": 0.0009002685546875, + "step": 13322 + }, + { + "epoch": 0.78, + "learning_rate": 1.2665457961026555e-08, + "logits/chosen": -2.0522987842559814, + "logits/rejected": -2.051500082015991, + "logps/chosen": -5.453217029571533, + "logps/rejected": -184.42677307128906, + "loss": 0.3765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05301318317651749, + "rewards/margins": 2.318540573120117, + "rewards/rejected": -2.2655274868011475, + "step": 13323 + }, + { + "epoch": 0.78, + "learning_rate": 1.265919003439152e-08, + "logits/chosen": -1.7869517803192139, + "logits/rejected": -1.818613886833191, + "logps/chosen": -378.6048889160156, + "logps/rejected": -610.9497680664062, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5936615467071533, + "rewards/margins": 10.254837036132812, + "rewards/rejected": -7.661175727844238, + "step": 13324 + }, + { + "epoch": 0.78, + "learning_rate": 1.2652923434284679e-08, + "logits/chosen": -1.9132431745529175, + "logits/rejected": -1.9001319408416748, + "logps/chosen": -198.3968963623047, + "logps/rejected": -299.05535888671875, + "loss": 0.0461, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5284042358398438, + "rewards/margins": 3.508741855621338, + "rewards/rejected": -0.9803375601768494, + "step": 13325 + }, + { + "epoch": 0.78, + "learning_rate": 1.2646658160928714e-08, + "logits/chosen": -1.8297640085220337, + "logits/rejected": -1.8297438621520996, + "logps/chosen": -142.747802734375, + "logps/rejected": -244.05203247070312, + "loss": 0.1576, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0623931884765625, + "rewards/margins": 2.289320468902588, + "rewards/rejected": -1.2269271612167358, + "step": 13326 + }, + { + "epoch": 0.78, + "learning_rate": 1.2640394214546146e-08, + "logits/chosen": -1.943372130393982, + "logits/rejected": -1.941156268119812, + "logps/chosen": -51.08988571166992, + "logps/rejected": -238.66636657714844, + "loss": 0.1944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7673808932304382, + "rewards/margins": 3.242283344268799, + "rewards/rejected": -2.474902391433716, + "step": 13327 + }, + { + "epoch": 0.78, + "learning_rate": 1.2634131595359526e-08, + "logits/chosen": -1.7959656715393066, + "logits/rejected": -1.8070942163467407, + "logps/chosen": -3.5694544315338135, + "logps/rejected": -175.53993225097656, + "loss": 0.3624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36996757984161377, + "rewards/margins": 1.3707351684570312, + "rewards/rejected": -1.0007675886154175, + "step": 13328 + }, + { + "epoch": 0.78, + "learning_rate": 1.2627870303591326e-08, + "logits/chosen": -1.9081146717071533, + "logits/rejected": -1.9115097522735596, + "logps/chosen": -116.79461669921875, + "logps/rejected": -194.48416137695312, + "loss": 0.6692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6954178214073181, + "rewards/margins": 0.3785843253135681, + "rewards/rejected": -1.0740021467208862, + "step": 13329 + }, + { + "epoch": 0.78, + "learning_rate": 1.2621610339463995e-08, + "logits/chosen": -2.0165579319000244, + "logits/rejected": -2.0311806201934814, + "logps/chosen": -236.4761962890625, + "logps/rejected": -335.1793212890625, + "loss": 0.2528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14443360269069672, + "rewards/margins": 2.2753357887268066, + "rewards/rejected": -2.419769287109375, + "step": 13330 + }, + { + "epoch": 0.78, + "learning_rate": 1.2615351703199889e-08, + "logits/chosen": -1.774440050125122, + "logits/rejected": -1.777892827987671, + "logps/chosen": -1.4109982252120972, + "logps/rejected": -209.47935485839844, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00960307102650404, + "rewards/margins": 3.3936805725097656, + "rewards/rejected": -3.384077548980713, + "step": 13331 + }, + { + "epoch": 0.78, + "learning_rate": 1.2609094395021353e-08, + "logits/chosen": -1.996125340461731, + "logits/rejected": -1.993192195892334, + "logps/chosen": -0.010222035460174084, + "logps/rejected": -84.5217056274414, + "loss": 0.3801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011472273763502017, + "rewards/margins": 2.1191492080688477, + "rewards/rejected": -2.1192638874053955, + "step": 13332 + }, + { + "epoch": 0.78, + "learning_rate": 1.2602838415150685e-08, + "logits/chosen": -1.9344271421432495, + "logits/rejected": -1.8585597276687622, + "logps/chosen": -249.8460693359375, + "logps/rejected": -391.11187744140625, + "loss": 0.1398, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.39847731590271, + "rewards/margins": 1.5300233364105225, + "rewards/rejected": 0.8684539794921875, + "step": 13333 + }, + { + "epoch": 0.78, + "learning_rate": 1.2596583763810142e-08, + "logits/chosen": -1.6937263011932373, + "logits/rejected": -1.6995428800582886, + "logps/chosen": -8.164935111999512, + "logps/rejected": -182.431884765625, + "loss": 0.2568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4691827893257141, + "rewards/margins": 2.548891544342041, + "rewards/rejected": -2.0797088146209717, + "step": 13334 + }, + { + "epoch": 0.78, + "learning_rate": 1.259033044122189e-08, + "logits/chosen": -2.010573387145996, + "logits/rejected": -1.9990510940551758, + "logps/chosen": -5.4358453780878335e-05, + "logps/rejected": -181.25628662109375, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.887319278168434e-07, + "rewards/margins": 4.395071983337402, + "rewards/rejected": -4.395071506500244, + "step": 13335 + }, + { + "epoch": 0.78, + "learning_rate": 1.2584078447608093e-08, + "logits/chosen": -1.9342790842056274, + "logits/rejected": -1.932713270187378, + "logps/chosen": -0.0061414241790771484, + "logps/rejected": -175.66665649414062, + "loss": 0.3353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002632847463246435, + "rewards/margins": 4.149270057678223, + "rewards/rejected": -4.149533271789551, + "step": 13336 + }, + { + "epoch": 0.78, + "learning_rate": 1.2577827783190848e-08, + "logits/chosen": -2.051600456237793, + "logits/rejected": -2.0375232696533203, + "logps/chosen": -72.4469223022461, + "logps/rejected": -167.1434326171875, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2875007688999176, + "rewards/margins": 2.640866994857788, + "rewards/rejected": -2.3533661365509033, + "step": 13337 + }, + { + "epoch": 0.78, + "learning_rate": 1.2571578448192222e-08, + "logits/chosen": -1.6667907238006592, + "logits/rejected": -1.6800048351287842, + "logps/chosen": -217.059326171875, + "logps/rejected": -428.85235595703125, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6628968715667725, + "rewards/margins": 4.023306369781494, + "rewards/rejected": -1.3604096174240112, + "step": 13338 + }, + { + "epoch": 0.78, + "learning_rate": 1.2565330442834204e-08, + "logits/chosen": -1.982086420059204, + "logits/rejected": -1.9699203968048096, + "logps/chosen": -113.91537475585938, + "logps/rejected": -341.5393371582031, + "loss": 0.2421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4837242066860199, + "rewards/margins": 1.7613792419433594, + "rewards/rejected": -1.277655005455017, + "step": 13339 + }, + { + "epoch": 0.78, + "learning_rate": 1.2559083767338757e-08, + "logits/chosen": -1.8247982263565063, + "logits/rejected": -1.8415710926055908, + "logps/chosen": -146.76284790039062, + "logps/rejected": -458.9331359863281, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6290924549102783, + "rewards/margins": 9.840002059936523, + "rewards/rejected": -8.210909843444824, + "step": 13340 + }, + { + "epoch": 0.78, + "learning_rate": 1.2552838421927797e-08, + "logits/chosen": -1.6674160957336426, + "logits/rejected": -1.6722906827926636, + "logps/chosen": -4.230072021484375, + "logps/rejected": -129.89280700683594, + "loss": 0.4563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024187421426177025, + "rewards/margins": 1.3529754877090454, + "rewards/rejected": -1.3771629333496094, + "step": 13341 + }, + { + "epoch": 0.78, + "learning_rate": 1.2546594406823207e-08, + "logits/chosen": -1.7934513092041016, + "logits/rejected": -1.7550549507141113, + "logps/chosen": -251.44342041015625, + "logps/rejected": -490.65081787109375, + "loss": 0.0606, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9444763660430908, + "rewards/margins": 2.430621385574341, + "rewards/rejected": -0.48614501953125, + "step": 13342 + }, + { + "epoch": 0.78, + "learning_rate": 1.2540351722246777e-08, + "logits/chosen": -2.0039899349212646, + "logits/rejected": -2.0017237663269043, + "logps/chosen": -0.4994658827781677, + "logps/rejected": -74.2350082397461, + "loss": 0.5639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015767930075526237, + "rewards/margins": 0.29850515723228455, + "rewards/rejected": -0.31427308917045593, + "step": 13343 + }, + { + "epoch": 0.78, + "learning_rate": 1.253411036842027e-08, + "logits/chosen": -1.9232394695281982, + "logits/rejected": -1.9238582849502563, + "logps/chosen": -41.96731185913086, + "logps/rejected": -200.98291015625, + "loss": 0.5841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.493011474609375, + "rewards/margins": 2.7578704357147217, + "rewards/rejected": -3.2508819103240967, + "step": 13344 + }, + { + "epoch": 0.78, + "learning_rate": 1.252787034556546e-08, + "logits/chosen": -1.859595775604248, + "logits/rejected": -1.8627771139144897, + "logps/chosen": -215.358642578125, + "logps/rejected": -303.7292785644531, + "loss": 0.4228, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.892193555831909, + "rewards/margins": -0.24734807014465332, + "rewards/rejected": 3.1395416259765625, + "step": 13345 + }, + { + "epoch": 0.78, + "learning_rate": 1.2521631653903975e-08, + "logits/chosen": -1.6885654926300049, + "logits/rejected": -1.7386460304260254, + "logps/chosen": -314.54486083984375, + "logps/rejected": -328.2641906738281, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8325653076171875, + "rewards/margins": 2.7716917991638184, + "rewards/rejected": -0.9391266107559204, + "step": 13346 + }, + { + "epoch": 0.78, + "learning_rate": 1.251539429365746e-08, + "logits/chosen": -1.9962480068206787, + "logits/rejected": -1.9958510398864746, + "logps/chosen": -5.571549415588379, + "logps/rejected": -203.7754669189453, + "loss": 0.3229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1660441905260086, + "rewards/margins": 3.1290111541748047, + "rewards/rejected": -2.9629669189453125, + "step": 13347 + }, + { + "epoch": 0.78, + "learning_rate": 1.2509158265047499e-08, + "logits/chosen": -1.7423460483551025, + "logits/rejected": -1.7453618049621582, + "logps/chosen": -17.395566940307617, + "logps/rejected": -129.26315307617188, + "loss": 0.1837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9637998938560486, + "rewards/margins": 2.490333318710327, + "rewards/rejected": -1.5265334844589233, + "step": 13348 + }, + { + "epoch": 0.78, + "learning_rate": 1.2502923568295631e-08, + "logits/chosen": -1.7626596689224243, + "logits/rejected": -1.7663702964782715, + "logps/chosen": -259.64849853515625, + "logps/rejected": -418.4400634765625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.111215353012085, + "rewards/margins": 5.251407146453857, + "rewards/rejected": -2.1401917934417725, + "step": 13349 + }, + { + "epoch": 0.78, + "learning_rate": 1.2496690203623327e-08, + "logits/chosen": -1.910874605178833, + "logits/rejected": -1.9050920009613037, + "logps/chosen": -0.13185900449752808, + "logps/rejected": -282.0192565917969, + "loss": 0.329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0047802091576159, + "rewards/margins": 5.725824356079102, + "rewards/rejected": -5.730604648590088, + "step": 13350 + }, + { + "epoch": 0.78, + "learning_rate": 1.2490458171252038e-08, + "logits/chosen": -2.00787091255188, + "logits/rejected": -1.994479775428772, + "logps/chosen": -25.065155029296875, + "logps/rejected": -158.0124969482422, + "loss": 0.4415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2760152816772461, + "rewards/margins": 0.43346309661865234, + "rewards/rejected": -0.15744781494140625, + "step": 13351 + }, + { + "epoch": 0.78, + "learning_rate": 1.2484227471403148e-08, + "logits/chosen": -1.8088897466659546, + "logits/rejected": -1.7916511297225952, + "logps/chosen": -211.22286987304688, + "logps/rejected": -299.50506591796875, + "loss": 0.0884, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.962243676185608, + "rewards/margins": 2.3332152366638184, + "rewards/rejected": -0.3709716796875, + "step": 13352 + }, + { + "epoch": 0.78, + "learning_rate": 1.2477998104298026e-08, + "logits/chosen": -1.8107962608337402, + "logits/rejected": -1.7939517498016357, + "logps/chosen": -227.79074096679688, + "logps/rejected": -275.36932373046875, + "loss": 0.2376, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6548004150390625, + "rewards/margins": 0.8826263546943665, + "rewards/rejected": 0.772174060344696, + "step": 13353 + }, + { + "epoch": 0.78, + "learning_rate": 1.247177007015794e-08, + "logits/chosen": -1.7976899147033691, + "logits/rejected": -1.8166946172714233, + "logps/chosen": -211.84957885742188, + "logps/rejected": -377.27899169921875, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.789912462234497, + "rewards/margins": 6.530153274536133, + "rewards/rejected": -4.740240573883057, + "step": 13354 + }, + { + "epoch": 0.78, + "learning_rate": 1.2465543369204146e-08, + "logits/chosen": -1.8065332174301147, + "logits/rejected": -1.7947590351104736, + "logps/chosen": -48.01290512084961, + "logps/rejected": -187.75094604492188, + "loss": 0.2551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38944587111473083, + "rewards/margins": 2.1399402618408203, + "rewards/rejected": -1.750494360923767, + "step": 13355 + }, + { + "epoch": 0.78, + "learning_rate": 1.2459318001657853e-08, + "logits/chosen": -1.8894925117492676, + "logits/rejected": -1.8780300617218018, + "logps/chosen": -0.015114694833755493, + "logps/rejected": -152.70169067382812, + "loss": 0.4441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001395428436808288, + "rewards/margins": 1.4480369091033936, + "rewards/rejected": -1.449432373046875, + "step": 13356 + }, + { + "epoch": 0.78, + "learning_rate": 1.245309396774023e-08, + "logits/chosen": -1.918265700340271, + "logits/rejected": -1.917885184288025, + "logps/chosen": -41.54108810424805, + "logps/rejected": -124.91173553466797, + "loss": 0.1295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4925835132598877, + "rewards/margins": 1.9541969299316406, + "rewards/rejected": -0.4616134762763977, + "step": 13357 + }, + { + "epoch": 0.78, + "learning_rate": 1.244687126767236e-08, + "logits/chosen": -1.884279727935791, + "logits/rejected": -1.8847129344940186, + "logps/chosen": -46.55527114868164, + "logps/rejected": -329.3447265625, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.646150588989258, + "rewards/margins": 8.409563064575195, + "rewards/rejected": -5.7634124755859375, + "step": 13358 + }, + { + "epoch": 0.78, + "learning_rate": 1.244064990167531e-08, + "logits/chosen": -2.0559136867523193, + "logits/rejected": -2.0513758659362793, + "logps/chosen": -33.688209533691406, + "logps/rejected": -278.26910400390625, + "loss": 0.4422, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39627495408058167, + "rewards/margins": 5.527322769165039, + "rewards/rejected": -5.923597812652588, + "step": 13359 + }, + { + "epoch": 0.78, + "learning_rate": 1.2434429869970103e-08, + "logits/chosen": -1.9444063901901245, + "logits/rejected": -1.9387474060058594, + "logps/chosen": -23.438493728637695, + "logps/rejected": -142.58192443847656, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6549766659736633, + "rewards/margins": 0.877149224281311, + "rewards/rejected": -0.2221725434064865, + "step": 13360 + }, + { + "epoch": 0.78, + "learning_rate": 1.2428211172777708e-08, + "logits/chosen": -1.864843487739563, + "logits/rejected": -1.8676798343658447, + "logps/chosen": -16.864913940429688, + "logps/rejected": -287.38226318359375, + "loss": 0.253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4112991392612457, + "rewards/margins": 4.692659378051758, + "rewards/rejected": -4.281360149383545, + "step": 13361 + }, + { + "epoch": 0.78, + "learning_rate": 1.2421993810319025e-08, + "logits/chosen": -1.61054527759552, + "logits/rejected": -1.6094133853912354, + "logps/chosen": -218.81613159179688, + "logps/rejected": -346.6458740234375, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8701752424240112, + "rewards/margins": 3.316854953765869, + "rewards/rejected": -1.446679711341858, + "step": 13362 + }, + { + "epoch": 0.78, + "learning_rate": 1.2415777782814923e-08, + "logits/chosen": -1.883626103401184, + "logits/rejected": -1.8992908000946045, + "logps/chosen": -0.36574047803878784, + "logps/rejected": -213.31529235839844, + "loss": 0.3211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1070699691772461, + "rewards/margins": 6.065567493438721, + "rewards/rejected": -5.958497524261475, + "step": 13363 + }, + { + "epoch": 0.78, + "learning_rate": 1.240956309048627e-08, + "logits/chosen": -1.9225791692733765, + "logits/rejected": -1.9081568717956543, + "logps/chosen": -158.24652099609375, + "logps/rejected": -323.2332763671875, + "loss": 0.103, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7690826654434204, + "rewards/margins": 2.457263231277466, + "rewards/rejected": -0.6881805658340454, + "step": 13364 + }, + { + "epoch": 0.78, + "learning_rate": 1.2403349733553792e-08, + "logits/chosen": -1.6865689754486084, + "logits/rejected": -1.631142258644104, + "logps/chosen": -209.86856079101562, + "logps/rejected": -326.8162841796875, + "loss": 0.1063, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.170030355453491, + "rewards/margins": 2.265934944152832, + "rewards/rejected": -0.09590454399585724, + "step": 13365 + }, + { + "epoch": 0.78, + "learning_rate": 1.2397137712238247e-08, + "logits/chosen": -2.061389684677124, + "logits/rejected": -2.0505802631378174, + "logps/chosen": -215.3381805419922, + "logps/rejected": -346.0098876953125, + "loss": 0.3123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42075347900390625, + "rewards/margins": 0.6908676624298096, + "rewards/rejected": -1.1116211414337158, + "step": 13366 + }, + { + "epoch": 0.78, + "learning_rate": 1.2390927026760284e-08, + "logits/chosen": -1.7506825923919678, + "logits/rejected": -1.755103588104248, + "logps/chosen": -347.9797668457031, + "logps/rejected": -373.8315124511719, + "loss": 0.3669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8680694699287415, + "rewards/margins": 0.03198850154876709, + "rewards/rejected": 0.8360809683799744, + "step": 13367 + }, + { + "epoch": 0.78, + "learning_rate": 1.2384717677340584e-08, + "logits/chosen": -1.8255157470703125, + "logits/rejected": -1.8347657918930054, + "logps/chosen": -170.78961181640625, + "logps/rejected": -373.0840759277344, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.819793701171875, + "rewards/margins": 5.002710342407227, + "rewards/rejected": -3.1829164028167725, + "step": 13368 + }, + { + "epoch": 0.78, + "learning_rate": 1.2378509664199693e-08, + "logits/chosen": -1.9415433406829834, + "logits/rejected": -1.9403538703918457, + "logps/chosen": -203.2943572998047, + "logps/rejected": -644.0946044921875, + "loss": 0.0658, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0221999883651733, + "rewards/margins": 8.175776481628418, + "rewards/rejected": -7.153576850891113, + "step": 13369 + }, + { + "epoch": 0.78, + "learning_rate": 1.237230298755817e-08, + "logits/chosen": -1.91165292263031, + "logits/rejected": -1.8947261571884155, + "logps/chosen": -263.0921630859375, + "logps/rejected": -408.0330505371094, + "loss": 0.2285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5518432855606079, + "rewards/margins": 0.8573059439659119, + "rewards/rejected": -0.30546265840530396, + "step": 13370 + }, + { + "epoch": 0.78, + "learning_rate": 1.2366097647636497e-08, + "logits/chosen": -1.8396790027618408, + "logits/rejected": -1.8297902345657349, + "logps/chosen": -72.46968078613281, + "logps/rejected": -295.2789611816406, + "loss": 0.1171, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1424270868301392, + "rewards/margins": 3.7887282371520996, + "rewards/rejected": -2.64630126953125, + "step": 13371 + }, + { + "epoch": 0.78, + "learning_rate": 1.2359893644655139e-08, + "logits/chosen": -1.9621492624282837, + "logits/rejected": -1.955753207206726, + "logps/chosen": -39.54370880126953, + "logps/rejected": -185.89910888671875, + "loss": 0.2849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9287860989570618, + "rewards/margins": 1.022865653038025, + "rewards/rejected": -0.09407959133386612, + "step": 13372 + }, + { + "epoch": 0.78, + "learning_rate": 1.2353690978834463e-08, + "logits/chosen": -1.989643931388855, + "logits/rejected": -1.9783129692077637, + "logps/chosen": -5.9365134802646935e-05, + "logps/rejected": -298.4684753417969, + "loss": 0.3349, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9907297428289894e-06, + "rewards/margins": 7.126096725463867, + "rewards/rejected": -7.1260986328125, + "step": 13373 + }, + { + "epoch": 0.78, + "learning_rate": 1.2347489650394833e-08, + "logits/chosen": -1.9603980779647827, + "logits/rejected": -1.9540936946868896, + "logps/chosen": -0.1434517502784729, + "logps/rejected": -154.76138305664062, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03810092434287071, + "rewards/margins": 3.159682273864746, + "rewards/rejected": -3.1215813159942627, + "step": 13374 + }, + { + "epoch": 0.78, + "learning_rate": 1.2341289659556547e-08, + "logits/chosen": -1.9750815629959106, + "logits/rejected": -1.9807461500167847, + "logps/chosen": -3.862342418869957e-05, + "logps/rejected": -222.58351135253906, + "loss": 0.3326, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.311309461016208e-07, + "rewards/margins": 6.163571357727051, + "rewards/rejected": -6.163571357727051, + "step": 13375 + }, + { + "epoch": 0.78, + "learning_rate": 1.2335091006539877e-08, + "logits/chosen": -1.8953863382339478, + "logits/rejected": -1.91206955909729, + "logps/chosen": -41.23121643066406, + "logps/rejected": -269.675048828125, + "loss": 0.1137, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.126001000404358, + "rewards/margins": 5.363141059875488, + "rewards/rejected": -4.23714017868042, + "step": 13376 + }, + { + "epoch": 0.78, + "learning_rate": 1.2328893691564996e-08, + "logits/chosen": -1.858452320098877, + "logits/rejected": -1.8298858404159546, + "logps/chosen": -171.90155029296875, + "logps/rejected": -411.0458679199219, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1646499633789062, + "rewards/margins": 3.6184067726135254, + "rewards/rejected": -0.453756719827652, + "step": 13377 + }, + { + "epoch": 0.78, + "learning_rate": 1.2322697714852081e-08, + "logits/chosen": -1.8286726474761963, + "logits/rejected": -1.8339370489120483, + "logps/chosen": -68.63742065429688, + "logps/rejected": -185.46701049804688, + "loss": 0.3087, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24570007622241974, + "rewards/margins": 2.0733261108398438, + "rewards/rejected": -1.82762610912323, + "step": 13378 + }, + { + "epoch": 0.78, + "learning_rate": 1.2316503076621243e-08, + "logits/chosen": -1.678513765335083, + "logits/rejected": -1.6880804300308228, + "logps/chosen": -190.29025268554688, + "logps/rejected": -291.6524658203125, + "loss": 0.058, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6987946033477783, + "rewards/margins": 2.347158908843994, + "rewards/rejected": 0.35163575410842896, + "step": 13379 + }, + { + "epoch": 0.78, + "learning_rate": 1.2310309777092558e-08, + "logits/chosen": -1.8959107398986816, + "logits/rejected": -1.8944884538650513, + "logps/chosen": -9.012818336486816, + "logps/rejected": -52.08375930786133, + "loss": 0.6401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.041903115808963776, + "rewards/margins": 0.12712803483009338, + "rewards/rejected": -0.08522491902112961, + "step": 13380 + }, + { + "epoch": 0.78, + "learning_rate": 1.230411781648602e-08, + "logits/chosen": -1.6855229139328003, + "logits/rejected": -1.7248378992080688, + "logps/chosen": -156.40899658203125, + "logps/rejected": -200.57827758789062, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6888535022735596, + "rewards/margins": 0.46831822395324707, + "rewards/rejected": 1.2205352783203125, + "step": 13381 + }, + { + "epoch": 0.78, + "learning_rate": 1.2297927195021607e-08, + "logits/chosen": -1.646530032157898, + "logits/rejected": -1.6367120742797852, + "logps/chosen": -138.18508911132812, + "logps/rejected": -272.80908203125, + "loss": 0.1917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.388616919517517, + "rewards/margins": 1.698028564453125, + "rewards/rejected": -0.3094116151332855, + "step": 13382 + }, + { + "epoch": 0.78, + "learning_rate": 1.2291737912919237e-08, + "logits/chosen": -1.7597016096115112, + "logits/rejected": -1.759109616279602, + "logps/chosen": -0.7704217433929443, + "logps/rejected": -103.60913848876953, + "loss": 0.5854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011264837346971035, + "rewards/margins": 0.49617764353752136, + "rewards/rejected": -0.5074424743652344, + "step": 13383 + }, + { + "epoch": 0.78, + "learning_rate": 1.228554997039879e-08, + "logits/chosen": -1.993478536605835, + "logits/rejected": -1.9892778396606445, + "logps/chosen": -48.000022888183594, + "logps/rejected": -187.55137634277344, + "loss": 0.338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3743923306465149, + "rewards/margins": 1.3745250701904297, + "rewards/rejected": -1.0001327991485596, + "step": 13384 + }, + { + "epoch": 0.78, + "learning_rate": 1.2279363367680107e-08, + "logits/chosen": -1.9220763444900513, + "logits/rejected": -1.9193593263626099, + "logps/chosen": -100.67063903808594, + "logps/rejected": -250.08944702148438, + "loss": 0.1332, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3463150262832642, + "rewards/margins": 1.7640671730041504, + "rewards/rejected": -0.41775208711624146, + "step": 13385 + }, + { + "epoch": 0.78, + "learning_rate": 1.2273178104982912e-08, + "logits/chosen": -1.7628693580627441, + "logits/rejected": -1.7680336236953735, + "logps/chosen": -23.77379035949707, + "logps/rejected": -181.98654174804688, + "loss": 0.2094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9741018414497375, + "rewards/margins": 2.376152753829956, + "rewards/rejected": -1.4020508527755737, + "step": 13386 + }, + { + "epoch": 0.78, + "learning_rate": 1.2266994182527007e-08, + "logits/chosen": -1.9509207010269165, + "logits/rejected": -1.9502711296081543, + "logps/chosen": -25.84904670715332, + "logps/rejected": -142.173828125, + "loss": 0.9896, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.9393903613090515, + "rewards/margins": -0.32558727264404297, + "rewards/rejected": -0.6138030886650085, + "step": 13387 + }, + { + "epoch": 0.78, + "learning_rate": 1.2260811600532029e-08, + "logits/chosen": -1.7024106979370117, + "logits/rejected": -1.7412060499191284, + "logps/chosen": -174.01974487304688, + "logps/rejected": -276.5981140136719, + "loss": 0.2508, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5384979248046875, + "rewards/margins": 0.6109985113143921, + "rewards/rejected": 0.9274994134902954, + "step": 13388 + }, + { + "epoch": 0.78, + "learning_rate": 1.2254630359217621e-08, + "logits/chosen": -1.7867435216903687, + "logits/rejected": -1.7855640649795532, + "logps/chosen": -185.4976348876953, + "logps/rejected": -331.01556396484375, + "loss": 0.0605, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.524653673171997, + "rewards/margins": 3.0354933738708496, + "rewards/rejected": -1.510839819908142, + "step": 13389 + }, + { + "epoch": 0.78, + "learning_rate": 1.2248450458803373e-08, + "logits/chosen": -2.000256299972534, + "logits/rejected": -2.001711845397949, + "logps/chosen": -20.248641967773438, + "logps/rejected": -141.53941345214844, + "loss": 0.4722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5676762461662292, + "rewards/margins": 2.6161983013153076, + "rewards/rejected": -3.1838746070861816, + "step": 13390 + }, + { + "epoch": 0.78, + "learning_rate": 1.2242271899508844e-08, + "logits/chosen": -1.9597318172454834, + "logits/rejected": -1.947123408317566, + "logps/chosen": -209.85787963867188, + "logps/rejected": -433.0929870605469, + "loss": 0.2049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.011511206626892, + "rewards/margins": 1.5490020513534546, + "rewards/rejected": -0.5374908447265625, + "step": 13391 + }, + { + "epoch": 0.78, + "learning_rate": 1.22360946815535e-08, + "logits/chosen": -1.960557460784912, + "logits/rejected": -1.9308385848999023, + "logps/chosen": -197.10147094726562, + "logps/rejected": -343.96014404296875, + "loss": 0.1267, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9240295886993408, + "rewards/margins": 1.6662384271621704, + "rewards/rejected": 0.257791131734848, + "step": 13392 + }, + { + "epoch": 0.78, + "learning_rate": 1.2229918805156791e-08, + "logits/chosen": -1.9675055742263794, + "logits/rejected": -1.9654871225357056, + "logps/chosen": -66.54928588867188, + "logps/rejected": -237.0880126953125, + "loss": 0.2265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2147781401872635, + "rewards/margins": 5.5987701416015625, + "rewards/rejected": -5.3839921951293945, + "step": 13393 + }, + { + "epoch": 0.78, + "learning_rate": 1.2223744270538122e-08, + "logits/chosen": -1.8507484197616577, + "logits/rejected": -1.8605258464813232, + "logps/chosen": -251.53099060058594, + "logps/rejected": -401.8990173339844, + "loss": 0.0777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7845230102539062, + "rewards/margins": 2.837397813796997, + "rewards/rejected": -2.052874803543091, + "step": 13394 + }, + { + "epoch": 0.78, + "learning_rate": 1.221757107791685e-08, + "logits/chosen": -1.8479429483413696, + "logits/rejected": -1.842872142791748, + "logps/chosen": -40.57207107543945, + "logps/rejected": -131.3585968017578, + "loss": 0.4619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.056522369384765625, + "rewards/margins": 0.7653358578681946, + "rewards/rejected": -0.708813488483429, + "step": 13395 + }, + { + "epoch": 0.78, + "learning_rate": 1.221139922751226e-08, + "logits/chosen": -1.8242226839065552, + "logits/rejected": -1.8336808681488037, + "logps/chosen": -277.2163391113281, + "logps/rejected": -423.6671142578125, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.810021996498108, + "rewards/margins": 3.552886962890625, + "rewards/rejected": -1.742864966392517, + "step": 13396 + }, + { + "epoch": 0.78, + "learning_rate": 1.2205228719543609e-08, + "logits/chosen": -1.9357141256332397, + "logits/rejected": -1.8707263469696045, + "logps/chosen": -257.6477966308594, + "logps/rejected": -456.97454833984375, + "loss": 0.0476, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6678009033203125, + "rewards/margins": 2.8969788551330566, + "rewards/rejected": -0.22917786240577698, + "step": 13397 + }, + { + "epoch": 0.78, + "learning_rate": 1.2199059554230106e-08, + "logits/chosen": -1.949629306793213, + "logits/rejected": -1.9548405408859253, + "logps/chosen": -0.3555534780025482, + "logps/rejected": -85.90924072265625, + "loss": 0.4201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03157460317015648, + "rewards/margins": 1.6484050750732422, + "rewards/rejected": -1.679979681968689, + "step": 13398 + }, + { + "epoch": 0.78, + "learning_rate": 1.2192891731790927e-08, + "logits/chosen": -1.839667797088623, + "logits/rejected": -1.82700514793396, + "logps/chosen": -23.027788162231445, + "logps/rejected": -120.55451202392578, + "loss": 0.6495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014951134100556374, + "rewards/margins": 0.12448597699403763, + "rewards/rejected": -0.13943710923194885, + "step": 13399 + }, + { + "epoch": 0.78, + "learning_rate": 1.2186725252445157e-08, + "logits/chosen": -1.9795715808868408, + "logits/rejected": -1.9841551780700684, + "logps/chosen": -57.81654357910156, + "logps/rejected": -390.09332275390625, + "loss": 0.1666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8795547485351562, + "rewards/margins": 9.141911506652832, + "rewards/rejected": -8.262356758117676, + "step": 13400 + }, + { + "epoch": 0.78, + "learning_rate": 1.218056011641187e-08, + "logits/chosen": -1.9614535570144653, + "logits/rejected": -1.9619656801223755, + "logps/chosen": -182.34619140625, + "logps/rejected": -434.417236328125, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8795746564865112, + "rewards/margins": 3.731527805328369, + "rewards/rejected": -1.851953148841858, + "step": 13401 + }, + { + "epoch": 0.78, + "learning_rate": 1.2174396323910085e-08, + "logits/chosen": -1.860473871231079, + "logits/rejected": -1.8444255590438843, + "logps/chosen": -146.91273498535156, + "logps/rejected": -206.95782470703125, + "loss": 0.2141, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.687678575515747, + "rewards/margins": 1.198114037513733, + "rewards/rejected": 0.4895645081996918, + "step": 13402 + }, + { + "epoch": 0.78, + "learning_rate": 1.2168233875158779e-08, + "logits/chosen": -2.007981777191162, + "logits/rejected": -2.010279893875122, + "logps/chosen": -3.8099863529205322, + "logps/rejected": -61.77861022949219, + "loss": 0.5008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1539245843887329, + "rewards/margins": 0.6565353870391846, + "rewards/rejected": -0.5026108026504517, + "step": 13403 + }, + { + "epoch": 0.78, + "learning_rate": 1.2162072770376847e-08, + "logits/chosen": -2.1046128273010254, + "logits/rejected": -2.0970945358276367, + "logps/chosen": -7.485623359680176, + "logps/rejected": -214.04031372070312, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16913262009620667, + "rewards/margins": 1.7849575281143188, + "rewards/rejected": -1.6158249378204346, + "step": 13404 + }, + { + "epoch": 0.78, + "learning_rate": 1.2155913009783158e-08, + "logits/chosen": -2.1286544799804688, + "logits/rejected": -2.115863800048828, + "logps/chosen": -22.757701873779297, + "logps/rejected": -321.03759765625, + "loss": 0.2246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46603068709373474, + "rewards/margins": 5.950906276702881, + "rewards/rejected": -5.484875679016113, + "step": 13405 + }, + { + "epoch": 0.78, + "learning_rate": 1.2149754593596589e-08, + "logits/chosen": -1.8677719831466675, + "logits/rejected": -1.8661975860595703, + "logps/chosen": -0.014963041059672832, + "logps/rejected": -76.769775390625, + "loss": 0.5498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014013090403750539, + "rewards/margins": 0.6642862558364868, + "rewards/rejected": -0.6656875610351562, + "step": 13406 + }, + { + "epoch": 0.78, + "learning_rate": 1.2143597522035865e-08, + "logits/chosen": -1.89400053024292, + "logits/rejected": -1.8929632902145386, + "logps/chosen": -0.880770742893219, + "logps/rejected": -79.03971099853516, + "loss": 0.4508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.051135893911123276, + "rewards/margins": 1.2480056285858154, + "rewards/rejected": -1.1968697309494019, + "step": 13407 + }, + { + "epoch": 0.78, + "learning_rate": 1.2137441795319748e-08, + "logits/chosen": -1.8315393924713135, + "logits/rejected": -1.8564419746398926, + "logps/chosen": -219.6480712890625, + "logps/rejected": -387.7154846191406, + "loss": 0.0901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8683639764785767, + "rewards/margins": 3.433271884918213, + "rewards/rejected": -2.5649077892303467, + "step": 13408 + }, + { + "epoch": 0.78, + "learning_rate": 1.2131287413666873e-08, + "logits/chosen": -1.9766789674758911, + "logits/rejected": -1.9674443006515503, + "logps/chosen": -5.5348801612854, + "logps/rejected": -119.52621459960938, + "loss": 0.4265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12772655487060547, + "rewards/margins": 1.298219919204712, + "rewards/rejected": -1.1704933643341064, + "step": 13409 + }, + { + "epoch": 0.78, + "learning_rate": 1.2125134377295936e-08, + "logits/chosen": -1.8470288515090942, + "logits/rejected": -1.90427827835083, + "logps/chosen": -161.17916870117188, + "logps/rejected": -314.8529968261719, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4143218994140625, + "rewards/margins": 5.159912109375, + "rewards/rejected": -2.7455902099609375, + "step": 13410 + }, + { + "epoch": 0.78, + "learning_rate": 1.2118982686425477e-08, + "logits/chosen": -1.9597454071044922, + "logits/rejected": -2.004838466644287, + "logps/chosen": -203.10818481445312, + "logps/rejected": -393.18511962890625, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.558973789215088, + "rewards/margins": 3.255201816558838, + "rewards/rejected": -0.69622802734375, + "step": 13411 + }, + { + "epoch": 0.78, + "learning_rate": 1.2112832341274048e-08, + "logits/chosen": -1.7665210962295532, + "logits/rejected": -1.7767049074172974, + "logps/chosen": -0.0007134221959859133, + "logps/rejected": -207.6702117919922, + "loss": 0.3778, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.671998330100905e-06, + "rewards/margins": 2.7406504154205322, + "rewards/rejected": -2.7406418323516846, + "step": 13412 + }, + { + "epoch": 0.78, + "learning_rate": 1.2106683342060143e-08, + "logits/chosen": -1.7043910026550293, + "logits/rejected": -1.7017595767974854, + "logps/chosen": -30.678150177001953, + "logps/rejected": -85.99771881103516, + "loss": 0.5146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3640190064907074, + "rewards/margins": 0.27633437514305115, + "rewards/rejected": 0.08768463134765625, + "step": 13413 + }, + { + "epoch": 0.78, + "learning_rate": 1.2100535689002211e-08, + "logits/chosen": -1.8694051504135132, + "logits/rejected": -1.8556088209152222, + "logps/chosen": -155.119140625, + "logps/rejected": -200.58419799804688, + "loss": 0.3434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6753937005996704, + "rewards/margins": 0.0967971682548523, + "rewards/rejected": 0.5785965323448181, + "step": 13414 + }, + { + "epoch": 0.78, + "learning_rate": 1.2094389382318631e-08, + "logits/chosen": -2.027949333190918, + "logits/rejected": -2.0305845737457275, + "logps/chosen": -123.0753402709961, + "logps/rejected": -282.4580383300781, + "loss": 0.1002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1303199529647827, + "rewards/margins": 2.360990047454834, + "rewards/rejected": -1.2306702136993408, + "step": 13415 + }, + { + "epoch": 0.78, + "learning_rate": 1.208824442222775e-08, + "logits/chosen": -1.998634934425354, + "logits/rejected": -1.9920437335968018, + "logps/chosen": -0.0031913258135318756, + "logps/rejected": -230.74215698242188, + "loss": 0.3774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018481277220416814, + "rewards/margins": 2.3553175926208496, + "rewards/rejected": -2.3555023670196533, + "step": 13416 + }, + { + "epoch": 0.78, + "learning_rate": 1.2082100808947877e-08, + "logits/chosen": -1.8204665184020996, + "logits/rejected": -1.8151586055755615, + "logps/chosen": -79.12403106689453, + "logps/rejected": -359.8838195800781, + "loss": 0.0355, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.395862579345703, + "rewards/margins": 4.2565436363220215, + "rewards/rejected": -1.860681176185608, + "step": 13417 + }, + { + "epoch": 0.78, + "learning_rate": 1.2075958542697273e-08, + "logits/chosen": -1.8424190282821655, + "logits/rejected": -1.8423657417297363, + "logps/chosen": -40.59870529174805, + "logps/rejected": -92.88993072509766, + "loss": 0.3712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.405997097492218, + "rewards/margins": 1.2921948432922363, + "rewards/rejected": -0.8861976861953735, + "step": 13418 + }, + { + "epoch": 0.78, + "learning_rate": 1.2069817623694112e-08, + "logits/chosen": -1.9897269010543823, + "logits/rejected": -1.984654426574707, + "logps/chosen": -1.6927686374401674e-05, + "logps/rejected": -209.1064453125, + "loss": 0.3385, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7684988402352246e-08, + "rewards/margins": 6.070962429046631, + "rewards/rejected": -6.070962429046631, + "step": 13419 + }, + { + "epoch": 0.78, + "learning_rate": 1.2063678052156568e-08, + "logits/chosen": -1.8370308876037598, + "logits/rejected": -1.8486487865447998, + "logps/chosen": -269.7525939941406, + "logps/rejected": -372.51519775390625, + "loss": 0.0812, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9801392555236816, + "rewards/margins": 2.1126649379730225, + "rewards/rejected": 0.867474377155304, + "step": 13420 + }, + { + "epoch": 0.78, + "learning_rate": 1.2057539828302749e-08, + "logits/chosen": -1.9587510824203491, + "logits/rejected": -1.9541374444961548, + "logps/chosen": -116.11227416992188, + "logps/rejected": -261.341064453125, + "loss": 0.3763, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8466094732284546, + "rewards/margins": 0.07014763355255127, + "rewards/rejected": 1.7764618396759033, + "step": 13421 + }, + { + "epoch": 0.78, + "learning_rate": 1.205140295235072e-08, + "logits/chosen": -1.6932538747787476, + "logits/rejected": -1.6874027252197266, + "logps/chosen": -13.032654762268066, + "logps/rejected": -17.728530883789062, + "loss": 0.6641, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.048853110522031784, + "rewards/margins": 0.0010890960693359375, + "rewards/rejected": -0.04994220659136772, + "step": 13422 + }, + { + "epoch": 0.78, + "learning_rate": 1.2045267424518474e-08, + "logits/chosen": -1.9809218645095825, + "logits/rejected": -1.9768275022506714, + "logps/chosen": -24.450424194335938, + "logps/rejected": -158.6793212890625, + "loss": 0.2397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40701714158058167, + "rewards/margins": 2.441932439804077, + "rewards/rejected": -2.0349152088165283, + "step": 13423 + }, + { + "epoch": 0.78, + "learning_rate": 1.2039133245023969e-08, + "logits/chosen": -1.8711473941802979, + "logits/rejected": -1.8780624866485596, + "logps/chosen": -32.788734436035156, + "logps/rejected": -255.48692321777344, + "loss": 0.731, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.809177815914154, + "rewards/margins": -0.8237338662147522, + "rewards/rejected": 1.6329116821289062, + "step": 13424 + }, + { + "epoch": 0.78, + "learning_rate": 1.203300041408517e-08, + "logits/chosen": -1.8528118133544922, + "logits/rejected": -1.8435968160629272, + "logps/chosen": -91.54508972167969, + "logps/rejected": -238.50022888183594, + "loss": 0.7389, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0642074346542358, + "rewards/margins": 1.9671021699905396, + "rewards/rejected": -3.0313096046447754, + "step": 13425 + }, + { + "epoch": 0.78, + "learning_rate": 1.2026868931919898e-08, + "logits/chosen": -2.1072211265563965, + "logits/rejected": -2.1085667610168457, + "logps/chosen": -4.263837814331055, + "logps/rejected": -182.7877197265625, + "loss": 0.4372, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1815413534641266, + "rewards/margins": 2.5331192016601562, + "rewards/rejected": -2.71466064453125, + "step": 13426 + }, + { + "epoch": 0.78, + "learning_rate": 1.2020738798746e-08, + "logits/chosen": -1.7851656675338745, + "logits/rejected": -1.79509437084198, + "logps/chosen": -52.6468620300293, + "logps/rejected": -157.33970642089844, + "loss": 0.1324, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4601658582687378, + "rewards/margins": 2.7802600860595703, + "rewards/rejected": -1.320094347000122, + "step": 13427 + }, + { + "epoch": 0.78, + "learning_rate": 1.2014610014781213e-08, + "logits/chosen": -1.9764059782028198, + "logits/rejected": -1.9685324430465698, + "logps/chosen": -15.994497299194336, + "logps/rejected": -162.76693725585938, + "loss": 0.1663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8731226325035095, + "rewards/margins": 4.135375499725342, + "rewards/rejected": -3.2622528076171875, + "step": 13428 + }, + { + "epoch": 0.78, + "learning_rate": 1.2008482580243306e-08, + "logits/chosen": -1.9372588396072388, + "logits/rejected": -1.9258133172988892, + "logps/chosen": -29.731342315673828, + "logps/rejected": -129.89743041992188, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32101479172706604, + "rewards/margins": 1.2536289691925049, + "rewards/rejected": -0.9326141476631165, + "step": 13429 + }, + { + "epoch": 0.78, + "learning_rate": 1.2002356495349925e-08, + "logits/chosen": -2.0830471515655518, + "logits/rejected": -2.079629898071289, + "logps/chosen": -14.516545295715332, + "logps/rejected": -167.50570678710938, + "loss": 0.2982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19335012137889862, + "rewards/margins": 3.882845878601074, + "rewards/rejected": -3.689495801925659, + "step": 13430 + }, + { + "epoch": 0.78, + "learning_rate": 1.199623176031871e-08, + "logits/chosen": -1.7705700397491455, + "logits/rejected": -1.7727503776550293, + "logps/chosen": -159.58717346191406, + "logps/rejected": -358.94476318359375, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8914612531661987, + "rewards/margins": 3.991219997406006, + "rewards/rejected": -2.0997588634490967, + "step": 13431 + }, + { + "epoch": 0.78, + "learning_rate": 1.199010837536723e-08, + "logits/chosen": -1.9747209548950195, + "logits/rejected": -1.96736478805542, + "logps/chosen": -10.835190773010254, + "logps/rejected": -113.629638671875, + "loss": 0.7393, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.029955005273222923, + "rewards/margins": -0.25735121965408325, + "rewards/rejected": 0.2873062193393707, + "step": 13432 + }, + { + "epoch": 0.78, + "learning_rate": 1.1983986340713048e-08, + "logits/chosen": -1.998490333557129, + "logits/rejected": -1.981114149093628, + "logps/chosen": -1.6197298765182495, + "logps/rejected": -334.50079345703125, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12060636281967163, + "rewards/margins": 5.927412033081055, + "rewards/rejected": -5.806805610656738, + "step": 13433 + }, + { + "epoch": 0.78, + "learning_rate": 1.1977865656573615e-08, + "logits/chosen": -1.8005521297454834, + "logits/rejected": -1.7368748188018799, + "logps/chosen": -406.35858154296875, + "logps/rejected": -408.10693359375, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.565020799636841, + "rewards/margins": 3.519946336746216, + "rewards/rejected": 0.045074462890625, + "step": 13434 + }, + { + "epoch": 0.78, + "learning_rate": 1.1971746323166376e-08, + "logits/chosen": -2.036806106567383, + "logits/rejected": -2.03792405128479, + "logps/chosen": -1.667833924293518, + "logps/rejected": -45.75482940673828, + "loss": 0.5588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05532175302505493, + "rewards/margins": 0.47303682565689087, + "rewards/rejected": -0.41771507263183594, + "step": 13435 + }, + { + "epoch": 0.78, + "learning_rate": 1.1965628340708728e-08, + "logits/chosen": -2.0576539039611816, + "logits/rejected": -2.0513532161712646, + "logps/chosen": -40.529457092285156, + "logps/rejected": -161.0756378173828, + "loss": 0.4196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0049953460693359375, + "rewards/margins": 1.5890324115753174, + "rewards/rejected": -1.5940277576446533, + "step": 13436 + }, + { + "epoch": 0.78, + "learning_rate": 1.195951170941802e-08, + "logits/chosen": -1.9418340921401978, + "logits/rejected": -1.9409914016723633, + "logps/chosen": -15.020965576171875, + "logps/rejected": -100.81327819824219, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01746063306927681, + "rewards/margins": 1.2656487226486206, + "rewards/rejected": -1.2831093072891235, + "step": 13437 + }, + { + "epoch": 0.78, + "learning_rate": 1.195339642951152e-08, + "logits/chosen": -2.1475601196289062, + "logits/rejected": -2.1407933235168457, + "logps/chosen": -11.012171745300293, + "logps/rejected": -183.57815551757812, + "loss": 0.217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4984290301799774, + "rewards/margins": 2.28098201751709, + "rewards/rejected": -1.7825530767440796, + "step": 13438 + }, + { + "epoch": 0.78, + "learning_rate": 1.194728250120648e-08, + "logits/chosen": -1.7579927444458008, + "logits/rejected": -1.7577292919158936, + "logps/chosen": -5.340494681149721e-05, + "logps/rejected": -252.90235900878906, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.788107397260319e-06, + "rewards/margins": 7.679507255554199, + "rewards/rejected": -7.679509162902832, + "step": 13439 + }, + { + "epoch": 0.78, + "learning_rate": 1.1941169924720101e-08, + "logits/chosen": -1.756714105606079, + "logits/rejected": -1.7479808330535889, + "logps/chosen": -32.741554260253906, + "logps/rejected": -187.66128540039062, + "loss": 0.4621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2818721830844879, + "rewards/margins": 2.5238940715789795, + "rewards/rejected": -2.8057663440704346, + "step": 13440 + }, + { + "epoch": 0.78, + "learning_rate": 1.1935058700269551e-08, + "logits/chosen": -2.0098493099212646, + "logits/rejected": -1.9926005601882935, + "logps/chosen": -226.15805053710938, + "logps/rejected": -565.21630859375, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3877503871917725, + "rewards/margins": 7.920557022094727, + "rewards/rejected": -5.532806396484375, + "step": 13441 + }, + { + "epoch": 0.78, + "learning_rate": 1.1928948828071893e-08, + "logits/chosen": -1.786547064781189, + "logits/rejected": -1.7897422313690186, + "logps/chosen": -7.46234945836477e-05, + "logps/rejected": -212.4423065185547, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.766892177736736e-06, + "rewards/margins": 5.415877819061279, + "rewards/rejected": -5.415881633758545, + "step": 13442 + }, + { + "epoch": 0.78, + "learning_rate": 1.1922840308344207e-08, + "logits/chosen": -2.00626277923584, + "logits/rejected": -2.012310028076172, + "logps/chosen": -24.210895538330078, + "logps/rejected": -127.6881103515625, + "loss": 0.5039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06037483364343643, + "rewards/margins": 0.8868793845176697, + "rewards/rejected": -0.8265045285224915, + "step": 13443 + }, + { + "epoch": 0.78, + "learning_rate": 1.1916733141303482e-08, + "logits/chosen": -1.9644685983657837, + "logits/rejected": -1.9650429487228394, + "logps/chosen": -28.777767181396484, + "logps/rejected": -232.9674072265625, + "loss": 0.1194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1538749933242798, + "rewards/margins": 6.007503509521484, + "rewards/rejected": -4.853628635406494, + "step": 13444 + }, + { + "epoch": 0.78, + "learning_rate": 1.1910627327166683e-08, + "logits/chosen": -1.9296149015426636, + "logits/rejected": -1.9302889108657837, + "logps/chosen": -8.264429092407227, + "logps/rejected": -65.4304428100586, + "loss": 0.4656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12678098678588867, + "rewards/margins": 0.8408953547477722, + "rewards/rejected": -0.7141143679618835, + "step": 13445 + }, + { + "epoch": 0.78, + "learning_rate": 1.1904522866150729e-08, + "logits/chosen": -1.723219633102417, + "logits/rejected": -1.716847538948059, + "logps/chosen": -151.65078735351562, + "logps/rejected": -264.0906066894531, + "loss": 0.1045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8088729977607727, + "rewards/margins": 2.26712965965271, + "rewards/rejected": -1.4582566022872925, + "step": 13446 + }, + { + "epoch": 0.78, + "learning_rate": 1.189841975847244e-08, + "logits/chosen": -1.904486894607544, + "logits/rejected": -1.887582540512085, + "logps/chosen": -293.18798828125, + "logps/rejected": -504.267578125, + "loss": 0.0579, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.036297559738159, + "rewards/margins": 2.2454283237457275, + "rewards/rejected": 0.7908691763877869, + "step": 13447 + }, + { + "epoch": 0.78, + "learning_rate": 1.1892318004348684e-08, + "logits/chosen": -2.0188369750976562, + "logits/rejected": -2.0184836387634277, + "logps/chosen": -0.16664239764213562, + "logps/rejected": -15.66781234741211, + "loss": 0.6853, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.004997182171791792, + "rewards/margins": -0.024979902431368828, + "rewards/rejected": 0.01998271979391575, + "step": 13448 + }, + { + "epoch": 0.78, + "learning_rate": 1.1886217603996185e-08, + "logits/chosen": -2.112553358078003, + "logits/rejected": -2.1431045532226562, + "logps/chosen": -173.0692596435547, + "logps/rejected": -246.4626007080078, + "loss": 0.1204, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.414254903793335, + "rewards/margins": 1.4192612171173096, + "rewards/rejected": 0.9949936270713806, + "step": 13449 + }, + { + "epoch": 0.78, + "learning_rate": 1.188011855763167e-08, + "logits/chosen": -1.9630604982376099, + "logits/rejected": -1.9643681049346924, + "logps/chosen": -0.5203561782836914, + "logps/rejected": -70.37271881103516, + "loss": 0.4361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027722680941224098, + "rewards/margins": 1.112912893295288, + "rewards/rejected": -1.0851901769638062, + "step": 13450 + }, + { + "epoch": 0.78, + "learning_rate": 1.1874020865471806e-08, + "logits/chosen": -1.8788012266159058, + "logits/rejected": -1.8559598922729492, + "logps/chosen": -241.21023559570312, + "logps/rejected": -382.03045654296875, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.117724657058716, + "rewards/margins": 3.5634796619415283, + "rewards/rejected": -1.4457550048828125, + "step": 13451 + }, + { + "epoch": 0.78, + "learning_rate": 1.1867924527733237e-08, + "logits/chosen": -1.7712401151657104, + "logits/rejected": -1.7877397537231445, + "logps/chosen": -220.9081268310547, + "logps/rejected": -333.15924072265625, + "loss": 0.113, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8210785388946533, + "rewards/margins": 1.9992951154708862, + "rewards/rejected": -0.17821656167507172, + "step": 13452 + }, + { + "epoch": 0.78, + "learning_rate": 1.186182954463249e-08, + "logits/chosen": -2.05183482170105, + "logits/rejected": -2.041146755218506, + "logps/chosen": -22.693960189819336, + "logps/rejected": -193.3920440673828, + "loss": 0.4173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01608295552432537, + "rewards/margins": 0.8333509564399719, + "rewards/rejected": -0.8494338989257812, + "step": 13453 + }, + { + "epoch": 0.78, + "learning_rate": 1.1855735916386118e-08, + "logits/chosen": -1.8450778722763062, + "logits/rejected": -1.8325815200805664, + "logps/chosen": -59.720008850097656, + "logps/rejected": -262.02081298828125, + "loss": 0.4584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6507980227470398, + "rewards/margins": 0.3711807131767273, + "rewards/rejected": 0.2796173095703125, + "step": 13454 + }, + { + "epoch": 0.78, + "learning_rate": 1.184964364321059e-08, + "logits/chosen": -1.9966907501220703, + "logits/rejected": -2.004633665084839, + "logps/chosen": -70.07781982421875, + "logps/rejected": -174.99444580078125, + "loss": 0.4214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2981162965297699, + "rewards/margins": 0.9599357843399048, + "rewards/rejected": -0.6618194580078125, + "step": 13455 + }, + { + "epoch": 0.78, + "learning_rate": 1.1843552725322343e-08, + "logits/chosen": -1.8052898645401, + "logits/rejected": -1.8046269416809082, + "logps/chosen": -7.7514448165893555, + "logps/rejected": -106.45123291015625, + "loss": 0.3374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12519703805446625, + "rewards/margins": 2.558248281478882, + "rewards/rejected": -2.433051347732544, + "step": 13456 + }, + { + "epoch": 0.78, + "learning_rate": 1.1837463162937733e-08, + "logits/chosen": -1.9070394039154053, + "logits/rejected": -1.9033164978027344, + "logps/chosen": -21.71603775024414, + "logps/rejected": -130.31246948242188, + "loss": 0.3382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.038184165954589844, + "rewards/margins": 2.940812587738037, + "rewards/rejected": -2.9026284217834473, + "step": 13457 + }, + { + "epoch": 0.78, + "learning_rate": 1.1831374956273104e-08, + "logits/chosen": -1.886914610862732, + "logits/rejected": -1.8682081699371338, + "logps/chosen": -9.348526954650879, + "logps/rejected": -253.6962890625, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048597048968076706, + "rewards/margins": 6.35503625869751, + "rewards/rejected": -6.306439399719238, + "step": 13458 + }, + { + "epoch": 0.78, + "learning_rate": 1.1825288105544745e-08, + "logits/chosen": -1.891635537147522, + "logits/rejected": -1.8921600580215454, + "logps/chosen": -15.540239334106445, + "logps/rejected": -270.2093200683594, + "loss": 0.2757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17395038902759552, + "rewards/margins": 2.8290958404541016, + "rewards/rejected": -2.6551454067230225, + "step": 13459 + }, + { + "epoch": 0.78, + "learning_rate": 1.1819202610968893e-08, + "logits/chosen": -2.012683391571045, + "logits/rejected": -2.012272596359253, + "logps/chosen": -54.838504791259766, + "logps/rejected": -243.1293182373047, + "loss": 0.2579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31193122267723083, + "rewards/margins": 2.766763687133789, + "rewards/rejected": -2.4548325538635254, + "step": 13460 + }, + { + "epoch": 0.78, + "learning_rate": 1.1813118472761718e-08, + "logits/chosen": -1.7957158088684082, + "logits/rejected": -1.7717316150665283, + "logps/chosen": -181.27200317382812, + "logps/rejected": -387.6850280761719, + "loss": 0.1483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7370697259902954, + "rewards/margins": 1.942651391029358, + "rewards/rejected": -1.2055816650390625, + "step": 13461 + }, + { + "epoch": 0.78, + "learning_rate": 1.1807035691139361e-08, + "logits/chosen": -1.882784366607666, + "logits/rejected": -1.8440133333206177, + "logps/chosen": -214.14410400390625, + "logps/rejected": -377.833984375, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.454071044921875, + "rewards/margins": 3.903656005859375, + "rewards/rejected": -1.4495849609375, + "step": 13462 + }, + { + "epoch": 0.78, + "learning_rate": 1.1800954266317925e-08, + "logits/chosen": -1.7424739599227905, + "logits/rejected": -1.7423678636550903, + "logps/chosen": -238.95794677734375, + "logps/rejected": -393.721435546875, + "loss": 0.044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.991491675376892, + "rewards/margins": 3.353808641433716, + "rewards/rejected": -1.3623169660568237, + "step": 13463 + }, + { + "epoch": 0.78, + "learning_rate": 1.1794874198513454e-08, + "logits/chosen": -2.105928897857666, + "logits/rejected": -2.0968570709228516, + "logps/chosen": -23.080873489379883, + "logps/rejected": -225.0057373046875, + "loss": 0.1982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7392080426216125, + "rewards/margins": 3.500559091567993, + "rewards/rejected": -2.7613511085510254, + "step": 13464 + }, + { + "epoch": 0.78, + "learning_rate": 1.1788795487941922e-08, + "logits/chosen": -1.9110193252563477, + "logits/rejected": -1.8442094326019287, + "logps/chosen": -326.91900634765625, + "logps/rejected": -479.25665283203125, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.953662157058716, + "rewards/margins": 3.1473388671875, + "rewards/rejected": -0.19367675483226776, + "step": 13465 + }, + { + "epoch": 0.78, + "learning_rate": 1.178271813481927e-08, + "logits/chosen": -1.9945307970046997, + "logits/rejected": -1.978916049003601, + "logps/chosen": -206.6331787109375, + "logps/rejected": -435.35614013671875, + "loss": 0.0386, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7087677717208862, + "rewards/margins": 4.149905681610107, + "rewards/rejected": -2.4411377906799316, + "step": 13466 + }, + { + "epoch": 0.78, + "learning_rate": 1.1776642139361436e-08, + "logits/chosen": -1.9133286476135254, + "logits/rejected": -1.9123705625534058, + "logps/chosen": -21.96434783935547, + "logps/rejected": -76.7462158203125, + "loss": 0.5174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3087257444858551, + "rewards/margins": 0.2624168395996094, + "rewards/rejected": 0.04630890116095543, + "step": 13467 + }, + { + "epoch": 0.78, + "learning_rate": 1.1770567501784235e-08, + "logits/chosen": -1.7924193143844604, + "logits/rejected": -1.7715590000152588, + "logps/chosen": -91.83558654785156, + "logps/rejected": -321.38775634765625, + "loss": 0.0552, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.012141466140747, + "rewards/margins": 3.667189121246338, + "rewards/rejected": -1.6550476551055908, + "step": 13468 + }, + { + "epoch": 0.78, + "learning_rate": 1.176449422230349e-08, + "logits/chosen": -1.8021224737167358, + "logits/rejected": -1.8993948698043823, + "logps/chosen": -159.03189086914062, + "logps/rejected": -247.06802368164062, + "loss": 0.0863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7931121587753296, + "rewards/margins": 1.884881615638733, + "rewards/rejected": -0.09176941215991974, + "step": 13469 + }, + { + "epoch": 0.78, + "learning_rate": 1.1758422301134912e-08, + "logits/chosen": -2.0500552654266357, + "logits/rejected": -2.047361135482788, + "logps/chosen": -187.38458251953125, + "logps/rejected": -228.02809143066406, + "loss": 0.2273, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.889634847640991, + "rewards/margins": 0.6858644485473633, + "rewards/rejected": 2.203770399093628, + "step": 13470 + }, + { + "epoch": 0.78, + "learning_rate": 1.1752351738494259e-08, + "logits/chosen": -1.8807505369186401, + "logits/rejected": -1.8844044208526611, + "logps/chosen": -10.534308433532715, + "logps/rejected": -72.55194091796875, + "loss": 0.5648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3512623906135559, + "rewards/margins": 0.24744769930839539, + "rewards/rejected": 0.10381469875574112, + "step": 13471 + }, + { + "epoch": 0.78, + "learning_rate": 1.1746282534597152e-08, + "logits/chosen": -1.8158267736434937, + "logits/rejected": -1.8136669397354126, + "logps/chosen": -0.000158302704221569, + "logps/rejected": -137.67465209960938, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.897876013070345e-06, + "rewards/margins": 2.6017327308654785, + "rewards/rejected": -2.601736545562744, + "step": 13472 + }, + { + "epoch": 0.78, + "learning_rate": 1.1740214689659205e-08, + "logits/chosen": -1.8074036836624146, + "logits/rejected": -1.826737880706787, + "logps/chosen": -328.4664611816406, + "logps/rejected": -504.72161865234375, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.985482931137085, + "rewards/margins": 7.8253631591796875, + "rewards/rejected": -4.839880466461182, + "step": 13473 + }, + { + "epoch": 0.78, + "learning_rate": 1.1734148203895983e-08, + "logits/chosen": -1.692624568939209, + "logits/rejected": -1.679610013961792, + "logps/chosen": -0.00010943197412416339, + "logps/rejected": -96.52111053466797, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0692204341467004e-05, + "rewards/margins": 1.2428085803985596, + "rewards/rejected": -1.2427978515625, + "step": 13474 + }, + { + "epoch": 0.78, + "learning_rate": 1.1728083077523004e-08, + "logits/chosen": -1.9446849822998047, + "logits/rejected": -1.9398632049560547, + "logps/chosen": -23.9997615814209, + "logps/rejected": -187.29598999023438, + "loss": 0.5, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5054935812950134, + "rewards/margins": 2.394033432006836, + "rewards/rejected": -2.899527072906494, + "step": 13475 + }, + { + "epoch": 0.78, + "learning_rate": 1.1722019310755716e-08, + "logits/chosen": -1.7901531457901, + "logits/rejected": -1.7677452564239502, + "logps/chosen": -66.09424591064453, + "logps/rejected": -316.2914123535156, + "loss": 0.0715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7063316106796265, + "rewards/margins": 5.180150508880615, + "rewards/rejected": -3.4738190174102783, + "step": 13476 + }, + { + "epoch": 0.78, + "learning_rate": 1.1715956903809537e-08, + "logits/chosen": -1.9003584384918213, + "logits/rejected": -1.9042847156524658, + "logps/chosen": -8.547122706659138e-05, + "logps/rejected": -69.33196258544922, + "loss": 0.3775, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.841138772666454e-07, + "rewards/margins": 2.4736406803131104, + "rewards/rejected": -2.4736411571502686, + "step": 13477 + }, + { + "epoch": 0.78, + "learning_rate": 1.1709895856899832e-08, + "logits/chosen": -1.9720619916915894, + "logits/rejected": -1.926901936531067, + "logps/chosen": -165.3092803955078, + "logps/rejected": -281.631591796875, + "loss": 0.3519, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5935410261154175, + "rewards/margins": 0.31548309326171875, + "rewards/rejected": 1.2780579328536987, + "step": 13478 + }, + { + "epoch": 0.78, + "learning_rate": 1.1703836170241943e-08, + "logits/chosen": -1.882663607597351, + "logits/rejected": -1.8839865922927856, + "logps/chosen": -31.7510929107666, + "logps/rejected": -84.64522552490234, + "loss": 0.4085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7104452252388, + "rewards/margins": 0.36420804262161255, + "rewards/rejected": 0.3462371826171875, + "step": 13479 + }, + { + "epoch": 0.78, + "learning_rate": 1.1697777844051105e-08, + "logits/chosen": -1.8410229682922363, + "logits/rejected": -1.8320552110671997, + "logps/chosen": -103.9473876953125, + "logps/rejected": -253.51219177246094, + "loss": 0.0667, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3539750576019287, + "rewards/margins": 2.719548225402832, + "rewards/rejected": -0.36557313799858093, + "step": 13480 + }, + { + "epoch": 0.78, + "learning_rate": 1.1691720878542555e-08, + "logits/chosen": -1.9202607870101929, + "logits/rejected": -1.914325475692749, + "logps/chosen": -6.9671807289123535, + "logps/rejected": -62.9407844543457, + "loss": 0.537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06471047550439835, + "rewards/margins": 0.6611459255218506, + "rewards/rejected": -0.7258564233779907, + "step": 13481 + }, + { + "epoch": 0.78, + "learning_rate": 1.1685665273931461e-08, + "logits/chosen": -1.9414688348770142, + "logits/rejected": -1.943650484085083, + "logps/chosen": -6.531470775604248, + "logps/rejected": -128.18194580078125, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12123999744653702, + "rewards/margins": 2.2374303340911865, + "rewards/rejected": -2.1161904335021973, + "step": 13482 + }, + { + "epoch": 0.78, + "learning_rate": 1.1679611030432967e-08, + "logits/chosen": -2.0489792823791504, + "logits/rejected": -2.0458316802978516, + "logps/chosen": -67.94551086425781, + "logps/rejected": -167.991455078125, + "loss": 0.2194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6955536007881165, + "rewards/margins": 1.9030532836914062, + "rewards/rejected": -1.2074997425079346, + "step": 13483 + }, + { + "epoch": 0.78, + "learning_rate": 1.1673558148262125e-08, + "logits/chosen": -1.7056877613067627, + "logits/rejected": -1.6924265623092651, + "logps/chosen": -202.1441192626953, + "logps/rejected": -279.96942138671875, + "loss": 0.0896, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8691589832305908, + "rewards/margins": 2.589993476867676, + "rewards/rejected": -0.7208343744277954, + "step": 13484 + }, + { + "epoch": 0.78, + "learning_rate": 1.1667506627633956e-08, + "logits/chosen": -2.0004069805145264, + "logits/rejected": -1.9922702312469482, + "logps/chosen": -76.97257232666016, + "logps/rejected": -326.2832946777344, + "loss": 0.3605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45284196734428406, + "rewards/margins": 3.5372824668884277, + "rewards/rejected": -3.990124464035034, + "step": 13485 + }, + { + "epoch": 0.78, + "learning_rate": 1.1661456468763487e-08, + "logits/chosen": -1.9160122871398926, + "logits/rejected": -1.910578727722168, + "logps/chosen": -67.06968688964844, + "logps/rejected": -137.09518432617188, + "loss": 0.4073, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.2598931789398193, + "rewards/margins": -0.06297516822814941, + "rewards/rejected": 2.3228683471679688, + "step": 13486 + }, + { + "epoch": 0.78, + "learning_rate": 1.1655407671865597e-08, + "logits/chosen": -2.0903708934783936, + "logits/rejected": -2.090196371078491, + "logps/chosen": -47.27116775512695, + "logps/rejected": -340.381103515625, + "loss": 0.1196, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1198055744171143, + "rewards/margins": 7.4660797119140625, + "rewards/rejected": -6.346273899078369, + "step": 13487 + }, + { + "epoch": 0.78, + "learning_rate": 1.1649360237155209e-08, + "logits/chosen": -1.670684814453125, + "logits/rejected": -1.6796917915344238, + "logps/chosen": -254.98509216308594, + "logps/rejected": -422.9835205078125, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4109513759613037, + "rewards/margins": 5.555070877075195, + "rewards/rejected": -3.1441192626953125, + "step": 13488 + }, + { + "epoch": 0.78, + "learning_rate": 1.1643314164847112e-08, + "logits/chosen": -1.7815839052200317, + "logits/rejected": -1.7810571193695068, + "logps/chosen": -65.96408081054688, + "logps/rejected": -213.56912231445312, + "loss": 0.4406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14445267617702484, + "rewards/margins": 1.1546028852462769, + "rewards/rejected": -1.0101501941680908, + "step": 13489 + }, + { + "epoch": 0.79, + "learning_rate": 1.1637269455156145e-08, + "logits/chosen": -1.9255285263061523, + "logits/rejected": -1.9789906740188599, + "logps/chosen": -154.94076538085938, + "logps/rejected": -251.3009490966797, + "loss": 0.2134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0337203741073608, + "rewards/margins": 0.8075011968612671, + "rewards/rejected": 0.22621917724609375, + "step": 13490 + }, + { + "epoch": 0.79, + "learning_rate": 1.1631226108297005e-08, + "logits/chosen": -1.9432710409164429, + "logits/rejected": -1.9421184062957764, + "logps/chosen": -95.82376861572266, + "logps/rejected": -264.25823974609375, + "loss": 0.1909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2167991399765015, + "rewards/margins": 1.4250541925430298, + "rewards/rejected": -0.20825500786304474, + "step": 13491 + }, + { + "epoch": 0.79, + "learning_rate": 1.1625184124484395e-08, + "logits/chosen": -1.7840473651885986, + "logits/rejected": -1.7778970003128052, + "logps/chosen": -6.918704986572266, + "logps/rejected": -202.61758422851562, + "loss": 0.3288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12711267173290253, + "rewards/margins": 3.9640769958496094, + "rewards/rejected": -3.8369643688201904, + "step": 13492 + }, + { + "epoch": 0.79, + "learning_rate": 1.1619143503932955e-08, + "logits/chosen": -1.7580236196517944, + "logits/rejected": -1.7639673948287964, + "logps/chosen": -38.884483337402344, + "logps/rejected": -442.63836669921875, + "loss": 0.1635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6193996667861938, + "rewards/margins": 9.32080364227295, + "rewards/rejected": -8.701403617858887, + "step": 13493 + }, + { + "epoch": 0.79, + "learning_rate": 1.1613104246857296e-08, + "logits/chosen": -1.872602105140686, + "logits/rejected": -1.871217966079712, + "logps/chosen": -193.3837890625, + "logps/rejected": -329.1481018066406, + "loss": 0.1402, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0045944452285767, + "rewards/margins": 1.6978347301483154, + "rewards/rejected": -0.6932403445243835, + "step": 13494 + }, + { + "epoch": 0.79, + "learning_rate": 1.1607066353471928e-08, + "logits/chosen": -1.9122915267944336, + "logits/rejected": -1.9022068977355957, + "logps/chosen": -187.31809997558594, + "logps/rejected": -410.20703125, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8850433826446533, + "rewards/margins": 6.962435722351074, + "rewards/rejected": -4.077392578125, + "step": 13495 + }, + { + "epoch": 0.79, + "learning_rate": 1.1601029823991365e-08, + "logits/chosen": -1.9463789463043213, + "logits/rejected": -1.9509211778640747, + "logps/chosen": -43.35309982299805, + "logps/rejected": -191.51171875, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7496574521064758, + "rewards/margins": 1.6949882507324219, + "rewards/rejected": -0.945330798625946, + "step": 13496 + }, + { + "epoch": 0.79, + "learning_rate": 1.1594994658630053e-08, + "logits/chosen": -1.7635935544967651, + "logits/rejected": -1.7581169605255127, + "logps/chosen": -232.15914916992188, + "logps/rejected": -440.48187255859375, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2223267555236816, + "rewards/margins": 3.9841065406799316, + "rewards/rejected": -1.76177978515625, + "step": 13497 + }, + { + "epoch": 0.79, + "learning_rate": 1.1588960857602404e-08, + "logits/chosen": -1.8574374914169312, + "logits/rejected": -1.8534389734268188, + "logps/chosen": -21.19019889831543, + "logps/rejected": -161.3033447265625, + "loss": 0.3157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18555907905101776, + "rewards/margins": 2.6867690086364746, + "rewards/rejected": -2.5012099742889404, + "step": 13498 + }, + { + "epoch": 0.79, + "learning_rate": 1.1582928421122739e-08, + "logits/chosen": -1.8036093711853027, + "logits/rejected": -1.8132350444793701, + "logps/chosen": -59.552764892578125, + "logps/rejected": -238.24749755859375, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1794731169939041, + "rewards/margins": 3.776998281478882, + "rewards/rejected": -3.597525119781494, + "step": 13499 + }, + { + "epoch": 0.79, + "learning_rate": 1.1576897349405379e-08, + "logits/chosen": -1.752214789390564, + "logits/rejected": -1.7543494701385498, + "logps/chosen": -81.86611938476562, + "logps/rejected": -407.0323486328125, + "loss": 0.1009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1041748523712158, + "rewards/margins": 7.025439262390137, + "rewards/rejected": -5.9212646484375, + "step": 13500 + }, + { + "epoch": 0.79, + "learning_rate": 1.1570867642664567e-08, + "logits/chosen": -1.7605931758880615, + "logits/rejected": -1.7590453624725342, + "logps/chosen": -202.63900756835938, + "logps/rejected": -282.6109619140625, + "loss": 0.2606, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.240915060043335, + "rewards/margins": 0.45823681354522705, + "rewards/rejected": 1.782678246498108, + "step": 13501 + }, + { + "epoch": 0.79, + "learning_rate": 1.1564839301114526e-08, + "logits/chosen": -1.8113007545471191, + "logits/rejected": -1.7088782787322998, + "logps/chosen": -311.03826904296875, + "logps/rejected": -674.1401977539062, + "loss": 0.0376, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.035473585128784, + "rewards/margins": 3.345294237136841, + "rewards/rejected": -0.3098205626010895, + "step": 13502 + }, + { + "epoch": 0.79, + "learning_rate": 1.155881232496939e-08, + "logits/chosen": -2.0134708881378174, + "logits/rejected": -2.008570671081543, + "logps/chosen": -22.96324920654297, + "logps/rejected": -133.8040771484375, + "loss": 0.2446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5894569754600525, + "rewards/margins": 1.7379662990570068, + "rewards/rejected": -1.1485092639923096, + "step": 13503 + }, + { + "epoch": 0.79, + "learning_rate": 1.155278671444328e-08, + "logits/chosen": -1.9876196384429932, + "logits/rejected": -1.9678902626037598, + "logps/chosen": -35.799015045166016, + "logps/rejected": -292.3764343261719, + "loss": 0.1114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3496719598770142, + "rewards/margins": 4.985081672668457, + "rewards/rejected": -3.6354095935821533, + "step": 13504 + }, + { + "epoch": 0.79, + "learning_rate": 1.1546762469750243e-08, + "logits/chosen": -1.8386262655258179, + "logits/rejected": -1.8179267644882202, + "logps/chosen": -400.3526916503906, + "logps/rejected": -605.0422973632812, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.85642409324646, + "rewards/margins": 2.593893527984619, + "rewards/rejected": 0.26253053545951843, + "step": 13505 + }, + { + "epoch": 0.79, + "learning_rate": 1.1540739591104303e-08, + "logits/chosen": -1.9952479600906372, + "logits/rejected": -1.995481014251709, + "logps/chosen": -74.95378112792969, + "logps/rejected": -159.57691955566406, + "loss": 0.2999, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1897674798965454, + "rewards/margins": 0.6052688956260681, + "rewards/rejected": 0.5844985842704773, + "step": 13506 + }, + { + "epoch": 0.79, + "learning_rate": 1.153471807871943e-08, + "logits/chosen": -1.9841994047164917, + "logits/rejected": -1.9747971296310425, + "logps/chosen": -33.24929428100586, + "logps/rejected": -209.48660278320312, + "loss": 0.2223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6685425043106079, + "rewards/margins": 4.154026985168457, + "rewards/rejected": -3.4854843616485596, + "step": 13507 + }, + { + "epoch": 0.79, + "learning_rate": 1.15286979328095e-08, + "logits/chosen": -1.944395899772644, + "logits/rejected": -1.9305940866470337, + "logps/chosen": -11.806856155395508, + "logps/rejected": -221.2047119140625, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29098406434059143, + "rewards/margins": 2.8650121688842773, + "rewards/rejected": -2.5740280151367188, + "step": 13508 + }, + { + "epoch": 0.79, + "learning_rate": 1.1522679153588426e-08, + "logits/chosen": -1.7278625965118408, + "logits/rejected": -1.736770749092102, + "logps/chosen": -204.08482360839844, + "logps/rejected": -289.7272644042969, + "loss": 0.0656, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.97197425365448, + "rewards/margins": 2.582158088684082, + "rewards/rejected": -0.6101837158203125, + "step": 13509 + }, + { + "epoch": 0.79, + "learning_rate": 1.1516661741269989e-08, + "logits/chosen": -1.7457596063613892, + "logits/rejected": -1.7415101528167725, + "logps/chosen": -185.98110961914062, + "logps/rejected": -231.5006103515625, + "loss": 0.2192, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4185028076171875, + "rewards/margins": 0.6445831060409546, + "rewards/rejected": 1.773919701576233, + "step": 13510 + }, + { + "epoch": 0.79, + "learning_rate": 1.1510645696067983e-08, + "logits/chosen": -1.9480478763580322, + "logits/rejected": -1.9457610845565796, + "logps/chosen": -22.55243682861328, + "logps/rejected": -68.25889587402344, + "loss": 0.6431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028076553717255592, + "rewards/margins": 0.3131679594516754, + "rewards/rejected": -0.34124451875686646, + "step": 13511 + }, + { + "epoch": 0.79, + "learning_rate": 1.1504631018196087e-08, + "logits/chosen": -1.9365712404251099, + "logits/rejected": -1.9312279224395752, + "logps/chosen": -1.0266674757003784, + "logps/rejected": -128.4227294921875, + "loss": 0.3639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07002045959234238, + "rewards/margins": 2.929914712905884, + "rewards/rejected": -2.9999351501464844, + "step": 13512 + }, + { + "epoch": 0.79, + "learning_rate": 1.1498617707868031e-08, + "logits/chosen": -1.930811882019043, + "logits/rejected": -1.9414836168289185, + "logps/chosen": -182.3328399658203, + "logps/rejected": -198.96759033203125, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.870874047279358, + "rewards/margins": 0.21417236328125, + "rewards/rejected": 1.656701683998108, + "step": 13513 + }, + { + "epoch": 0.79, + "learning_rate": 1.149260576529738e-08, + "logits/chosen": -1.7441043853759766, + "logits/rejected": -1.7393040657043457, + "logps/chosen": -38.89339828491211, + "logps/rejected": -229.6810302734375, + "loss": 0.1516, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3853352069854736, + "rewards/margins": 1.9990711212158203, + "rewards/rejected": -0.6137359738349915, + "step": 13514 + }, + { + "epoch": 0.79, + "learning_rate": 1.1486595190697739e-08, + "logits/chosen": -1.7654093503952026, + "logits/rejected": -1.7614974975585938, + "logps/chosen": -274.6253662109375, + "logps/rejected": -415.709716796875, + "loss": 0.1654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4736084043979645, + "rewards/margins": 2.3212127685546875, + "rewards/rejected": -1.8476043939590454, + "step": 13515 + }, + { + "epoch": 0.79, + "learning_rate": 1.1480585984282626e-08, + "logits/chosen": -1.8174843788146973, + "logits/rejected": -1.8347220420837402, + "logps/chosen": -252.51815795898438, + "logps/rejected": -391.60394287109375, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1950409412384033, + "rewards/margins": 4.952905654907227, + "rewards/rejected": -2.757864475250244, + "step": 13516 + }, + { + "epoch": 0.79, + "learning_rate": 1.1474578146265535e-08, + "logits/chosen": -1.9781869649887085, + "logits/rejected": -1.9771790504455566, + "logps/chosen": -319.93878173828125, + "logps/rejected": -372.8987121582031, + "loss": 0.3204, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.719250440597534, + "rewards/margins": 0.1423797607421875, + "rewards/rejected": 2.5768706798553467, + "step": 13517 + }, + { + "epoch": 0.79, + "learning_rate": 1.1468571676859856e-08, + "logits/chosen": -1.7906723022460938, + "logits/rejected": -1.7977334260940552, + "logps/chosen": -122.98299407958984, + "logps/rejected": -408.46856689453125, + "loss": 0.6914, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3369537591934204, + "rewards/margins": 10.302541732788086, + "rewards/rejected": -11.639495849609375, + "step": 13518 + }, + { + "epoch": 0.79, + "learning_rate": 1.1462566576278992e-08, + "logits/chosen": -1.9755340814590454, + "logits/rejected": -1.9697258472442627, + "logps/chosen": -3.960362434387207, + "logps/rejected": -172.3765869140625, + "loss": 0.2719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4144289195537567, + "rewards/margins": 2.4172611236572266, + "rewards/rejected": -2.0028321743011475, + "step": 13519 + }, + { + "epoch": 0.79, + "learning_rate": 1.1456562844736268e-08, + "logits/chosen": -1.9204446077346802, + "logits/rejected": -1.9154719114303589, + "logps/chosen": -5.340498319128528e-05, + "logps/rejected": -141.03585815429688, + "loss": 0.4477, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.203975330099638e-06, + "rewards/margins": 1.4113765954971313, + "rewards/rejected": -1.4113754034042358, + "step": 13520 + }, + { + "epoch": 0.79, + "learning_rate": 1.1450560482444982e-08, + "logits/chosen": -1.971569538116455, + "logits/rejected": -1.9596058130264282, + "logps/chosen": -90.88114166259766, + "logps/rejected": -290.54248046875, + "loss": 0.144, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3214691877365112, + "rewards/margins": 2.2195160388946533, + "rewards/rejected": -0.8980469107627869, + "step": 13521 + }, + { + "epoch": 0.79, + "learning_rate": 1.1444559489618343e-08, + "logits/chosen": -1.9000403881072998, + "logits/rejected": -1.8901656866073608, + "logps/chosen": -11.80510139465332, + "logps/rejected": -300.37017822265625, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08598051220178604, + "rewards/margins": 7.2115936279296875, + "rewards/rejected": -7.297574043273926, + "step": 13522 + }, + { + "epoch": 0.79, + "learning_rate": 1.1438559866469543e-08, + "logits/chosen": -1.8523417711257935, + "logits/rejected": -1.8885319232940674, + "logps/chosen": -241.89637756347656, + "logps/rejected": -469.2129211425781, + "loss": 0.0862, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3124831914901733, + "rewards/margins": 2.1448562145233154, + "rewards/rejected": -0.8323730826377869, + "step": 13523 + }, + { + "epoch": 0.79, + "learning_rate": 1.1432561613211722e-08, + "logits/chosen": -1.6995394229888916, + "logits/rejected": -1.7027957439422607, + "logps/chosen": -0.0034251175820827484, + "logps/rejected": -35.794212341308594, + "loss": 0.4775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033979382715187967, + "rewards/margins": 0.9474402666091919, + "rewards/rejected": -0.9477800726890564, + "step": 13524 + }, + { + "epoch": 0.79, + "learning_rate": 1.142656473005798e-08, + "logits/chosen": -1.592215657234192, + "logits/rejected": -1.5696091651916504, + "logps/chosen": -392.37091064453125, + "logps/rejected": -571.8836059570312, + "loss": 0.1711, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6733155250549316, + "rewards/margins": 0.9326721429824829, + "rewards/rejected": 1.7406433820724487, + "step": 13525 + }, + { + "epoch": 0.79, + "learning_rate": 1.1420569217221326e-08, + "logits/chosen": -2.0721616744995117, + "logits/rejected": -2.086031198501587, + "logps/chosen": -189.5804443359375, + "logps/rejected": -449.9901123046875, + "loss": 0.0493, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.778167724609375, + "rewards/margins": 3.0521669387817383, + "rewards/rejected": -1.2739990949630737, + "step": 13526 + }, + { + "epoch": 0.79, + "learning_rate": 1.141457507491475e-08, + "logits/chosen": -1.8972569704055786, + "logits/rejected": -1.8933595418930054, + "logps/chosen": -5.969256401062012, + "logps/rejected": -196.76962280273438, + "loss": 0.2617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4864777624607086, + "rewards/margins": 2.581796407699585, + "rewards/rejected": -2.095318555831909, + "step": 13527 + }, + { + "epoch": 0.79, + "learning_rate": 1.1408582303351243e-08, + "logits/chosen": -1.9056708812713623, + "logits/rejected": -1.8118865489959717, + "logps/chosen": -167.7741241455078, + "logps/rejected": -617.0996704101562, + "loss": 0.0493, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.163433790206909, + "rewards/margins": 4.625658988952637, + "rewards/rejected": -2.4622254371643066, + "step": 13528 + }, + { + "epoch": 0.79, + "learning_rate": 1.1402590902743647e-08, + "logits/chosen": -1.9627736806869507, + "logits/rejected": -1.9710582494735718, + "logps/chosen": -56.82810592651367, + "logps/rejected": -145.9625244140625, + "loss": 0.2393, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3671306371688843, + "rewards/margins": 1.296749472618103, + "rewards/rejected": 0.07038116455078125, + "step": 13529 + }, + { + "epoch": 0.79, + "learning_rate": 1.139660087330484e-08, + "logits/chosen": -1.8672758340835571, + "logits/rejected": -1.8612779378890991, + "logps/chosen": -236.501953125, + "logps/rejected": -538.1285400390625, + "loss": 0.1423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9067947268486023, + "rewards/margins": 1.6342605352401733, + "rewards/rejected": -0.727465808391571, + "step": 13530 + }, + { + "epoch": 0.79, + "learning_rate": 1.1390612215247564e-08, + "logits/chosen": -1.7671712636947632, + "logits/rejected": -1.769331455230713, + "logps/chosen": -66.70299530029297, + "logps/rejected": -335.42852783203125, + "loss": 0.2362, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0970451831817627, + "rewards/margins": 1.1576652526855469, + "rewards/rejected": -0.06062011793255806, + "step": 13531 + }, + { + "epoch": 0.79, + "learning_rate": 1.1384624928784637e-08, + "logits/chosen": -1.9829548597335815, + "logits/rejected": -1.981203317642212, + "logps/chosen": -34.91315841674805, + "logps/rejected": -257.93695068359375, + "loss": 0.2199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5841812491416931, + "rewards/margins": 3.271472215652466, + "rewards/rejected": -2.687290906906128, + "step": 13532 + }, + { + "epoch": 0.79, + "learning_rate": 1.13786390141287e-08, + "logits/chosen": -2.0849993228912354, + "logits/rejected": -2.0763211250305176, + "logps/chosen": -2.5152992748189718e-05, + "logps/rejected": -225.75555419921875, + "loss": 0.3315, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6093035810627043e-06, + "rewards/margins": 7.910215854644775, + "rewards/rejected": -7.910214424133301, + "step": 13533 + }, + { + "epoch": 0.79, + "learning_rate": 1.1372654471492427e-08, + "logits/chosen": -1.8704524040222168, + "logits/rejected": -1.880779504776001, + "logps/chosen": -42.783103942871094, + "logps/rejected": -189.03411865234375, + "loss": 0.551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06943664699792862, + "rewards/margins": 0.5179046988487244, + "rewards/rejected": -0.44846802949905396, + "step": 13534 + }, + { + "epoch": 0.79, + "learning_rate": 1.136667130108841e-08, + "logits/chosen": -1.748291254043579, + "logits/rejected": -1.7400816679000854, + "logps/chosen": -241.65261840820312, + "logps/rejected": -363.05419921875, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4695680141448975, + "rewards/margins": 2.7039246559143066, + "rewards/rejected": 0.765643298625946, + "step": 13535 + }, + { + "epoch": 0.79, + "learning_rate": 1.1360689503129223e-08, + "logits/chosen": -1.6991443634033203, + "logits/rejected": -1.717128038406372, + "logps/chosen": -211.0133514404297, + "logps/rejected": -291.22528076171875, + "loss": 0.0779, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8097946643829346, + "rewards/margins": 1.969630479812622, + "rewards/rejected": -0.1598358154296875, + "step": 13536 + }, + { + "epoch": 0.79, + "learning_rate": 1.1354709077827334e-08, + "logits/chosen": -1.943303108215332, + "logits/rejected": -1.9419339895248413, + "logps/chosen": -133.19491577148438, + "logps/rejected": -341.8934326171875, + "loss": 0.1053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3851836919784546, + "rewards/margins": 1.9248870611190796, + "rewards/rejected": -0.539703369140625, + "step": 13537 + }, + { + "epoch": 0.79, + "learning_rate": 1.1348730025395215e-08, + "logits/chosen": -1.9666129350662231, + "logits/rejected": -1.9576665163040161, + "logps/chosen": -109.50945281982422, + "logps/rejected": -226.36463928222656, + "loss": 0.2511, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4573242664337158, + "rewards/margins": 0.9093552231788635, + "rewards/rejected": 0.5479690432548523, + "step": 13538 + }, + { + "epoch": 0.79, + "learning_rate": 1.1342752346045265e-08, + "logits/chosen": -1.8779648542404175, + "logits/rejected": -1.8892549276351929, + "logps/chosen": -0.3882095515727997, + "logps/rejected": -178.81365966796875, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03063659928739071, + "rewards/margins": 2.7736449241638184, + "rewards/rejected": -2.7430083751678467, + "step": 13539 + }, + { + "epoch": 0.79, + "learning_rate": 1.1336776039989865e-08, + "logits/chosen": -1.9001935720443726, + "logits/rejected": -1.8837735652923584, + "logps/chosen": -52.37174987792969, + "logps/rejected": -255.74142456054688, + "loss": 0.1403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7552833557128906, + "rewards/margins": 5.896006107330322, + "rewards/rejected": -5.140722751617432, + "step": 13540 + }, + { + "epoch": 0.79, + "learning_rate": 1.1330801107441284e-08, + "logits/chosen": -1.9811081886291504, + "logits/rejected": -1.9830049276351929, + "logps/chosen": -186.73025512695312, + "logps/rejected": -285.38299560546875, + "loss": 0.2099, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6287018060684204, + "rewards/margins": 0.8641357421875, + "rewards/rejected": 0.7645660638809204, + "step": 13541 + }, + { + "epoch": 0.79, + "learning_rate": 1.1324827548611798e-08, + "logits/chosen": -1.8422800302505493, + "logits/rejected": -1.8159008026123047, + "logps/chosen": -131.3922882080078, + "logps/rejected": -199.0343780517578, + "loss": 0.3646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6525329947471619, + "rewards/margins": 1.0699448585510254, + "rewards/rejected": -0.41741180419921875, + "step": 13542 + }, + { + "epoch": 0.79, + "learning_rate": 1.131885536371362e-08, + "logits/chosen": -1.7907119989395142, + "logits/rejected": -1.7870558500289917, + "logps/chosen": -14.064024925231934, + "logps/rejected": -207.40097045898438, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07349472492933273, + "rewards/margins": 2.5347740650177, + "rewards/rejected": -2.4612793922424316, + "step": 13543 + }, + { + "epoch": 0.79, + "learning_rate": 1.1312884552958923e-08, + "logits/chosen": -1.9165493249893188, + "logits/rejected": -1.915722131729126, + "logps/chosen": -12.370906829833984, + "logps/rejected": -252.59024047851562, + "loss": 0.3733, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.036673929542303085, + "rewards/margins": 2.2154886722564697, + "rewards/rejected": -2.178814649581909, + "step": 13544 + }, + { + "epoch": 0.79, + "learning_rate": 1.130691511655979e-08, + "logits/chosen": -1.9082027673721313, + "logits/rejected": -1.8784204721450806, + "logps/chosen": -178.8103790283203, + "logps/rejected": -343.5774841308594, + "loss": 0.3014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0940964221954346, + "rewards/margins": 0.4530838131904602, + "rewards/rejected": 0.6410126090049744, + "step": 13545 + }, + { + "epoch": 0.79, + "learning_rate": 1.1300947054728289e-08, + "logits/chosen": -2.1158649921417236, + "logits/rejected": -2.114393711090088, + "logps/chosen": -0.00010633024066919461, + "logps/rejected": -258.23284912109375, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7773401143349474e-06, + "rewards/margins": 6.003866672515869, + "rewards/rejected": -6.003869533538818, + "step": 13546 + }, + { + "epoch": 0.79, + "learning_rate": 1.1294980367676477e-08, + "logits/chosen": -1.8212531805038452, + "logits/rejected": -1.818503499031067, + "logps/chosen": -287.35260009765625, + "logps/rejected": -343.4612731933594, + "loss": 0.2629, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9022979736328125, + "rewards/margins": 0.5761535167694092, + "rewards/rejected": 0.32614442706108093, + "step": 13547 + }, + { + "epoch": 0.79, + "learning_rate": 1.1289015055616269e-08, + "logits/chosen": -1.804051160812378, + "logits/rejected": -1.8014074563980103, + "logps/chosen": -4.45364236831665, + "logps/rejected": -124.67327880859375, + "loss": 0.3813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1393566131591797, + "rewards/margins": 1.7636181116104126, + "rewards/rejected": -1.624261498451233, + "step": 13548 + }, + { + "epoch": 0.79, + "learning_rate": 1.1283051118759618e-08, + "logits/chosen": -1.7472556829452515, + "logits/rejected": -1.743264079093933, + "logps/chosen": -36.407196044921875, + "logps/rejected": -249.18487548828125, + "loss": 0.5183, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.039699554443359375, + "rewards/margins": 0.5465766787528992, + "rewards/rejected": -0.5862762331962585, + "step": 13549 + }, + { + "epoch": 0.79, + "learning_rate": 1.1277088557318353e-08, + "logits/chosen": -1.9434186220169067, + "logits/rejected": -1.9431577920913696, + "logps/chosen": -5.701447486877441, + "logps/rejected": -169.17282104492188, + "loss": 0.3151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09129500389099121, + "rewards/margins": 2.617994785308838, + "rewards/rejected": -2.5266997814178467, + "step": 13550 + }, + { + "epoch": 0.79, + "learning_rate": 1.127112737150434e-08, + "logits/chosen": -1.822561502456665, + "logits/rejected": -1.8353217840194702, + "logps/chosen": -60.77728271484375, + "logps/rejected": -64.23933410644531, + "loss": 0.8242, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.20037841796875, + "rewards/margins": -0.28953248262405396, + "rewards/rejected": 0.08915405720472336, + "step": 13551 + }, + { + "epoch": 0.79, + "learning_rate": 1.126516756152931e-08, + "logits/chosen": -2.008688449859619, + "logits/rejected": -1.954371452331543, + "logps/chosen": -192.06008911132812, + "logps/rejected": -350.6164855957031, + "loss": 0.055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.95341956615448, + "rewards/margins": 3.811711311340332, + "rewards/rejected": -1.8582916259765625, + "step": 13552 + }, + { + "epoch": 0.79, + "learning_rate": 1.1259209127605008e-08, + "logits/chosen": -1.9662375450134277, + "logits/rejected": -1.9600213766098022, + "logps/chosen": -0.00023840450739953667, + "logps/rejected": -324.85284423828125, + "loss": 0.3445, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5340963727794588e-05, + "rewards/margins": 3.786633253097534, + "rewards/rejected": -3.7866485118865967, + "step": 13553 + }, + { + "epoch": 0.79, + "learning_rate": 1.1253252069943093e-08, + "logits/chosen": -1.9192315340042114, + "logits/rejected": -1.9041292667388916, + "logps/chosen": -21.311630249023438, + "logps/rejected": -212.89674377441406, + "loss": 0.3907, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0956493392586708, + "rewards/margins": 1.4185421466827393, + "rewards/rejected": -1.3228927850723267, + "step": 13554 + }, + { + "epoch": 0.79, + "learning_rate": 1.1247296388755212e-08, + "logits/chosen": -1.8507107496261597, + "logits/rejected": -1.8634042739868164, + "logps/chosen": -201.404541015625, + "logps/rejected": -413.4468994140625, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.348052978515625, + "rewards/margins": 3.5751771926879883, + "rewards/rejected": -1.2271240949630737, + "step": 13555 + }, + { + "epoch": 0.79, + "learning_rate": 1.1241342084252907e-08, + "logits/chosen": -1.6517517566680908, + "logits/rejected": -1.6517678499221802, + "logps/chosen": -0.024180706590414047, + "logps/rejected": -225.85406494140625, + "loss": 0.4369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0005769931012764573, + "rewards/margins": 1.5395907163619995, + "rewards/rejected": -1.5390137434005737, + "step": 13556 + }, + { + "epoch": 0.79, + "learning_rate": 1.1235389156647723e-08, + "logits/chosen": -1.9717880487442017, + "logits/rejected": -2.013129711151123, + "logps/chosen": -159.4220428466797, + "logps/rejected": -299.2361145019531, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.295088291168213, + "rewards/margins": 5.1262102127075195, + "rewards/rejected": -2.8311219215393066, + "step": 13557 + }, + { + "epoch": 0.79, + "learning_rate": 1.1229437606151131e-08, + "logits/chosen": -1.804844856262207, + "logits/rejected": -1.8068263530731201, + "logps/chosen": -13.797974586486816, + "logps/rejected": -280.1407775878906, + "loss": 0.3032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19651269912719727, + "rewards/margins": 3.682615041732788, + "rewards/rejected": -3.486102342605591, + "step": 13558 + }, + { + "epoch": 0.79, + "learning_rate": 1.1223487432974583e-08, + "logits/chosen": -1.8977519273757935, + "logits/rejected": -1.8972495794296265, + "logps/chosen": -20.13360023498535, + "logps/rejected": -37.09823989868164, + "loss": 0.5601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3027631938457489, + "rewards/margins": 0.2998594343662262, + "rewards/rejected": 0.0029037476051598787, + "step": 13559 + }, + { + "epoch": 0.79, + "learning_rate": 1.121753863732942e-08, + "logits/chosen": -1.7575247287750244, + "logits/rejected": -1.7568248510360718, + "logps/chosen": -19.36220359802246, + "logps/rejected": -240.65538024902344, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.150569349527359, + "rewards/margins": 4.308190822601318, + "rewards/rejected": -4.4587602615356445, + "step": 13560 + }, + { + "epoch": 0.79, + "learning_rate": 1.1211591219426992e-08, + "logits/chosen": -1.7548539638519287, + "logits/rejected": -1.7091256380081177, + "logps/chosen": -176.46287536621094, + "logps/rejected": -438.55328369140625, + "loss": 0.0423, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0238099098205566, + "rewards/margins": 2.9628236293792725, + "rewards/rejected": 0.06098632887005806, + "step": 13561 + }, + { + "epoch": 0.79, + "learning_rate": 1.1205645179478573e-08, + "logits/chosen": -1.9109352827072144, + "logits/rejected": -1.9124242067337036, + "logps/chosen": -0.014272419735789299, + "logps/rejected": -312.5033264160156, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012477881973609328, + "rewards/margins": 5.821457386016846, + "rewards/rejected": -5.822705268859863, + "step": 13562 + }, + { + "epoch": 0.79, + "learning_rate": 1.1199700517695421e-08, + "logits/chosen": -1.6982650756835938, + "logits/rejected": -1.701028823852539, + "logps/chosen": -0.25216394662857056, + "logps/rejected": -194.56671142578125, + "loss": 0.3499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021374665200710297, + "rewards/margins": 3.62813401222229, + "rewards/rejected": -3.6495087146759033, + "step": 13563 + }, + { + "epoch": 0.79, + "learning_rate": 1.119375723428868e-08, + "logits/chosen": -1.828130841255188, + "logits/rejected": -1.8184107542037964, + "logps/chosen": -340.7071838378906, + "logps/rejected": -391.95159912109375, + "loss": 0.3584, + "rewards/accuracies": 0.0, + "rewards/chosen": 3.066946506500244, + "rewards/margins": -0.0037567615509033203, + "rewards/rejected": 3.0707032680511475, + "step": 13564 + }, + { + "epoch": 0.79, + "learning_rate": 1.1187815329469508e-08, + "logits/chosen": -1.7470016479492188, + "logits/rejected": -1.744038462638855, + "logps/chosen": -31.296615600585938, + "logps/rejected": -162.126708984375, + "loss": 0.3929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5411915183067322, + "rewards/margins": 1.0787540674209595, + "rewards/rejected": -0.5375625491142273, + "step": 13565 + }, + { + "epoch": 0.79, + "learning_rate": 1.1181874803448981e-08, + "logits/chosen": -1.8534022569656372, + "logits/rejected": -1.8540637493133545, + "logps/chosen": -16.640092849731445, + "logps/rejected": -135.28099060058594, + "loss": 0.4028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12569160759449005, + "rewards/margins": 2.874577045440674, + "rewards/rejected": -3.0002686977386475, + "step": 13566 + }, + { + "epoch": 0.79, + "learning_rate": 1.1175935656438146e-08, + "logits/chosen": -1.9484307765960693, + "logits/rejected": -1.9351682662963867, + "logps/chosen": -38.79522705078125, + "logps/rejected": -225.11691284179688, + "loss": 0.272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20772476494312286, + "rewards/margins": 3.7315406799316406, + "rewards/rejected": -3.523815870285034, + "step": 13567 + }, + { + "epoch": 0.79, + "learning_rate": 1.1169997888648003e-08, + "logits/chosen": -1.765460729598999, + "logits/rejected": -1.7695564031600952, + "logps/chosen": -43.80426025390625, + "logps/rejected": -160.27841186523438, + "loss": 0.4601, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3111492097377777, + "rewards/margins": 1.0510730743408203, + "rewards/rejected": -1.3622223138809204, + "step": 13568 + }, + { + "epoch": 0.79, + "learning_rate": 1.116406150028944e-08, + "logits/chosen": -2.0232322216033936, + "logits/rejected": -2.0273094177246094, + "logps/chosen": -26.483970642089844, + "logps/rejected": -287.40985107421875, + "loss": 0.228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41345444321632385, + "rewards/margins": 3.3661584854125977, + "rewards/rejected": -2.9527039527893066, + "step": 13569 + }, + { + "epoch": 0.79, + "learning_rate": 1.1158126491573406e-08, + "logits/chosen": -1.8835666179656982, + "logits/rejected": -1.8799588680267334, + "logps/chosen": -163.1762237548828, + "logps/rejected": -208.19033813476562, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3291611671447754, + "rewards/margins": 0.30314183235168457, + "rewards/rejected": 2.026019334793091, + "step": 13570 + }, + { + "epoch": 0.79, + "learning_rate": 1.1152192862710703e-08, + "logits/chosen": -1.8612048625946045, + "logits/rejected": -1.905346393585205, + "logps/chosen": -258.2823791503906, + "logps/rejected": -420.95318603515625, + "loss": 0.104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8245269656181335, + "rewards/margins": 2.8020966053009033, + "rewards/rejected": -1.977569580078125, + "step": 13571 + }, + { + "epoch": 0.79, + "learning_rate": 1.114626061391215e-08, + "logits/chosen": -1.8974313735961914, + "logits/rejected": -1.9339087009429932, + "logps/chosen": -268.56671142578125, + "logps/rejected": -382.00341796875, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2038514614105225, + "rewards/margins": 2.779388666152954, + "rewards/rejected": 0.4244628846645355, + "step": 13572 + }, + { + "epoch": 0.79, + "learning_rate": 1.1140329745388444e-08, + "logits/chosen": -1.96668541431427, + "logits/rejected": -1.9579704999923706, + "logps/chosen": -0.1147797554731369, + "logps/rejected": -196.41140747070312, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016647694632411003, + "rewards/margins": 5.1147589683532715, + "rewards/rejected": -5.098111152648926, + "step": 13573 + }, + { + "epoch": 0.79, + "learning_rate": 1.113440025735034e-08, + "logits/chosen": -1.7921009063720703, + "logits/rejected": -1.7948564291000366, + "logps/chosen": -42.90694046020508, + "logps/rejected": -121.59288787841797, + "loss": 0.3396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38317757844924927, + "rewards/margins": 1.5653209686279297, + "rewards/rejected": -1.1821434497833252, + "step": 13574 + }, + { + "epoch": 0.79, + "learning_rate": 1.1128472150008434e-08, + "logits/chosen": -1.7707809209823608, + "logits/rejected": -1.7769194841384888, + "logps/chosen": -186.7271270751953, + "logps/rejected": -429.9644775390625, + "loss": 0.065, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1495742797851562, + "rewards/margins": 3.4546830654144287, + "rewards/rejected": -2.3051087856292725, + "step": 13575 + }, + { + "epoch": 0.79, + "learning_rate": 1.1122545423573338e-08, + "logits/chosen": -2.0660383701324463, + "logits/rejected": -2.0650250911712646, + "logps/chosen": -35.29239273071289, + "logps/rejected": -190.4727783203125, + "loss": 0.1749, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8821060061454773, + "rewards/margins": 3.0672929286956787, + "rewards/rejected": -2.1851868629455566, + "step": 13576 + }, + { + "epoch": 0.79, + "learning_rate": 1.1116620078255601e-08, + "logits/chosen": -1.9381474256515503, + "logits/rejected": -1.9435888528823853, + "logps/chosen": -99.30509948730469, + "logps/rejected": -155.69342041015625, + "loss": 0.3236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25060731172561646, + "rewards/margins": 1.9671204090118408, + "rewards/rejected": -1.7165130376815796, + "step": 13577 + }, + { + "epoch": 0.79, + "learning_rate": 1.1110696114265732e-08, + "logits/chosen": -1.784542202949524, + "logits/rejected": -1.8308099508285522, + "logps/chosen": -358.857666015625, + "logps/rejected": -475.609375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.901715040206909, + "rewards/margins": 5.755719184875488, + "rewards/rejected": -2.85400390625, + "step": 13578 + }, + { + "epoch": 0.79, + "learning_rate": 1.1104773531814149e-08, + "logits/chosen": -1.900327444076538, + "logits/rejected": -1.8852678537368774, + "logps/chosen": -115.18248748779297, + "logps/rejected": -284.2945556640625, + "loss": 0.4731, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7880508303642273, + "rewards/margins": 0.12468105554580688, + "rewards/rejected": 0.6633697748184204, + "step": 13579 + }, + { + "epoch": 0.79, + "learning_rate": 1.1098852331111269e-08, + "logits/chosen": -2.121609926223755, + "logits/rejected": -2.116164207458496, + "logps/chosen": -16.285051345825195, + "logps/rejected": -168.54867553710938, + "loss": 0.3687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04523115232586861, + "rewards/margins": 3.147287607192993, + "rewards/rejected": -3.192518711090088, + "step": 13580 + }, + { + "epoch": 0.79, + "learning_rate": 1.1092932512367442e-08, + "logits/chosen": -1.9378598928451538, + "logits/rejected": -1.9285173416137695, + "logps/chosen": -235.44448852539062, + "logps/rejected": -502.424560546875, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7273406982421875, + "rewards/margins": 3.7830352783203125, + "rewards/rejected": -1.055694580078125, + "step": 13581 + }, + { + "epoch": 0.79, + "learning_rate": 1.1087014075792978e-08, + "logits/chosen": -1.8733775615692139, + "logits/rejected": -1.8711212873458862, + "logps/chosen": -163.4261932373047, + "logps/rejected": -208.06521606445312, + "loss": 0.2667, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.353964328765869, + "rewards/margins": 0.38906872272491455, + "rewards/rejected": 1.9648956060409546, + "step": 13582 + }, + { + "epoch": 0.79, + "learning_rate": 1.1081097021598107e-08, + "logits/chosen": -1.8815284967422485, + "logits/rejected": -1.879343867301941, + "logps/chosen": -10.871725082397461, + "logps/rejected": -25.678787231445312, + "loss": 0.7645, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.12727737426757812, + "rewards/margins": -0.2333787977695465, + "rewards/rejected": 0.10610141605138779, + "step": 13583 + }, + { + "epoch": 0.79, + "learning_rate": 1.107518134999304e-08, + "logits/chosen": -1.8621408939361572, + "logits/rejected": -1.8567311763763428, + "logps/chosen": -0.0011081034317612648, + "logps/rejected": -245.91140747070312, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3990887055115309e-05, + "rewards/margins": 5.0635666847229, + "rewards/rejected": -5.063580513000488, + "step": 13584 + }, + { + "epoch": 0.79, + "learning_rate": 1.1069267061187932e-08, + "logits/chosen": -1.8376885652542114, + "logits/rejected": -1.782818078994751, + "logps/chosen": -149.40960693359375, + "logps/rejected": -306.7857666015625, + "loss": 0.1086, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.133409261703491, + "rewards/margins": 2.310612678527832, + "rewards/rejected": -0.17720337212085724, + "step": 13585 + }, + { + "epoch": 0.79, + "learning_rate": 1.1063354155392907e-08, + "logits/chosen": -1.9039223194122314, + "logits/rejected": -1.8968502283096313, + "logps/chosen": -121.28358459472656, + "logps/rejected": -366.5171813964844, + "loss": 0.0821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.539544701576233, + "rewards/margins": 10.648075103759766, + "rewards/rejected": -9.108530044555664, + "step": 13586 + }, + { + "epoch": 0.79, + "learning_rate": 1.1057442632817982e-08, + "logits/chosen": -2.1294591426849365, + "logits/rejected": -2.1080195903778076, + "logps/chosen": -37.42948913574219, + "logps/rejected": -119.67713928222656, + "loss": 0.5157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06433868408203125, + "rewards/margins": 0.7749527096748352, + "rewards/rejected": -0.710614025592804, + "step": 13587 + }, + { + "epoch": 0.79, + "learning_rate": 1.1051532493673177e-08, + "logits/chosen": -1.8899829387664795, + "logits/rejected": -1.8858721256256104, + "logps/chosen": -69.00003814697266, + "logps/rejected": -379.8892822265625, + "loss": 0.217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9200355410575867, + "rewards/margins": 1.8263038396835327, + "rewards/rejected": -0.906268298625946, + "step": 13588 + }, + { + "epoch": 0.79, + "learning_rate": 1.104562373816848e-08, + "logits/chosen": -1.8734564781188965, + "logits/rejected": -1.86832594871521, + "logps/chosen": -3.409667491912842, + "logps/rejected": -55.67060470581055, + "loss": 0.513, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3303756531968247e-05, + "rewards/margins": 0.9846595525741577, + "rewards/rejected": -0.9846462607383728, + "step": 13589 + }, + { + "epoch": 0.79, + "learning_rate": 1.1039716366513757e-08, + "logits/chosen": -1.8427373170852661, + "logits/rejected": -1.8451038599014282, + "logps/chosen": -148.60992431640625, + "logps/rejected": -234.24188232421875, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49671632051467896, + "rewards/margins": 0.6531982421875, + "rewards/rejected": -0.15648193657398224, + "step": 13590 + }, + { + "epoch": 0.79, + "learning_rate": 1.10338103789189e-08, + "logits/chosen": -1.8502277135849, + "logits/rejected": -1.8655692338943481, + "logps/chosen": -232.1591033935547, + "logps/rejected": -421.31475830078125, + "loss": 0.1677, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.269944906234741, + "rewards/margins": 1.0069199800491333, + "rewards/rejected": 1.263024926185608, + "step": 13591 + }, + { + "epoch": 0.79, + "learning_rate": 1.1027905775593676e-08, + "logits/chosen": -1.8389301300048828, + "logits/rejected": -1.8443666696548462, + "logps/chosen": -245.78997802734375, + "logps/rejected": -592.4581298828125, + "loss": 0.0409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8209213614463806, + "rewards/margins": 5.920665264129639, + "rewards/rejected": -5.099743843078613, + "step": 13592 + }, + { + "epoch": 0.79, + "learning_rate": 1.10220025567479e-08, + "logits/chosen": -1.8423593044281006, + "logits/rejected": -1.8345527648925781, + "logps/chosen": -17.068601608276367, + "logps/rejected": -177.36788940429688, + "loss": 0.4323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35533973574638367, + "rewards/margins": 2.091442108154297, + "rewards/rejected": -2.446781873703003, + "step": 13593 + }, + { + "epoch": 0.79, + "learning_rate": 1.1016100722591242e-08, + "logits/chosen": -1.7517223358154297, + "logits/rejected": -1.7831307649612427, + "logps/chosen": -212.94320678710938, + "logps/rejected": -439.4302062988281, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.419398546218872, + "rewards/margins": 4.846189975738525, + "rewards/rejected": -2.4267914295196533, + "step": 13594 + }, + { + "epoch": 0.79, + "learning_rate": 1.1010200273333376e-08, + "logits/chosen": -1.652709722518921, + "logits/rejected": -1.665562629699707, + "logps/chosen": -2.100778579711914, + "logps/rejected": -340.8681335449219, + "loss": 0.3449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0288846492767334, + "rewards/margins": 6.099621772766113, + "rewards/rejected": -6.128506660461426, + "step": 13595 + }, + { + "epoch": 0.79, + "learning_rate": 1.1004301209183914e-08, + "logits/chosen": -1.8555868864059448, + "logits/rejected": -1.8558591604232788, + "logps/chosen": -42.87616729736328, + "logps/rejected": -128.60543823242188, + "loss": 0.4289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2931484282016754, + "rewards/margins": 1.0446614027023315, + "rewards/rejected": -0.7515129446983337, + "step": 13596 + }, + { + "epoch": 0.79, + "learning_rate": 1.099840353035244e-08, + "logits/chosen": -1.9359967708587646, + "logits/rejected": -2.004957914352417, + "logps/chosen": -243.41351318359375, + "logps/rejected": -338.35870361328125, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.661389112472534, + "rewards/margins": 3.0690338611602783, + "rewards/rejected": -0.407644659280777, + "step": 13597 + }, + { + "epoch": 0.79, + "learning_rate": 1.0992507237048432e-08, + "logits/chosen": -1.9768494367599487, + "logits/rejected": -1.9787163734436035, + "logps/chosen": -47.2081298828125, + "logps/rejected": -202.845947265625, + "loss": 0.1921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7531375885009766, + "rewards/margins": 3.735657215118408, + "rewards/rejected": -2.9825196266174316, + "step": 13598 + }, + { + "epoch": 0.79, + "learning_rate": 1.0986612329481377e-08, + "logits/chosen": -1.7140138149261475, + "logits/rejected": -1.7151730060577393, + "logps/chosen": -199.25439453125, + "logps/rejected": -366.06866455078125, + "loss": 0.0615, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.676378011703491, + "rewards/margins": 2.203050374984741, + "rewards/rejected": 0.47332763671875, + "step": 13599 + }, + { + "epoch": 0.79, + "learning_rate": 1.0980718807860683e-08, + "logits/chosen": -1.9641786813735962, + "logits/rejected": -1.9623717069625854, + "logps/chosen": -55.65231704711914, + "logps/rejected": -89.05901336669922, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6365990042686462, + "rewards/margins": 1.8352477550506592, + "rewards/rejected": -1.1986488103866577, + "step": 13600 + }, + { + "epoch": 0.79, + "learning_rate": 1.0974826672395737e-08, + "logits/chosen": -1.8167909383773804, + "logits/rejected": -1.815721869468689, + "logps/chosen": -31.635223388671875, + "logps/rejected": -157.36898803710938, + "loss": 0.3433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7522441744804382, + "rewards/margins": 1.0958598852157593, + "rewards/rejected": -0.34361574053764343, + "step": 13601 + }, + { + "epoch": 0.79, + "learning_rate": 1.0968935923295825e-08, + "logits/chosen": -1.8614975214004517, + "logits/rejected": -1.8554986715316772, + "logps/chosen": -208.10760498046875, + "logps/rejected": -466.9151611328125, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.212780714035034, + "rewards/margins": 5.8012237548828125, + "rewards/rejected": -2.5884430408477783, + "step": 13602 + }, + { + "epoch": 0.79, + "learning_rate": 1.0963046560770233e-08, + "logits/chosen": -1.9767416715621948, + "logits/rejected": -1.96079421043396, + "logps/chosen": -85.12625885009766, + "logps/rejected": -228.9940185546875, + "loss": 0.0634, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1279594898223877, + "rewards/margins": 4.025511741638184, + "rewards/rejected": -1.897552490234375, + "step": 13603 + }, + { + "epoch": 0.79, + "learning_rate": 1.0957158585028176e-08, + "logits/chosen": -1.998248815536499, + "logits/rejected": -1.9907567501068115, + "logps/chosen": -47.449462890625, + "logps/rejected": -233.0277557373047, + "loss": 0.1812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7710472345352173, + "rewards/margins": 2.4041452407836914, + "rewards/rejected": -1.6330978870391846, + "step": 13604 + }, + { + "epoch": 0.79, + "learning_rate": 1.0951271996278838e-08, + "logits/chosen": -1.8841791152954102, + "logits/rejected": -1.8938027620315552, + "logps/chosen": -107.53526306152344, + "logps/rejected": -230.97093200683594, + "loss": 0.0994, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.292314887046814, + "rewards/margins": 2.946835994720459, + "rewards/rejected": -1.6545212268829346, + "step": 13605 + }, + { + "epoch": 0.79, + "learning_rate": 1.0945386794731315e-08, + "logits/chosen": -1.9607919454574585, + "logits/rejected": -1.9662584066390991, + "logps/chosen": -204.82656860351562, + "logps/rejected": -468.2864685058594, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5611664056777954, + "rewards/margins": 3.9467592239379883, + "rewards/rejected": -2.3855926990509033, + "step": 13606 + }, + { + "epoch": 0.79, + "learning_rate": 1.0939502980594678e-08, + "logits/chosen": -1.765403389930725, + "logits/rejected": -1.7864837646484375, + "logps/chosen": -264.3419189453125, + "logps/rejected": -490.7872314453125, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1261353492736816, + "rewards/margins": 5.902585029602051, + "rewards/rejected": -2.776449680328369, + "step": 13607 + }, + { + "epoch": 0.79, + "learning_rate": 1.0933620554077982e-08, + "logits/chosen": -2.043638229370117, + "logits/rejected": -2.027906656265259, + "logps/chosen": -31.38456916809082, + "logps/rejected": -351.6949157714844, + "loss": 0.3721, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46168556809425354, + "rewards/margins": 4.634893417358398, + "rewards/rejected": -5.096579074859619, + "step": 13608 + }, + { + "epoch": 0.79, + "learning_rate": 1.0927739515390172e-08, + "logits/chosen": -2.04730486869812, + "logits/rejected": -2.020853042602539, + "logps/chosen": -245.42164611816406, + "logps/rejected": -383.4659423828125, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.101201057434082, + "rewards/margins": 4.272349834442139, + "rewards/rejected": -0.17114868760108948, + "step": 13609 + }, + { + "epoch": 0.79, + "learning_rate": 1.0921859864740186e-08, + "logits/chosen": -2.0427567958831787, + "logits/rejected": -2.0384724140167236, + "logps/chosen": -3.1626336574554443, + "logps/rejected": -335.28265380859375, + "loss": 0.2602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.268574982881546, + "rewards/margins": 7.6316704750061035, + "rewards/rejected": -7.363095283508301, + "step": 13610 + }, + { + "epoch": 0.79, + "learning_rate": 1.091598160233686e-08, + "logits/chosen": -1.7664461135864258, + "logits/rejected": -1.7716803550720215, + "logps/chosen": -215.28640747070312, + "logps/rejected": -277.1339111328125, + "loss": 0.4246, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.939239501953125, + "rewards/margins": -0.28042006492614746, + "rewards/rejected": 3.2196595668792725, + "step": 13611 + }, + { + "epoch": 0.79, + "learning_rate": 1.091010472838908e-08, + "logits/chosen": -1.6848593950271606, + "logits/rejected": -1.6672905683517456, + "logps/chosen": -262.35406494140625, + "logps/rejected": -398.93035888671875, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0037782192230225, + "rewards/margins": 2.939657688140869, + "rewards/rejected": -0.9358795285224915, + "step": 13612 + }, + { + "epoch": 0.79, + "learning_rate": 1.0904229243105573e-08, + "logits/chosen": -1.9908547401428223, + "logits/rejected": -1.9828757047653198, + "logps/chosen": -4.013706207275391, + "logps/rejected": -299.39764404296875, + "loss": 0.7954, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3837069869041443, + "rewards/margins": -0.06018403172492981, + "rewards/rejected": -0.3235229551792145, + "step": 13613 + }, + { + "epoch": 0.79, + "learning_rate": 1.0898355146695098e-08, + "logits/chosen": -1.834277868270874, + "logits/rejected": -1.8390413522720337, + "logps/chosen": -0.00015436949615832418, + "logps/rejected": -60.788082122802734, + "loss": 0.6413, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1945048704073997e-06, + "rewards/margins": 0.08550362288951874, + "rewards/rejected": -0.08550681918859482, + "step": 13614 + }, + { + "epoch": 0.79, + "learning_rate": 1.0892482439366285e-08, + "logits/chosen": -2.0251362323760986, + "logits/rejected": -2.0173027515411377, + "logps/chosen": -35.376346588134766, + "logps/rejected": -153.27362060546875, + "loss": 0.1029, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1497303247451782, + "rewards/margins": 3.016139507293701, + "rewards/rejected": -1.8664093017578125, + "step": 13615 + }, + { + "epoch": 0.79, + "learning_rate": 1.0886611121327822e-08, + "logits/chosen": -1.8422307968139648, + "logits/rejected": -1.8401521444320679, + "logps/chosen": -18.32917022705078, + "logps/rejected": -253.17990112304688, + "loss": 0.2365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47924327850341797, + "rewards/margins": 8.421358108520508, + "rewards/rejected": -7.942114353179932, + "step": 13616 + }, + { + "epoch": 0.79, + "learning_rate": 1.088074119278824e-08, + "logits/chosen": -2.0129363536834717, + "logits/rejected": -2.006833076477051, + "logps/chosen": -0.03057321161031723, + "logps/rejected": -158.615234375, + "loss": 0.3528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023967837914824486, + "rewards/margins": 4.026051998138428, + "rewards/rejected": -4.002084255218506, + "step": 13617 + }, + { + "epoch": 0.79, + "learning_rate": 1.0874872653956085e-08, + "logits/chosen": -2.004636287689209, + "logits/rejected": -1.9863591194152832, + "logps/chosen": -11.359766006469727, + "logps/rejected": -300.4197998046875, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05680341646075249, + "rewards/margins": 3.8684916496276855, + "rewards/rejected": -3.811688184738159, + "step": 13618 + }, + { + "epoch": 0.79, + "learning_rate": 1.086900550503983e-08, + "logits/chosen": -1.8338775634765625, + "logits/rejected": -1.8243563175201416, + "logps/chosen": -0.0002111031935783103, + "logps/rejected": -251.89903259277344, + "loss": 0.3442, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.545471661756892e-07, + "rewards/margins": 6.343916416168213, + "rewards/rejected": -6.343916416168213, + "step": 13619 + }, + { + "epoch": 0.79, + "learning_rate": 1.0863139746247929e-08, + "logits/chosen": -1.7716199159622192, + "logits/rejected": -1.770841360092163, + "logps/chosen": -49.69609832763672, + "logps/rejected": -118.13297271728516, + "loss": 0.2981, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1467208862304688, + "rewards/margins": 0.8928000926971436, + "rewards/rejected": 0.2539207637310028, + "step": 13620 + }, + { + "epoch": 0.79, + "learning_rate": 1.0857275377788727e-08, + "logits/chosen": -2.0644125938415527, + "logits/rejected": -2.0515971183776855, + "logps/chosen": -23.50429916381836, + "logps/rejected": -190.81182861328125, + "loss": 0.6383, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3234144151210785, + "rewards/margins": -0.10749685764312744, + "rewards/rejected": 0.43091127276420593, + "step": 13621 + }, + { + "epoch": 0.79, + "learning_rate": 1.0851412399870574e-08, + "logits/chosen": -1.8303250074386597, + "logits/rejected": -1.8026645183563232, + "logps/chosen": -39.99032974243164, + "logps/rejected": -238.60826110839844, + "loss": 0.1723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7047416567802429, + "rewards/margins": 3.597126007080078, + "rewards/rejected": -2.8923842906951904, + "step": 13622 + }, + { + "epoch": 0.79, + "learning_rate": 1.0845550812701748e-08, + "logits/chosen": -1.678553581237793, + "logits/rejected": -1.6891164779663086, + "logps/chosen": -179.07455444335938, + "logps/rejected": -333.46820068359375, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1313767433166504, + "rewards/margins": 2.860743761062622, + "rewards/rejected": -0.7293670773506165, + "step": 13623 + }, + { + "epoch": 0.79, + "learning_rate": 1.08396906164905e-08, + "logits/chosen": -1.8308871984481812, + "logits/rejected": -1.8303024768829346, + "logps/chosen": -23.272674560546875, + "logps/rejected": -187.0413055419922, + "loss": 0.5155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09143257141113281, + "rewards/margins": 0.7250606417655945, + "rewards/rejected": -0.8164932131767273, + "step": 13624 + }, + { + "epoch": 0.79, + "learning_rate": 1.083383181144497e-08, + "logits/chosen": -2.061129331588745, + "logits/rejected": -2.050990104675293, + "logps/chosen": -19.637929916381836, + "logps/rejected": -198.9718780517578, + "loss": 0.2842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4536804258823395, + "rewards/margins": 2.5551393032073975, + "rewards/rejected": -2.101458787918091, + "step": 13625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0827974397773327e-08, + "logits/chosen": -1.8084394931793213, + "logits/rejected": -1.8354891538619995, + "logps/chosen": -320.7730712890625, + "logps/rejected": -371.29638671875, + "loss": 0.0458, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0345215797424316, + "rewards/margins": 2.9249329566955566, + "rewards/rejected": -0.890411376953125, + "step": 13626 + }, + { + "epoch": 0.79, + "learning_rate": 1.0822118375683642e-08, + "logits/chosen": -1.9826538562774658, + "logits/rejected": -1.974502682685852, + "logps/chosen": -22.350183486938477, + "logps/rejected": -101.64947509765625, + "loss": 0.6274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28437596559524536, + "rewards/margins": 0.3058370351791382, + "rewards/rejected": -0.5902130007743835, + "step": 13627 + }, + { + "epoch": 0.79, + "learning_rate": 1.0816263745383946e-08, + "logits/chosen": -2.0266215801239014, + "logits/rejected": -2.0303993225097656, + "logps/chosen": -13.285201072692871, + "logps/rejected": -158.61630249023438, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2353283017873764, + "rewards/margins": 2.7904245853424072, + "rewards/rejected": -2.555096387863159, + "step": 13628 + }, + { + "epoch": 0.79, + "learning_rate": 1.0810410507082252e-08, + "logits/chosen": -1.9424420595169067, + "logits/rejected": -1.9267501831054688, + "logps/chosen": -35.51639175415039, + "logps/rejected": -220.10121154785156, + "loss": 0.4435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3085552155971527, + "rewards/margins": 2.480675220489502, + "rewards/rejected": -2.7892303466796875, + "step": 13629 + }, + { + "epoch": 0.79, + "learning_rate": 1.0804558660986446e-08, + "logits/chosen": -1.9570658206939697, + "logits/rejected": -1.933748483657837, + "logps/chosen": -48.669334411621094, + "logps/rejected": -409.0052185058594, + "loss": 0.1086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1463844776153564, + "rewards/margins": 10.745850563049316, + "rewards/rejected": -9.599466323852539, + "step": 13630 + }, + { + "epoch": 0.79, + "learning_rate": 1.0798708207304464e-08, + "logits/chosen": -1.7995840311050415, + "logits/rejected": -1.8034729957580566, + "logps/chosen": -24.084644317626953, + "logps/rejected": -198.70547485351562, + "loss": 0.2026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6200687289237976, + "rewards/margins": 4.189993858337402, + "rewards/rejected": -3.56992506980896, + "step": 13631 + }, + { + "epoch": 0.79, + "learning_rate": 1.079285914624411e-08, + "logits/chosen": -2.001387119293213, + "logits/rejected": -1.99637770652771, + "logps/chosen": -38.80394744873047, + "logps/rejected": -154.39376831054688, + "loss": 0.2265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5543907284736633, + "rewards/margins": 2.995577335357666, + "rewards/rejected": -2.4411866664886475, + "step": 13632 + }, + { + "epoch": 0.79, + "learning_rate": 1.0787011478013196e-08, + "logits/chosen": -1.7788052558898926, + "logits/rejected": -1.780741810798645, + "logps/chosen": -34.29383087158203, + "logps/rejected": -310.96209716796875, + "loss": 0.2637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4242721498012543, + "rewards/margins": 3.1603591442108154, + "rewards/rejected": -2.7360870838165283, + "step": 13633 + }, + { + "epoch": 0.79, + "learning_rate": 1.0781165202819414e-08, + "logits/chosen": -1.8439626693725586, + "logits/rejected": -1.842798113822937, + "logps/chosen": -1.3198983669281006, + "logps/rejected": -167.0745086669922, + "loss": 0.326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0705496072769165, + "rewards/margins": 4.049818515777588, + "rewards/rejected": -3.979268789291382, + "step": 13634 + }, + { + "epoch": 0.79, + "learning_rate": 1.0775320320870518e-08, + "logits/chosen": -2.0410735607147217, + "logits/rejected": -2.028761625289917, + "logps/chosen": -43.15746307373047, + "logps/rejected": -361.7127685546875, + "loss": 0.215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3405197262763977, + "rewards/margins": 5.8421525955200195, + "rewards/rejected": -5.5016326904296875, + "step": 13635 + }, + { + "epoch": 0.79, + "learning_rate": 1.0769476832374097e-08, + "logits/chosen": -1.8827216625213623, + "logits/rejected": -1.9038881063461304, + "logps/chosen": -320.98590087890625, + "logps/rejected": -398.7870788574219, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09157104790210724, + "rewards/margins": 1.0484740734100342, + "rewards/rejected": -1.140045166015625, + "step": 13636 + }, + { + "epoch": 0.79, + "learning_rate": 1.0763634737537753e-08, + "logits/chosen": -1.8566386699676514, + "logits/rejected": -1.9676622152328491, + "logps/chosen": -279.4691162109375, + "logps/rejected": -294.9266662597656, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2468140125274658, + "rewards/margins": 2.38690185546875, + "rewards/rejected": -1.1400879621505737, + "step": 13637 + }, + { + "epoch": 0.79, + "learning_rate": 1.0757794036569034e-08, + "logits/chosen": -1.9860354661941528, + "logits/rejected": -2.011828899383545, + "logps/chosen": -151.945556640625, + "logps/rejected": -442.13531494140625, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1610077619552612, + "rewards/margins": 3.588733196258545, + "rewards/rejected": -2.427725315093994, + "step": 13638 + }, + { + "epoch": 0.79, + "learning_rate": 1.0751954729675433e-08, + "logits/chosen": -1.7557952404022217, + "logits/rejected": -1.8198519945144653, + "logps/chosen": -239.48638916015625, + "logps/rejected": -450.65435791015625, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3003785610198975, + "rewards/margins": 4.697238445281982, + "rewards/rejected": -1.3968597650527954, + "step": 13639 + }, + { + "epoch": 0.79, + "learning_rate": 1.0746116817064372e-08, + "logits/chosen": -1.936095118522644, + "logits/rejected": -1.9026623964309692, + "logps/chosen": -220.71551513671875, + "logps/rejected": -341.62884521484375, + "loss": 0.3174, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5005950927734375, + "rewards/margins": 0.23885798454284668, + "rewards/rejected": 1.2617371082305908, + "step": 13640 + }, + { + "epoch": 0.79, + "learning_rate": 1.0740280298943256e-08, + "logits/chosen": -1.879888892173767, + "logits/rejected": -2.0330121517181396, + "logps/chosen": -371.67327880859375, + "logps/rejected": -373.636962890625, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5864471793174744, + "rewards/margins": 3.4734253883361816, + "rewards/rejected": -2.8869781494140625, + "step": 13641 + }, + { + "epoch": 0.79, + "learning_rate": 1.073444517551942e-08, + "logits/chosen": -2.1140105724334717, + "logits/rejected": -2.108351707458496, + "logps/chosen": -19.79160499572754, + "logps/rejected": -241.56622314453125, + "loss": 0.5598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6266312599182129, + "rewards/margins": 3.808882236480713, + "rewards/rejected": -4.435513496398926, + "step": 13642 + }, + { + "epoch": 0.79, + "learning_rate": 1.0728611447000174e-08, + "logits/chosen": -1.9768117666244507, + "logits/rejected": -1.9680083990097046, + "logps/chosen": -50.35310363769531, + "logps/rejected": -181.2559356689453, + "loss": 0.4629, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019112015143036842, + "rewards/margins": 1.1482837200164795, + "rewards/rejected": -1.1291717290878296, + "step": 13643 + }, + { + "epoch": 0.79, + "learning_rate": 1.0722779113592733e-08, + "logits/chosen": -1.7139055728912354, + "logits/rejected": -1.720268726348877, + "logps/chosen": -22.462718963623047, + "logps/rejected": -187.93118286132812, + "loss": 0.4215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27109774947166443, + "rewards/margins": 0.8763498067855835, + "rewards/rejected": -0.6052520871162415, + "step": 13644 + }, + { + "epoch": 0.79, + "learning_rate": 1.071694817550431e-08, + "logits/chosen": -1.8082714080810547, + "logits/rejected": -1.8005099296569824, + "logps/chosen": -181.5355682373047, + "logps/rejected": -215.05487060546875, + "loss": 0.3485, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7273666858673096, + "rewards/margins": 0.15062415599822998, + "rewards/rejected": 1.5767425298690796, + "step": 13645 + }, + { + "epoch": 0.79, + "learning_rate": 1.0711118632942035e-08, + "logits/chosen": -1.950905442237854, + "logits/rejected": -1.9443131685256958, + "logps/chosen": -0.28929316997528076, + "logps/rejected": -130.7548828125, + "loss": 0.4889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022168470546603203, + "rewards/margins": 1.0877512693405151, + "rewards/rejected": -1.109919786453247, + "step": 13646 + }, + { + "epoch": 0.79, + "learning_rate": 1.0705290486113023e-08, + "logits/chosen": -1.878341794013977, + "logits/rejected": -1.8703796863555908, + "logps/chosen": -8.118729591369629, + "logps/rejected": -123.6168441772461, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25715169310569763, + "rewards/margins": 1.8993308544158936, + "rewards/rejected": -1.6421791315078735, + "step": 13647 + }, + { + "epoch": 0.79, + "learning_rate": 1.069946373522429e-08, + "logits/chosen": -1.9013049602508545, + "logits/rejected": -1.8958295583724976, + "logps/chosen": -175.49066162109375, + "logps/rejected": -273.0653076171875, + "loss": 0.1327, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1935226917266846, + "rewards/margins": 1.933125376701355, + "rewards/rejected": -0.7396026849746704, + "step": 13648 + }, + { + "epoch": 0.79, + "learning_rate": 1.0693638380482834e-08, + "logits/chosen": -1.7505234479904175, + "logits/rejected": -1.7442060708999634, + "logps/chosen": -147.46343994140625, + "logps/rejected": -320.20318603515625, + "loss": 0.0608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6475448608398438, + "rewards/margins": 2.664006233215332, + "rewards/rejected": -1.0164612531661987, + "step": 13649 + }, + { + "epoch": 0.79, + "learning_rate": 1.0687814422095637e-08, + "logits/chosen": -1.9124011993408203, + "logits/rejected": -1.8971683979034424, + "logps/chosen": -48.2805061340332, + "logps/rejected": -258.50732421875, + "loss": 0.4459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8860222101211548, + "rewards/margins": 0.04253464937210083, + "rewards/rejected": 0.843487560749054, + "step": 13650 + }, + { + "epoch": 0.79, + "learning_rate": 1.068199186026955e-08, + "logits/chosen": -1.6938540935516357, + "logits/rejected": -1.6802514791488647, + "logps/chosen": -27.954349517822266, + "logps/rejected": -297.5054931640625, + "loss": 0.4799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6137714385986328, + "rewards/margins": 6.652308464050293, + "rewards/rejected": -7.266079902648926, + "step": 13651 + }, + { + "epoch": 0.79, + "learning_rate": 1.0676170695211462e-08, + "logits/chosen": -2.064457654953003, + "logits/rejected": -2.066293239593506, + "logps/chosen": -0.0013735033571720123, + "logps/rejected": -85.16344451904297, + "loss": 0.6077, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0686296162893996e-05, + "rewards/margins": 0.282792329788208, + "rewards/rejected": -0.2828430235385895, + "step": 13652 + }, + { + "epoch": 0.79, + "learning_rate": 1.0670350927128108e-08, + "logits/chosen": -1.7067153453826904, + "logits/rejected": -1.6748896837234497, + "logps/chosen": -134.37112426757812, + "logps/rejected": -498.854736328125, + "loss": 0.0799, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9196761846542358, + "rewards/margins": 3.6966261863708496, + "rewards/rejected": -1.7769501209259033, + "step": 13653 + }, + { + "epoch": 0.79, + "learning_rate": 1.0664532556226302e-08, + "logits/chosen": -1.7027320861816406, + "logits/rejected": -1.6454641819000244, + "logps/chosen": -156.70962524414062, + "logps/rejected": -307.850341796875, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4216521978378296, + "rewards/margins": 0.7540954351425171, + "rewards/rejected": 0.6675567626953125, + "step": 13654 + }, + { + "epoch": 0.79, + "learning_rate": 1.0658715582712696e-08, + "logits/chosen": -1.6585031747817993, + "logits/rejected": -1.6637609004974365, + "logps/chosen": -193.63272094726562, + "logps/rejected": -247.60499572753906, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.45111083984375, + "rewards/margins": 0.6098861694335938, + "rewards/rejected": 0.8412246704101562, + "step": 13655 + }, + { + "epoch": 0.79, + "learning_rate": 1.0652900006793953e-08, + "logits/chosen": -1.833168864250183, + "logits/rejected": -1.8359054327011108, + "logps/chosen": -25.78924560546875, + "logps/rejected": -126.038330078125, + "loss": 0.4325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5276323556900024, + "rewards/margins": 0.5317949652671814, + "rewards/rejected": -0.004162597935646772, + "step": 13656 + }, + { + "epoch": 0.79, + "learning_rate": 1.0647085828676666e-08, + "logits/chosen": -1.962074637413025, + "logits/rejected": -1.9416311979293823, + "logps/chosen": -25.751140594482422, + "logps/rejected": -327.15087890625, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2723585069179535, + "rewards/margins": 4.064594745635986, + "rewards/rejected": -3.792236328125, + "step": 13657 + }, + { + "epoch": 0.79, + "learning_rate": 1.06412730485674e-08, + "logits/chosen": -1.7607872486114502, + "logits/rejected": -1.7028619050979614, + "logps/chosen": -112.37123107910156, + "logps/rejected": -373.2970886230469, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9368156790733337, + "rewards/margins": 0.7713615894317627, + "rewards/rejected": 0.16545410454273224, + "step": 13658 + }, + { + "epoch": 0.79, + "learning_rate": 1.0635461666672629e-08, + "logits/chosen": -1.9652734994888306, + "logits/rejected": -1.9689881801605225, + "logps/chosen": -0.2523082494735718, + "logps/rejected": -298.11700439453125, + "loss": 0.3261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012468595989048481, + "rewards/margins": 10.750269889831543, + "rewards/rejected": -10.762738227844238, + "step": 13659 + }, + { + "epoch": 0.79, + "learning_rate": 1.062965168319881e-08, + "logits/chosen": -1.952818512916565, + "logits/rejected": -1.9681938886642456, + "logps/chosen": -84.30401611328125, + "logps/rejected": -129.6911163330078, + "loss": 0.5585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18555298447608948, + "rewards/margins": 0.67431640625, + "rewards/rejected": -0.8598694205284119, + "step": 13660 + }, + { + "epoch": 0.79, + "learning_rate": 1.0623843098352348e-08, + "logits/chosen": -1.9221138954162598, + "logits/rejected": -1.909082055091858, + "logps/chosen": -146.7332305908203, + "logps/rejected": -301.0042724609375, + "loss": 0.0693, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0647354125976562, + "rewards/margins": 3.5345170497894287, + "rewards/rejected": -2.4697816371917725, + "step": 13661 + }, + { + "epoch": 0.8, + "learning_rate": 1.0618035912339595e-08, + "logits/chosen": -1.7724900245666504, + "logits/rejected": -1.7652192115783691, + "logps/chosen": -170.42074584960938, + "logps/rejected": -299.4335632324219, + "loss": 0.4494, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.430596947669983, + "rewards/margins": -0.2869293689727783, + "rewards/rejected": 1.7175263166427612, + "step": 13662 + }, + { + "epoch": 0.8, + "learning_rate": 1.0612230125366838e-08, + "logits/chosen": -2.0668094158172607, + "logits/rejected": -2.056243419647217, + "logps/chosen": -28.18693733215332, + "logps/rejected": -192.37985229492188, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2979551553726196, + "rewards/margins": 0.6305784583091736, + "rewards/rejected": 0.667376697063446, + "step": 13663 + }, + { + "epoch": 0.8, + "learning_rate": 1.0606425737640329e-08, + "logits/chosen": -1.7585883140563965, + "logits/rejected": -1.7594280242919922, + "logps/chosen": -278.06005859375, + "logps/rejected": -418.5469055175781, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.064892530441284, + "rewards/margins": 4.148135185241699, + "rewards/rejected": -1.0832427740097046, + "step": 13664 + }, + { + "epoch": 0.8, + "learning_rate": 1.0600622749366272e-08, + "logits/chosen": -2.083622455596924, + "logits/rejected": -2.079324245452881, + "logps/chosen": -53.50239562988281, + "logps/rejected": -260.16094970703125, + "loss": 0.2363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3637527525424957, + "rewards/margins": 5.241575717926025, + "rewards/rejected": -4.8778228759765625, + "step": 13665 + }, + { + "epoch": 0.8, + "learning_rate": 1.0594821160750838e-08, + "logits/chosen": -1.8824615478515625, + "logits/rejected": -1.8855552673339844, + "logps/chosen": -38.19114685058594, + "logps/rejected": -265.0752868652344, + "loss": 0.0751, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.140932083129883, + "rewards/margins": 3.229771852493286, + "rewards/rejected": -1.0888397693634033, + "step": 13666 + }, + { + "epoch": 0.8, + "learning_rate": 1.058902097200009e-08, + "logits/chosen": -1.7665162086486816, + "logits/rejected": -1.686306118965149, + "logps/chosen": -174.71246337890625, + "logps/rejected": -519.1676025390625, + "loss": 0.0842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2262665033340454, + "rewards/margins": 2.7513976097106934, + "rewards/rejected": -1.5251312255859375, + "step": 13667 + }, + { + "epoch": 0.8, + "learning_rate": 1.0583222183320096e-08, + "logits/chosen": -1.9272714853286743, + "logits/rejected": -1.9230613708496094, + "logps/chosen": -91.8856201171875, + "logps/rejected": -262.5703430175781, + "loss": 0.4974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4361770749092102, + "rewards/margins": 1.8534409999847412, + "rewards/rejected": -2.2896180152893066, + "step": 13668 + }, + { + "epoch": 0.8, + "learning_rate": 1.0577424794916862e-08, + "logits/chosen": -2.038907527923584, + "logits/rejected": -2.031665563583374, + "logps/chosen": -25.658023834228516, + "logps/rejected": -203.42080688476562, + "loss": 0.1978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5931480526924133, + "rewards/margins": 3.8426291942596436, + "rewards/rejected": -3.249481201171875, + "step": 13669 + }, + { + "epoch": 0.8, + "learning_rate": 1.057162880699633e-08, + "logits/chosen": -1.798983097076416, + "logits/rejected": -1.7794013023376465, + "logps/chosen": -0.00012016072287224233, + "logps/rejected": -99.74920654296875, + "loss": 0.5206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00011354932939866558, + "rewards/margins": 0.8593405485153198, + "rewards/rejected": -0.8592270016670227, + "step": 13670 + }, + { + "epoch": 0.8, + "learning_rate": 1.056583421976443e-08, + "logits/chosen": -1.6615970134735107, + "logits/rejected": -1.6679959297180176, + "logps/chosen": -162.26107788085938, + "logps/rejected": -270.6190185546875, + "loss": 0.5116, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6047424674034119, + "rewards/margins": -0.3127868175506592, + "rewards/rejected": 0.917529284954071, + "step": 13671 + }, + { + "epoch": 0.8, + "learning_rate": 1.0560041033426959e-08, + "logits/chosen": -2.018324851989746, + "logits/rejected": -2.0147624015808105, + "logps/chosen": -3.431262731552124, + "logps/rejected": -120.85987854003906, + "loss": 0.4059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12707388401031494, + "rewards/margins": 1.4686052799224854, + "rewards/rejected": -1.3415313959121704, + "step": 13672 + }, + { + "epoch": 0.8, + "learning_rate": 1.0554249248189779e-08, + "logits/chosen": -2.008842945098877, + "logits/rejected": -2.015428304672241, + "logps/chosen": -80.90375518798828, + "logps/rejected": -274.5086669921875, + "loss": 0.3517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6855606436729431, + "rewards/margins": 0.7596848011016846, + "rewards/rejected": -0.07412414997816086, + "step": 13673 + }, + { + "epoch": 0.8, + "learning_rate": 1.0548458864258603e-08, + "logits/chosen": -1.9499361515045166, + "logits/rejected": -1.9326190948486328, + "logps/chosen": -137.23011779785156, + "logps/rejected": -196.04953002929688, + "loss": 0.2788, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36817628145217896, + "rewards/margins": 1.7363801002502441, + "rewards/rejected": -1.3682037591934204, + "step": 13674 + }, + { + "epoch": 0.8, + "learning_rate": 1.0542669881839161e-08, + "logits/chosen": -1.8208428621292114, + "logits/rejected": -1.7855501174926758, + "logps/chosen": -214.69387817382812, + "logps/rejected": -308.47137451171875, + "loss": 0.1272, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.028212070465088, + "rewards/margins": 1.8082382678985596, + "rewards/rejected": 0.21997375786304474, + "step": 13675 + }, + { + "epoch": 0.8, + "learning_rate": 1.0536882301137062e-08, + "logits/chosen": -1.7360820770263672, + "logits/rejected": -1.7243916988372803, + "logps/chosen": -121.05728149414062, + "logps/rejected": -396.8486022949219, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1785491704940796, + "rewards/margins": 4.320580959320068, + "rewards/rejected": -3.1420319080352783, + "step": 13676 + }, + { + "epoch": 0.8, + "learning_rate": 1.0531096122357968e-08, + "logits/chosen": -1.748552918434143, + "logits/rejected": -1.7279529571533203, + "logps/chosen": -331.9883117675781, + "logps/rejected": -531.6494750976562, + "loss": 0.0599, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6014435291290283, + "rewards/margins": 2.5020720958709717, + "rewards/rejected": 0.09937133640050888, + "step": 13677 + }, + { + "epoch": 0.8, + "learning_rate": 1.0525311345707388e-08, + "logits/chosen": -1.9018926620483398, + "logits/rejected": -1.9065665006637573, + "logps/chosen": -3.8005478382110596, + "logps/rejected": -82.61195373535156, + "loss": 0.4829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01585102081298828, + "rewards/margins": 1.2222799062728882, + "rewards/rejected": -1.2381309270858765, + "step": 13678 + }, + { + "epoch": 0.8, + "learning_rate": 1.051952797139084e-08, + "logits/chosen": -1.9048030376434326, + "logits/rejected": -1.8949532508850098, + "logps/chosen": -12.704769134521484, + "logps/rejected": -143.3831329345703, + "loss": 0.3514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3445381224155426, + "rewards/margins": 1.9306495189666748, + "rewards/rejected": -1.5861114263534546, + "step": 13679 + }, + { + "epoch": 0.8, + "learning_rate": 1.0513745999613782e-08, + "logits/chosen": -1.8966593742370605, + "logits/rejected": -1.8939014673233032, + "logps/chosen": -60.19544219970703, + "logps/rejected": -206.31967163085938, + "loss": 0.1434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7570205926895142, + "rewards/margins": 3.4993362426757812, + "rewards/rejected": -2.7423157691955566, + "step": 13680 + }, + { + "epoch": 0.8, + "learning_rate": 1.0507965430581628e-08, + "logits/chosen": -1.7443546056747437, + "logits/rejected": -1.7414978742599487, + "logps/chosen": -62.036476135253906, + "logps/rejected": -431.1347351074219, + "loss": 0.1948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5789856314659119, + "rewards/margins": 5.391995429992676, + "rewards/rejected": -4.813009738922119, + "step": 13681 + }, + { + "epoch": 0.8, + "learning_rate": 1.0502186264499701e-08, + "logits/chosen": -1.8721007108688354, + "logits/rejected": -1.8704454898834229, + "logps/chosen": -116.25081634521484, + "logps/rejected": -262.779052734375, + "loss": 0.1513, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.254560112953186, + "rewards/margins": 1.6618813276290894, + "rewards/rejected": -0.40732118487358093, + "step": 13682 + }, + { + "epoch": 0.8, + "learning_rate": 1.0496408501573322e-08, + "logits/chosen": -1.8884717226028442, + "logits/rejected": -1.8714215755462646, + "logps/chosen": -29.29863929748535, + "logps/rejected": -207.00381469726562, + "loss": 0.4932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26796817779541016, + "rewards/margins": 0.2960626482963562, + "rewards/rejected": -0.02809448353946209, + "step": 13683 + }, + { + "epoch": 0.8, + "learning_rate": 1.049063214200775e-08, + "logits/chosen": -2.1097769737243652, + "logits/rejected": -2.1155214309692383, + "logps/chosen": -20.36400032043457, + "logps/rejected": -89.30281066894531, + "loss": 0.3953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5580112338066101, + "rewards/margins": 0.7763172388076782, + "rewards/rejected": -0.21830597519874573, + "step": 13684 + }, + { + "epoch": 0.8, + "learning_rate": 1.0484857186008195e-08, + "logits/chosen": -1.857045292854309, + "logits/rejected": -1.802155613899231, + "logps/chosen": -236.94691467285156, + "logps/rejected": -309.7001953125, + "loss": 0.208, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6717957258224487, + "rewards/margins": 1.1093385219573975, + "rewards/rejected": 0.562457263469696, + "step": 13685 + }, + { + "epoch": 0.8, + "learning_rate": 1.0479083633779789e-08, + "logits/chosen": -1.9428473711013794, + "logits/rejected": -1.9334611892700195, + "logps/chosen": -56.38932418823242, + "logps/rejected": -150.51815795898438, + "loss": 0.3735, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6946945190429688, + "rewards/margins": 1.0001418590545654, + "rewards/rejected": -0.30544739961624146, + "step": 13686 + }, + { + "epoch": 0.8, + "learning_rate": 1.0473311485527654e-08, + "logits/chosen": -1.7957241535186768, + "logits/rejected": -1.7588697671890259, + "logps/chosen": -356.80108642578125, + "logps/rejected": -501.81585693359375, + "loss": 0.1194, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.31353759765625, + "rewards/margins": 1.7805359363555908, + "rewards/rejected": 0.533001720905304, + "step": 13687 + }, + { + "epoch": 0.8, + "learning_rate": 1.0467540741456837e-08, + "logits/chosen": -1.935336947441101, + "logits/rejected": -1.9321593046188354, + "logps/chosen": -65.63299560546875, + "logps/rejected": -348.99700927734375, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2550949156284332, + "rewards/margins": 4.472526550292969, + "rewards/rejected": -4.217431545257568, + "step": 13688 + }, + { + "epoch": 0.8, + "learning_rate": 1.0461771401772351e-08, + "logits/chosen": -1.9258054494857788, + "logits/rejected": -1.9831818342208862, + "logps/chosen": -160.99127197265625, + "logps/rejected": -203.67816162109375, + "loss": 0.0631, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.242314100265503, + "rewards/margins": 2.3638579845428467, + "rewards/rejected": -0.12154388427734375, + "step": 13689 + }, + { + "epoch": 0.8, + "learning_rate": 1.0456003466679159e-08, + "logits/chosen": -1.8275244235992432, + "logits/rejected": -1.8366059064865112, + "logps/chosen": -25.66754150390625, + "logps/rejected": -196.84645080566406, + "loss": 0.2999, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08441772311925888, + "rewards/margins": 3.017544746398926, + "rewards/rejected": -2.933126926422119, + "step": 13690 + }, + { + "epoch": 0.8, + "learning_rate": 1.0450236936382128e-08, + "logits/chosen": -1.925129771232605, + "logits/rejected": -1.8938907384872437, + "logps/chosen": -264.1017150878906, + "logps/rejected": -487.9020080566406, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6578766107559204, + "rewards/margins": 4.948263645172119, + "rewards/rejected": -4.290387153625488, + "step": 13691 + }, + { + "epoch": 0.8, + "learning_rate": 1.0444471811086165e-08, + "logits/chosen": -1.7763396501541138, + "logits/rejected": -1.776161789894104, + "logps/chosen": -11.92573356628418, + "logps/rejected": -153.39044189453125, + "loss": 0.3633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2573964297771454, + "rewards/margins": 1.4531301259994507, + "rewards/rejected": -1.195733666419983, + "step": 13692 + }, + { + "epoch": 0.8, + "learning_rate": 1.0438708090996034e-08, + "logits/chosen": -1.823845624923706, + "logits/rejected": -1.9090081453323364, + "logps/chosen": -170.40884399414062, + "logps/rejected": -190.81448364257812, + "loss": 0.2502, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9934662580490112, + "rewards/margins": 0.5484223365783691, + "rewards/rejected": 1.445043921470642, + "step": 13693 + }, + { + "epoch": 0.8, + "learning_rate": 1.0432945776316526e-08, + "logits/chosen": -1.998238205909729, + "logits/rejected": -1.99941086769104, + "logps/chosen": -0.9082616567611694, + "logps/rejected": -192.88015747070312, + "loss": 0.3434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015708327293395996, + "rewards/margins": 2.622699737548828, + "rewards/rejected": -2.6384079456329346, + "step": 13694 + }, + { + "epoch": 0.8, + "learning_rate": 1.0427184867252297e-08, + "logits/chosen": -1.8862552642822266, + "logits/rejected": -1.8870352506637573, + "logps/chosen": -92.3988037109375, + "logps/rejected": -344.4366760253906, + "loss": 0.101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.033843994140625, + "rewards/margins": 3.4724395275115967, + "rewards/rejected": -2.4385955333709717, + "step": 13695 + }, + { + "epoch": 0.8, + "learning_rate": 1.0421425364008062e-08, + "logits/chosen": -1.8542741537094116, + "logits/rejected": -1.8228719234466553, + "logps/chosen": -253.00067138671875, + "logps/rejected": -397.8741760253906, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.617440938949585, + "rewards/margins": 4.566394329071045, + "rewards/rejected": -0.9489532709121704, + "step": 13696 + }, + { + "epoch": 0.8, + "learning_rate": 1.0415667266788386e-08, + "logits/chosen": -1.8327127695083618, + "logits/rejected": -1.8371305465698242, + "logps/chosen": -0.0007197030354291201, + "logps/rejected": -181.8170928955078, + "loss": 0.3624, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.792789306724444e-05, + "rewards/margins": 4.58078670501709, + "rewards/rejected": -4.580844402313232, + "step": 13697 + }, + { + "epoch": 0.8, + "learning_rate": 1.0409910575797832e-08, + "logits/chosen": -2.060987710952759, + "logits/rejected": -2.031599760055542, + "logps/chosen": -191.27767944335938, + "logps/rejected": -523.7400512695312, + "loss": 0.1964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1788589507341385, + "rewards/margins": 5.662804126739502, + "rewards/rejected": -5.841662883758545, + "step": 13698 + }, + { + "epoch": 0.8, + "learning_rate": 1.0404155291240913e-08, + "logits/chosen": -1.850948691368103, + "logits/rejected": -1.9061837196350098, + "logps/chosen": -181.81475830078125, + "logps/rejected": -231.4917449951172, + "loss": 0.2635, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5896514654159546, + "rewards/margins": 0.9766555428504944, + "rewards/rejected": 0.6129959225654602, + "step": 13699 + }, + { + "epoch": 0.8, + "learning_rate": 1.0398401413322095e-08, + "logits/chosen": -1.9688905477523804, + "logits/rejected": -1.9645406007766724, + "logps/chosen": -0.11060942709445953, + "logps/rejected": -30.391393661499023, + "loss": 0.6893, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0033936151303350925, + "rewards/margins": -0.02426210790872574, + "rewards/rejected": 0.020868493244051933, + "step": 13700 + }, + { + "epoch": 0.8, + "learning_rate": 1.0392648942245758e-08, + "logits/chosen": -1.8935835361480713, + "logits/rejected": -1.8793606758117676, + "logps/chosen": -95.96304321289062, + "logps/rejected": -231.8774871826172, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6227859854698181, + "rewards/margins": 1.182000756263733, + "rewards/rejected": -0.5592147707939148, + "step": 13701 + }, + { + "epoch": 0.8, + "learning_rate": 1.0386897878216273e-08, + "logits/chosen": -2.1239449977874756, + "logits/rejected": -2.100525140762329, + "logps/chosen": -0.07224523276090622, + "logps/rejected": -282.7979736328125, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05128617212176323, + "rewards/margins": 7.001634120941162, + "rewards/rejected": -6.950347900390625, + "step": 13702 + }, + { + "epoch": 0.8, + "learning_rate": 1.038114822143794e-08, + "logits/chosen": -1.8763307332992554, + "logits/rejected": -1.8824090957641602, + "logps/chosen": -64.91715240478516, + "logps/rejected": -375.6824951171875, + "loss": 0.1987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3123680055141449, + "rewards/margins": 5.577083587646484, + "rewards/rejected": -5.264715671539307, + "step": 13703 + }, + { + "epoch": 0.8, + "learning_rate": 1.0375399972115034e-08, + "logits/chosen": -2.053353786468506, + "logits/rejected": -2.0278356075286865, + "logps/chosen": -67.74369049072266, + "logps/rejected": -278.12213134765625, + "loss": 0.0519, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.104909658432007, + "rewards/margins": 8.10676097869873, + "rewards/rejected": -6.0018510818481445, + "step": 13704 + }, + { + "epoch": 0.8, + "learning_rate": 1.0369653130451733e-08, + "logits/chosen": -2.006885528564453, + "logits/rejected": -2.0066983699798584, + "logps/chosen": -24.718711853027344, + "logps/rejected": -149.6259307861328, + "loss": 0.4478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016501998528838158, + "rewards/margins": 1.1492962837219238, + "rewards/rejected": -1.1327942609786987, + "step": 13705 + }, + { + "epoch": 0.8, + "learning_rate": 1.0363907696652208e-08, + "logits/chosen": -1.9955370426177979, + "logits/rejected": -1.9928412437438965, + "logps/chosen": -89.46446228027344, + "logps/rejected": -283.6507263183594, + "loss": 0.2234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.564868152141571, + "rewards/margins": 5.058190822601318, + "rewards/rejected": -4.493322849273682, + "step": 13706 + }, + { + "epoch": 0.8, + "learning_rate": 1.0358163670920555e-08, + "logits/chosen": -1.891580581665039, + "logits/rejected": -1.8699337244033813, + "logps/chosen": -136.2674560546875, + "logps/rejected": -237.26861572265625, + "loss": 0.069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.935400366783142, + "rewards/margins": 2.7034575939178467, + "rewards/rejected": -0.7680572867393494, + "step": 13707 + }, + { + "epoch": 0.8, + "learning_rate": 1.0352421053460858e-08, + "logits/chosen": -1.7903858423233032, + "logits/rejected": -1.806673288345337, + "logps/chosen": -203.56492614746094, + "logps/rejected": -380.14825439453125, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1421220302581787, + "rewards/margins": 2.953993320465088, + "rewards/rejected": 0.18812866508960724, + "step": 13708 + }, + { + "epoch": 0.8, + "learning_rate": 1.0346679844477085e-08, + "logits/chosen": -1.7377079725265503, + "logits/rejected": -1.6994448900222778, + "logps/chosen": -157.64031982421875, + "logps/rejected": -356.3890380859375, + "loss": 0.0591, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6822571754455566, + "rewards/margins": 2.713223457336426, + "rewards/rejected": -0.03096618689596653, + "step": 13709 + }, + { + "epoch": 0.8, + "learning_rate": 1.0340940044173202e-08, + "logits/chosen": -1.7994844913482666, + "logits/rejected": -1.8515987396240234, + "logps/chosen": -316.45440673828125, + "logps/rejected": -306.1216125488281, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8521820306777954, + "rewards/margins": 2.8290343284606934, + "rewards/rejected": -0.9768524169921875, + "step": 13710 + }, + { + "epoch": 0.8, + "learning_rate": 1.0335201652753145e-08, + "logits/chosen": -1.940281629562378, + "logits/rejected": -1.9336186647415161, + "logps/chosen": -0.15830537676811218, + "logps/rejected": -221.07183837890625, + "loss": 0.3343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037793681025505066, + "rewards/margins": 6.101284027099609, + "rewards/rejected": -6.063490390777588, + "step": 13711 + }, + { + "epoch": 0.8, + "learning_rate": 1.0329464670420735e-08, + "logits/chosen": -1.7022154331207275, + "logits/rejected": -1.7635418176651, + "logps/chosen": -273.8044738769531, + "logps/rejected": -245.52198791503906, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.1042938232421875, + "rewards/margins": 2.7204999923706055, + "rewards/rejected": 1.3837937116622925, + "step": 13712 + }, + { + "epoch": 0.8, + "learning_rate": 1.0323729097379796e-08, + "logits/chosen": -1.9610469341278076, + "logits/rejected": -1.95125150680542, + "logps/chosen": -10.95511245727539, + "logps/rejected": -135.84085083007812, + "loss": 0.478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.205134779214859, + "rewards/margins": 0.3891664743423462, + "rewards/rejected": -0.184031680226326, + "step": 13713 + }, + { + "epoch": 0.8, + "learning_rate": 1.0317994933834056e-08, + "logits/chosen": -1.8328365087509155, + "logits/rejected": -1.8182231187820435, + "logps/chosen": -61.411949157714844, + "logps/rejected": -386.44805908203125, + "loss": 0.3904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07209739834070206, + "rewards/margins": 1.0448611974716187, + "rewards/rejected": -1.1169586181640625, + "step": 13714 + }, + { + "epoch": 0.8, + "learning_rate": 1.031226217998727e-08, + "logits/chosen": -1.8970831632614136, + "logits/rejected": -1.8924942016601562, + "logps/chosen": -207.08938598632812, + "logps/rejected": -260.5857849121094, + "loss": 0.3664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8156296014785767, + "rewards/margins": 0.3504470884799957, + "rewards/rejected": 0.46518251299858093, + "step": 13715 + }, + { + "epoch": 0.8, + "learning_rate": 1.0306530836043049e-08, + "logits/chosen": -1.9083315134048462, + "logits/rejected": -1.907739520072937, + "logps/chosen": -11.125052452087402, + "logps/rejected": -135.31021118164062, + "loss": 0.3172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1374988555908203, + "rewards/margins": 3.671522855758667, + "rewards/rejected": -3.5340240001678467, + "step": 13716 + }, + { + "epoch": 0.8, + "learning_rate": 1.0300800902205032e-08, + "logits/chosen": -1.8875950574874878, + "logits/rejected": -1.886394739151001, + "logps/chosen": -19.358346939086914, + "logps/rejected": -114.31087493896484, + "loss": 0.2197, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8800079226493835, + "rewards/margins": 1.859636664390564, + "rewards/rejected": -0.9796287417411804, + "step": 13717 + }, + { + "epoch": 0.8, + "learning_rate": 1.0295072378676722e-08, + "logits/chosen": -1.6951560974121094, + "logits/rejected": -1.675253987312317, + "logps/chosen": -410.9029541015625, + "logps/rejected": -547.5738525390625, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.510601818561554, + "rewards/margins": 0.688031017780304, + "rewards/rejected": -0.17742919921875, + "step": 13718 + }, + { + "epoch": 0.8, + "learning_rate": 1.0289345265661693e-08, + "logits/chosen": -2.1664843559265137, + "logits/rejected": -2.1581814289093018, + "logps/chosen": -2.714141607284546, + "logps/rejected": -110.0300064086914, + "loss": 0.1589, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1982271671295166, + "rewards/margins": 2.6087381839752197, + "rewards/rejected": -1.4105110168457031, + "step": 13719 + }, + { + "epoch": 0.8, + "learning_rate": 1.028361956336335e-08, + "logits/chosen": -1.965002179145813, + "logits/rejected": -1.9775394201278687, + "logps/chosen": -113.5108413696289, + "logps/rejected": -298.27020263671875, + "loss": 0.2355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47228318452835083, + "rewards/margins": 1.6392266750335693, + "rewards/rejected": -1.1669434309005737, + "step": 13720 + }, + { + "epoch": 0.8, + "learning_rate": 1.0277895271985116e-08, + "logits/chosen": -1.9166781902313232, + "logits/rejected": -1.923377513885498, + "logps/chosen": -21.197452545166016, + "logps/rejected": -112.65487670898438, + "loss": 0.4028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3438602387905121, + "rewards/margins": 0.9183895587921143, + "rewards/rejected": -0.5745292901992798, + "step": 13721 + }, + { + "epoch": 0.8, + "learning_rate": 1.0272172391730338e-08, + "logits/chosen": -1.7656800746917725, + "logits/rejected": -1.7518967390060425, + "logps/chosen": -263.92547607421875, + "logps/rejected": -456.28790283203125, + "loss": 0.1724, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.185049533843994, + "rewards/margins": 1.0289521217346191, + "rewards/rejected": 1.156097412109375, + "step": 13722 + }, + { + "epoch": 0.8, + "learning_rate": 1.0266450922802344e-08, + "logits/chosen": -1.9893759489059448, + "logits/rejected": -1.983255386352539, + "logps/chosen": -0.8449388742446899, + "logps/rejected": -184.42654418945312, + "loss": 0.3154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08717135339975357, + "rewards/margins": 4.311889171600342, + "rewards/rejected": -4.224717617034912, + "step": 13723 + }, + { + "epoch": 0.8, + "learning_rate": 1.0260730865404354e-08, + "logits/chosen": -2.156299114227295, + "logits/rejected": -2.144789934158325, + "logps/chosen": -142.2271728515625, + "logps/rejected": -363.1148681640625, + "loss": 0.0958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9137634634971619, + "rewards/margins": 4.177697658538818, + "rewards/rejected": -3.263934373855591, + "step": 13724 + }, + { + "epoch": 0.8, + "learning_rate": 1.0255012219739595e-08, + "logits/chosen": -1.7394261360168457, + "logits/rejected": -1.741805076599121, + "logps/chosen": -0.0002519929548725486, + "logps/rejected": -336.1096496582031, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2825726116716396e-05, + "rewards/margins": 7.232491493225098, + "rewards/rejected": -7.232504367828369, + "step": 13725 + }, + { + "epoch": 0.8, + "learning_rate": 1.0249294986011214e-08, + "logits/chosen": -1.7877100706100464, + "logits/rejected": -1.7894620895385742, + "logps/chosen": -210.91114807128906, + "logps/rejected": -481.4971008300781, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.403796434402466, + "rewards/margins": 5.310519218444824, + "rewards/rejected": -2.9067230224609375, + "step": 13726 + }, + { + "epoch": 0.8, + "learning_rate": 1.0243579164422329e-08, + "logits/chosen": -1.9075113534927368, + "logits/rejected": -1.9091845750808716, + "logps/chosen": -0.05530954524874687, + "logps/rejected": -195.87774658203125, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027019453700631857, + "rewards/margins": 2.3045032024383545, + "rewards/rejected": -2.3072052001953125, + "step": 13727 + }, + { + "epoch": 0.8, + "learning_rate": 1.0237864755175968e-08, + "logits/chosen": -1.964624285697937, + "logits/rejected": -1.9488624334335327, + "logps/chosen": -102.7941665649414, + "logps/rejected": -333.9692687988281, + "loss": 0.1517, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2422599792480469, + "rewards/margins": 3.24870228767395, + "rewards/rejected": -2.0064423084259033, + "step": 13728 + }, + { + "epoch": 0.8, + "learning_rate": 1.023215175847515e-08, + "logits/chosen": -1.632454752922058, + "logits/rejected": -1.6348397731781006, + "logps/chosen": -0.32266831398010254, + "logps/rejected": -291.46417236328125, + "loss": 0.2663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22631140053272247, + "rewards/margins": 6.595430374145508, + "rewards/rejected": -6.369119167327881, + "step": 13729 + }, + { + "epoch": 0.8, + "learning_rate": 1.0226440174522827e-08, + "logits/chosen": -1.8030931949615479, + "logits/rejected": -1.7913709878921509, + "logps/chosen": -163.61038208007812, + "logps/rejected": -484.9217224121094, + "loss": 0.0505, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6477203369140625, + "rewards/margins": 5.375317573547363, + "rewards/rejected": -3.7275969982147217, + "step": 13730 + }, + { + "epoch": 0.8, + "learning_rate": 1.02207300035219e-08, + "logits/chosen": -1.9922819137573242, + "logits/rejected": -2.048814535140991, + "logps/chosen": -177.01864624023438, + "logps/rejected": -342.2341003417969, + "loss": 0.0766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9418884515762329, + "rewards/margins": 2.595755100250244, + "rewards/rejected": -1.6538666486740112, + "step": 13731 + }, + { + "epoch": 0.8, + "learning_rate": 1.0215021245675237e-08, + "logits/chosen": -1.9091845750808716, + "logits/rejected": -1.942772388458252, + "logps/chosen": -337.3193054199219, + "logps/rejected": -394.23773193359375, + "loss": 0.0507, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.932626485824585, + "rewards/margins": 2.709134101867676, + "rewards/rejected": 0.22349242866039276, + "step": 13732 + }, + { + "epoch": 0.8, + "learning_rate": 1.0209313901185606e-08, + "logits/chosen": -1.9850165843963623, + "logits/rejected": -1.9671921730041504, + "logps/chosen": -60.203826904296875, + "logps/rejected": -248.07989501953125, + "loss": 0.3164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16357918083667755, + "rewards/margins": 3.8493261337280273, + "rewards/rejected": -3.685746908187866, + "step": 13733 + }, + { + "epoch": 0.8, + "learning_rate": 1.0203607970255812e-08, + "logits/chosen": -1.9830609560012817, + "logits/rejected": -1.9797310829162598, + "logps/chosen": -84.44392395019531, + "logps/rejected": -409.16827392578125, + "loss": 0.1094, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1245803833007812, + "rewards/margins": 3.506260633468628, + "rewards/rejected": -2.3816802501678467, + "step": 13734 + }, + { + "epoch": 0.8, + "learning_rate": 1.0197903453088508e-08, + "logits/chosen": -1.931918978691101, + "logits/rejected": -1.9325659275054932, + "logps/chosen": -15.514533042907715, + "logps/rejected": -47.153873443603516, + "loss": 0.6282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07538118213415146, + "rewards/margins": 0.17033357918262482, + "rewards/rejected": -0.09495239704847336, + "step": 13735 + }, + { + "epoch": 0.8, + "learning_rate": 1.0192200349886381e-08, + "logits/chosen": -1.9306379556655884, + "logits/rejected": -1.936012625694275, + "logps/chosen": -26.68412208557129, + "logps/rejected": -181.1903076171875, + "loss": 0.2071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4613214433193207, + "rewards/margins": 2.287299633026123, + "rewards/rejected": -1.82597815990448, + "step": 13736 + }, + { + "epoch": 0.8, + "learning_rate": 1.0186498660851995e-08, + "logits/chosen": -1.781072735786438, + "logits/rejected": -1.6846297979354858, + "logps/chosen": -405.0431823730469, + "logps/rejected": -585.6716918945312, + "loss": 0.0799, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9169129133224487, + "rewards/margins": 2.5154175758361816, + "rewards/rejected": -0.5985046625137329, + "step": 13737 + }, + { + "epoch": 0.8, + "learning_rate": 1.018079838618795e-08, + "logits/chosen": -2.095287561416626, + "logits/rejected": -2.0956501960754395, + "logps/chosen": -50.52216339111328, + "logps/rejected": -234.35552978515625, + "loss": 0.1759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8736568689346313, + "rewards/margins": 3.6635265350341797, + "rewards/rejected": -2.789869785308838, + "step": 13738 + }, + { + "epoch": 0.8, + "learning_rate": 1.0175099526096715e-08, + "logits/chosen": -1.9413167238235474, + "logits/rejected": -1.927059292793274, + "logps/chosen": -233.83349609375, + "logps/rejected": -438.5977478027344, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.397869825363159, + "rewards/margins": 3.615399122238159, + "rewards/rejected": -1.217529296875, + "step": 13739 + }, + { + "epoch": 0.8, + "learning_rate": 1.0169402080780742e-08, + "logits/chosen": -1.8413348197937012, + "logits/rejected": -1.8466124534606934, + "logps/chosen": -13.657541275024414, + "logps/rejected": -144.44412231445312, + "loss": 0.4409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14557257294654846, + "rewards/margins": 1.0432151556015015, + "rewards/rejected": -0.8976425528526306, + "step": 13740 + }, + { + "epoch": 0.8, + "learning_rate": 1.0163706050442445e-08, + "logits/chosen": -1.9017072916030884, + "logits/rejected": -1.8960965871810913, + "logps/chosen": -50.19036102294922, + "logps/rejected": -291.8663330078125, + "loss": 0.2321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2712535858154297, + "rewards/margins": 5.457172870635986, + "rewards/rejected": -5.185919284820557, + "step": 13741 + }, + { + "epoch": 0.8, + "learning_rate": 1.0158011435284174e-08, + "logits/chosen": -1.9116264581680298, + "logits/rejected": -1.884175181388855, + "logps/chosen": -173.1378173828125, + "logps/rejected": -357.0765380859375, + "loss": 0.0769, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8948075771331787, + "rewards/margins": 2.1759629249572754, + "rewards/rejected": 0.7188445925712585, + "step": 13742 + }, + { + "epoch": 0.8, + "learning_rate": 1.0152318235508223e-08, + "logits/chosen": -1.867724895477295, + "logits/rejected": -1.8668118715286255, + "logps/chosen": -20.233631134033203, + "logps/rejected": -176.30340576171875, + "loss": 0.3577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40870991349220276, + "rewards/margins": 1.1243135929107666, + "rewards/rejected": -0.7156036496162415, + "step": 13743 + }, + { + "epoch": 0.8, + "learning_rate": 1.0146626451316836e-08, + "logits/chosen": -2.1327462196350098, + "logits/rejected": -2.128465175628662, + "logps/chosen": -5.007333755493164, + "logps/rejected": -193.82208251953125, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17185214161872864, + "rewards/margins": 4.124327182769775, + "rewards/rejected": -4.296179294586182, + "step": 13744 + }, + { + "epoch": 0.8, + "learning_rate": 1.0140936082912228e-08, + "logits/chosen": -1.7873104810714722, + "logits/rejected": -1.7601712942123413, + "logps/chosen": -112.88809204101562, + "logps/rejected": -400.3341064453125, + "loss": 0.5126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5051239132881165, + "rewards/margins": 6.444589138031006, + "rewards/rejected": -6.949713230133057, + "step": 13745 + }, + { + "epoch": 0.8, + "learning_rate": 1.013524713049655e-08, + "logits/chosen": -1.9755092859268188, + "logits/rejected": -1.9785467386245728, + "logps/chosen": -30.74067497253418, + "logps/rejected": -152.18309020996094, + "loss": 0.2289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6517000198364258, + "rewards/margins": 2.1472368240356445, + "rewards/rejected": -1.4955368041992188, + "step": 13746 + }, + { + "epoch": 0.8, + "learning_rate": 1.012955959427188e-08, + "logits/chosen": -1.9578086137771606, + "logits/rejected": -1.9577645063400269, + "logps/chosen": -125.11679077148438, + "logps/rejected": -151.1054229736328, + "loss": 0.4268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9936553835868835, + "rewards/margins": 0.40661007165908813, + "rewards/rejected": 0.5870453119277954, + "step": 13747 + }, + { + "epoch": 0.8, + "learning_rate": 1.0123873474440286e-08, + "logits/chosen": -1.715922474861145, + "logits/rejected": -1.717880368232727, + "logps/chosen": -54.8547477722168, + "logps/rejected": -223.20309448242188, + "loss": 0.1963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6239986419677734, + "rewards/margins": 4.496407508850098, + "rewards/rejected": -3.8724091053009033, + "step": 13748 + }, + { + "epoch": 0.8, + "learning_rate": 1.0118188771203756e-08, + "logits/chosen": -1.736382246017456, + "logits/rejected": -1.7213054895401, + "logps/chosen": -152.88363647460938, + "logps/rejected": -199.74267578125, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7775466442108154, + "rewards/margins": 2.370011806488037, + "rewards/rejected": 1.4075348377227783, + "step": 13749 + }, + { + "epoch": 0.8, + "learning_rate": 1.011250548476425e-08, + "logits/chosen": -1.6419848203659058, + "logits/rejected": -1.688798427581787, + "logps/chosen": -287.4915771484375, + "logps/rejected": -303.748291015625, + "loss": 0.0909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8604828119277954, + "rewards/margins": 1.8892182111740112, + "rewards/rejected": -0.02873535268008709, + "step": 13750 + }, + { + "epoch": 0.8, + "learning_rate": 1.0106823615323667e-08, + "logits/chosen": -1.703596830368042, + "logits/rejected": -1.7045966386795044, + "logps/chosen": -1.208994746208191, + "logps/rejected": -58.46070861816406, + "loss": 0.5264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08990901708602905, + "rewards/margins": 0.6276718378067017, + "rewards/rejected": -0.5377628207206726, + "step": 13751 + }, + { + "epoch": 0.8, + "learning_rate": 1.0101143163083826e-08, + "logits/chosen": -1.9131476879119873, + "logits/rejected": -1.9121030569076538, + "logps/chosen": -5.651577949523926, + "logps/rejected": -44.87481689453125, + "loss": 0.774, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.08684825897216797, + "rewards/margins": -0.3999517560005188, + "rewards/rejected": 0.48680001497268677, + "step": 13752 + }, + { + "epoch": 0.8, + "learning_rate": 1.0095464128246566e-08, + "logits/chosen": -1.9188334941864014, + "logits/rejected": -1.9098824262619019, + "logps/chosen": -0.21062447130680084, + "logps/rejected": -151.7691192626953, + "loss": 0.6676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008823652751743793, + "rewards/margins": 0.07241008430719376, + "rewards/rejected": -0.06358642876148224, + "step": 13753 + }, + { + "epoch": 0.8, + "learning_rate": 1.008978651101361e-08, + "logits/chosen": -1.9251912832260132, + "logits/rejected": -1.909643292427063, + "logps/chosen": -0.14349320530891418, + "logps/rejected": -181.98309326171875, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10829772800207138, + "rewards/margins": 3.733337640762329, + "rewards/rejected": -3.62503981590271, + "step": 13754 + }, + { + "epoch": 0.8, + "learning_rate": 1.008411031158667e-08, + "logits/chosen": -1.5952345132827759, + "logits/rejected": -1.5973150730133057, + "logps/chosen": -233.26205444335938, + "logps/rejected": -501.7581787109375, + "loss": 0.1528, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0188705921173096, + "rewards/margins": 1.4800643920898438, + "rewards/rejected": 0.538806140422821, + "step": 13755 + }, + { + "epoch": 0.8, + "learning_rate": 1.007843553016735e-08, + "logits/chosen": -1.8951542377471924, + "logits/rejected": -1.8883239030838013, + "logps/chosen": -178.75936889648438, + "logps/rejected": -361.429443359375, + "loss": 0.3189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3219757080078125, + "rewards/margins": 0.33029478788375854, + "rewards/rejected": 0.991680920124054, + "step": 13756 + }, + { + "epoch": 0.8, + "learning_rate": 1.0072762166957305e-08, + "logits/chosen": -1.9794925451278687, + "logits/rejected": -1.9920233488082886, + "logps/chosen": -197.828857421875, + "logps/rejected": -422.8684387207031, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.164015293121338, + "rewards/margins": 5.100395202636719, + "rewards/rejected": -2.93638014793396, + "step": 13757 + }, + { + "epoch": 0.8, + "learning_rate": 1.0067090222158042e-08, + "logits/chosen": -1.8034183979034424, + "logits/rejected": -1.8065967559814453, + "logps/chosen": -136.29238891601562, + "logps/rejected": -352.13592529296875, + "loss": 0.2524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6774703860282898, + "rewards/margins": 1.743360996246338, + "rewards/rejected": -1.0658905506134033, + "step": 13758 + }, + { + "epoch": 0.8, + "learning_rate": 1.0061419695971068e-08, + "logits/chosen": -1.6291455030441284, + "logits/rejected": -1.7310993671417236, + "logps/chosen": -238.9903106689453, + "logps/rejected": -402.6654052734375, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2322921752929688, + "rewards/margins": 6.770018100738525, + "rewards/rejected": -4.537725925445557, + "step": 13759 + }, + { + "epoch": 0.8, + "learning_rate": 1.0055750588597827e-08, + "logits/chosen": -1.816136360168457, + "logits/rejected": -1.8159284591674805, + "logps/chosen": -22.126134872436523, + "logps/rejected": -126.9516372680664, + "loss": 0.3325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7301006317138672, + "rewards/margins": 0.8530826568603516, + "rewards/rejected": -0.12298202514648438, + "step": 13760 + }, + { + "epoch": 0.8, + "learning_rate": 1.0050082900239726e-08, + "logits/chosen": -2.0171642303466797, + "logits/rejected": -2.0274527072906494, + "logps/chosen": -82.58590698242188, + "logps/rejected": -186.3461151123047, + "loss": 0.2912, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3591880798339844, + "rewards/margins": 1.892163872718811, + "rewards/rejected": -1.5329757928848267, + "step": 13761 + }, + { + "epoch": 0.8, + "learning_rate": 1.004441663109808e-08, + "logits/chosen": -1.9285327196121216, + "logits/rejected": -1.9095627069473267, + "logps/chosen": -106.78605651855469, + "logps/rejected": -201.05723571777344, + "loss": 0.2019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.30804443359375, + "rewards/margins": 1.7460159063339233, + "rewards/rejected": -0.4379715025424957, + "step": 13762 + }, + { + "epoch": 0.8, + "learning_rate": 1.0038751781374205e-08, + "logits/chosen": -1.9073514938354492, + "logits/rejected": -1.8872852325439453, + "logps/chosen": -156.50482177734375, + "logps/rejected": -267.384033203125, + "loss": 0.2002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6264251470565796, + "rewards/margins": 1.1245238780975342, + "rewards/rejected": 0.5019012689590454, + "step": 13763 + }, + { + "epoch": 0.8, + "learning_rate": 1.0033088351269337e-08, + "logits/chosen": -1.637355089187622, + "logits/rejected": -1.6322053670883179, + "logps/chosen": -0.0039456808008253574, + "logps/rejected": -246.63917541503906, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5693089962005615e-06, + "rewards/margins": 3.7362449169158936, + "rewards/rejected": -3.736250400543213, + "step": 13764 + }, + { + "epoch": 0.8, + "learning_rate": 1.002742634098468e-08, + "logits/chosen": -2.113448143005371, + "logits/rejected": -2.125199556350708, + "logps/chosen": -76.968994140625, + "logps/rejected": -236.2555694580078, + "loss": 0.0664, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4859771728515625, + "rewards/margins": 2.256577968597412, + "rewards/rejected": 0.22939911484718323, + "step": 13765 + }, + { + "epoch": 0.8, + "learning_rate": 1.0021765750721355e-08, + "logits/chosen": -1.7715736627578735, + "logits/rejected": -1.7618341445922852, + "logps/chosen": -10.198617935180664, + "logps/rejected": -64.16351318359375, + "loss": 0.9072, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.35506579279899597, + "rewards/margins": -0.45090243220329285, + "rewards/rejected": 0.09583663940429688, + "step": 13766 + }, + { + "epoch": 0.8, + "learning_rate": 1.0016106580680467e-08, + "logits/chosen": -2.0105862617492676, + "logits/rejected": -2.013378381729126, + "logps/chosen": -0.14097702503204346, + "logps/rejected": -132.72525024414062, + "loss": 0.5081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0003237813652958721, + "rewards/margins": 0.9949283003807068, + "rewards/rejected": -0.9946045279502869, + "step": 13767 + }, + { + "epoch": 0.8, + "learning_rate": 1.0010448831063051e-08, + "logits/chosen": -1.8986730575561523, + "logits/rejected": -1.9936459064483643, + "logps/chosen": -260.6200256347656, + "logps/rejected": -464.40869140625, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0019867420196533, + "rewards/margins": 7.286063194274902, + "rewards/rejected": -5.28407621383667, + "step": 13768 + }, + { + "epoch": 0.8, + "learning_rate": 1.000479250207012e-08, + "logits/chosen": -1.960907220840454, + "logits/rejected": -1.9471161365509033, + "logps/chosen": -53.69693374633789, + "logps/rejected": -193.53184509277344, + "loss": 0.2158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9711376428604126, + "rewards/margins": 1.5802273750305176, + "rewards/rejected": -0.6090896725654602, + "step": 13769 + }, + { + "epoch": 0.8, + "learning_rate": 9.999137593902585e-09, + "logits/chosen": -1.9526151418685913, + "logits/rejected": -1.944183111190796, + "logps/chosen": -46.16814422607422, + "logps/rejected": -160.16525268554688, + "loss": 0.6905, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07172279804944992, + "rewards/margins": -0.023191452026367188, + "rewards/rejected": 0.09491425007581711, + "step": 13770 + }, + { + "epoch": 0.8, + "learning_rate": 9.99348410676134e-09, + "logits/chosen": -1.893293857574463, + "logits/rejected": -1.8902534246444702, + "logps/chosen": -8.499501564074308e-05, + "logps/rejected": -271.4070739746094, + "loss": 0.3206, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.361613153174403e-06, + "rewards/margins": 8.78360366821289, + "rewards/rejected": -8.783607482910156, + "step": 13771 + }, + { + "epoch": 0.8, + "learning_rate": 9.987832040847238e-09, + "logits/chosen": -1.820523738861084, + "logits/rejected": -1.8125649690628052, + "logps/chosen": -129.29055786132812, + "logps/rejected": -267.44866943359375, + "loss": 0.131, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1774262189865112, + "rewards/margins": 1.5955811738967896, + "rewards/rejected": -0.41815492510795593, + "step": 13772 + }, + { + "epoch": 0.8, + "learning_rate": 9.982181396361067e-09, + "logits/chosen": -1.922536849975586, + "logits/rejected": -1.936743974685669, + "logps/chosen": -88.2025375366211, + "logps/rejected": -238.51321411132812, + "loss": 0.3253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07751236110925674, + "rewards/margins": 2.058164358139038, + "rewards/rejected": -2.135676622390747, + "step": 13773 + }, + { + "epoch": 0.8, + "learning_rate": 9.976532173503566e-09, + "logits/chosen": -1.902868390083313, + "logits/rejected": -1.89292573928833, + "logps/chosen": -22.508262634277344, + "logps/rejected": -138.46908569335938, + "loss": 0.3349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38705483078956604, + "rewards/margins": 0.9609425067901611, + "rewards/rejected": -0.5738876461982727, + "step": 13774 + }, + { + "epoch": 0.8, + "learning_rate": 9.970884372475396e-09, + "logits/chosen": -1.6832771301269531, + "logits/rejected": -1.6855511665344238, + "logps/chosen": -155.49501037597656, + "logps/rejected": -367.4696350097656, + "loss": 0.1807, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7709884643554688, + "rewards/margins": 1.016209363937378, + "rewards/rejected": 0.754779040813446, + "step": 13775 + }, + { + "epoch": 0.8, + "learning_rate": 9.965237993477244e-09, + "logits/chosen": -1.848930835723877, + "logits/rejected": -1.8346269130706787, + "logps/chosen": -66.03768920898438, + "logps/rejected": -200.0894012451172, + "loss": 0.6336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5690784454345703, + "rewards/margins": 1.387569785118103, + "rewards/rejected": -1.9566482305526733, + "step": 13776 + }, + { + "epoch": 0.8, + "learning_rate": 9.959593036709657e-09, + "logits/chosen": -1.8187183141708374, + "logits/rejected": -1.820643424987793, + "logps/chosen": -50.224456787109375, + "logps/rejected": -271.4830322265625, + "loss": 0.2817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14808540046215057, + "rewards/margins": 3.674072742462158, + "rewards/rejected": -3.525987386703491, + "step": 13777 + }, + { + "epoch": 0.8, + "learning_rate": 9.953949502373194e-09, + "logits/chosen": -2.045558452606201, + "logits/rejected": -2.0494749546051025, + "logps/chosen": -11.273865699768066, + "logps/rejected": -94.70046997070312, + "loss": 0.3466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24589510262012482, + "rewards/margins": 1.7200597524642944, + "rewards/rejected": -1.474164605140686, + "step": 13778 + }, + { + "epoch": 0.8, + "learning_rate": 9.948307390668309e-09, + "logits/chosen": -1.74556565284729, + "logits/rejected": -1.7452943325042725, + "logps/chosen": -3.980196714401245, + "logps/rejected": -80.26844787597656, + "loss": 0.4558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13052575290203094, + "rewards/margins": 1.1967869997024536, + "rewards/rejected": -1.0662612915039062, + "step": 13779 + }, + { + "epoch": 0.8, + "learning_rate": 9.942666701795483e-09, + "logits/chosen": -1.9346297979354858, + "logits/rejected": -1.9660214185714722, + "logps/chosen": -135.44387817382812, + "logps/rejected": -208.91134643554688, + "loss": 0.1793, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6982024908065796, + "rewards/margins": 1.0435333251953125, + "rewards/rejected": 0.6546692252159119, + "step": 13780 + }, + { + "epoch": 0.8, + "learning_rate": 9.937027435955065e-09, + "logits/chosen": -1.7054277658462524, + "logits/rejected": -1.6986045837402344, + "logps/chosen": -191.901123046875, + "logps/rejected": -327.37408447265625, + "loss": 0.0612, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5058975219726562, + "rewards/margins": 2.753828525543213, + "rewards/rejected": -1.247930884361267, + "step": 13781 + }, + { + "epoch": 0.8, + "learning_rate": 9.9313895933474e-09, + "logits/chosen": -1.7505401372909546, + "logits/rejected": -1.7470624446868896, + "logps/chosen": -5.295900344848633, + "logps/rejected": -72.72808074951172, + "loss": 0.6758, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.584947407245636, + "rewards/margins": -0.49018388986587524, + "rewards/rejected": 1.0751312971115112, + "step": 13782 + }, + { + "epoch": 0.8, + "learning_rate": 9.925753174172774e-09, + "logits/chosen": -1.8814435005187988, + "logits/rejected": -1.86916184425354, + "logps/chosen": -245.98287963867188, + "logps/rejected": -305.1905517578125, + "loss": 0.3327, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.56534743309021, + "rewards/margins": 0.09481215476989746, + "rewards/rejected": 2.4705352783203125, + "step": 13783 + }, + { + "epoch": 0.8, + "learning_rate": 9.920118178631432e-09, + "logits/chosen": -1.8034619092941284, + "logits/rejected": -1.7948213815689087, + "logps/chosen": -0.000726482190657407, + "logps/rejected": -173.1218719482422, + "loss": 0.3286, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0991152016213164e-05, + "rewards/margins": 4.010501861572266, + "rewards/rejected": -4.010470867156982, + "step": 13784 + }, + { + "epoch": 0.8, + "learning_rate": 9.914484606923529e-09, + "logits/chosen": -1.7567956447601318, + "logits/rejected": -1.756123661994934, + "logps/chosen": -3.9326324462890625, + "logps/rejected": -59.39293670654297, + "loss": 0.5843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11259100586175919, + "rewards/margins": 0.6089391112327576, + "rewards/rejected": -0.7215301394462585, + "step": 13785 + }, + { + "epoch": 0.8, + "learning_rate": 9.908852459249217e-09, + "logits/chosen": -1.5904978513717651, + "logits/rejected": -1.5928089618682861, + "logps/chosen": -171.33013916015625, + "logps/rejected": -345.6654968261719, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7446273565292358, + "rewards/margins": 3.833815097808838, + "rewards/rejected": -2.0891876220703125, + "step": 13786 + }, + { + "epoch": 0.8, + "learning_rate": 9.90322173580857e-09, + "logits/chosen": -1.7785286903381348, + "logits/rejected": -1.7581932544708252, + "logps/chosen": -132.52084350585938, + "logps/rejected": -245.47390747070312, + "loss": 0.2593, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7805923223495483, + "rewards/margins": 0.8314330577850342, + "rewards/rejected": 0.9491592645645142, + "step": 13787 + }, + { + "epoch": 0.8, + "learning_rate": 9.897592436801639e-09, + "logits/chosen": -1.716975212097168, + "logits/rejected": -1.7159793376922607, + "logps/chosen": -22.215927124023438, + "logps/rejected": -223.38629150390625, + "loss": 0.2296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5082859396934509, + "rewards/margins": 2.347726821899414, + "rewards/rejected": -1.839440941810608, + "step": 13788 + }, + { + "epoch": 0.8, + "learning_rate": 9.891964562428367e-09, + "logits/chosen": -1.888825535774231, + "logits/rejected": -1.8668627738952637, + "logps/chosen": -56.0875244140625, + "logps/rejected": -247.22019958496094, + "loss": 0.1442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8481780886650085, + "rewards/margins": 2.9020509719848633, + "rewards/rejected": -2.05387282371521, + "step": 13789 + }, + { + "epoch": 0.8, + "learning_rate": 9.886338112888708e-09, + "logits/chosen": -1.7836024761199951, + "logits/rejected": -1.7838268280029297, + "logps/chosen": -195.54583740234375, + "logps/rejected": -494.51641845703125, + "loss": 0.0416, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.036932349205017, + "rewards/margins": 5.978662490844727, + "rewards/rejected": -4.94173002243042, + "step": 13790 + }, + { + "epoch": 0.8, + "learning_rate": 9.880713088382537e-09, + "logits/chosen": -2.121464252471924, + "logits/rejected": -2.1216752529144287, + "logps/chosen": -46.1922721862793, + "logps/rejected": -166.48646545410156, + "loss": 0.1559, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1727837324142456, + "rewards/margins": 2.4487481117248535, + "rewards/rejected": -1.275964379310608, + "step": 13791 + }, + { + "epoch": 0.8, + "learning_rate": 9.875089489109679e-09, + "logits/chosen": -1.897462010383606, + "logits/rejected": -1.900875210762024, + "logps/chosen": -0.029985975474119186, + "logps/rejected": -196.23272705078125, + "loss": 0.3666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013584772124886513, + "rewards/margins": 2.6260757446289062, + "rewards/rejected": -2.6124908924102783, + "step": 13792 + }, + { + "epoch": 0.8, + "learning_rate": 9.869467315269925e-09, + "logits/chosen": -1.9001415967941284, + "logits/rejected": -1.905126929283142, + "logps/chosen": -1.0384411811828613, + "logps/rejected": -52.323143005371094, + "loss": 0.4766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06710274517536163, + "rewards/margins": 0.9512580037117004, + "rewards/rejected": -0.8841552734375, + "step": 13793 + }, + { + "epoch": 0.8, + "learning_rate": 9.863846567062972e-09, + "logits/chosen": -1.8655025959014893, + "logits/rejected": -1.8664664030075073, + "logps/chosen": -75.50627136230469, + "logps/rejected": -171.72691345214844, + "loss": 0.612, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20107880234718323, + "rewards/margins": 0.3176208436489105, + "rewards/rejected": -0.5186996459960938, + "step": 13794 + }, + { + "epoch": 0.8, + "learning_rate": 9.858227244688538e-09, + "logits/chosen": -1.8294345140457153, + "logits/rejected": -1.8300142288208008, + "logps/chosen": -48.65324020385742, + "logps/rejected": -248.2865753173828, + "loss": 0.2067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6168174743652344, + "rewards/margins": 6.08071231842041, + "rewards/rejected": -5.463894844055176, + "step": 13795 + }, + { + "epoch": 0.8, + "learning_rate": 9.852609348346214e-09, + "logits/chosen": -2.014132499694824, + "logits/rejected": -2.015072822570801, + "logps/chosen": -2.4892003536224365, + "logps/rejected": -149.5457763671875, + "loss": 0.3501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05702665075659752, + "rewards/margins": 3.1754379272460938, + "rewards/rejected": -3.1184113025665283, + "step": 13796 + }, + { + "epoch": 0.8, + "learning_rate": 9.846992878235606e-09, + "logits/chosen": -1.9263081550598145, + "logits/rejected": -1.9523118734359741, + "logps/chosen": -163.1875, + "logps/rejected": -446.1064758300781, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.548727512359619, + "rewards/margins": 6.223706245422363, + "rewards/rejected": -3.674978733062744, + "step": 13797 + }, + { + "epoch": 0.8, + "learning_rate": 9.841377834556186e-09, + "logits/chosen": -2.023709774017334, + "logits/rejected": -1.9824252128601074, + "logps/chosen": -104.07821655273438, + "logps/rejected": -236.69552612304688, + "loss": 0.4043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.354016900062561, + "rewards/margins": 0.16781079769134521, + "rewards/rejected": 1.1862061023712158, + "step": 13798 + }, + { + "epoch": 0.8, + "learning_rate": 9.835764217507492e-09, + "logits/chosen": -1.724613070487976, + "logits/rejected": -1.7310733795166016, + "logps/chosen": -0.7170341610908508, + "logps/rejected": -74.86658477783203, + "loss": 0.5355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20851004123687744, + "rewards/margins": 0.5076586008071899, + "rewards/rejected": -0.2991485595703125, + "step": 13799 + }, + { + "epoch": 0.8, + "learning_rate": 9.830152027288907e-09, + "logits/chosen": -2.085193634033203, + "logits/rejected": -2.0794551372528076, + "logps/chosen": -65.04730224609375, + "logps/rejected": -196.97998046875, + "loss": 0.127, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.081379771232605, + "rewards/margins": 3.4655261039733887, + "rewards/rejected": -2.384146213531494, + "step": 13800 + }, + { + "epoch": 0.8, + "learning_rate": 9.824541264099812e-09, + "logits/chosen": -1.973623275756836, + "logits/rejected": -1.9700534343719482, + "logps/chosen": -16.771833419799805, + "logps/rejected": -212.64187622070312, + "loss": 0.4243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18906450271606445, + "rewards/margins": 2.757323741912842, + "rewards/rejected": -2.9463882446289062, + "step": 13801 + }, + { + "epoch": 0.8, + "learning_rate": 9.81893192813953e-09, + "logits/chosen": -1.986556887626648, + "logits/rejected": -2.0111851692199707, + "logps/chosen": -197.9337158203125, + "logps/rejected": -292.3259582519531, + "loss": 0.1339, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.406988501548767, + "rewards/margins": 1.6192901134490967, + "rewards/rejected": -0.21230164170265198, + "step": 13802 + }, + { + "epoch": 0.8, + "learning_rate": 9.81332401960735e-09, + "logits/chosen": -1.9981422424316406, + "logits/rejected": -1.9237816333770752, + "logps/chosen": -123.66201782226562, + "logps/rejected": -375.80712890625, + "loss": 0.0944, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.03485107421875, + "rewards/margins": 2.425769090652466, + "rewards/rejected": -1.3909180164337158, + "step": 13803 + }, + { + "epoch": 0.8, + "learning_rate": 9.807717538702454e-09, + "logits/chosen": -2.0063304901123047, + "logits/rejected": -2.0061216354370117, + "logps/chosen": -8.71214771270752, + "logps/rejected": -183.00733947753906, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4071922302246094, + "rewards/margins": 3.4320411682128906, + "rewards/rejected": -3.0248489379882812, + "step": 13804 + }, + { + "epoch": 0.8, + "learning_rate": 9.802112485624043e-09, + "logits/chosen": -2.002385377883911, + "logits/rejected": -1.9993458986282349, + "logps/chosen": -36.36793899536133, + "logps/rejected": -122.75163269042969, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18175926804542542, + "rewards/margins": 3.045447826385498, + "rewards/rejected": -3.2272071838378906, + "step": 13805 + }, + { + "epoch": 0.8, + "learning_rate": 9.796508860571217e-09, + "logits/chosen": -1.8082575798034668, + "logits/rejected": -1.8160911798477173, + "logps/chosen": -157.1923065185547, + "logps/rejected": -232.66339111328125, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5947144031524658, + "rewards/margins": 0.6964142322540283, + "rewards/rejected": 0.8983001708984375, + "step": 13806 + }, + { + "epoch": 0.8, + "learning_rate": 9.79090666374307e-09, + "logits/chosen": -1.9392834901809692, + "logits/rejected": -1.9303464889526367, + "logps/chosen": -0.000901946856174618, + "logps/rejected": -231.3247833251953, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1758503357414156e-05, + "rewards/margins": 3.78357195854187, + "rewards/rejected": -3.783613681793213, + "step": 13807 + }, + { + "epoch": 0.8, + "learning_rate": 9.785305895338585e-09, + "logits/chosen": -1.9509154558181763, + "logits/rejected": -2.0028133392333984, + "logps/chosen": -298.3631591796875, + "logps/rejected": -438.2778015136719, + "loss": 0.0517, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.511221408843994, + "rewards/margins": 2.80889892578125, + "rewards/rejected": -0.297677606344223, + "step": 13808 + }, + { + "epoch": 0.8, + "learning_rate": 9.77970655555675e-09, + "logits/chosen": -1.8692378997802734, + "logits/rejected": -1.856961965560913, + "logps/chosen": -0.06470059603452682, + "logps/rejected": -106.46792602539062, + "loss": 0.6709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01467683631926775, + "rewards/margins": 0.044053059071302414, + "rewards/rejected": -0.02937622182071209, + "step": 13809 + }, + { + "epoch": 0.8, + "learning_rate": 9.77410864459648e-09, + "logits/chosen": -1.8151501417160034, + "logits/rejected": -1.811753749847412, + "logps/chosen": -23.05365753173828, + "logps/rejected": -263.57647705078125, + "loss": 0.2571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4651157557964325, + "rewards/margins": 2.9996144771575928, + "rewards/rejected": -2.534498691558838, + "step": 13810 + }, + { + "epoch": 0.8, + "learning_rate": 9.768512162656628e-09, + "logits/chosen": -1.8403154611587524, + "logits/rejected": -1.8442387580871582, + "logps/chosen": -30.943262100219727, + "logps/rejected": -133.96676635742188, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.171918272972107, + "rewards/margins": 0.5161001086235046, + "rewards/rejected": 0.6558181643486023, + "step": 13811 + }, + { + "epoch": 0.8, + "learning_rate": 9.762917109936037e-09, + "logits/chosen": -1.7679386138916016, + "logits/rejected": -1.7727153301239014, + "logps/chosen": -0.010885834693908691, + "logps/rejected": -152.69692993164062, + "loss": 0.3463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035365825169719756, + "rewards/margins": 3.434448719024658, + "rewards/rejected": -3.434802293777466, + "step": 13812 + }, + { + "epoch": 0.8, + "learning_rate": 9.757323486633423e-09, + "logits/chosen": -1.83137047290802, + "logits/rejected": -1.802814245223999, + "logps/chosen": -170.388916015625, + "logps/rejected": -291.8572082519531, + "loss": 0.046, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5685410499572754, + "rewards/margins": 3.182844638824463, + "rewards/rejected": -0.6143035888671875, + "step": 13813 + }, + { + "epoch": 0.8, + "learning_rate": 9.751731292947556e-09, + "logits/chosen": -1.8307113647460938, + "logits/rejected": -1.8149659633636475, + "logps/chosen": -244.0936279296875, + "logps/rejected": -386.48602294921875, + "loss": 0.2014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4971923828125, + "rewards/margins": 0.976971447467804, + "rewards/rejected": 0.520220935344696, + "step": 13814 + }, + { + "epoch": 0.8, + "learning_rate": 9.746140529077051e-09, + "logits/chosen": -2.030721664428711, + "logits/rejected": -2.016129970550537, + "logps/chosen": -74.8228530883789, + "logps/rejected": -242.5209503173828, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6690208911895752, + "rewards/margins": 4.233628273010254, + "rewards/rejected": -2.5646073818206787, + "step": 13815 + }, + { + "epoch": 0.8, + "learning_rate": 9.740551195220554e-09, + "logits/chosen": -2.1239261627197266, + "logits/rejected": -2.1159417629241943, + "logps/chosen": -26.063541412353516, + "logps/rejected": -229.7326202392578, + "loss": 0.3703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30476054549217224, + "rewards/margins": 1.3908705711364746, + "rewards/rejected": -1.08610999584198, + "step": 13816 + }, + { + "epoch": 0.8, + "learning_rate": 9.73496329157658e-09, + "logits/chosen": -1.6562443971633911, + "logits/rejected": -1.672669529914856, + "logps/chosen": -250.35296630859375, + "logps/rejected": -426.8751220703125, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1025664806365967, + "rewards/margins": 4.179751396179199, + "rewards/rejected": -2.0771851539611816, + "step": 13817 + }, + { + "epoch": 0.8, + "learning_rate": 9.729376818343698e-09, + "logits/chosen": -1.9308727979660034, + "logits/rejected": -1.9940863847732544, + "logps/chosen": -117.18704986572266, + "logps/rejected": -641.1289672851562, + "loss": 0.0303, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7273887395858765, + "rewards/margins": 9.663949012756348, + "rewards/rejected": -7.936560153961182, + "step": 13818 + }, + { + "epoch": 0.8, + "learning_rate": 9.72379177572032e-09, + "logits/chosen": -1.9383347034454346, + "logits/rejected": -1.9359594583511353, + "logps/chosen": -73.61994934082031, + "logps/rejected": -415.2569580078125, + "loss": 0.0695, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9124459028244019, + "rewards/margins": 7.79284143447876, + "rewards/rejected": -5.880395412445068, + "step": 13819 + }, + { + "epoch": 0.8, + "learning_rate": 9.718208163904884e-09, + "logits/chosen": -1.7603843212127686, + "logits/rejected": -1.7615270614624023, + "logps/chosen": -13.648513793945312, + "logps/rejected": -165.45309448242188, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06323490291833878, + "rewards/margins": 3.5024025440216064, + "rewards/rejected": -3.4391677379608154, + "step": 13820 + }, + { + "epoch": 0.8, + "learning_rate": 9.712625983095706e-09, + "logits/chosen": -1.74839186668396, + "logits/rejected": -1.7880427837371826, + "logps/chosen": -192.1875457763672, + "logps/rejected": -368.3037109375, + "loss": 0.1739, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2754043340682983, + "rewards/margins": 1.1536513566970825, + "rewards/rejected": 0.12175293266773224, + "step": 13821 + }, + { + "epoch": 0.8, + "learning_rate": 9.707045233491145e-09, + "logits/chosen": -1.8748418092727661, + "logits/rejected": -1.8477243185043335, + "logps/chosen": -138.19203186035156, + "logps/rejected": -415.70330810546875, + "loss": 0.0376, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7848007678985596, + "rewards/margins": 3.898655891418457, + "rewards/rejected": -2.1138551235198975, + "step": 13822 + }, + { + "epoch": 0.8, + "learning_rate": 9.701465915289424e-09, + "logits/chosen": -1.835671305656433, + "logits/rejected": -1.842472791671753, + "logps/chosen": -37.908084869384766, + "logps/rejected": -212.907470703125, + "loss": 0.5124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44152411818504333, + "rewards/margins": 0.043849557638168335, + "rewards/rejected": 0.397674560546875, + "step": 13823 + }, + { + "epoch": 0.8, + "learning_rate": 9.695888028688753e-09, + "logits/chosen": -1.890264868736267, + "logits/rejected": -1.8811722993850708, + "logps/chosen": -309.9322814941406, + "logps/rejected": -447.11016845703125, + "loss": 0.2067, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.127859592437744, + "rewards/margins": 0.6877350807189941, + "rewards/rejected": 3.44012451171875, + "step": 13824 + }, + { + "epoch": 0.8, + "learning_rate": 9.690311573887284e-09, + "logits/chosen": -1.8425946235656738, + "logits/rejected": -1.8788548707962036, + "logps/chosen": -222.20736694335938, + "logps/rejected": -710.07470703125, + "loss": 0.0515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9352661371231079, + "rewards/margins": 16.406463623046875, + "rewards/rejected": -15.471198081970215, + "step": 13825 + }, + { + "epoch": 0.8, + "learning_rate": 9.684736551083138e-09, + "logits/chosen": -2.027691602706909, + "logits/rejected": -2.0364508628845215, + "logps/chosen": -0.9142985343933105, + "logps/rejected": -58.4015998840332, + "loss": 0.5901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11134608089923859, + "rewards/margins": 0.31993335485458374, + "rewards/rejected": -0.20858727395534515, + "step": 13826 + }, + { + "epoch": 0.8, + "learning_rate": 9.679162960474347e-09, + "logits/chosen": -1.8863872289657593, + "logits/rejected": -1.895495891571045, + "logps/chosen": -23.249828338623047, + "logps/rejected": -113.77243041992188, + "loss": 0.3942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01852588728070259, + "rewards/margins": 1.726431131362915, + "rewards/rejected": -1.7449569702148438, + "step": 13827 + }, + { + "epoch": 0.8, + "learning_rate": 9.673590802258913e-09, + "logits/chosen": -1.745802879333496, + "logits/rejected": -1.747198224067688, + "logps/chosen": -0.0010360523592680693, + "logps/rejected": -172.8213348388672, + "loss": 0.3316, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.138455996231642e-06, + "rewards/margins": 3.5325138568878174, + "rewards/rejected": -3.532505750656128, + "step": 13828 + }, + { + "epoch": 0.8, + "learning_rate": 9.668020076634792e-09, + "logits/chosen": -1.9541270732879639, + "logits/rejected": -1.960715413093567, + "logps/chosen": -24.653703689575195, + "logps/rejected": -217.23574829101562, + "loss": 0.2397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6447141766548157, + "rewards/margins": 4.761788845062256, + "rewards/rejected": -4.117074489593506, + "step": 13829 + }, + { + "epoch": 0.8, + "learning_rate": 9.662450783799892e-09, + "logits/chosen": -2.004483699798584, + "logits/rejected": -1.9982362985610962, + "logps/chosen": -21.230831146240234, + "logps/rejected": -149.2058868408203, + "loss": 0.3898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7643892168998718, + "rewards/margins": 0.6434343457221985, + "rewards/rejected": 0.12095489352941513, + "step": 13830 + }, + { + "epoch": 0.8, + "learning_rate": 9.656882923952043e-09, + "logits/chosen": -1.9609774351119995, + "logits/rejected": -1.9492489099502563, + "logps/chosen": -90.657470703125, + "logps/rejected": -362.5724792480469, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09442138671875, + "rewards/margins": 8.130050659179688, + "rewards/rejected": -8.035629272460938, + "step": 13831 + }, + { + "epoch": 0.8, + "learning_rate": 9.651316497289053e-09, + "logits/chosen": -1.8305375576019287, + "logits/rejected": -1.83546781539917, + "logps/chosen": -195.49649047851562, + "logps/rejected": -389.16265869140625, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2222687005996704, + "rewards/margins": 4.161746501922607, + "rewards/rejected": -2.9394776821136475, + "step": 13832 + }, + { + "epoch": 0.8, + "learning_rate": 9.645751504008664e-09, + "logits/chosen": -1.7839035987854004, + "logits/rejected": -1.7564061880111694, + "logps/chosen": -335.05072021484375, + "logps/rejected": -455.9723205566406, + "loss": 0.206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33176881074905396, + "rewards/margins": 1.1041595935821533, + "rewards/rejected": -0.7723907828330994, + "step": 13833 + }, + { + "epoch": 0.81, + "learning_rate": 9.64018794430858e-09, + "logits/chosen": -1.639406442642212, + "logits/rejected": -1.6283758878707886, + "logps/chosen": -50.37278747558594, + "logps/rejected": -244.95584106445312, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3462856411933899, + "rewards/margins": 1.5282909870147705, + "rewards/rejected": -1.1820052862167358, + "step": 13834 + }, + { + "epoch": 0.81, + "learning_rate": 9.634625818386455e-09, + "logits/chosen": -2.0758345127105713, + "logits/rejected": -2.0587494373321533, + "logps/chosen": -222.80641174316406, + "logps/rejected": -535.0479736328125, + "loss": 0.0446, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9720810651779175, + "rewards/margins": 7.014726638793945, + "rewards/rejected": -5.042645454406738, + "step": 13835 + }, + { + "epoch": 0.81, + "learning_rate": 9.62906512643984e-09, + "logits/chosen": -1.8649640083312988, + "logits/rejected": -1.8424794673919678, + "logps/chosen": -195.0486297607422, + "logps/rejected": -281.3702697753906, + "loss": 0.2041, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3130996227264404, + "rewards/margins": 0.7771559953689575, + "rewards/rejected": 1.535943627357483, + "step": 13836 + }, + { + "epoch": 0.81, + "learning_rate": 9.623505868666333e-09, + "logits/chosen": -2.107588768005371, + "logits/rejected": -2.103933334350586, + "logps/chosen": -14.269268035888672, + "logps/rejected": -93.3454360961914, + "loss": 0.5805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02866525761783123, + "rewards/margins": 0.33072730898857117, + "rewards/rejected": -0.35939255356788635, + "step": 13837 + }, + { + "epoch": 0.81, + "learning_rate": 9.617948045263391e-09, + "logits/chosen": -1.9269766807556152, + "logits/rejected": -1.9213253259658813, + "logps/chosen": -116.78271484375, + "logps/rejected": -318.9459228515625, + "loss": 0.1378, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0852035284042358, + "rewards/margins": 2.6075942516326904, + "rewards/rejected": -1.5223907232284546, + "step": 13838 + }, + { + "epoch": 0.81, + "learning_rate": 9.61239165642847e-09, + "logits/chosen": -1.877801775932312, + "logits/rejected": -1.8854234218597412, + "logps/chosen": -81.53032684326172, + "logps/rejected": -266.2366943359375, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5926437377929688, + "rewards/margins": 5.917149543762207, + "rewards/rejected": -4.324505805969238, + "step": 13839 + }, + { + "epoch": 0.81, + "learning_rate": 9.606836702358934e-09, + "logits/chosen": -1.8711211681365967, + "logits/rejected": -1.8570986986160278, + "logps/chosen": -48.201072692871094, + "logps/rejected": -237.2203369140625, + "loss": 0.1251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7980964779853821, + "rewards/margins": 4.168947696685791, + "rewards/rejected": -3.3708512783050537, + "step": 13840 + }, + { + "epoch": 0.81, + "learning_rate": 9.601283183252167e-09, + "logits/chosen": -1.792090654373169, + "logits/rejected": -1.8171271085739136, + "logps/chosen": -139.7367706298828, + "logps/rejected": -263.6043395996094, + "loss": 0.2507, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5444473028182983, + "rewards/margins": 0.9927718639373779, + "rewards/rejected": 0.5516754388809204, + "step": 13841 + }, + { + "epoch": 0.81, + "learning_rate": 9.595731099305427e-09, + "logits/chosen": -2.1295340061187744, + "logits/rejected": -2.116516590118408, + "logps/chosen": -12.927327156066895, + "logps/rejected": -215.39028930664062, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1600971221923828, + "rewards/margins": 2.9525837898254395, + "rewards/rejected": -2.7924866676330566, + "step": 13842 + }, + { + "epoch": 0.81, + "learning_rate": 9.590180450715945e-09, + "logits/chosen": -2.0877602100372314, + "logits/rejected": -2.078855514526367, + "logps/chosen": -25.264102935791016, + "logps/rejected": -173.70364379882812, + "loss": 0.3733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13659973442554474, + "rewards/margins": 3.4275436401367188, + "rewards/rejected": -3.564143419265747, + "step": 13843 + }, + { + "epoch": 0.81, + "learning_rate": 9.584631237680928e-09, + "logits/chosen": -1.926743745803833, + "logits/rejected": -1.927343487739563, + "logps/chosen": -2.996758222579956, + "logps/rejected": -97.268798828125, + "loss": 0.6095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06988801807165146, + "rewards/margins": 0.5175784230232239, + "rewards/rejected": -0.5874664187431335, + "step": 13844 + }, + { + "epoch": 0.81, + "learning_rate": 9.579083460397513e-09, + "logits/chosen": -2.1138741970062256, + "logits/rejected": -2.1056525707244873, + "logps/chosen": -22.05570411682129, + "logps/rejected": -164.75970458984375, + "loss": 0.2085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.198242425918579, + "rewards/margins": 1.3740938901901245, + "rewards/rejected": -0.17585144937038422, + "step": 13845 + }, + { + "epoch": 0.81, + "learning_rate": 9.573537119062758e-09, + "logits/chosen": -1.881061315536499, + "logits/rejected": -1.8608942031860352, + "logps/chosen": -4.8756020987639204e-05, + "logps/rejected": -203.89901733398438, + "loss": 0.3456, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0132538363905041e-06, + "rewards/margins": 6.443816661834717, + "rewards/rejected": -6.4438157081604, + "step": 13846 + }, + { + "epoch": 0.81, + "learning_rate": 9.567992213873716e-09, + "logits/chosen": -1.993752360343933, + "logits/rejected": -1.9905929565429688, + "logps/chosen": -16.72266960144043, + "logps/rejected": -107.26658630371094, + "loss": 0.4428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08562984317541122, + "rewards/margins": 1.203534483909607, + "rewards/rejected": -1.1179046630859375, + "step": 13847 + }, + { + "epoch": 0.81, + "learning_rate": 9.562448745027362e-09, + "logits/chosen": -1.890501856803894, + "logits/rejected": -1.888377070426941, + "logps/chosen": -93.72590637207031, + "logps/rejected": -237.26824951171875, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10245056450366974, + "rewards/margins": 2.3406801223754883, + "rewards/rejected": -2.238229513168335, + "step": 13848 + }, + { + "epoch": 0.81, + "learning_rate": 9.55690671272064e-09, + "logits/chosen": -1.991494059562683, + "logits/rejected": -1.9830198287963867, + "logps/chosen": -118.90106201171875, + "logps/rejected": -230.170166015625, + "loss": 0.0843, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.868303060531616, + "rewards/margins": 2.0669374465942383, + "rewards/rejected": 0.8013656735420227, + "step": 13849 + }, + { + "epoch": 0.81, + "learning_rate": 9.551366117150412e-09, + "logits/chosen": -1.8359664678573608, + "logits/rejected": -1.8463386297225952, + "logps/chosen": -7.637688159942627, + "logps/rejected": -189.36451721191406, + "loss": 0.2766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3367789685726166, + "rewards/margins": 4.101193904876709, + "rewards/rejected": -3.7644150257110596, + "step": 13850 + }, + { + "epoch": 0.81, + "learning_rate": 9.545826958513515e-09, + "logits/chosen": -1.906827449798584, + "logits/rejected": -1.905240774154663, + "logps/chosen": -0.004132356494665146, + "logps/rejected": -124.58879089355469, + "loss": 0.4391, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00037965812953189015, + "rewards/margins": 1.569018840789795, + "rewards/rejected": -1.5693985223770142, + "step": 13851 + }, + { + "epoch": 0.81, + "learning_rate": 9.540289237006726e-09, + "logits/chosen": -1.8528556823730469, + "logits/rejected": -1.8577033281326294, + "logps/chosen": -24.074390411376953, + "logps/rejected": -220.23989868164062, + "loss": 0.2327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3537025451660156, + "rewards/margins": 3.054236650466919, + "rewards/rejected": -2.7005341053009033, + "step": 13852 + }, + { + "epoch": 0.81, + "learning_rate": 9.534752952826774e-09, + "logits/chosen": -1.8179999589920044, + "logits/rejected": -1.7991328239440918, + "logps/chosen": -198.28558349609375, + "logps/rejected": -581.6270751953125, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4038727283477783, + "rewards/margins": 8.474130630493164, + "rewards/rejected": -6.070257663726807, + "step": 13853 + }, + { + "epoch": 0.81, + "learning_rate": 9.529218106170344e-09, + "logits/chosen": -2.0150392055511475, + "logits/rejected": -2.034975528717041, + "logps/chosen": -163.21751403808594, + "logps/rejected": -388.5099182128906, + "loss": 0.0883, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5349701046943665, + "rewards/margins": 3.4309144020080566, + "rewards/rejected": -2.895944356918335, + "step": 13854 + }, + { + "epoch": 0.81, + "learning_rate": 9.52368469723403e-09, + "logits/chosen": -1.769999384880066, + "logits/rejected": -1.7712136507034302, + "logps/chosen": -186.24209594726562, + "logps/rejected": -409.4091796875, + "loss": 0.0449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8977569937705994, + "rewards/margins": 4.847250461578369, + "rewards/rejected": -3.949493408203125, + "step": 13855 + }, + { + "epoch": 0.81, + "learning_rate": 9.51815272621445e-09, + "logits/chosen": -1.9729852676391602, + "logits/rejected": -1.9807987213134766, + "logps/chosen": -12.617849349975586, + "logps/rejected": -220.04095458984375, + "loss": 0.4597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009853363735601306, + "rewards/margins": 1.2604587078094482, + "rewards/rejected": -1.261444091796875, + "step": 13856 + }, + { + "epoch": 0.81, + "learning_rate": 9.51262219330809e-09, + "logits/chosen": -2.0170841217041016, + "logits/rejected": -2.016265392303467, + "logps/chosen": -0.0004926977562718093, + "logps/rejected": -177.0450439453125, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6384772607125342e-05, + "rewards/margins": 4.449556827545166, + "rewards/rejected": -4.449573040008545, + "step": 13857 + }, + { + "epoch": 0.81, + "learning_rate": 9.50709309871145e-09, + "logits/chosen": -1.9361162185668945, + "logits/rejected": -1.9368842840194702, + "logps/chosen": -184.74234008789062, + "logps/rejected": -199.4112091064453, + "loss": 0.3744, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3718384504318237, + "rewards/margins": -0.00898122787475586, + "rewards/rejected": 1.3808196783065796, + "step": 13858 + }, + { + "epoch": 0.81, + "learning_rate": 9.501565442620901e-09, + "logits/chosen": -1.9289076328277588, + "logits/rejected": -1.9261780977249146, + "logps/chosen": -37.158851623535156, + "logps/rejected": -203.073974609375, + "loss": 0.1142, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2490005493164062, + "rewards/margins": 2.799038887023926, + "rewards/rejected": -1.55003821849823, + "step": 13859 + }, + { + "epoch": 0.81, + "learning_rate": 9.496039225232882e-09, + "logits/chosen": -1.7284997701644897, + "logits/rejected": -1.7145079374313354, + "logps/chosen": -122.069580078125, + "logps/rejected": -216.99923706054688, + "loss": 0.0823, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0131256580352783, + "rewards/margins": 2.5605058670043945, + "rewards/rejected": -0.5473800897598267, + "step": 13860 + }, + { + "epoch": 0.81, + "learning_rate": 9.490514446743658e-09, + "logits/chosen": -1.9553782939910889, + "logits/rejected": -1.926943063735962, + "logps/chosen": -6.452723026275635, + "logps/rejected": -342.3173522949219, + "loss": 0.2979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21617208421230316, + "rewards/margins": 5.872483253479004, + "rewards/rejected": -5.65631103515625, + "step": 13861 + }, + { + "epoch": 0.81, + "learning_rate": 9.484991107349515e-09, + "logits/chosen": -1.9760926961898804, + "logits/rejected": -2.0479373931884766, + "logps/chosen": -211.98245239257812, + "logps/rejected": -405.05718994140625, + "loss": 0.0551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9716949462890625, + "rewards/margins": 3.7755095958709717, + "rewards/rejected": -2.803814649581909, + "step": 13862 + }, + { + "epoch": 0.81, + "learning_rate": 9.47946920724667e-09, + "logits/chosen": -1.9194297790527344, + "logits/rejected": -1.9078084230422974, + "logps/chosen": -166.04563903808594, + "logps/rejected": -223.7933349609375, + "loss": 0.3722, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.578688144683838, + "rewards/margins": -0.05890488624572754, + "rewards/rejected": 2.6375930309295654, + "step": 13863 + }, + { + "epoch": 0.81, + "learning_rate": 9.473948746631299e-09, + "logits/chosen": -1.7720407247543335, + "logits/rejected": -1.7636910676956177, + "logps/chosen": -66.4671630859375, + "logps/rejected": -219.96263122558594, + "loss": 0.206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4760726988315582, + "rewards/margins": 3.455166816711426, + "rewards/rejected": -2.9790940284729004, + "step": 13864 + }, + { + "epoch": 0.81, + "learning_rate": 9.46842972569949e-09, + "logits/chosen": -2.0574283599853516, + "logits/rejected": -2.059216022491455, + "logps/chosen": -21.10733985900879, + "logps/rejected": -146.4734649658203, + "loss": 0.3833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04956016689538956, + "rewards/margins": 2.106513500213623, + "rewards/rejected": -2.0569534301757812, + "step": 13865 + }, + { + "epoch": 0.81, + "learning_rate": 9.462912144647317e-09, + "logits/chosen": -1.7777178287506104, + "logits/rejected": -1.7745361328125, + "logps/chosen": -37.770347595214844, + "logps/rejected": -181.1739501953125, + "loss": 0.1835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4229316711425781, + "rewards/margins": 1.4582115411758423, + "rewards/rejected": -0.03527984768152237, + "step": 13866 + }, + { + "epoch": 0.81, + "learning_rate": 9.457396003670798e-09, + "logits/chosen": -1.7998275756835938, + "logits/rejected": -1.8017349243164062, + "logps/chosen": -0.13635484874248505, + "logps/rejected": -161.62582397460938, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.024143764690962e-06, + "rewards/margins": 3.318722724914551, + "rewards/rejected": -3.3187317848205566, + "step": 13867 + }, + { + "epoch": 0.81, + "learning_rate": 9.451881302965897e-09, + "logits/chosen": -1.8932996988296509, + "logits/rejected": -1.888463020324707, + "logps/chosen": -7.192951202392578, + "logps/rejected": -136.19692993164062, + "loss": 0.5118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.331094354391098, + "rewards/margins": 0.3474670350551605, + "rewards/rejected": -0.0163726806640625, + "step": 13868 + }, + { + "epoch": 0.81, + "learning_rate": 9.446368042728498e-09, + "logits/chosen": -1.8832306861877441, + "logits/rejected": -1.8762458562850952, + "logps/chosen": -36.94041442871094, + "logps/rejected": -247.1359405517578, + "loss": 0.1772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4546268582344055, + "rewards/margins": 2.395395278930664, + "rewards/rejected": -1.9407684803009033, + "step": 13869 + }, + { + "epoch": 0.81, + "learning_rate": 9.440856223154482e-09, + "logits/chosen": -1.806673526763916, + "logits/rejected": -1.8083118200302124, + "logps/chosen": -0.10489343106746674, + "logps/rejected": -54.72964859008789, + "loss": 0.6321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04083612561225891, + "rewards/margins": 0.07198350876569748, + "rewards/rejected": -0.031147385016083717, + "step": 13870 + }, + { + "epoch": 0.81, + "learning_rate": 9.435345844439646e-09, + "logits/chosen": -1.873859167098999, + "logits/rejected": -1.8750886917114258, + "logps/chosen": -273.29681396484375, + "logps/rejected": -296.75384521484375, + "loss": 0.4206, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0028046369552612, + "rewards/margins": 0.3639069199562073, + "rewards/rejected": 0.638897716999054, + "step": 13871 + }, + { + "epoch": 0.81, + "learning_rate": 9.429836906779748e-09, + "logits/chosen": -1.7928823232650757, + "logits/rejected": -1.794816017150879, + "logps/chosen": -272.005615234375, + "logps/rejected": -503.87969970703125, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.173071265220642, + "rewards/margins": 7.244650363922119, + "rewards/rejected": -6.0715789794921875, + "step": 13872 + }, + { + "epoch": 0.81, + "learning_rate": 9.424329410370507e-09, + "logits/chosen": -1.820644497871399, + "logits/rejected": -1.8219469785690308, + "logps/chosen": -0.23049306869506836, + "logps/rejected": -194.3838348388672, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004091005306690931, + "rewards/margins": 3.65840744972229, + "rewards/rejected": -3.6624984741210938, + "step": 13873 + }, + { + "epoch": 0.81, + "learning_rate": 9.418823355407547e-09, + "logits/chosen": -2.009464740753174, + "logits/rejected": -2.0046474933624268, + "logps/chosen": -32.803810119628906, + "logps/rejected": -389.4296875, + "loss": 0.1333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7541874051094055, + "rewards/margins": 6.663602352142334, + "rewards/rejected": -5.909414768218994, + "step": 13874 + }, + { + "epoch": 0.81, + "learning_rate": 9.41331874208649e-09, + "logits/chosen": -1.950219750404358, + "logits/rejected": -1.9476295709609985, + "logps/chosen": -4.14845380873885e-05, + "logps/rejected": -72.05342102050781, + "loss": 0.3976, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.053090378874913e-07, + "rewards/margins": 2.1990044116973877, + "rewards/rejected": -2.199004888534546, + "step": 13875 + }, + { + "epoch": 0.81, + "learning_rate": 9.407815570602878e-09, + "logits/chosen": -1.8684457540512085, + "logits/rejected": -1.871014952659607, + "logps/chosen": -6.395957946777344, + "logps/rejected": -105.15576934814453, + "loss": 0.5679, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.190507173538208, + "rewards/margins": 0.7260302901268005, + "rewards/rejected": -0.9165374636650085, + "step": 13876 + }, + { + "epoch": 0.81, + "learning_rate": 9.402313841152232e-09, + "logits/chosen": -1.932669997215271, + "logits/rejected": -1.9361727237701416, + "logps/chosen": -38.760013580322266, + "logps/rejected": -152.5596923828125, + "loss": 0.4567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1674884855747223, + "rewards/margins": 0.5785312652587891, + "rewards/rejected": -0.4110427796840668, + "step": 13877 + }, + { + "epoch": 0.81, + "learning_rate": 9.396813553929956e-09, + "logits/chosen": -2.0997815132141113, + "logits/rejected": -2.0960233211517334, + "logps/chosen": -75.04725646972656, + "logps/rejected": -352.398193359375, + "loss": 0.3635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12552490830421448, + "rewards/margins": 7.070364475250244, + "rewards/rejected": -6.9448394775390625, + "step": 13878 + }, + { + "epoch": 0.81, + "learning_rate": 9.391314709131499e-09, + "logits/chosen": -1.8330023288726807, + "logits/rejected": -1.8382606506347656, + "logps/chosen": -28.75023651123047, + "logps/rejected": -134.52134704589844, + "loss": 0.2185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5378066897392273, + "rewards/margins": 2.7480669021606445, + "rewards/rejected": -2.2102601528167725, + "step": 13879 + }, + { + "epoch": 0.81, + "learning_rate": 9.385817306952165e-09, + "logits/chosen": -1.7697244882583618, + "logits/rejected": -1.7782490253448486, + "logps/chosen": -270.7542419433594, + "logps/rejected": -512.05712890625, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.34513258934021, + "rewards/margins": 6.140878677368164, + "rewards/rejected": -3.795745849609375, + "step": 13880 + }, + { + "epoch": 0.81, + "learning_rate": 9.380321347587284e-09, + "logits/chosen": -1.712459683418274, + "logits/rejected": -1.7184967994689941, + "logps/chosen": -12.778909683227539, + "logps/rejected": -229.20297241210938, + "loss": 0.2444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3920761048793793, + "rewards/margins": 2.0536789894104004, + "rewards/rejected": -1.6616028547286987, + "step": 13881 + }, + { + "epoch": 0.81, + "learning_rate": 9.374826831232053e-09, + "logits/chosen": -1.8392354249954224, + "logits/rejected": -1.829565167427063, + "logps/chosen": -3.850408029393293e-05, + "logps/rejected": -131.96136474609375, + "loss": 0.3693, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.602702231044532e-07, + "rewards/margins": 2.9804487228393555, + "rewards/rejected": -2.9804482460021973, + "step": 13882 + }, + { + "epoch": 0.81, + "learning_rate": 9.36933375808172e-09, + "logits/chosen": -1.5817519426345825, + "logits/rejected": -1.5944738388061523, + "logps/chosen": -0.009744999930262566, + "logps/rejected": -163.68270874023438, + "loss": 0.4555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008381350198760629, + "rewards/margins": 1.1225504875183105, + "rewards/rejected": -1.123388648033142, + "step": 13883 + }, + { + "epoch": 0.81, + "learning_rate": 9.363842128331384e-09, + "logits/chosen": -1.8464449644088745, + "logits/rejected": -1.8279706239700317, + "logps/chosen": -38.362342834472656, + "logps/rejected": -380.4693298339844, + "loss": 0.2214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5673503875732422, + "rewards/margins": 5.337360858917236, + "rewards/rejected": -4.770010471343994, + "step": 13884 + }, + { + "epoch": 0.81, + "learning_rate": 9.358351942176146e-09, + "logits/chosen": -1.8996968269348145, + "logits/rejected": -1.8900219202041626, + "logps/chosen": -178.25985717773438, + "logps/rejected": -503.7171630859375, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7478058338165283, + "rewards/margins": 4.09039306640625, + "rewards/rejected": -1.3425873517990112, + "step": 13885 + }, + { + "epoch": 0.81, + "learning_rate": 9.352863199811051e-09, + "logits/chosen": -1.629098892211914, + "logits/rejected": -1.6014659404754639, + "logps/chosen": -143.16026306152344, + "logps/rejected": -369.6612548828125, + "loss": 0.0875, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.404710531234741, + "rewards/margins": 2.506706476211548, + "rewards/rejected": -0.10199584811925888, + "step": 13886 + }, + { + "epoch": 0.81, + "learning_rate": 9.347375901431092e-09, + "logits/chosen": -1.9272266626358032, + "logits/rejected": -1.928292155265808, + "logps/chosen": -161.732421875, + "logps/rejected": -241.32754516601562, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7733154296875, + "rewards/margins": 0.46605223417282104, + "rewards/rejected": 0.30726319551467896, + "step": 13887 + }, + { + "epoch": 0.81, + "learning_rate": 9.341890047231182e-09, + "logits/chosen": -1.8846510648727417, + "logits/rejected": -1.8776445388793945, + "logps/chosen": -31.323305130004883, + "logps/rejected": -151.61602783203125, + "loss": 0.5496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20706577599048615, + "rewards/margins": 0.9158468842506409, + "rewards/rejected": -1.1229126453399658, + "step": 13888 + }, + { + "epoch": 0.81, + "learning_rate": 9.33640563740622e-09, + "logits/chosen": -2.0168557167053223, + "logits/rejected": -1.9446231126785278, + "logps/chosen": -273.3436279296875, + "logps/rejected": -705.8318481445312, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0508790016174316, + "rewards/margins": 7.190472602844238, + "rewards/rejected": -5.139593601226807, + "step": 13889 + }, + { + "epoch": 0.81, + "learning_rate": 9.330922672151037e-09, + "logits/chosen": -1.8659104108810425, + "logits/rejected": -1.8695753812789917, + "logps/chosen": -0.44282689690589905, + "logps/rejected": -119.2402114868164, + "loss": 0.4007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0629437193274498, + "rewards/margins": 1.8861477375030518, + "rewards/rejected": -1.8232040405273438, + "step": 13890 + }, + { + "epoch": 0.81, + "learning_rate": 9.325441151660424e-09, + "logits/chosen": -1.8345787525177002, + "logits/rejected": -1.8358241319656372, + "logps/chosen": -67.18891143798828, + "logps/rejected": -157.0677490234375, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.58461993932724, + "rewards/margins": 1.660733938217163, + "rewards/rejected": -1.0761139392852783, + "step": 13891 + }, + { + "epoch": 0.81, + "learning_rate": 9.31996107612909e-09, + "logits/chosen": -1.9311891794204712, + "logits/rejected": -1.9926402568817139, + "logps/chosen": -179.41177368164062, + "logps/rejected": -332.44244384765625, + "loss": 0.2188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3827362060546875, + "rewards/margins": 3.943686008453369, + "rewards/rejected": -3.5609498023986816, + "step": 13892 + }, + { + "epoch": 0.81, + "learning_rate": 9.314482445751732e-09, + "logits/chosen": -1.81389319896698, + "logits/rejected": -1.8274227380752563, + "logps/chosen": -264.0836181640625, + "logps/rejected": -365.8844909667969, + "loss": 0.0773, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8730804920196533, + "rewards/margins": 1.9231019020080566, + "rewards/rejected": -0.05002136155962944, + "step": 13893 + }, + { + "epoch": 0.81, + "learning_rate": 9.309005260722962e-09, + "logits/chosen": -2.0109386444091797, + "logits/rejected": -1.9953619241714478, + "logps/chosen": -2.6517765522003174, + "logps/rejected": -229.72323608398438, + "loss": 0.3738, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00412518996745348, + "rewards/margins": 5.163272857666016, + "rewards/rejected": -5.167397975921631, + "step": 13894 + }, + { + "epoch": 0.81, + "learning_rate": 9.303529521237375e-09, + "logits/chosen": -1.7614871263504028, + "logits/rejected": -1.756883144378662, + "logps/chosen": -245.32882690429688, + "logps/rejected": -556.7222290039062, + "loss": 0.1047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2852783203125, + "rewards/margins": 4.786703586578369, + "rewards/rejected": -4.501425266265869, + "step": 13895 + }, + { + "epoch": 0.81, + "learning_rate": 9.298055227489492e-09, + "logits/chosen": -1.7958470582962036, + "logits/rejected": -1.766022801399231, + "logps/chosen": -203.3781280517578, + "logps/rejected": -292.5924072265625, + "loss": 0.2164, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4987274408340454, + "rewards/margins": 1.217919945716858, + "rewards/rejected": 0.2808074951171875, + "step": 13896 + }, + { + "epoch": 0.81, + "learning_rate": 9.292582379673764e-09, + "logits/chosen": -1.7039347887039185, + "logits/rejected": -1.7295747995376587, + "logps/chosen": -193.3192138671875, + "logps/rejected": -432.6441345214844, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7830262184143066, + "rewards/margins": 7.850039958953857, + "rewards/rejected": -5.067013740539551, + "step": 13897 + }, + { + "epoch": 0.81, + "learning_rate": 9.287110977984652e-09, + "logits/chosen": -1.919450283050537, + "logits/rejected": -1.9095958471298218, + "logps/chosen": -24.287899017333984, + "logps/rejected": -173.37600708007812, + "loss": 0.1137, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4599850177764893, + "rewards/margins": 2.9344875812530518, + "rewards/rejected": -1.4745025634765625, + "step": 13898 + }, + { + "epoch": 0.81, + "learning_rate": 9.281641022616499e-09, + "logits/chosen": -2.019843578338623, + "logits/rejected": -2.0076043605804443, + "logps/chosen": -40.07765197753906, + "logps/rejected": -281.2119445800781, + "loss": 0.2541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.046269990503787994, + "rewards/margins": 3.3133018016815186, + "rewards/rejected": -3.2670319080352783, + "step": 13899 + }, + { + "epoch": 0.81, + "learning_rate": 9.276172513763642e-09, + "logits/chosen": -1.8428399562835693, + "logits/rejected": -1.7877901792526245, + "logps/chosen": -285.501708984375, + "logps/rejected": -401.73577880859375, + "loss": 0.1283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8108032941818237, + "rewards/margins": 2.0976014137268066, + "rewards/rejected": -0.2867980897426605, + "step": 13900 + }, + { + "epoch": 0.81, + "learning_rate": 9.270705451620308e-09, + "logits/chosen": -1.9303470849990845, + "logits/rejected": -1.9174226522445679, + "logps/chosen": -92.93778991699219, + "logps/rejected": -338.1390380859375, + "loss": 0.1701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6318931579589844, + "rewards/margins": 6.332448482513428, + "rewards/rejected": -5.700555324554443, + "step": 13901 + }, + { + "epoch": 0.81, + "learning_rate": 9.265239836380779e-09, + "logits/chosen": -1.8582826852798462, + "logits/rejected": -1.8582754135131836, + "logps/chosen": -33.43111801147461, + "logps/rejected": -194.58497619628906, + "loss": 0.4479, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2658771574497223, + "rewards/margins": 1.9847713708877563, + "rewards/rejected": -2.2506484985351562, + "step": 13902 + }, + { + "epoch": 0.81, + "learning_rate": 9.259775668239167e-09, + "logits/chosen": -1.8981086015701294, + "logits/rejected": -1.8910696506500244, + "logps/chosen": -13.824369430541992, + "logps/rejected": -57.75434875488281, + "loss": 0.4913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3080694377422333, + "rewards/margins": 0.2653665840625763, + "rewards/rejected": 0.04270286485552788, + "step": 13903 + }, + { + "epoch": 0.81, + "learning_rate": 9.25431294738961e-09, + "logits/chosen": -2.1372056007385254, + "logits/rejected": -2.1337857246398926, + "logps/chosen": -37.34665298461914, + "logps/rejected": -98.65044403076172, + "loss": 0.6113, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1564968079328537, + "rewards/margins": -0.06990852952003479, + "rewards/rejected": 0.2264053374528885, + "step": 13904 + }, + { + "epoch": 0.81, + "learning_rate": 9.248851674026164e-09, + "logits/chosen": -1.8750253915786743, + "logits/rejected": -1.880301833152771, + "logps/chosen": -294.93505859375, + "logps/rejected": -429.156494140625, + "loss": 0.2744, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1546905040740967, + "rewards/margins": 0.36750173568725586, + "rewards/rejected": 1.7871887683868408, + "step": 13905 + }, + { + "epoch": 0.81, + "learning_rate": 9.243391848342852e-09, + "logits/chosen": -2.0250394344329834, + "logits/rejected": -1.9169716835021973, + "logps/chosen": -225.99456787109375, + "logps/rejected": -715.8654174804688, + "loss": 0.0595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8970047235488892, + "rewards/margins": 11.60222339630127, + "rewards/rejected": -10.705218315124512, + "step": 13906 + }, + { + "epoch": 0.81, + "learning_rate": 9.237933470533615e-09, + "logits/chosen": -1.5948983430862427, + "logits/rejected": -1.59791100025177, + "logps/chosen": -0.00021039102284703404, + "logps/rejected": -45.695823669433594, + "loss": 0.6479, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.22567596717272e-06, + "rewards/margins": 0.18036606907844543, + "rewards/rejected": -0.18037529289722443, + "step": 13907 + }, + { + "epoch": 0.81, + "learning_rate": 9.232476540792366e-09, + "logits/chosen": -1.7792527675628662, + "logits/rejected": -1.8204782009124756, + "logps/chosen": -204.1759033203125, + "logps/rejected": -494.44097900390625, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.394235372543335, + "rewards/margins": 5.1594390869140625, + "rewards/rejected": -2.7652039527893066, + "step": 13908 + }, + { + "epoch": 0.81, + "learning_rate": 9.227021059312973e-09, + "logits/chosen": -1.7661203145980835, + "logits/rejected": -1.783313274383545, + "logps/chosen": -217.49456787109375, + "logps/rejected": -229.4561767578125, + "loss": 0.1068, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.441305637359619, + "rewards/margins": 1.7781524658203125, + "rewards/rejected": 0.6631531119346619, + "step": 13909 + }, + { + "epoch": 0.81, + "learning_rate": 9.22156702628924e-09, + "logits/chosen": -1.8081730604171753, + "logits/rejected": -1.8080987930297852, + "logps/chosen": -27.503278732299805, + "logps/rejected": -115.39755249023438, + "loss": 0.3799, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23685264587402344, + "rewards/margins": 1.590287446975708, + "rewards/rejected": -1.3534348011016846, + "step": 13910 + }, + { + "epoch": 0.81, + "learning_rate": 9.216114441914902e-09, + "logits/chosen": -1.7757235765457153, + "logits/rejected": -1.776423454284668, + "logps/chosen": -201.15811157226562, + "logps/rejected": -462.44525146484375, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.393476963043213, + "rewards/margins": 4.166456699371338, + "rewards/rejected": -1.772979736328125, + "step": 13911 + }, + { + "epoch": 0.81, + "learning_rate": 9.210663306383681e-09, + "logits/chosen": -1.8095623254776, + "logits/rejected": -1.8073713779449463, + "logps/chosen": -230.7421875, + "logps/rejected": -364.87127685546875, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3760955333709717, + "rewards/margins": 0.1389617919921875, + "rewards/rejected": 2.237133741378784, + "step": 13912 + }, + { + "epoch": 0.81, + "learning_rate": 9.205213619889218e-09, + "logits/chosen": -1.997223973274231, + "logits/rejected": -1.986203908920288, + "logps/chosen": -129.9510498046875, + "logps/rejected": -291.85040283203125, + "loss": 0.1502, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.846795678138733, + "rewards/margins": 1.28033447265625, + "rewards/rejected": 0.5664612054824829, + "step": 13913 + }, + { + "epoch": 0.81, + "learning_rate": 9.199765382625114e-09, + "logits/chosen": -2.0615415573120117, + "logits/rejected": -2.057251453399658, + "logps/chosen": -6.643084526062012, + "logps/rejected": -277.9102783203125, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03821439668536186, + "rewards/margins": 2.7813901901245117, + "rewards/rejected": -2.8196046352386475, + "step": 13914 + }, + { + "epoch": 0.81, + "learning_rate": 9.194318594784934e-09, + "logits/chosen": -1.889266014099121, + "logits/rejected": -1.8894314765930176, + "logps/chosen": -53.762779235839844, + "logps/rejected": -259.76043701171875, + "loss": 0.198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6247852444648743, + "rewards/margins": 2.673018217086792, + "rewards/rejected": -2.0482330322265625, + "step": 13915 + }, + { + "epoch": 0.81, + "learning_rate": 9.188873256562135e-09, + "logits/chosen": -1.7894619703292847, + "logits/rejected": -1.7962276935577393, + "logps/chosen": -218.88253784179688, + "logps/rejected": -336.13336181640625, + "loss": 0.4635, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1065399646759033, + "rewards/margins": 0.9147918224334717, + "rewards/rejected": -2.021331787109375, + "step": 13916 + }, + { + "epoch": 0.81, + "learning_rate": 9.183429368150208e-09, + "logits/chosen": -1.8284885883331299, + "logits/rejected": -1.8362679481506348, + "logps/chosen": -4.529893703875132e-05, + "logps/rejected": -440.1312255859375, + "loss": 0.3294, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1086198128396063e-06, + "rewards/margins": 12.144466400146484, + "rewards/rejected": -12.1444673538208, + "step": 13917 + }, + { + "epoch": 0.81, + "learning_rate": 9.177986929742515e-09, + "logits/chosen": -2.01139760017395, + "logits/rejected": -1.9937447309494019, + "logps/chosen": -39.98724365234375, + "logps/rejected": -207.7641143798828, + "loss": 0.0532, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4226493835449219, + "rewards/margins": 4.740647315979004, + "rewards/rejected": -3.317997694015503, + "step": 13918 + }, + { + "epoch": 0.81, + "learning_rate": 9.172545941532422e-09, + "logits/chosen": -1.8316227197647095, + "logits/rejected": -1.8276931047439575, + "logps/chosen": -44.019927978515625, + "logps/rejected": -195.70846557617188, + "loss": 0.3151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3978286683559418, + "rewards/margins": 2.2843642234802246, + "rewards/rejected": -1.88653564453125, + "step": 13919 + }, + { + "epoch": 0.81, + "learning_rate": 9.167106403713182e-09, + "logits/chosen": -1.6735727787017822, + "logits/rejected": -1.6759299039840698, + "logps/chosen": -48.354957580566406, + "logps/rejected": -258.320068359375, + "loss": 0.132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8900436758995056, + "rewards/margins": 3.4520065784454346, + "rewards/rejected": -2.561962842941284, + "step": 13920 + }, + { + "epoch": 0.81, + "learning_rate": 9.161668316478078e-09, + "logits/chosen": -1.517572283744812, + "logits/rejected": -1.5268785953521729, + "logps/chosen": -17.56597328186035, + "logps/rejected": -141.39083862304688, + "loss": 0.6027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06487884372472763, + "rewards/margins": 0.5470626950263977, + "rewards/rejected": -0.6119415163993835, + "step": 13921 + }, + { + "epoch": 0.81, + "learning_rate": 9.156231680020272e-09, + "logits/chosen": -2.051713705062866, + "logits/rejected": -2.04567813873291, + "logps/chosen": -80.76878356933594, + "logps/rejected": -146.91598510742188, + "loss": 0.7167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44857025146484375, + "rewards/margins": 0.21376800537109375, + "rewards/rejected": -0.6623382568359375, + "step": 13922 + }, + { + "epoch": 0.81, + "learning_rate": 9.150796494532909e-09, + "logits/chosen": -1.4947513341903687, + "logits/rejected": -1.4909977912902832, + "logps/chosen": -0.3277493715286255, + "logps/rejected": -86.23462677001953, + "loss": 0.6218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02918108180165291, + "rewards/margins": 0.3252096176147461, + "rewards/rejected": -0.3543907105922699, + "step": 13923 + }, + { + "epoch": 0.81, + "learning_rate": 9.145362760209052e-09, + "logits/chosen": -1.836716890335083, + "logits/rejected": -1.8450144529342651, + "logps/chosen": -22.90389060974121, + "logps/rejected": -171.5353546142578, + "loss": 0.3988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26760560274124146, + "rewards/margins": 2.614859104156494, + "rewards/rejected": -2.882464647293091, + "step": 13924 + }, + { + "epoch": 0.81, + "learning_rate": 9.13993047724177e-09, + "logits/chosen": -2.0491421222686768, + "logits/rejected": -2.0449390411376953, + "logps/chosen": -0.15448491275310516, + "logps/rejected": -93.56076049804688, + "loss": 0.5111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011572607792913914, + "rewards/margins": 0.7301785349845886, + "rewards/rejected": -0.7417511343955994, + "step": 13925 + }, + { + "epoch": 0.81, + "learning_rate": 9.134499645824017e-09, + "logits/chosen": -1.9582005739212036, + "logits/rejected": -1.9431350231170654, + "logps/chosen": -311.517578125, + "logps/rejected": -527.6260986328125, + "loss": 0.048, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5512330532073975, + "rewards/margins": 2.4555726051330566, + "rewards/rejected": 0.09566040337085724, + "step": 13926 + }, + { + "epoch": 0.81, + "learning_rate": 9.129070266148731e-09, + "logits/chosen": -1.9728026390075684, + "logits/rejected": -1.9637645483016968, + "logps/chosen": -34.28557205200195, + "logps/rejected": -213.1479034423828, + "loss": 0.1317, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.267306923866272, + "rewards/margins": 2.78590726852417, + "rewards/rejected": -1.5186004638671875, + "step": 13927 + }, + { + "epoch": 0.81, + "learning_rate": 9.123642338408793e-09, + "logits/chosen": -1.8868342638015747, + "logits/rejected": -1.8715096712112427, + "logps/chosen": -84.6074447631836, + "logps/rejected": -306.0335693359375, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31720733642578125, + "rewards/margins": 2.5206315517425537, + "rewards/rejected": -2.2034242153167725, + "step": 13928 + }, + { + "epoch": 0.81, + "learning_rate": 9.118215862797034e-09, + "logits/chosen": -2.067274808883667, + "logits/rejected": -2.072167158126831, + "logps/chosen": -26.251550674438477, + "logps/rejected": -152.48045349121094, + "loss": 0.3692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08050765842199326, + "rewards/margins": 2.6099693775177, + "rewards/rejected": -2.529461622238159, + "step": 13929 + }, + { + "epoch": 0.81, + "learning_rate": 9.112790839506212e-09, + "logits/chosen": -1.8714520931243896, + "logits/rejected": -1.8805325031280518, + "logps/chosen": -160.08766174316406, + "logps/rejected": -295.2891845703125, + "loss": 0.0751, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1588242053985596, + "rewards/margins": 2.4205398559570312, + "rewards/rejected": -0.26171571016311646, + "step": 13930 + }, + { + "epoch": 0.81, + "learning_rate": 9.10736726872906e-09, + "logits/chosen": -2.010908842086792, + "logits/rejected": -2.0121219158172607, + "logps/chosen": -0.35357123613357544, + "logps/rejected": -178.31182861328125, + "loss": 0.6416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01593015156686306, + "rewards/margins": 0.13762210309505463, + "rewards/rejected": -0.15355224907398224, + "step": 13931 + }, + { + "epoch": 0.81, + "learning_rate": 9.101945150658253e-09, + "logits/chosen": -1.9990105628967285, + "logits/rejected": -1.997926950454712, + "logps/chosen": -50.16899108886719, + "logps/rejected": -125.22454833984375, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5552211999893188, + "rewards/margins": 1.0839245319366455, + "rewards/rejected": -0.5287033319473267, + "step": 13932 + }, + { + "epoch": 0.81, + "learning_rate": 9.096524485486406e-09, + "logits/chosen": -1.7910609245300293, + "logits/rejected": -1.7779788970947266, + "logps/chosen": -170.13705444335938, + "logps/rejected": -292.1389465332031, + "loss": 0.1186, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9233429431915283, + "rewards/margins": 1.7537598609924316, + "rewards/rejected": 0.16958312690258026, + "step": 13933 + }, + { + "epoch": 0.81, + "learning_rate": 9.091105273406103e-09, + "logits/chosen": -2.0162315368652344, + "logits/rejected": -2.014791250228882, + "logps/chosen": -0.0002000256790779531, + "logps/rejected": -85.97401428222656, + "loss": 0.6925, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.0013000974140596e-05, + "rewards/margins": -0.017765140160918236, + "rewards/rejected": 0.01775512658059597, + "step": 13934 + }, + { + "epoch": 0.81, + "learning_rate": 9.085687514609835e-09, + "logits/chosen": -1.768688678741455, + "logits/rejected": -1.6973557472229004, + "logps/chosen": -283.0108642578125, + "logps/rejected": -546.3803100585938, + "loss": 0.0597, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.170724630355835, + "rewards/margins": 3.0899691581726074, + "rewards/rejected": -0.9192444086074829, + "step": 13935 + }, + { + "epoch": 0.81, + "learning_rate": 9.08027120929008e-09, + "logits/chosen": -1.8801175355911255, + "logits/rejected": -1.8923852443695068, + "logps/chosen": -34.00824737548828, + "logps/rejected": -177.8074951171875, + "loss": 0.4341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8437652587890625, + "rewards/margins": 0.30348509550094604, + "rewards/rejected": 0.5402801632881165, + "step": 13936 + }, + { + "epoch": 0.81, + "learning_rate": 9.07485635763925e-09, + "logits/chosen": -1.8548614978790283, + "logits/rejected": -1.8512941598892212, + "logps/chosen": -47.36981964111328, + "logps/rejected": -235.91256713867188, + "loss": 0.1933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7800697684288025, + "rewards/margins": 1.944089651107788, + "rewards/rejected": -1.1640198230743408, + "step": 13937 + }, + { + "epoch": 0.81, + "learning_rate": 9.06944295984972e-09, + "logits/chosen": -1.9788695573806763, + "logits/rejected": -1.9713995456695557, + "logps/chosen": -30.393905639648438, + "logps/rejected": -241.4900360107422, + "loss": 0.2919, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5317813754081726, + "rewards/margins": 1.2622836828231812, + "rewards/rejected": -0.7305023074150085, + "step": 13938 + }, + { + "epoch": 0.81, + "learning_rate": 9.064031016113765e-09, + "logits/chosen": -1.975650429725647, + "logits/rejected": -2.033479928970337, + "logps/chosen": -201.40869140625, + "logps/rejected": -464.780029296875, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6738967895507812, + "rewards/margins": 8.001859664916992, + "rewards/rejected": -5.327963352203369, + "step": 13939 + }, + { + "epoch": 0.81, + "learning_rate": 9.058620526623695e-09, + "logits/chosen": -1.8245913982391357, + "logits/rejected": -1.8674561977386475, + "logps/chosen": -208.17843627929688, + "logps/rejected": -423.62567138671875, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2938904762268066, + "rewards/margins": 4.742462158203125, + "rewards/rejected": -2.4485719203948975, + "step": 13940 + }, + { + "epoch": 0.81, + "learning_rate": 9.053211491571678e-09, + "logits/chosen": -1.8230103254318237, + "logits/rejected": -1.805694818496704, + "logps/chosen": -86.13850402832031, + "logps/rejected": -210.0086669921875, + "loss": 0.0944, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7027450799942017, + "rewards/margins": 2.961491584777832, + "rewards/rejected": -1.2587463855743408, + "step": 13941 + }, + { + "epoch": 0.81, + "learning_rate": 9.047803911149893e-09, + "logits/chosen": -1.994736909866333, + "logits/rejected": -1.9524836540222168, + "logps/chosen": -201.24606323242188, + "logps/rejected": -545.482421875, + "loss": 0.0481, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.462963819503784, + "rewards/margins": 3.003552198410034, + "rewards/rejected": -0.54058837890625, + "step": 13942 + }, + { + "epoch": 0.81, + "learning_rate": 9.042397785550404e-09, + "logits/chosen": -1.8853174448013306, + "logits/rejected": -1.885689377784729, + "logps/chosen": -22.56884765625, + "logps/rejected": -33.18605422973633, + "loss": 0.5297, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.601086437702179, + "rewards/margins": -0.03445017337799072, + "rewards/rejected": 0.6355366110801697, + "step": 13943 + }, + { + "epoch": 0.81, + "learning_rate": 9.036993114965318e-09, + "logits/chosen": -1.9901082515716553, + "logits/rejected": -1.9606648683547974, + "logps/chosen": -96.95883178710938, + "logps/rejected": -397.76617431640625, + "loss": 0.115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.398363471031189, + "rewards/margins": 6.696074962615967, + "rewards/rejected": -5.297711372375488, + "step": 13944 + }, + { + "epoch": 0.81, + "learning_rate": 9.031589899586601e-09, + "logits/chosen": -1.8082497119903564, + "logits/rejected": -1.8431473970413208, + "logps/chosen": -167.1326446533203, + "logps/rejected": -222.88021850585938, + "loss": 0.2601, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3846161365509033, + "rewards/margins": 0.9272140860557556, + "rewards/rejected": 0.4574020504951477, + "step": 13945 + }, + { + "epoch": 0.81, + "learning_rate": 9.0261881396062e-09, + "logits/chosen": -2.089826822280884, + "logits/rejected": -2.077392339706421, + "logps/chosen": -0.00035616118111647666, + "logps/rejected": -211.8736572265625, + "loss": 0.3499, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2670416001346894e-05, + "rewards/margins": 4.802886962890625, + "rewards/rejected": -4.802909851074219, + "step": 13946 + }, + { + "epoch": 0.81, + "learning_rate": 9.020787835216025e-09, + "logits/chosen": -1.8501099348068237, + "logits/rejected": -1.8492974042892456, + "logps/chosen": -76.34589385986328, + "logps/rejected": -166.4684600830078, + "loss": 0.3015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7839813232421875, + "rewards/margins": 1.2387511730194092, + "rewards/rejected": -0.45476990938186646, + "step": 13947 + }, + { + "epoch": 0.81, + "learning_rate": 9.015388986607931e-09, + "logits/chosen": -1.7996339797973633, + "logits/rejected": -1.8057690858840942, + "logps/chosen": -341.4754638671875, + "logps/rejected": -511.24591064453125, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.388964891433716, + "rewards/margins": 3.2134034633636475, + "rewards/rejected": -0.8244385123252869, + "step": 13948 + }, + { + "epoch": 0.81, + "learning_rate": 9.009991593973681e-09, + "logits/chosen": -1.9563740491867065, + "logits/rejected": -1.9759631156921387, + "logps/chosen": -173.75546264648438, + "logps/rejected": -422.7769775390625, + "loss": 0.0396, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6528609991073608, + "rewards/margins": 4.455741882324219, + "rewards/rejected": -2.8028810024261475, + "step": 13949 + }, + { + "epoch": 0.81, + "learning_rate": 9.004595657505037e-09, + "logits/chosen": -2.022825002670288, + "logits/rejected": -2.016002893447876, + "logps/chosen": -118.26153564453125, + "logps/rejected": -291.587890625, + "loss": 0.1224, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1378662586212158, + "rewards/margins": 3.1554384231567383, + "rewards/rejected": -2.0175721645355225, + "step": 13950 + }, + { + "epoch": 0.81, + "learning_rate": 8.999201177393685e-09, + "logits/chosen": -1.8511264324188232, + "logits/rejected": -1.7864370346069336, + "logps/chosen": -312.8475341796875, + "logps/rejected": -499.9292297363281, + "loss": 0.156, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1797058582305908, + "rewards/margins": 2.987600803375244, + "rewards/rejected": -1.8078949451446533, + "step": 13951 + }, + { + "epoch": 0.81, + "learning_rate": 8.993808153831273e-09, + "logits/chosen": -1.9464266300201416, + "logits/rejected": -1.9498422145843506, + "logps/chosen": -278.8885192871094, + "logps/rejected": -388.1930847167969, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7903534173965454, + "rewards/margins": 0.4539550542831421, + "rewards/rejected": 1.3363983631134033, + "step": 13952 + }, + { + "epoch": 0.81, + "learning_rate": 8.988416587009368e-09, + "logits/chosen": -1.887803316116333, + "logits/rejected": -1.9175913333892822, + "logps/chosen": -163.74722290039062, + "logps/rejected": -482.5823059082031, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.022757053375244, + "rewards/margins": 5.606329441070557, + "rewards/rejected": -3.5835723876953125, + "step": 13953 + }, + { + "epoch": 0.81, + "learning_rate": 8.983026477119515e-09, + "logits/chosen": -1.8717567920684814, + "logits/rejected": -1.8866100311279297, + "logps/chosen": -47.996620178222656, + "logps/rejected": -145.72288513183594, + "loss": 0.4041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22181282937526703, + "rewards/margins": 0.8674213886260986, + "rewards/rejected": -0.6456085443496704, + "step": 13954 + }, + { + "epoch": 0.81, + "learning_rate": 8.977637824353201e-09, + "logits/chosen": -1.9444429874420166, + "logits/rejected": -1.9094047546386719, + "logps/chosen": -132.07666015625, + "logps/rejected": -350.17340087890625, + "loss": 0.0744, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6264023780822754, + "rewards/margins": 2.4314072132110596, + "rewards/rejected": 0.19499512016773224, + "step": 13955 + }, + { + "epoch": 0.81, + "learning_rate": 8.97225062890185e-09, + "logits/chosen": -1.8646584749221802, + "logits/rejected": -1.8698532581329346, + "logps/chosen": -103.63211822509766, + "logps/rejected": -312.50830078125, + "loss": 0.3925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2731063961982727, + "rewards/margins": 5.682731628417969, + "rewards/rejected": -5.955838203430176, + "step": 13956 + }, + { + "epoch": 0.81, + "learning_rate": 8.96686489095686e-09, + "logits/chosen": -2.0976457595825195, + "logits/rejected": -2.0987548828125, + "logps/chosen": -46.83274459838867, + "logps/rejected": -178.09255981445312, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6226417422294617, + "rewards/margins": 1.315650224685669, + "rewards/rejected": -0.6930084228515625, + "step": 13957 + }, + { + "epoch": 0.81, + "learning_rate": 8.961480610709515e-09, + "logits/chosen": -2.0530126094818115, + "logits/rejected": -2.050823926925659, + "logps/chosen": -182.83688354492188, + "logps/rejected": -255.39990234375, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.557574510574341, + "rewards/margins": 1.944329857826233, + "rewards/rejected": 0.6132446527481079, + "step": 13958 + }, + { + "epoch": 0.81, + "learning_rate": 8.956097788351147e-09, + "logits/chosen": -2.0665767192840576, + "logits/rejected": -2.053549289703369, + "logps/chosen": -13.142802238464355, + "logps/rejected": -199.59747314453125, + "loss": 0.3277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1829543113708496, + "rewards/margins": 4.499293327331543, + "rewards/rejected": -4.316339015960693, + "step": 13959 + }, + { + "epoch": 0.81, + "learning_rate": 8.95071642407294e-09, + "logits/chosen": -1.9385563135147095, + "logits/rejected": -1.9516109228134155, + "logps/chosen": -53.00674819946289, + "logps/rejected": -342.719970703125, + "loss": 0.3028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09998512268066406, + "rewards/margins": 8.59317398071289, + "rewards/rejected": -8.493188858032227, + "step": 13960 + }, + { + "epoch": 0.81, + "learning_rate": 8.945336518066088e-09, + "logits/chosen": -1.7040824890136719, + "logits/rejected": -1.7068520784378052, + "logps/chosen": -22.037826538085938, + "logps/rejected": -192.2587890625, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16407184302806854, + "rewards/margins": 2.9566054344177246, + "rewards/rejected": -3.1206772327423096, + "step": 13961 + }, + { + "epoch": 0.81, + "learning_rate": 8.939958070521686e-09, + "logits/chosen": -1.7674866914749146, + "logits/rejected": -1.7042138576507568, + "logps/chosen": -242.00987243652344, + "logps/rejected": -454.96588134765625, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6210113763809204, + "rewards/margins": 0.9358917474746704, + "rewards/rejected": -0.31488037109375, + "step": 13962 + }, + { + "epoch": 0.81, + "learning_rate": 8.934581081630837e-09, + "logits/chosen": -2.021310806274414, + "logits/rejected": -1.9777823686599731, + "logps/chosen": -234.56362915039062, + "logps/rejected": -493.39605712890625, + "loss": 0.0607, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0486725568771362, + "rewards/margins": 3.618804931640625, + "rewards/rejected": -2.5701324939727783, + "step": 13963 + }, + { + "epoch": 0.81, + "learning_rate": 8.929205551584534e-09, + "logits/chosen": -1.9703576564788818, + "logits/rejected": -1.9587656259536743, + "logps/chosen": -22.639266967773438, + "logps/rejected": -150.01345825195312, + "loss": 0.4571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5950118899345398, + "rewards/margins": 0.3827682137489319, + "rewards/rejected": 0.21224366128444672, + "step": 13964 + }, + { + "epoch": 0.81, + "learning_rate": 8.923831480573746e-09, + "logits/chosen": -1.9654847383499146, + "logits/rejected": -1.9686450958251953, + "logps/chosen": -150.92706298828125, + "logps/rejected": -350.0146789550781, + "loss": 0.0403, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9683197736740112, + "rewards/margins": 3.6125917434692383, + "rewards/rejected": -1.6442718505859375, + "step": 13965 + }, + { + "epoch": 0.81, + "learning_rate": 8.918458868789392e-09, + "logits/chosen": -1.8961719274520874, + "logits/rejected": -1.8917042016983032, + "logps/chosen": -30.48124885559082, + "logps/rejected": -201.7078399658203, + "loss": 0.1945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6005411148071289, + "rewards/margins": 2.528106212615967, + "rewards/rejected": -1.9275649785995483, + "step": 13966 + }, + { + "epoch": 0.81, + "learning_rate": 8.913087716422334e-09, + "logits/chosen": -1.919612169265747, + "logits/rejected": -1.9105215072631836, + "logps/chosen": -26.409299850463867, + "logps/rejected": -164.16122436523438, + "loss": 0.5733, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7611234784126282, + "rewards/margins": -0.28135544061660767, + "rewards/rejected": 1.0424789190292358, + "step": 13967 + }, + { + "epoch": 0.81, + "learning_rate": 8.907718023663375e-09, + "logits/chosen": -1.73569917678833, + "logits/rejected": -1.7240639925003052, + "logps/chosen": -143.91334533691406, + "logps/rejected": -185.8538360595703, + "loss": 0.3491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9158310294151306, + "rewards/margins": 0.7716736197471619, + "rewards/rejected": 0.14415740966796875, + "step": 13968 + }, + { + "epoch": 0.81, + "learning_rate": 8.902349790703272e-09, + "logits/chosen": -1.8959835767745972, + "logits/rejected": -1.8816481828689575, + "logps/chosen": -243.127685546875, + "logps/rejected": -521.3414306640625, + "loss": 0.0488, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5221588611602783, + "rewards/margins": 4.476122856140137, + "rewards/rejected": -2.9539642333984375, + "step": 13969 + }, + { + "epoch": 0.81, + "learning_rate": 8.89698301773274e-09, + "logits/chosen": -1.8823521137237549, + "logits/rejected": -1.8861643075942993, + "logps/chosen": -184.57015991210938, + "logps/rejected": -282.55157470703125, + "loss": 0.1203, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.411144971847534, + "rewards/margins": 1.5841948986053467, + "rewards/rejected": 0.8269500732421875, + "step": 13970 + }, + { + "epoch": 0.81, + "learning_rate": 8.891617704942434e-09, + "logits/chosen": -1.6280088424682617, + "logits/rejected": -1.6354116201400757, + "logps/chosen": -184.89974975585938, + "logps/rejected": -418.9297180175781, + "loss": 0.0885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3842743635177612, + "rewards/margins": 2.833261251449585, + "rewards/rejected": -1.4489868879318237, + "step": 13971 + }, + { + "epoch": 0.81, + "learning_rate": 8.88625385252294e-09, + "logits/chosen": -1.9376071691513062, + "logits/rejected": -1.9461164474487305, + "logps/chosen": -151.86578369140625, + "logps/rejected": -286.74676513671875, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.28556227684021, + "rewards/margins": 2.63163161277771, + "rewards/rejected": -0.3460693359375, + "step": 13972 + }, + { + "epoch": 0.81, + "learning_rate": 8.88089146066482e-09, + "logits/chosen": -1.7824026346206665, + "logits/rejected": -1.7598909139633179, + "logps/chosen": -52.970367431640625, + "logps/rejected": -286.27703857421875, + "loss": 0.1986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49746400117874146, + "rewards/margins": 4.609955310821533, + "rewards/rejected": -4.112491130828857, + "step": 13973 + }, + { + "epoch": 0.81, + "learning_rate": 8.875530529558567e-09, + "logits/chosen": -1.8491888046264648, + "logits/rejected": -1.853490948677063, + "logps/chosen": -2.057448387145996, + "logps/rejected": -224.1315155029297, + "loss": 0.3712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09815897792577744, + "rewards/margins": 4.5847697257995605, + "rewards/rejected": -4.682928562164307, + "step": 13974 + }, + { + "epoch": 0.81, + "learning_rate": 8.870171059394633e-09, + "logits/chosen": -1.8945584297180176, + "logits/rejected": -1.8827426433563232, + "logps/chosen": -47.44383239746094, + "logps/rejected": -245.24923706054688, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8706688284873962, + "rewards/margins": 2.6200082302093506, + "rewards/rejected": -1.7493393421173096, + "step": 13975 + }, + { + "epoch": 0.81, + "learning_rate": 8.864813050363418e-09, + "logits/chosen": -1.715894341468811, + "logits/rejected": -1.668449878692627, + "logps/chosen": -177.91600036621094, + "logps/rejected": -336.4248352050781, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8467941284179688, + "rewards/margins": 4.511366367340088, + "rewards/rejected": -0.6645721793174744, + "step": 13976 + }, + { + "epoch": 0.81, + "learning_rate": 8.859456502655255e-09, + "logits/chosen": -1.8342936038970947, + "logits/rejected": -1.82663893699646, + "logps/chosen": -281.6771240234375, + "logps/rejected": -446.01336669921875, + "loss": 0.1415, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.050628662109375, + "rewards/margins": 1.24261474609375, + "rewards/rejected": 0.808013916015625, + "step": 13977 + }, + { + "epoch": 0.81, + "learning_rate": 8.85410141646043e-09, + "logits/chosen": -2.038139820098877, + "logits/rejected": -2.000577688217163, + "logps/chosen": -44.29010009765625, + "logps/rejected": -274.4390563964844, + "loss": 0.3421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1664302796125412, + "rewards/margins": 2.629638195037842, + "rewards/rejected": -2.463207960128784, + "step": 13978 + }, + { + "epoch": 0.81, + "learning_rate": 8.84874779196919e-09, + "logits/chosen": -2.0465567111968994, + "logits/rejected": -2.0358550548553467, + "logps/chosen": -3.552579879760742, + "logps/rejected": -400.8533935546875, + "loss": 0.3466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012176013551652431, + "rewards/margins": 7.524381160736084, + "rewards/rejected": -7.536557197570801, + "step": 13979 + }, + { + "epoch": 0.81, + "learning_rate": 8.843395629371737e-09, + "logits/chosen": -1.8516433238983154, + "logits/rejected": -1.8517698049545288, + "logps/chosen": -0.005097812972962856, + "logps/rejected": -156.89462280273438, + "loss": 0.5051, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.461210457724519e-05, + "rewards/margins": 0.9823055863380432, + "rewards/rejected": -0.982250988483429, + "step": 13980 + }, + { + "epoch": 0.81, + "learning_rate": 8.838044928858157e-09, + "logits/chosen": -1.883552074432373, + "logits/rejected": -1.8943939208984375, + "logps/chosen": -216.95932006835938, + "logps/rejected": -465.201904296875, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8162964582443237, + "rewards/margins": 4.1862335205078125, + "rewards/rejected": -2.3699371814727783, + "step": 13981 + }, + { + "epoch": 0.81, + "learning_rate": 8.832695690618597e-09, + "logits/chosen": -2.0780818462371826, + "logits/rejected": -2.0830237865448, + "logps/chosen": -162.09446716308594, + "logps/rejected": -261.0211181640625, + "loss": 0.3306, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8551543951034546, + "rewards/margins": 0.23782038688659668, + "rewards/rejected": 1.617334008216858, + "step": 13982 + }, + { + "epoch": 0.81, + "learning_rate": 8.827347914843047e-09, + "logits/chosen": -1.7668461799621582, + "logits/rejected": -1.7835869789123535, + "logps/chosen": -220.83447265625, + "logps/rejected": -348.7716369628906, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.640460252761841, + "rewards/margins": 5.304827690124512, + "rewards/rejected": -1.66436767578125, + "step": 13983 + }, + { + "epoch": 0.81, + "learning_rate": 8.822001601721502e-09, + "logits/chosen": -2.0429537296295166, + "logits/rejected": -1.9797173738479614, + "logps/chosen": -164.65280151367188, + "logps/rejected": -356.3879089355469, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0898300409317017, + "rewards/margins": 0.9195022583007812, + "rewards/rejected": 0.17032776772975922, + "step": 13984 + }, + { + "epoch": 0.81, + "learning_rate": 8.816656751443863e-09, + "logits/chosen": -1.8657431602478027, + "logits/rejected": -1.93487548828125, + "logps/chosen": -291.3245544433594, + "logps/rejected": -470.442138671875, + "loss": 0.0463, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2671539783477783, + "rewards/margins": 5.4525909423828125, + "rewards/rejected": -4.185437202453613, + "step": 13985 + }, + { + "epoch": 0.81, + "learning_rate": 8.811313364200051e-09, + "logits/chosen": -1.7487883567810059, + "logits/rejected": -1.7296308279037476, + "logps/chosen": -174.98257446289062, + "logps/rejected": -323.7269287109375, + "loss": 0.0786, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.789628744125366, + "rewards/margins": 2.1470627784729004, + "rewards/rejected": 0.642565906047821, + "step": 13986 + }, + { + "epoch": 0.81, + "learning_rate": 8.805971440179849e-09, + "logits/chosen": -1.878431797027588, + "logits/rejected": -1.8769303560256958, + "logps/chosen": -20.65517807006836, + "logps/rejected": -230.52224731445312, + "loss": 0.1875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7259525656700134, + "rewards/margins": 3.1658620834350586, + "rewards/rejected": -2.4399094581604004, + "step": 13987 + }, + { + "epoch": 0.81, + "learning_rate": 8.800630979573048e-09, + "logits/chosen": -2.0515692234039307, + "logits/rejected": -2.0282673835754395, + "logps/chosen": -148.2987518310547, + "logps/rejected": -212.28897094726562, + "loss": 0.1509, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2263550758361816, + "rewards/margins": 1.0804901123046875, + "rewards/rejected": 2.145864963531494, + "step": 13988 + }, + { + "epoch": 0.81, + "learning_rate": 8.795291982569358e-09, + "logits/chosen": -1.746616005897522, + "logits/rejected": -1.7520750761032104, + "logps/chosen": -287.7755126953125, + "logps/rejected": -498.2053527832031, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.735852003097534, + "rewards/margins": 4.684884548187256, + "rewards/rejected": -1.9490326642990112, + "step": 13989 + }, + { + "epoch": 0.81, + "learning_rate": 8.789954449358461e-09, + "logits/chosen": -1.7880563735961914, + "logits/rejected": -1.7903815507888794, + "logps/chosen": -1.007423996925354, + "logps/rejected": -90.24661254882812, + "loss": 0.4754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0760851725935936, + "rewards/margins": 1.1993114948272705, + "rewards/rejected": -1.275396704673767, + "step": 13990 + }, + { + "epoch": 0.81, + "learning_rate": 8.784618380129955e-09, + "logits/chosen": -2.081723928451538, + "logits/rejected": -2.073585271835327, + "logps/chosen": -24.57590675354004, + "logps/rejected": -188.04629516601562, + "loss": 0.3684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14970187842845917, + "rewards/margins": 4.594194412231445, + "rewards/rejected": -4.743896484375, + "step": 13991 + }, + { + "epoch": 0.81, + "learning_rate": 8.77928377507341e-09, + "logits/chosen": -1.8907402753829956, + "logits/rejected": -1.8985494375228882, + "logps/chosen": -210.55108642578125, + "logps/rejected": -464.89971923828125, + "loss": 0.0626, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2680511474609375, + "rewards/margins": 4.146761894226074, + "rewards/rejected": -2.878710985183716, + "step": 13992 + }, + { + "epoch": 0.81, + "learning_rate": 8.773950634378335e-09, + "logits/chosen": -1.9751020669937134, + "logits/rejected": -2.0265069007873535, + "logps/chosen": -192.48257446289062, + "logps/rejected": -213.2725830078125, + "loss": 0.1034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0130646228790283, + "rewards/margins": 1.6357834339141846, + "rewards/rejected": 0.37728118896484375, + "step": 13993 + }, + { + "epoch": 0.81, + "learning_rate": 8.768618958234192e-09, + "logits/chosen": -1.7704625129699707, + "logits/rejected": -1.7649065256118774, + "logps/chosen": -0.511062502861023, + "logps/rejected": -204.47805786132812, + "loss": 0.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08327801525592804, + "rewards/margins": 2.6764907836914062, + "rewards/rejected": -2.593212842941284, + "step": 13994 + }, + { + "epoch": 0.81, + "learning_rate": 8.7632887468304e-09, + "logits/chosen": -1.9320930242538452, + "logits/rejected": -1.9322216510772705, + "logps/chosen": -9.105178833007812, + "logps/rejected": -139.32911682128906, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1515493392944336, + "rewards/margins": 1.5938482284545898, + "rewards/rejected": -1.4422988891601562, + "step": 13995 + }, + { + "epoch": 0.81, + "learning_rate": 8.75796000035629e-09, + "logits/chosen": -1.90217125415802, + "logits/rejected": -1.9001439809799194, + "logps/chosen": -5.3064680099487305, + "logps/rejected": -85.35677337646484, + "loss": 0.391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1858595460653305, + "rewards/margins": 1.4850409030914307, + "rewards/rejected": -1.299181342124939, + "step": 13996 + }, + { + "epoch": 0.81, + "learning_rate": 8.75263271900118e-09, + "logits/chosen": -2.0351052284240723, + "logits/rejected": -2.0291218757629395, + "logps/chosen": -0.0009490796364843845, + "logps/rejected": -190.13473510742188, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.941158401081339e-05, + "rewards/margins": 4.561832904815674, + "rewards/rejected": -4.561912536621094, + "step": 13997 + }, + { + "epoch": 0.81, + "learning_rate": 8.747306902954316e-09, + "logits/chosen": -1.869124412536621, + "logits/rejected": -1.8669897317886353, + "logps/chosen": -0.0265146866440773, + "logps/rejected": -212.96018981933594, + "loss": 0.3193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006731160101480782, + "rewards/margins": 5.339395046234131, + "rewards/rejected": -5.340068340301514, + "step": 13998 + }, + { + "epoch": 0.81, + "learning_rate": 8.741982552404914e-09, + "logits/chosen": -1.883758544921875, + "logits/rejected": -1.8748127222061157, + "logps/chosen": -192.35592651367188, + "logps/rejected": -254.4454345703125, + "loss": 0.1266, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1214661598205566, + "rewards/margins": 1.911026120185852, + "rewards/rejected": 0.21044006943702698, + "step": 13999 + }, + { + "epoch": 0.81, + "learning_rate": 8.73665966754208e-09, + "logits/chosen": -1.9354761838912964, + "logits/rejected": -1.9222421646118164, + "logps/chosen": -210.21090698242188, + "logps/rejected": -361.0843811035156, + "loss": 0.2814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.295867919921875, + "rewards/margins": 0.49688416719436646, + "rewards/rejected": 0.7989837527275085, + "step": 14000 + }, + { + "epoch": 0.81, + "learning_rate": 8.731338248554959e-09, + "logits/chosen": -1.9178731441497803, + "logits/rejected": -1.8799256086349487, + "logps/chosen": -227.55911254882812, + "logps/rejected": -317.7253112792969, + "loss": 0.0427, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1631195545196533, + "rewards/margins": 2.819357395172119, + "rewards/rejected": 0.34376221895217896, + "step": 14001 + }, + { + "epoch": 0.81, + "learning_rate": 8.726018295632565e-09, + "logits/chosen": -1.739206314086914, + "logits/rejected": -1.7444546222686768, + "logps/chosen": -45.75853729248047, + "logps/rejected": -173.14578247070312, + "loss": 0.4059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6914863586425781, + "rewards/margins": 0.5615242123603821, + "rewards/rejected": 0.12996216118335724, + "step": 14002 + }, + { + "epoch": 0.81, + "learning_rate": 8.7206998089639e-09, + "logits/chosen": -1.830069661140442, + "logits/rejected": -1.823562502861023, + "logps/chosen": -290.38763427734375, + "logps/rejected": -500.5037841796875, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8092286586761475, + "rewards/margins": 3.2759461402893066, + "rewards/rejected": -0.46671754121780396, + "step": 14003 + }, + { + "epoch": 0.81, + "learning_rate": 8.715382788737873e-09, + "logits/chosen": -1.7304712533950806, + "logits/rejected": -1.7246298789978027, + "logps/chosen": -119.39191436767578, + "logps/rejected": -219.64279174804688, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5363975763320923, + "rewards/margins": 1.4375388622283936, + "rewards/rejected": -0.901141345500946, + "step": 14004 + }, + { + "epoch": 0.82, + "learning_rate": 8.71006723514342e-09, + "logits/chosen": -1.782025694847107, + "logits/rejected": -1.7595404386520386, + "logps/chosen": -279.19879150390625, + "logps/rejected": -483.40814208984375, + "loss": 0.1502, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.333868384361267, + "rewards/margins": 1.9870758056640625, + "rewards/rejected": -0.6532074213027954, + "step": 14005 + }, + { + "epoch": 0.82, + "learning_rate": 8.704753148369332e-09, + "logits/chosen": -1.8590614795684814, + "logits/rejected": -1.8523610830307007, + "logps/chosen": -6.663264274597168, + "logps/rejected": -154.86041259765625, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09572148323059082, + "rewards/margins": 2.2250993251800537, + "rewards/rejected": -2.129377841949463, + "step": 14006 + }, + { + "epoch": 0.82, + "learning_rate": 8.699440528604419e-09, + "logits/chosen": -1.891178846359253, + "logits/rejected": -1.8931138515472412, + "logps/chosen": -20.85908317565918, + "logps/rejected": -116.15213775634766, + "loss": 0.5463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39746513962745667, + "rewards/margins": 1.2306530475616455, + "rewards/rejected": -1.6281181573867798, + "step": 14007 + }, + { + "epoch": 0.82, + "learning_rate": 8.69412937603739e-09, + "logits/chosen": -2.0048720836639404, + "logits/rejected": -1.9592519998550415, + "logps/chosen": -182.23326110839844, + "logps/rejected": -371.38250732421875, + "loss": 0.0737, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.039569139480591, + "rewards/margins": 2.0538086891174316, + "rewards/rejected": 0.985760509967804, + "step": 14008 + }, + { + "epoch": 0.82, + "learning_rate": 8.688819690856952e-09, + "logits/chosen": -2.0680835247039795, + "logits/rejected": -2.061569929122925, + "logps/chosen": -11.44717025756836, + "logps/rejected": -122.24021911621094, + "loss": 0.1525, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2030938863754272, + "rewards/margins": 2.2173495292663574, + "rewards/rejected": -1.0142555236816406, + "step": 14009 + }, + { + "epoch": 0.82, + "learning_rate": 8.683511473251704e-09, + "logits/chosen": -1.8328182697296143, + "logits/rejected": -1.8297199010849, + "logps/chosen": -24.372411727905273, + "logps/rejected": -140.04226684570312, + "loss": 0.7476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8288347125053406, + "rewards/margins": 0.8002119660377502, + "rewards/rejected": -1.6290466785430908, + "step": 14010 + }, + { + "epoch": 0.82, + "learning_rate": 8.678204723410226e-09, + "logits/chosen": -1.9548112154006958, + "logits/rejected": -1.936435580253601, + "logps/chosen": -84.62731170654297, + "logps/rejected": -256.1964111328125, + "loss": 0.7222, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3530174493789673, + "rewards/margins": 2.2733068466186523, + "rewards/rejected": -3.626324415206909, + "step": 14011 + }, + { + "epoch": 0.82, + "learning_rate": 8.672899441521043e-09, + "logits/chosen": -1.9780341386795044, + "logits/rejected": -1.9752031564712524, + "logps/chosen": -13.770475387573242, + "logps/rejected": -227.08267211914062, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03019428253173828, + "rewards/margins": 2.8026645183563232, + "rewards/rejected": -2.772470235824585, + "step": 14012 + }, + { + "epoch": 0.82, + "learning_rate": 8.667595627772639e-09, + "logits/chosen": -2.015023946762085, + "logits/rejected": -2.011715888977051, + "logps/chosen": -1.9381439685821533, + "logps/rejected": -92.69823455810547, + "loss": 0.5058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0764395222067833, + "rewards/margins": 0.9955305457115173, + "rewards/rejected": -0.9190910458564758, + "step": 14013 + }, + { + "epoch": 0.82, + "learning_rate": 8.662293282353406e-09, + "logits/chosen": -2.01831316947937, + "logits/rejected": -2.00417160987854, + "logps/chosen": -63.015621185302734, + "logps/rejected": -297.1746826171875, + "loss": 0.2235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15912361443042755, + "rewards/margins": 3.3465137481689453, + "rewards/rejected": -3.187390089035034, + "step": 14014 + }, + { + "epoch": 0.82, + "learning_rate": 8.656992405451713e-09, + "logits/chosen": -1.8387577533721924, + "logits/rejected": -1.8403804302215576, + "logps/chosen": -5.524487495422363, + "logps/rejected": -196.885009765625, + "loss": 0.4586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.371750146150589, + "rewards/margins": 4.182156085968018, + "rewards/rejected": -4.553906440734863, + "step": 14015 + }, + { + "epoch": 0.82, + "learning_rate": 8.651692997255888e-09, + "logits/chosen": -1.9028187990188599, + "logits/rejected": -1.909454345703125, + "logps/chosen": -14.982669830322266, + "logps/rejected": -108.26924133300781, + "loss": 0.5002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11816950142383575, + "rewards/margins": 0.5587356090545654, + "rewards/rejected": -0.67690509557724, + "step": 14016 + }, + { + "epoch": 0.82, + "learning_rate": 8.646395057954182e-09, + "logits/chosen": -2.0897068977355957, + "logits/rejected": -2.0849435329437256, + "logps/chosen": -56.004581451416016, + "logps/rejected": -306.49310302734375, + "loss": 0.1966, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6133468747138977, + "rewards/margins": 6.8256516456604, + "rewards/rejected": -6.212304592132568, + "step": 14017 + }, + { + "epoch": 0.82, + "learning_rate": 8.641098587734813e-09, + "logits/chosen": -1.8737280368804932, + "logits/rejected": -1.8679667711257935, + "logps/chosen": -3.6358578654471785e-05, + "logps/rejected": -146.6136474609375, + "loss": 0.3476, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2768617782276124e-06, + "rewards/margins": 4.028087615966797, + "rewards/rejected": -4.028085231781006, + "step": 14018 + }, + { + "epoch": 0.82, + "learning_rate": 8.635803586785911e-09, + "logits/chosen": -1.9254683256149292, + "logits/rejected": -1.9206469058990479, + "logps/chosen": -21.562686920166016, + "logps/rejected": -93.30927276611328, + "loss": 0.4925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3222213685512543, + "rewards/margins": 0.35439223051071167, + "rewards/rejected": -0.032170869410037994, + "step": 14019 + }, + { + "epoch": 0.82, + "learning_rate": 8.630510055295625e-09, + "logits/chosen": -1.889134168624878, + "logits/rejected": -1.8728784322738647, + "logps/chosen": -326.3662109375, + "logps/rejected": -581.2318115234375, + "loss": 0.0689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20488281548023224, + "rewards/margins": 6.363574028015137, + "rewards/rejected": -6.15869140625, + "step": 14020 + }, + { + "epoch": 0.82, + "learning_rate": 8.625217993451967e-09, + "logits/chosen": -1.557286262512207, + "logits/rejected": -1.575639247894287, + "logps/chosen": -231.33883666992188, + "logps/rejected": -264.36273193359375, + "loss": 0.1698, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4264068603515625, + "rewards/margins": 1.1354583501815796, + "rewards/rejected": 0.2909484803676605, + "step": 14021 + }, + { + "epoch": 0.82, + "learning_rate": 8.619927401442967e-09, + "logits/chosen": -2.040100574493408, + "logits/rejected": -2.032792806625366, + "logps/chosen": -162.4394073486328, + "logps/rejected": -263.97564697265625, + "loss": 0.0938, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.30908203125, + "rewards/margins": 1.7938110828399658, + "rewards/rejected": 0.515271008014679, + "step": 14022 + }, + { + "epoch": 0.82, + "learning_rate": 8.614638279456526e-09, + "logits/chosen": -1.760219931602478, + "logits/rejected": -1.7667269706726074, + "logps/chosen": -59.35871887207031, + "logps/rejected": -212.31405639648438, + "loss": 0.1825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5276634097099304, + "rewards/margins": 2.3753654956817627, + "rewards/rejected": -1.8477020263671875, + "step": 14023 + }, + { + "epoch": 0.82, + "learning_rate": 8.6093506276806e-09, + "logits/chosen": -1.7643104791641235, + "logits/rejected": -1.7622615098953247, + "logps/chosen": -134.67092895507812, + "logps/rejected": -204.69288635253906, + "loss": 0.1902, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7876510620117188, + "rewards/margins": 1.0841796398162842, + "rewards/rejected": 0.7034713625907898, + "step": 14024 + }, + { + "epoch": 0.82, + "learning_rate": 8.604064446302994e-09, + "logits/chosen": -1.926145076751709, + "logits/rejected": -1.9219090938568115, + "logps/chosen": -199.05921936035156, + "logps/rejected": -519.879638671875, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7544052600860596, + "rewards/margins": 5.711809158325195, + "rewards/rejected": -3.9574036598205566, + "step": 14025 + }, + { + "epoch": 0.82, + "learning_rate": 8.59877973551152e-09, + "logits/chosen": -2.011418581008911, + "logits/rejected": -2.014683723449707, + "logps/chosen": -0.00023804721422493458, + "logps/rejected": -293.3232727050781, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.601119710263447e-07, + "rewards/margins": 5.204046249389648, + "rewards/rejected": -5.204046726226807, + "step": 14026 + }, + { + "epoch": 0.82, + "learning_rate": 8.59349649549388e-09, + "logits/chosen": -2.151656150817871, + "logits/rejected": -2.1468887329101562, + "logps/chosen": -4.250086307525635, + "logps/rejected": -184.0096435546875, + "loss": 0.379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026630306616425514, + "rewards/margins": 2.207552433013916, + "rewards/rejected": -2.234182834625244, + "step": 14027 + }, + { + "epoch": 0.82, + "learning_rate": 8.588214726437804e-09, + "logits/chosen": -1.8953052759170532, + "logits/rejected": -1.9013813734054565, + "logps/chosen": -170.50051879882812, + "logps/rejected": -485.16400146484375, + "loss": 0.0599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8460678458213806, + "rewards/margins": 5.3731889724731445, + "rewards/rejected": -4.527121067047119, + "step": 14028 + }, + { + "epoch": 0.82, + "learning_rate": 8.582934428530902e-09, + "logits/chosen": -2.0353426933288574, + "logits/rejected": -2.0298285484313965, + "logps/chosen": -0.0028193150646984577, + "logps/rejected": -140.62783813476562, + "loss": 0.3746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022545864339917898, + "rewards/margins": 2.288052797317505, + "rewards/rejected": -2.288278341293335, + "step": 14029 + }, + { + "epoch": 0.82, + "learning_rate": 8.577655601960759e-09, + "logits/chosen": -2.0111305713653564, + "logits/rejected": -2.0092203617095947, + "logps/chosen": -85.38256072998047, + "logps/rejected": -167.63792419433594, + "loss": 0.267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8917999267578125, + "rewards/margins": 0.8111648559570312, + "rewards/rejected": 0.08063507080078125, + "step": 14030 + }, + { + "epoch": 0.82, + "learning_rate": 8.572378246914907e-09, + "logits/chosen": -1.810816764831543, + "logits/rejected": -1.7657369375228882, + "logps/chosen": -271.65313720703125, + "logps/rejected": -535.4088745117188, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5830750465393066, + "rewards/margins": 7.430990695953369, + "rewards/rejected": -4.8479156494140625, + "step": 14031 + }, + { + "epoch": 0.82, + "learning_rate": 8.567102363580842e-09, + "logits/chosen": -1.965417742729187, + "logits/rejected": -1.9595822095870972, + "logps/chosen": -5.757743929279968e-05, + "logps/rejected": -168.60693359375, + "loss": 0.3071, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.721994966734201e-07, + "rewards/margins": 3.7345311641693115, + "rewards/rejected": -3.7345306873321533, + "step": 14032 + }, + { + "epoch": 0.82, + "learning_rate": 8.561827952145956e-09, + "logits/chosen": -1.9185478687286377, + "logits/rejected": -1.9202907085418701, + "logps/chosen": -41.85219192504883, + "logps/rejected": -174.5130615234375, + "loss": 0.1369, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.361296534538269, + "rewards/margins": 2.565605640411377, + "rewards/rejected": -1.204309105873108, + "step": 14033 + }, + { + "epoch": 0.82, + "learning_rate": 8.556555012797634e-09, + "logits/chosen": -1.7685118913650513, + "logits/rejected": -1.7611008882522583, + "logps/chosen": -225.17254638671875, + "logps/rejected": -363.80682373046875, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.3876800537109375, + "rewards/margins": 4.036862373352051, + "rewards/rejected": 0.35081788897514343, + "step": 14034 + }, + { + "epoch": 0.82, + "learning_rate": 8.551283545723203e-09, + "logits/chosen": -1.648830771446228, + "logits/rejected": -1.7039706707000732, + "logps/chosen": -187.265380859375, + "logps/rejected": -356.37457275390625, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.707611083984375, + "rewards/margins": 5.328943252563477, + "rewards/rejected": -3.6213319301605225, + "step": 14035 + }, + { + "epoch": 0.82, + "learning_rate": 8.546013551109927e-09, + "logits/chosen": -2.026648759841919, + "logits/rejected": -2.0115625858306885, + "logps/chosen": -45.16831588745117, + "logps/rejected": -247.9915008544922, + "loss": 0.1706, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.835605263710022, + "rewards/margins": 3.1688742637634277, + "rewards/rejected": -2.333268880844116, + "step": 14036 + }, + { + "epoch": 0.82, + "learning_rate": 8.540745029145036e-09, + "logits/chosen": -2.1537601947784424, + "logits/rejected": -2.1487419605255127, + "logps/chosen": -53.712650299072266, + "logps/rejected": -203.63368225097656, + "loss": 0.2572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3950779139995575, + "rewards/margins": 5.184269905090332, + "rewards/rejected": -4.789192199707031, + "step": 14037 + }, + { + "epoch": 0.82, + "learning_rate": 8.535477980015666e-09, + "logits/chosen": -1.6963056325912476, + "logits/rejected": -1.6687657833099365, + "logps/chosen": -172.2434539794922, + "logps/rejected": -345.6119384765625, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8240280151367188, + "rewards/margins": 5.806831359863281, + "rewards/rejected": -2.9828033447265625, + "step": 14038 + }, + { + "epoch": 0.82, + "learning_rate": 8.530212403908944e-09, + "logits/chosen": -1.891710638999939, + "logits/rejected": -1.9039051532745361, + "logps/chosen": -240.589599609375, + "logps/rejected": -382.40472412109375, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1033997535705566, + "rewards/margins": 3.1216583251953125, + "rewards/rejected": -1.0182586908340454, + "step": 14039 + }, + { + "epoch": 0.82, + "learning_rate": 8.52494830101193e-09, + "logits/chosen": -1.9083735942840576, + "logits/rejected": -1.9069715738296509, + "logps/chosen": -0.06024615839123726, + "logps/rejected": -211.5838623046875, + "loss": 0.3546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04158565774559975, + "rewards/margins": 4.137261390686035, + "rewards/rejected": -4.095675945281982, + "step": 14040 + }, + { + "epoch": 0.82, + "learning_rate": 8.51968567151164e-09, + "logits/chosen": -1.9247572422027588, + "logits/rejected": -1.9295235872268677, + "logps/chosen": -271.0849304199219, + "logps/rejected": -417.9065246582031, + "loss": 0.0537, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.139294385910034, + "rewards/margins": 2.27931809425354, + "rewards/rejected": 0.8599762320518494, + "step": 14041 + }, + { + "epoch": 0.82, + "learning_rate": 8.514424515594992e-09, + "logits/chosen": -2.04575777053833, + "logits/rejected": -2.03324031829834, + "logps/chosen": -39.61266326904297, + "logps/rejected": -306.7142639160156, + "loss": 0.1703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6497913599014282, + "rewards/margins": 3.9220175743103027, + "rewards/rejected": -3.272226095199585, + "step": 14042 + }, + { + "epoch": 0.82, + "learning_rate": 8.509164833448934e-09, + "logits/chosen": -1.920628309249878, + "logits/rejected": -1.9248428344726562, + "logps/chosen": -32.11602783203125, + "logps/rejected": -76.75102233886719, + "loss": 0.5884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03268928453326225, + "rewards/margins": 0.19516640901565552, + "rewards/rejected": -0.16247712075710297, + "step": 14043 + }, + { + "epoch": 0.82, + "learning_rate": 8.503906625260287e-09, + "logits/chosen": -1.9523621797561646, + "logits/rejected": -1.9472335577011108, + "logps/chosen": -291.7978210449219, + "logps/rejected": -349.412109375, + "loss": 0.2859, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.723593235015869, + "rewards/margins": 0.3103301525115967, + "rewards/rejected": 2.4132630825042725, + "step": 14044 + }, + { + "epoch": 0.82, + "learning_rate": 8.49864989121587e-09, + "logits/chosen": -1.9579896926879883, + "logits/rejected": -1.9494057893753052, + "logps/chosen": -19.327896118164062, + "logps/rejected": -97.23786163330078, + "loss": 0.3651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12973804771900177, + "rewards/margins": 1.7726404666900635, + "rewards/rejected": -1.6429023742675781, + "step": 14045 + }, + { + "epoch": 0.82, + "learning_rate": 8.493394631502388e-09, + "logits/chosen": -1.7836676836013794, + "logits/rejected": -1.7731103897094727, + "logps/chosen": -38.08393478393555, + "logps/rejected": -260.3299255371094, + "loss": 0.3485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0641246810555458, + "rewards/margins": 2.7959651947021484, + "rewards/rejected": -2.7318406105041504, + "step": 14046 + }, + { + "epoch": 0.82, + "learning_rate": 8.48814084630658e-09, + "logits/chosen": -1.8580560684204102, + "logits/rejected": -1.873737096786499, + "logps/chosen": -213.2236785888672, + "logps/rejected": -315.6871032714844, + "loss": 0.13, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4430099725723267, + "rewards/margins": 2.2583725452423096, + "rewards/rejected": -0.8153625726699829, + "step": 14047 + }, + { + "epoch": 0.82, + "learning_rate": 8.482888535815057e-09, + "logits/chosen": -1.9829258918762207, + "logits/rejected": -1.9776493310928345, + "logps/chosen": -35.486900329589844, + "logps/rejected": -97.08453369140625, + "loss": 0.722, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12427139282226562, + "rewards/margins": -0.2608444392681122, + "rewards/rejected": 0.3851158320903778, + "step": 14048 + }, + { + "epoch": 0.82, + "learning_rate": 8.477637700214419e-09, + "logits/chosen": -1.8558632135391235, + "logits/rejected": -1.8337205648422241, + "logps/chosen": -77.13510131835938, + "logps/rejected": -377.8244934082031, + "loss": 0.0744, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6915940046310425, + "rewards/margins": 3.240037679672241, + "rewards/rejected": -1.5484436750411987, + "step": 14049 + }, + { + "epoch": 0.82, + "learning_rate": 8.472388339691189e-09, + "logits/chosen": -1.8635598421096802, + "logits/rejected": -1.8617007732391357, + "logps/chosen": -28.20981216430664, + "logps/rejected": -242.97592163085938, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1288047879934311, + "rewards/margins": 2.5695457458496094, + "rewards/rejected": -2.4407410621643066, + "step": 14050 + }, + { + "epoch": 0.82, + "learning_rate": 8.467140454431876e-09, + "logits/chosen": -2.0212223529815674, + "logits/rejected": -2.0657639503479004, + "logps/chosen": -208.86953735351562, + "logps/rejected": -426.77593994140625, + "loss": 0.0866, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6329971551895142, + "rewards/margins": 2.0578932762145996, + "rewards/rejected": -0.424896240234375, + "step": 14051 + }, + { + "epoch": 0.82, + "learning_rate": 8.461894044622881e-09, + "logits/chosen": -1.77238929271698, + "logits/rejected": -1.7937607765197754, + "logps/chosen": -217.38247680664062, + "logps/rejected": -530.9508056640625, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.646252393722534, + "rewards/margins": 6.86273193359375, + "rewards/rejected": -4.216479778289795, + "step": 14052 + }, + { + "epoch": 0.82, + "learning_rate": 8.456649110450591e-09, + "logits/chosen": -1.9356212615966797, + "logits/rejected": -2.0609078407287598, + "logps/chosen": -245.0067596435547, + "logps/rejected": -372.3646545410156, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9927383661270142, + "rewards/margins": 4.384034633636475, + "rewards/rejected": -2.39129638671875, + "step": 14053 + }, + { + "epoch": 0.82, + "learning_rate": 8.45140565210134e-09, + "logits/chosen": -1.8765805959701538, + "logits/rejected": -1.864341139793396, + "logps/chosen": -258.338623046875, + "logps/rejected": -368.9594421386719, + "loss": 0.157, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.406768798828125, + "rewards/margins": 1.2364928722381592, + "rewards/rejected": 0.17027588188648224, + "step": 14054 + }, + { + "epoch": 0.82, + "learning_rate": 8.446163669761396e-09, + "logits/chosen": -1.7895992994308472, + "logits/rejected": -1.7831488847732544, + "logps/chosen": -150.4136505126953, + "logps/rejected": -263.85235595703125, + "loss": 0.0604, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3894058465957642, + "rewards/margins": 2.5023834705352783, + "rewards/rejected": -1.1129776239395142, + "step": 14055 + }, + { + "epoch": 0.82, + "learning_rate": 8.440923163616991e-09, + "logits/chosen": -2.01707124710083, + "logits/rejected": -2.017077684402466, + "logps/chosen": -0.9762508869171143, + "logps/rejected": -134.2765350341797, + "loss": 0.4835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028161222115159035, + "rewards/margins": 1.0972371101379395, + "rewards/rejected": -1.1253982782363892, + "step": 14056 + }, + { + "epoch": 0.82, + "learning_rate": 8.435684133854271e-09, + "logits/chosen": -2.0697433948516846, + "logits/rejected": -2.057812213897705, + "logps/chosen": -0.17418941855430603, + "logps/rejected": -282.28204345703125, + "loss": 0.3087, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009469589218497276, + "rewards/margins": 7.189081192016602, + "rewards/rejected": -7.198550701141357, + "step": 14057 + }, + { + "epoch": 0.82, + "learning_rate": 8.430446580659361e-09, + "logits/chosen": -1.9370427131652832, + "logits/rejected": -1.9250342845916748, + "logps/chosen": -102.00813293457031, + "logps/rejected": -313.86468505859375, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.542492687702179, + "rewards/margins": 1.3327423334121704, + "rewards/rejected": -0.7902496457099915, + "step": 14058 + }, + { + "epoch": 0.82, + "learning_rate": 8.425210504218327e-09, + "logits/chosen": -1.8393741846084595, + "logits/rejected": -1.8315633535385132, + "logps/chosen": -5.988097667694092, + "logps/rejected": -121.48042297363281, + "loss": 0.4725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34847936034202576, + "rewards/margins": 2.1402597427368164, + "rewards/rejected": -2.488739013671875, + "step": 14059 + }, + { + "epoch": 0.82, + "learning_rate": 8.419975904717197e-09, + "logits/chosen": -1.7119486331939697, + "logits/rejected": -1.7173010110855103, + "logps/chosen": -105.28074645996094, + "logps/rejected": -219.09071350097656, + "loss": 0.6875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7643806338310242, + "rewards/margins": 1.0580787658691406, + "rewards/rejected": -1.8224594593048096, + "step": 14060 + }, + { + "epoch": 0.82, + "learning_rate": 8.414742782341883e-09, + "logits/chosen": -1.959845781326294, + "logits/rejected": -1.9727671146392822, + "logps/chosen": -224.93679809570312, + "logps/rejected": -471.2804260253906, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4504272937774658, + "rewards/margins": 6.586020469665527, + "rewards/rejected": -5.135592937469482, + "step": 14061 + }, + { + "epoch": 0.82, + "learning_rate": 8.409511137278347e-09, + "logits/chosen": -1.9051584005355835, + "logits/rejected": -1.9039915800094604, + "logps/chosen": -0.00016951042925938964, + "logps/rejected": -158.42408752441406, + "loss": 0.3678, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8290456763643306e-06, + "rewards/margins": 3.0188984870910645, + "rewards/rejected": -3.018904209136963, + "step": 14062 + }, + { + "epoch": 0.82, + "learning_rate": 8.404280969712407e-09, + "logits/chosen": -2.0289928913116455, + "logits/rejected": -2.029836893081665, + "logps/chosen": -12.500126838684082, + "logps/rejected": -143.77938842773438, + "loss": 0.3458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16250257194042206, + "rewards/margins": 2.3919572830200195, + "rewards/rejected": -2.229454755783081, + "step": 14063 + }, + { + "epoch": 0.82, + "learning_rate": 8.399052279829882e-09, + "logits/chosen": -2.0407207012176514, + "logits/rejected": -2.006734609603882, + "logps/chosen": -208.10531616210938, + "logps/rejected": -311.6819763183594, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5608460903167725, + "rewards/margins": 3.2438690662384033, + "rewards/rejected": 0.316976934671402, + "step": 14064 + }, + { + "epoch": 0.82, + "learning_rate": 8.39382506781649e-09, + "logits/chosen": -1.9152108430862427, + "logits/rejected": -1.9786823987960815, + "logps/chosen": -142.66217041015625, + "logps/rejected": -242.41648864746094, + "loss": 0.0835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.302893042564392, + "rewards/margins": 2.602046251296997, + "rewards/rejected": -1.299153208732605, + "step": 14065 + }, + { + "epoch": 0.82, + "learning_rate": 8.388599333857976e-09, + "logits/chosen": -1.8707561492919922, + "logits/rejected": -1.820163607597351, + "logps/chosen": -209.03997802734375, + "logps/rejected": -324.96832275390625, + "loss": 0.2273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.37933349609375, + "rewards/margins": 1.1857483386993408, + "rewards/rejected": 0.19358520209789276, + "step": 14066 + }, + { + "epoch": 0.82, + "learning_rate": 8.383375078139943e-09, + "logits/chosen": -1.6881976127624512, + "logits/rejected": -1.6957379579544067, + "logps/chosen": -197.28646850585938, + "logps/rejected": -337.19866943359375, + "loss": 0.0548, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.115922689437866, + "rewards/margins": 2.672752618789673, + "rewards/rejected": -0.5568298697471619, + "step": 14067 + }, + { + "epoch": 0.82, + "learning_rate": 8.378152300848007e-09, + "logits/chosen": -1.8580039739608765, + "logits/rejected": -1.8230962753295898, + "logps/chosen": -197.98919677734375, + "logps/rejected": -341.01806640625, + "loss": 0.1347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6711349487304688, + "rewards/margins": 1.879695177078247, + "rewards/rejected": -0.20856018364429474, + "step": 14068 + }, + { + "epoch": 0.82, + "learning_rate": 8.372931002167671e-09, + "logits/chosen": -1.8139394521713257, + "logits/rejected": -1.8075854778289795, + "logps/chosen": -42.506988525390625, + "logps/rejected": -353.1736145019531, + "loss": 0.3361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014894485473632812, + "rewards/margins": 8.44838809967041, + "rewards/rejected": -8.433493614196777, + "step": 14069 + }, + { + "epoch": 0.82, + "learning_rate": 8.367711182284471e-09, + "logits/chosen": -1.9320300817489624, + "logits/rejected": -1.919655203819275, + "logps/chosen": -142.9805908203125, + "logps/rejected": -194.3729705810547, + "loss": 0.2811, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4470916986465454, + "rewards/margins": 0.8324753046035767, + "rewards/rejected": 0.6146163940429688, + "step": 14070 + }, + { + "epoch": 0.82, + "learning_rate": 8.362492841383805e-09, + "logits/chosen": -1.9690529108047485, + "logits/rejected": -1.9305814504623413, + "logps/chosen": -221.39178466796875, + "logps/rejected": -447.4272766113281, + "loss": 0.0436, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3141831159591675, + "rewards/margins": 4.660200595855713, + "rewards/rejected": -3.346017599105835, + "step": 14071 + }, + { + "epoch": 0.82, + "learning_rate": 8.357275979651063e-09, + "logits/chosen": -1.8098030090332031, + "logits/rejected": -1.8312408924102783, + "logps/chosen": -195.2667999267578, + "logps/rejected": -421.13018798828125, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2674453258514404, + "rewards/margins": 6.623323440551758, + "rewards/rejected": -4.355877876281738, + "step": 14072 + }, + { + "epoch": 0.82, + "learning_rate": 8.352060597271577e-09, + "logits/chosen": -1.7827117443084717, + "logits/rejected": -1.7931208610534668, + "logps/chosen": -159.55093383789062, + "logps/rejected": -399.47772216796875, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0632965564727783, + "rewards/margins": 4.879171848297119, + "rewards/rejected": -2.815875291824341, + "step": 14073 + }, + { + "epoch": 0.82, + "learning_rate": 8.346846694430615e-09, + "logits/chosen": -1.7970138788223267, + "logits/rejected": -1.801211953163147, + "logps/chosen": -159.55911254882812, + "logps/rejected": -335.24688720703125, + "loss": 0.112, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.285360813140869, + "rewards/margins": 1.4964172840118408, + "rewards/rejected": 0.7889434695243835, + "step": 14074 + }, + { + "epoch": 0.82, + "learning_rate": 8.34163427131342e-09, + "logits/chosen": -1.8815280199050903, + "logits/rejected": -1.8734480142593384, + "logps/chosen": -26.54804801940918, + "logps/rejected": -245.8734588623047, + "loss": 0.2521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34294071793556213, + "rewards/margins": 5.409711837768555, + "rewards/rejected": -5.066771030426025, + "step": 14075 + }, + { + "epoch": 0.82, + "learning_rate": 8.33642332810514e-09, + "logits/chosen": -1.9103777408599854, + "logits/rejected": -1.9110575914382935, + "logps/chosen": -5.55565881729126, + "logps/rejected": -184.68423461914062, + "loss": 0.2679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43911463022232056, + "rewards/margins": 2.9183199405670166, + "rewards/rejected": -2.479205369949341, + "step": 14076 + }, + { + "epoch": 0.82, + "learning_rate": 8.3312138649909e-09, + "logits/chosen": -1.8671520948410034, + "logits/rejected": -1.8640661239624023, + "logps/chosen": -20.864362716674805, + "logps/rejected": -152.7171173095703, + "loss": 0.3885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028894424438476562, + "rewards/margins": 2.378822088241577, + "rewards/rejected": -2.4077165126800537, + "step": 14077 + }, + { + "epoch": 0.82, + "learning_rate": 8.326005882155768e-09, + "logits/chosen": -2.0902481079101562, + "logits/rejected": -2.100909948348999, + "logps/chosen": -47.69390106201172, + "logps/rejected": -181.44287109375, + "loss": 0.2374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3528579771518707, + "rewards/margins": 3.0522310733795166, + "rewards/rejected": -2.6993730068206787, + "step": 14078 + }, + { + "epoch": 0.82, + "learning_rate": 8.320799379784766e-09, + "logits/chosen": -1.9130849838256836, + "logits/rejected": -1.9068139791488647, + "logps/chosen": -246.9957275390625, + "logps/rejected": -279.9512939453125, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.821575880050659, + "rewards/margins": 3.116262674331665, + "rewards/rejected": 0.7053131461143494, + "step": 14079 + }, + { + "epoch": 0.82, + "learning_rate": 8.31559435806284e-09, + "logits/chosen": -1.9248524904251099, + "logits/rejected": -1.7886524200439453, + "logps/chosen": -175.9125213623047, + "logps/rejected": -374.30242919921875, + "loss": 0.0842, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5778825283050537, + "rewards/margins": 2.4570207595825195, + "rewards/rejected": 0.12086182087659836, + "step": 14080 + }, + { + "epoch": 0.82, + "learning_rate": 8.310390817174895e-09, + "logits/chosen": -2.0688576698303223, + "logits/rejected": -2.0578482151031494, + "logps/chosen": -161.39175415039062, + "logps/rejected": -270.3263854980469, + "loss": 0.155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7391616702079773, + "rewards/margins": 2.040998935699463, + "rewards/rejected": -1.3018372058868408, + "step": 14081 + }, + { + "epoch": 0.82, + "learning_rate": 8.305188757305799e-09, + "logits/chosen": -2.0118699073791504, + "logits/rejected": -2.014514923095703, + "logps/chosen": -0.003389465855434537, + "logps/rejected": -231.88909912109375, + "loss": 0.3523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001554308255435899, + "rewards/margins": 3.86716628074646, + "rewards/rejected": -3.867321729660034, + "step": 14082 + }, + { + "epoch": 0.82, + "learning_rate": 8.299988178640355e-09, + "logits/chosen": -1.9572761058807373, + "logits/rejected": -1.9471369981765747, + "logps/chosen": -9.804993629455566, + "logps/rejected": -245.11618041992188, + "loss": 0.2085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6142978072166443, + "rewards/margins": 4.181509494781494, + "rewards/rejected": -3.567211866378784, + "step": 14083 + }, + { + "epoch": 0.82, + "learning_rate": 8.29478908136329e-09, + "logits/chosen": -1.927298903465271, + "logits/rejected": -1.9707449674606323, + "logps/chosen": -366.2808837890625, + "logps/rejected": -412.2819519042969, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4638946056365967, + "rewards/margins": 4.693005561828613, + "rewards/rejected": -1.2291107177734375, + "step": 14084 + }, + { + "epoch": 0.82, + "learning_rate": 8.28959146565934e-09, + "logits/chosen": -1.9477254152297974, + "logits/rejected": -1.932822823524475, + "logps/chosen": -27.475658416748047, + "logps/rejected": -210.59579467773438, + "loss": 0.1369, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.017511010169983, + "rewards/margins": 3.9488067626953125, + "rewards/rejected": -2.931295871734619, + "step": 14085 + }, + { + "epoch": 0.82, + "learning_rate": 8.284395331713124e-09, + "logits/chosen": -1.7688312530517578, + "logits/rejected": -1.8043246269226074, + "logps/chosen": -243.1918182373047, + "logps/rejected": -389.11944580078125, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9493133425712585, + "rewards/margins": 0.1311492919921875, + "rewards/rejected": 0.818164050579071, + "step": 14086 + }, + { + "epoch": 0.82, + "learning_rate": 8.279200679709242e-09, + "logits/chosen": -1.9418294429779053, + "logits/rejected": -1.9550559520721436, + "logps/chosen": -267.3791198730469, + "logps/rejected": -345.0328369140625, + "loss": 0.0733, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.68326735496521, + "rewards/margins": 1.993850827217102, + "rewards/rejected": 0.6894165277481079, + "step": 14087 + }, + { + "epoch": 0.82, + "learning_rate": 8.274007509832209e-09, + "logits/chosen": -1.994195580482483, + "logits/rejected": -2.001293659210205, + "logps/chosen": -12.92082405090332, + "logps/rejected": -135.42367553710938, + "loss": 0.3868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17248402535915375, + "rewards/margins": 1.0976030826568604, + "rewards/rejected": -0.9251190423965454, + "step": 14088 + }, + { + "epoch": 0.82, + "learning_rate": 8.268815822266561e-09, + "logits/chosen": -1.9656500816345215, + "logits/rejected": -1.962181806564331, + "logps/chosen": -58.48652648925781, + "logps/rejected": -254.6392364501953, + "loss": 0.3286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8175193667411804, + "rewards/margins": 0.5419044494628906, + "rewards/rejected": 0.2756149470806122, + "step": 14089 + }, + { + "epoch": 0.82, + "learning_rate": 8.263625617196685e-09, + "logits/chosen": -1.9949371814727783, + "logits/rejected": -1.997755765914917, + "logps/chosen": -127.35697174072266, + "logps/rejected": -294.92230224609375, + "loss": 0.1335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9617546200752258, + "rewards/margins": 2.8056113719940186, + "rewards/rejected": -1.8438568115234375, + "step": 14090 + }, + { + "epoch": 0.82, + "learning_rate": 8.258436894806985e-09, + "logits/chosen": -1.8479154109954834, + "logits/rejected": -1.8395180702209473, + "logps/chosen": -33.68990707397461, + "logps/rejected": -184.49057006835938, + "loss": 0.4445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14962653815746307, + "rewards/margins": 1.3018139600753784, + "rewards/rejected": -1.451440453529358, + "step": 14091 + }, + { + "epoch": 0.82, + "learning_rate": 8.25324965528179e-09, + "logits/chosen": -1.8852125406265259, + "logits/rejected": -1.8741495609283447, + "logps/chosen": -176.0298614501953, + "logps/rejected": -307.986572265625, + "loss": 0.0904, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6521621942520142, + "rewards/margins": 3.0611281394958496, + "rewards/rejected": -1.408966064453125, + "step": 14092 + }, + { + "epoch": 0.82, + "learning_rate": 8.24806389880538e-09, + "logits/chosen": -1.864985466003418, + "logits/rejected": -1.8660157918930054, + "logps/chosen": -21.087318420410156, + "logps/rejected": -86.45899963378906, + "loss": 0.3571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48572275042533875, + "rewards/margins": 1.0779002904891968, + "rewards/rejected": -0.5921775698661804, + "step": 14093 + }, + { + "epoch": 0.82, + "learning_rate": 8.242879625561967e-09, + "logits/chosen": -1.9263534545898438, + "logits/rejected": -1.9221434593200684, + "logps/chosen": -0.6980289816856384, + "logps/rejected": -168.896728515625, + "loss": 0.4006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016705607995390892, + "rewards/margins": 2.0366454124450684, + "rewards/rejected": -2.0533509254455566, + "step": 14094 + }, + { + "epoch": 0.82, + "learning_rate": 8.23769683573572e-09, + "logits/chosen": -2.0325028896331787, + "logits/rejected": -2.0471580028533936, + "logps/chosen": -65.86954498291016, + "logps/rejected": -261.2637939453125, + "loss": 0.0655, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5109825134277344, + "rewards/margins": 3.9408531188964844, + "rewards/rejected": -2.42987060546875, + "step": 14095 + }, + { + "epoch": 0.82, + "learning_rate": 8.232515529510758e-09, + "logits/chosen": -1.9363999366760254, + "logits/rejected": -1.917731523513794, + "logps/chosen": -174.92214965820312, + "logps/rejected": -307.8470458984375, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.018913269042969, + "rewards/margins": 2.185246229171753, + "rewards/rejected": 1.8336670398712158, + "step": 14096 + }, + { + "epoch": 0.82, + "learning_rate": 8.22733570707116e-09, + "logits/chosen": -2.0534512996673584, + "logits/rejected": -2.0488052368164062, + "logps/chosen": -17.856996536254883, + "logps/rejected": -190.73316955566406, + "loss": 0.391, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1419273465871811, + "rewards/margins": 3.14180850982666, + "rewards/rejected": -3.283735752105713, + "step": 14097 + }, + { + "epoch": 0.82, + "learning_rate": 8.222157368600935e-09, + "logits/chosen": -1.6774803400039673, + "logits/rejected": -1.6752468347549438, + "logps/chosen": -194.48321533203125, + "logps/rejected": -414.27099609375, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6374053955078125, + "rewards/margins": 6.655817031860352, + "rewards/rejected": -3.01841139793396, + "step": 14098 + }, + { + "epoch": 0.82, + "learning_rate": 8.216980514284028e-09, + "logits/chosen": -1.8688875436782837, + "logits/rejected": -1.7985374927520752, + "logps/chosen": -166.70062255859375, + "logps/rejected": -293.7253723144531, + "loss": 0.1645, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.889154076576233, + "rewards/margins": 1.4760711193084717, + "rewards/rejected": 0.41308289766311646, + "step": 14099 + }, + { + "epoch": 0.82, + "learning_rate": 8.211805144304356e-09, + "logits/chosen": -1.8125534057617188, + "logits/rejected": -1.821079969406128, + "logps/chosen": -207.75164794921875, + "logps/rejected": -409.9246520996094, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1725709438323975, + "rewards/margins": 5.574045181274414, + "rewards/rejected": -3.4014739990234375, + "step": 14100 + }, + { + "epoch": 0.82, + "learning_rate": 8.206631258845775e-09, + "logits/chosen": -2.029298782348633, + "logits/rejected": -2.022261381149292, + "logps/chosen": -9.443031311035156, + "logps/rejected": -129.49002075195312, + "loss": 0.3595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40249738097190857, + "rewards/margins": 1.4662410020828247, + "rewards/rejected": -1.0637435913085938, + "step": 14101 + }, + { + "epoch": 0.82, + "learning_rate": 8.201458858092098e-09, + "logits/chosen": -2.0148096084594727, + "logits/rejected": -2.0067713260650635, + "logps/chosen": -0.006823073606938124, + "logps/rejected": -72.04795837402344, + "loss": 0.6944, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.011434725485742092, + "rewards/margins": -0.017833922058343887, + "rewards/rejected": 0.029268646612763405, + "step": 14102 + }, + { + "epoch": 0.82, + "learning_rate": 8.196287942227038e-09, + "logits/chosen": -1.9296917915344238, + "logits/rejected": -1.9216808080673218, + "logps/chosen": -39.821842193603516, + "logps/rejected": -184.58917236328125, + "loss": 0.3314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11767082661390305, + "rewards/margins": 2.84784197807312, + "rewards/rejected": -2.7301712036132812, + "step": 14103 + }, + { + "epoch": 0.82, + "learning_rate": 8.191118511434336e-09, + "logits/chosen": -1.9907046556472778, + "logits/rejected": -1.9780205488204956, + "logps/chosen": -17.371543884277344, + "logps/rejected": -217.14813232421875, + "loss": 0.2589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1339740753173828, + "rewards/margins": 3.9604389667510986, + "rewards/rejected": -3.826464891433716, + "step": 14104 + }, + { + "epoch": 0.82, + "learning_rate": 8.185950565897599e-09, + "logits/chosen": -1.9221808910369873, + "logits/rejected": -1.9927384853363037, + "logps/chosen": -225.29872131347656, + "logps/rejected": -534.2501831054688, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.43796706199646, + "rewards/margins": 3.8343539237976074, + "rewards/rejected": -1.396386742591858, + "step": 14105 + }, + { + "epoch": 0.82, + "learning_rate": 8.180784105800448e-09, + "logits/chosen": -1.8619049787521362, + "logits/rejected": -1.846479058265686, + "logps/chosen": -58.35098648071289, + "logps/rejected": -397.71209716796875, + "loss": 0.1155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3257282972335815, + "rewards/margins": 4.7512898445129395, + "rewards/rejected": -3.4255616664886475, + "step": 14106 + }, + { + "epoch": 0.82, + "learning_rate": 8.175619131326378e-09, + "logits/chosen": -1.8645579814910889, + "logits/rejected": -1.854697585105896, + "logps/chosen": -229.9148712158203, + "logps/rejected": -502.6138610839844, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.000035047531128, + "rewards/margins": 5.8528733253479, + "rewards/rejected": -3.8528382778167725, + "step": 14107 + }, + { + "epoch": 0.82, + "learning_rate": 8.17045564265893e-09, + "logits/chosen": -1.8896437883377075, + "logits/rejected": -1.8868153095245361, + "logps/chosen": -0.5197248458862305, + "logps/rejected": -156.02304077148438, + "loss": 0.3662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015385204926133156, + "rewards/margins": 2.9345524311065674, + "rewards/rejected": -2.949937582015991, + "step": 14108 + }, + { + "epoch": 0.82, + "learning_rate": 8.165293639981497e-09, + "logits/chosen": -1.8478412628173828, + "logits/rejected": -1.84622323513031, + "logps/chosen": -233.89183044433594, + "logps/rejected": -266.56402587890625, + "loss": 0.0797, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5418686866760254, + "rewards/margins": 1.8397202491760254, + "rewards/rejected": 1.7021484375, + "step": 14109 + }, + { + "epoch": 0.82, + "learning_rate": 8.160133123477465e-09, + "logits/chosen": -1.9394949674606323, + "logits/rejected": -1.9355409145355225, + "logps/chosen": -3.8758158683776855, + "logps/rejected": -101.71316528320312, + "loss": 0.4134, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09939422458410263, + "rewards/margins": 1.3403908014297485, + "rewards/rejected": -1.2409965991973877, + "step": 14110 + }, + { + "epoch": 0.82, + "learning_rate": 8.154974093330169e-09, + "logits/chosen": -1.8887135982513428, + "logits/rejected": -1.9263793230056763, + "logps/chosen": -199.53256225585938, + "logps/rejected": -290.17718505859375, + "loss": 0.0637, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4942153692245483, + "rewards/margins": 3.103590488433838, + "rewards/rejected": -1.609375, + "step": 14111 + }, + { + "epoch": 0.82, + "learning_rate": 8.149816549722888e-09, + "logits/chosen": -1.858961820602417, + "logits/rejected": -1.8481385707855225, + "logps/chosen": -71.80217742919922, + "logps/rejected": -275.2454833984375, + "loss": 0.4096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36563339829444885, + "rewards/margins": 3.3205161094665527, + "rewards/rejected": -3.6861495971679688, + "step": 14112 + }, + { + "epoch": 0.82, + "learning_rate": 8.144660492838823e-09, + "logits/chosen": -1.748107671737671, + "logits/rejected": -1.7373580932617188, + "logps/chosen": -150.00880432128906, + "logps/rejected": -487.9568176269531, + "loss": 0.3585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2528640925884247, + "rewards/margins": 13.400367736816406, + "rewards/rejected": -13.653231620788574, + "step": 14113 + }, + { + "epoch": 0.82, + "learning_rate": 8.139505922861156e-09, + "logits/chosen": -2.0387520790100098, + "logits/rejected": -2.055079936981201, + "logps/chosen": -208.3994598388672, + "logps/rejected": -292.44964599609375, + "loss": 0.2227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7113372683525085, + "rewards/margins": 1.7255644798278809, + "rewards/rejected": -1.014227271080017, + "step": 14114 + }, + { + "epoch": 0.82, + "learning_rate": 8.134352839972997e-09, + "logits/chosen": -1.5704996585845947, + "logits/rejected": -1.564574122428894, + "logps/chosen": -239.2193145751953, + "logps/rejected": -426.1543884277344, + "loss": 0.1114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5263046026229858, + "rewards/margins": 1.7719619274139404, + "rewards/rejected": -0.24565735459327698, + "step": 14115 + }, + { + "epoch": 0.82, + "learning_rate": 8.129201244357414e-09, + "logits/chosen": -1.8149125576019287, + "logits/rejected": -1.836728811264038, + "logps/chosen": -152.35816955566406, + "logps/rejected": -320.4585266113281, + "loss": 0.0694, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2289704084396362, + "rewards/margins": 2.490628242492676, + "rewards/rejected": -1.26165771484375, + "step": 14116 + }, + { + "epoch": 0.82, + "learning_rate": 8.124051136197429e-09, + "logits/chosen": -1.8800541162490845, + "logits/rejected": -1.9074980020523071, + "logps/chosen": -218.82241821289062, + "logps/rejected": -345.6946105957031, + "loss": 0.1085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6772171258926392, + "rewards/margins": 1.7755173444747925, + "rewards/rejected": -0.09830017387866974, + "step": 14117 + }, + { + "epoch": 0.82, + "learning_rate": 8.118902515675974e-09, + "logits/chosen": -1.978598952293396, + "logits/rejected": -2.009103298187256, + "logps/chosen": -342.6536865234375, + "logps/rejected": -465.807861328125, + "loss": 0.0691, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.116906762123108, + "rewards/margins": 2.4777863025665283, + "rewards/rejected": -1.3608795404434204, + "step": 14118 + }, + { + "epoch": 0.82, + "learning_rate": 8.113755382975962e-09, + "logits/chosen": -1.88870370388031, + "logits/rejected": -1.949710726737976, + "logps/chosen": -138.58041381835938, + "logps/rejected": -276.82513427734375, + "loss": 0.1839, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6058562994003296, + "rewards/margins": 1.02569580078125, + "rewards/rejected": 0.5801605582237244, + "step": 14119 + }, + { + "epoch": 0.82, + "learning_rate": 8.108609738280248e-09, + "logits/chosen": -1.969109058380127, + "logits/rejected": -1.9694938659667969, + "logps/chosen": -26.56039047241211, + "logps/rejected": -85.35765838623047, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23445244133472443, + "rewards/margins": 1.947654366493225, + "rewards/rejected": -1.713201880455017, + "step": 14120 + }, + { + "epoch": 0.82, + "learning_rate": 8.103465581771635e-09, + "logits/chosen": -1.8495227098464966, + "logits/rejected": -1.7285375595092773, + "logps/chosen": -164.68841552734375, + "logps/rejected": -506.95654296875, + "loss": 0.0784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6108306646347046, + "rewards/margins": 3.505279541015625, + "rewards/rejected": -1.8944488763809204, + "step": 14121 + }, + { + "epoch": 0.82, + "learning_rate": 8.098322913632844e-09, + "logits/chosen": -1.9407342672348022, + "logits/rejected": -1.9382270574569702, + "logps/chosen": -63.50300598144531, + "logps/rejected": -194.37088012695312, + "loss": 0.3561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6585708856582642, + "rewards/margins": 0.8183456659317017, + "rewards/rejected": -0.1597747802734375, + "step": 14122 + }, + { + "epoch": 0.82, + "learning_rate": 8.093181734046612e-09, + "logits/chosen": -1.9697033166885376, + "logits/rejected": -2.00295352935791, + "logps/chosen": -214.83489990234375, + "logps/rejected": -466.34130859375, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6013306379318237, + "rewards/margins": 6.120203018188477, + "rewards/rejected": -4.518872261047363, + "step": 14123 + }, + { + "epoch": 0.82, + "learning_rate": 8.088042043195537e-09, + "logits/chosen": -1.7310850620269775, + "logits/rejected": -1.7408119440078735, + "logps/chosen": -339.0849609375, + "logps/rejected": -398.1815490722656, + "loss": 0.1174, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8718414306640625, + "rewards/margins": 2.160205125808716, + "rewards/rejected": -0.28836366534233093, + "step": 14124 + }, + { + "epoch": 0.82, + "learning_rate": 8.082903841262233e-09, + "logits/chosen": -1.8395249843597412, + "logits/rejected": -1.7997803688049316, + "logps/chosen": -216.37713623046875, + "logps/rejected": -494.47613525390625, + "loss": 0.0507, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6589417457580566, + "rewards/margins": 2.878283739089966, + "rewards/rejected": -0.21934203803539276, + "step": 14125 + }, + { + "epoch": 0.82, + "learning_rate": 8.077767128429203e-09, + "logits/chosen": -2.035708427429199, + "logits/rejected": -2.0371646881103516, + "logps/chosen": -18.30765151977539, + "logps/rejected": -190.3688507080078, + "loss": 0.3128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21966324746608734, + "rewards/margins": 2.88835072517395, + "rewards/rejected": -2.668687582015991, + "step": 14126 + }, + { + "epoch": 0.82, + "learning_rate": 8.072631904878973e-09, + "logits/chosen": -2.058967351913452, + "logits/rejected": -2.054733991622925, + "logps/chosen": -16.985502243041992, + "logps/rejected": -46.111454010009766, + "loss": 0.5002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17957916855812073, + "rewards/margins": 0.7073898315429688, + "rewards/rejected": -0.5278106927871704, + "step": 14127 + }, + { + "epoch": 0.82, + "learning_rate": 8.067498170793935e-09, + "logits/chosen": -1.9592416286468506, + "logits/rejected": -1.9448463916778564, + "logps/chosen": -217.0782928466797, + "logps/rejected": -280.8003234863281, + "loss": 0.4393, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.9833176136016846, + "rewards/margins": -0.2817978858947754, + "rewards/rejected": 2.26511549949646, + "step": 14128 + }, + { + "epoch": 0.82, + "learning_rate": 8.062365926356484e-09, + "logits/chosen": -1.87659752368927, + "logits/rejected": -1.8709441423416138, + "logps/chosen": -83.09449768066406, + "logps/rejected": -175.96072387695312, + "loss": 0.1607, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8810280561447144, + "rewards/margins": 1.4634957313537598, + "rewards/rejected": 0.417532354593277, + "step": 14129 + }, + { + "epoch": 0.82, + "learning_rate": 8.057235171748911e-09, + "logits/chosen": -1.7505431175231934, + "logits/rejected": -1.729222297668457, + "logps/chosen": -231.68638610839844, + "logps/rejected": -321.276611328125, + "loss": 0.0869, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0611252784729004, + "rewards/margins": 1.7878190279006958, + "rewards/rejected": 1.2733062505722046, + "step": 14130 + }, + { + "epoch": 0.82, + "learning_rate": 8.05210590715354e-09, + "logits/chosen": -1.9636449813842773, + "logits/rejected": -1.9517874717712402, + "logps/chosen": -20.332866668701172, + "logps/rejected": -249.5364990234375, + "loss": 0.1588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6859645843505859, + "rewards/margins": 2.729801654815674, + "rewards/rejected": -2.043837070465088, + "step": 14131 + }, + { + "epoch": 0.82, + "learning_rate": 8.046978132752536e-09, + "logits/chosen": -1.7802015542984009, + "logits/rejected": -1.7770012617111206, + "logps/chosen": -181.64547729492188, + "logps/rejected": -390.48187255859375, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4717133045196533, + "rewards/margins": 4.555422782897949, + "rewards/rejected": -2.083709716796875, + "step": 14132 + }, + { + "epoch": 0.82, + "learning_rate": 8.041851848728087e-09, + "logits/chosen": -1.8993645906448364, + "logits/rejected": -1.8980205059051514, + "logps/chosen": -0.0006065329071134329, + "logps/rejected": -85.46666717529297, + "loss": 0.5459, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1918711354373954e-05, + "rewards/margins": 0.6254095435142517, + "rewards/rejected": -0.6253876090049744, + "step": 14133 + }, + { + "epoch": 0.82, + "learning_rate": 8.036727055262299e-09, + "logits/chosen": -2.0173723697662354, + "logits/rejected": -2.013763189315796, + "logps/chosen": -41.41322326660156, + "logps/rejected": -116.53014373779297, + "loss": 0.9696, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.8899299502372742, + "rewards/margins": -0.3782355785369873, + "rewards/rejected": -0.5116943717002869, + "step": 14134 + }, + { + "epoch": 0.82, + "learning_rate": 8.03160375253723e-09, + "logits/chosen": -1.788610816001892, + "logits/rejected": -1.7564371824264526, + "logps/chosen": -240.7822265625, + "logps/rejected": -648.7412719726562, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.120797872543335, + "rewards/margins": 6.855203628540039, + "rewards/rejected": -3.734405517578125, + "step": 14135 + }, + { + "epoch": 0.82, + "learning_rate": 8.026481940734896e-09, + "logits/chosen": -2.081421375274658, + "logits/rejected": -2.0766665935516357, + "logps/chosen": -162.84585571289062, + "logps/rejected": -321.54168701171875, + "loss": 0.0775, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.257031202316284, + "rewards/margins": 1.9430724382400513, + "rewards/rejected": 0.3139587342739105, + "step": 14136 + }, + { + "epoch": 0.82, + "learning_rate": 8.021361620037232e-09, + "logits/chosen": -1.8984566926956177, + "logits/rejected": -1.8955901861190796, + "logps/chosen": -49.79199981689453, + "logps/rejected": -241.80642700195312, + "loss": 0.1575, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.355233073234558, + "rewards/margins": 2.5447473526000977, + "rewards/rejected": -1.18951416015625, + "step": 14137 + }, + { + "epoch": 0.82, + "learning_rate": 8.016242790626138e-09, + "logits/chosen": -1.8918640613555908, + "logits/rejected": -1.8852864503860474, + "logps/chosen": -1.1086433005402796e-05, + "logps/rejected": -172.56552124023438, + "loss": 0.3246, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.24516906352801e-07, + "rewards/margins": 5.079653739929199, + "rewards/rejected": -5.079653263092041, + "step": 14138 + }, + { + "epoch": 0.82, + "learning_rate": 8.011125452683464e-09, + "logits/chosen": -1.715085744857788, + "logits/rejected": -1.690065622329712, + "logps/chosen": -154.54168701171875, + "logps/rejected": -291.011962890625, + "loss": 0.2089, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.284844994544983, + "rewards/margins": 1.3701934814453125, + "rewards/rejected": -0.08534850925207138, + "step": 14139 + }, + { + "epoch": 0.82, + "learning_rate": 8.006009606391018e-09, + "logits/chosen": -1.9251567125320435, + "logits/rejected": -1.9182854890823364, + "logps/chosen": -1.9431008695391938e-05, + "logps/rejected": -285.32269287109375, + "loss": 0.3381, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6689045878592879e-07, + "rewards/margins": 6.874749660491943, + "rewards/rejected": -6.874749660491943, + "step": 14140 + }, + { + "epoch": 0.82, + "learning_rate": 8.000895251930511e-09, + "logits/chosen": -1.5759663581848145, + "logits/rejected": -1.5652388334274292, + "logps/chosen": -36.33491516113281, + "logps/rejected": -174.25897216796875, + "loss": 0.2055, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6540245413780212, + "rewards/margins": 3.5429024696350098, + "rewards/rejected": -2.8888778686523438, + "step": 14141 + }, + { + "epoch": 0.82, + "learning_rate": 7.995782389483647e-09, + "logits/chosen": -1.9242905378341675, + "logits/rejected": -1.9025216102600098, + "logps/chosen": -117.12653350830078, + "logps/rejected": -278.92401123046875, + "loss": 0.0654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5934799313545227, + "rewards/margins": 3.131190538406372, + "rewards/rejected": -2.537710666656494, + "step": 14142 + }, + { + "epoch": 0.82, + "learning_rate": 7.99067101923206e-09, + "logits/chosen": -1.900576114654541, + "logits/rejected": -1.8776899576187134, + "logps/chosen": -180.07452392578125, + "logps/rejected": -378.8311767578125, + "loss": 0.1844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4826126098632812, + "rewards/margins": 0.9679183959960938, + "rewards/rejected": 0.5146942138671875, + "step": 14143 + }, + { + "epoch": 0.82, + "learning_rate": 7.985561141357338e-09, + "logits/chosen": -1.8712232112884521, + "logits/rejected": -1.855079174041748, + "logps/chosen": -203.53546142578125, + "logps/rejected": -448.1233215332031, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.014697313308716, + "rewards/margins": 4.412200927734375, + "rewards/rejected": -2.397503614425659, + "step": 14144 + }, + { + "epoch": 0.82, + "learning_rate": 7.980452756040973e-09, + "logits/chosen": -1.7167766094207764, + "logits/rejected": -1.696318507194519, + "logps/chosen": -190.31985473632812, + "logps/rejected": -350.0385437011719, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.470259189605713, + "rewards/margins": 3.334965705871582, + "rewards/rejected": 0.13529358804225922, + "step": 14145 + }, + { + "epoch": 0.82, + "learning_rate": 7.97534586346449e-09, + "logits/chosen": -1.9667068719863892, + "logits/rejected": -1.9675629138946533, + "logps/chosen": -0.0015872081276029348, + "logps/rejected": -162.92990112304688, + "loss": 0.4262, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3638673357927473e-06, + "rewards/margins": 1.60404634475708, + "rewards/rejected": -1.6040496826171875, + "step": 14146 + }, + { + "epoch": 0.82, + "learning_rate": 7.970240463809275e-09, + "logits/chosen": -1.7284400463104248, + "logits/rejected": -1.7186750173568726, + "logps/chosen": -173.84002685546875, + "logps/rejected": -370.8205871582031, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.429479956626892, + "rewards/margins": 3.8305788040161133, + "rewards/rejected": -2.4010987281799316, + "step": 14147 + }, + { + "epoch": 0.82, + "learning_rate": 7.965136557256723e-09, + "logits/chosen": -1.8077770471572876, + "logits/rejected": -1.8001035451889038, + "logps/chosen": -209.03756713867188, + "logps/rejected": -354.1593017578125, + "loss": 0.1475, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3741607666015625, + "rewards/margins": 1.3873076438903809, + "rewards/rejected": 0.9868530631065369, + "step": 14148 + }, + { + "epoch": 0.82, + "learning_rate": 7.960034143988109e-09, + "logits/chosen": -1.758354902267456, + "logits/rejected": -1.7622469663619995, + "logps/chosen": -144.20388793945312, + "logps/rejected": -326.1303405761719, + "loss": 0.0302, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2620179653167725, + "rewards/margins": 4.875744819641113, + "rewards/rejected": -2.613726854324341, + "step": 14149 + }, + { + "epoch": 0.82, + "learning_rate": 7.954933224184745e-09, + "logits/chosen": -1.904364824295044, + "logits/rejected": -1.9004055261611938, + "logps/chosen": -28.789596557617188, + "logps/rejected": -149.1258087158203, + "loss": 0.2062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7597160339355469, + "rewards/margins": 2.1891274452209473, + "rewards/rejected": -1.4294112920761108, + "step": 14150 + }, + { + "epoch": 0.82, + "learning_rate": 7.949833798027811e-09, + "logits/chosen": -1.9970136880874634, + "logits/rejected": -2.0429539680480957, + "logps/chosen": -130.7545623779297, + "logps/rejected": -393.4504699707031, + "loss": 0.0956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1247771978378296, + "rewards/margins": 2.2049498558044434, + "rewards/rejected": -1.0801727771759033, + "step": 14151 + }, + { + "epoch": 0.82, + "learning_rate": 7.944735865698466e-09, + "logits/chosen": -1.848284125328064, + "logits/rejected": -1.8519021272659302, + "logps/chosen": -61.9691276550293, + "logps/rejected": -198.3590087890625, + "loss": 0.2313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7923725247383118, + "rewards/margins": 2.2600619792938232, + "rewards/rejected": -1.4676895141601562, + "step": 14152 + }, + { + "epoch": 0.82, + "learning_rate": 7.939639427377815e-09, + "logits/chosen": -1.824807047843933, + "logits/rejected": -1.8643878698349, + "logps/chosen": -253.45315551757812, + "logps/rejected": -650.8941650390625, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.03849196434021, + "rewards/margins": 12.053757667541504, + "rewards/rejected": -10.015265464782715, + "step": 14153 + }, + { + "epoch": 0.82, + "learning_rate": 7.93454448324693e-09, + "logits/chosen": -1.932913064956665, + "logits/rejected": -1.9310283660888672, + "logps/chosen": -57.93644332885742, + "logps/rejected": -91.93763732910156, + "loss": 0.8623, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0357894897460938, + "rewards/margins": 0.7153152227401733, + "rewards/rejected": -1.751104712486267, + "step": 14154 + }, + { + "epoch": 0.82, + "learning_rate": 7.929451033486772e-09, + "logits/chosen": -2.0307679176330566, + "logits/rejected": -2.0348455905914307, + "logps/chosen": -112.04295349121094, + "logps/rejected": -138.1273956298828, + "loss": 0.5742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5017440915107727, + "rewards/margins": 1.214338779449463, + "rewards/rejected": -1.7160828113555908, + "step": 14155 + }, + { + "epoch": 0.82, + "learning_rate": 7.924359078278304e-09, + "logits/chosen": -1.833877444267273, + "logits/rejected": -1.834965705871582, + "logps/chosen": -41.7727165222168, + "logps/rejected": -251.46058654785156, + "loss": 0.2597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27966806292533875, + "rewards/margins": 4.294500827789307, + "rewards/rejected": -4.014832973480225, + "step": 14156 + }, + { + "epoch": 0.82, + "learning_rate": 7.919268617802417e-09, + "logits/chosen": -1.7638657093048096, + "logits/rejected": -1.7949403524398804, + "logps/chosen": -274.581298828125, + "logps/rejected": -596.3291015625, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.691625952720642, + "rewards/margins": 10.392965316772461, + "rewards/rejected": -8.701339721679688, + "step": 14157 + }, + { + "epoch": 0.82, + "learning_rate": 7.914179652239945e-09, + "logits/chosen": -2.009627342224121, + "logits/rejected": -2.0110082626342773, + "logps/chosen": -0.02209700271487236, + "logps/rejected": -182.90646362304688, + "loss": 0.521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009169402532279491, + "rewards/margins": 0.7288624048233032, + "rewards/rejected": -0.7196930050849915, + "step": 14158 + }, + { + "epoch": 0.82, + "learning_rate": 7.909092181771687e-09, + "logits/chosen": -1.7830575704574585, + "logits/rejected": -1.8431018590927124, + "logps/chosen": -122.47862243652344, + "logps/rejected": -241.25469970703125, + "loss": 0.2039, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0495712757110596, + "rewards/margins": 1.0933700799942017, + "rewards/rejected": -0.04379883036017418, + "step": 14159 + }, + { + "epoch": 0.82, + "learning_rate": 7.904006206578357e-09, + "logits/chosen": -1.784562587738037, + "logits/rejected": -1.7717609405517578, + "logps/chosen": -189.1164093017578, + "logps/rejected": -266.73095703125, + "loss": 0.2088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7003631591796875, + "rewards/margins": 1.2288177013397217, + "rewards/rejected": -0.528454601764679, + "step": 14160 + }, + { + "epoch": 0.82, + "learning_rate": 7.898921726840634e-09, + "logits/chosen": -1.7226464748382568, + "logits/rejected": -1.7169123888015747, + "logps/chosen": -160.95452880859375, + "logps/rejected": -317.7860107421875, + "loss": 0.0776, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3613343238830566, + "rewards/margins": 2.166830539703369, + "rewards/rejected": 0.1945037841796875, + "step": 14161 + }, + { + "epoch": 0.82, + "learning_rate": 7.893838742739151e-09, + "logits/chosen": -1.7930389642715454, + "logits/rejected": -1.7923524379730225, + "logps/chosen": -0.4531656503677368, + "logps/rejected": -89.3765869140625, + "loss": 0.3592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028255779296159744, + "rewards/margins": 2.8203036785125732, + "rewards/rejected": -2.7920479774475098, + "step": 14162 + }, + { + "epoch": 0.82, + "learning_rate": 7.888757254454492e-09, + "logits/chosen": -1.8626136779785156, + "logits/rejected": -1.852284550666809, + "logps/chosen": -275.95556640625, + "logps/rejected": -491.631103515625, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7129151821136475, + "rewards/margins": 4.548407077789307, + "rewards/rejected": -0.835491955280304, + "step": 14163 + }, + { + "epoch": 0.82, + "learning_rate": 7.883677262167144e-09, + "logits/chosen": -1.9181442260742188, + "logits/rejected": -1.9234914779663086, + "logps/chosen": -201.4573974609375, + "logps/rejected": -335.56903076171875, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.824267625808716, + "rewards/margins": 3.4017884731292725, + "rewards/rejected": 0.4224792420864105, + "step": 14164 + }, + { + "epoch": 0.82, + "learning_rate": 7.878598766057615e-09, + "logits/chosen": -1.7864940166473389, + "logits/rejected": -1.7803682088851929, + "logps/chosen": -8.964401058619842e-05, + "logps/rejected": -152.3115997314453, + "loss": 0.5519, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.622218744363636e-07, + "rewards/margins": 0.6580156683921814, + "rewards/rejected": -0.6580154299736023, + "step": 14165 + }, + { + "epoch": 0.82, + "learning_rate": 7.87352176630628e-09, + "logits/chosen": -1.8896745443344116, + "logits/rejected": -1.8999626636505127, + "logps/chosen": -257.4134826660156, + "logps/rejected": -367.6706848144531, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.425009250640869, + "rewards/margins": 3.7372803688049316, + "rewards/rejected": -1.3122711181640625, + "step": 14166 + }, + { + "epoch": 0.82, + "learning_rate": 7.868446263093532e-09, + "logits/chosen": -1.9637023210525513, + "logits/rejected": -1.9567615985870361, + "logps/chosen": -31.293529510498047, + "logps/rejected": -188.6458740234375, + "loss": 0.1558, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1521847248077393, + "rewards/margins": 1.853074312210083, + "rewards/rejected": -0.7008895874023438, + "step": 14167 + }, + { + "epoch": 0.82, + "learning_rate": 7.863372256599632e-09, + "logits/chosen": -1.9303656816482544, + "logits/rejected": -1.915149211883545, + "logps/chosen": -45.30522537231445, + "logps/rejected": -291.9564514160156, + "loss": 0.1791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7722427248954773, + "rewards/margins": 5.370744228363037, + "rewards/rejected": -4.598501682281494, + "step": 14168 + }, + { + "epoch": 0.82, + "learning_rate": 7.858299747004887e-09, + "logits/chosen": -2.0664145946502686, + "logits/rejected": -2.038527488708496, + "logps/chosen": -62.59699249267578, + "logps/rejected": -396.6156005859375, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2122466564178467, + "rewards/margins": 8.097521781921387, + "rewards/rejected": -5.885275363922119, + "step": 14169 + }, + { + "epoch": 0.82, + "learning_rate": 7.853228734489464e-09, + "logits/chosen": -1.8958055973052979, + "logits/rejected": -1.9339592456817627, + "logps/chosen": -179.0107879638672, + "logps/rejected": -409.13037109375, + "loss": 0.0414, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3899338245391846, + "rewards/margins": 4.357420444488525, + "rewards/rejected": -2.967486619949341, + "step": 14170 + }, + { + "epoch": 0.82, + "learning_rate": 7.848159219233524e-09, + "logits/chosen": -2.094132423400879, + "logits/rejected": -2.087862253189087, + "logps/chosen": -175.58212280273438, + "logps/rejected": -326.28228759765625, + "loss": 0.1053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2193878889083862, + "rewards/margins": 2.5914673805236816, + "rewards/rejected": -1.3720794916152954, + "step": 14171 + }, + { + "epoch": 0.82, + "learning_rate": 7.843091201417135e-09, + "logits/chosen": -1.8879694938659668, + "logits/rejected": -1.8980259895324707, + "logps/chosen": -242.74961853027344, + "logps/rejected": -455.2593078613281, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.91515052318573, + "rewards/margins": 4.357475280761719, + "rewards/rejected": -2.4423248767852783, + "step": 14172 + }, + { + "epoch": 0.82, + "learning_rate": 7.838024681220385e-09, + "logits/chosen": -1.951535940170288, + "logits/rejected": -1.9410125017166138, + "logps/chosen": -233.33631896972656, + "logps/rejected": -436.89349365234375, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.356273174285889, + "rewards/margins": 4.79661750793457, + "rewards/rejected": -0.4403442442417145, + "step": 14173 + }, + { + "epoch": 0.82, + "learning_rate": 7.832959658823219e-09, + "logits/chosen": -1.6446894407272339, + "logits/rejected": -1.6516764163970947, + "logps/chosen": -41.005157470703125, + "logps/rejected": -253.23533630371094, + "loss": 0.1349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1638535261154175, + "rewards/margins": 3.075894355773926, + "rewards/rejected": -1.9120407104492188, + "step": 14174 + }, + { + "epoch": 0.82, + "learning_rate": 7.827896134405587e-09, + "logits/chosen": -1.569671869277954, + "logits/rejected": -1.5493649244308472, + "logps/chosen": -165.23367309570312, + "logps/rejected": -351.5545349121094, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0806045532226562, + "rewards/margins": 0.5095016360282898, + "rewards/rejected": 0.5711029171943665, + "step": 14175 + }, + { + "epoch": 0.82, + "learning_rate": 7.822834108147375e-09, + "logits/chosen": -1.9651355743408203, + "logits/rejected": -1.9561522006988525, + "logps/chosen": -207.8444366455078, + "logps/rejected": -383.84063720703125, + "loss": 0.1512, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4561280012130737, + "rewards/margins": 2.21305251121521, + "rewards/rejected": -0.7569244503974915, + "step": 14176 + }, + { + "epoch": 0.83, + "learning_rate": 7.817773580228398e-09, + "logits/chosen": -1.9458779096603394, + "logits/rejected": -1.946112036705017, + "logps/chosen": -0.0022787582129240036, + "logps/rejected": -249.01524353027344, + "loss": 0.3547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015724365948699415, + "rewards/margins": 4.783334255218506, + "rewards/rejected": -4.783491611480713, + "step": 14177 + }, + { + "epoch": 0.83, + "learning_rate": 7.812714550828459e-09, + "logits/chosen": -1.6901005506515503, + "logits/rejected": -1.6934789419174194, + "logps/chosen": -40.53194808959961, + "logps/rejected": -72.87491607666016, + "loss": 0.7255, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2238895446062088, + "rewards/margins": -0.12099266052246094, + "rewards/rejected": -0.10289688408374786, + "step": 14178 + }, + { + "epoch": 0.83, + "learning_rate": 7.807657020127246e-09, + "logits/chosen": -2.0514986515045166, + "logits/rejected": -2.0510897636413574, + "logps/chosen": -0.061666302382946014, + "logps/rejected": -107.18989562988281, + "loss": 0.5572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004545439034700394, + "rewards/margins": 0.5711656212806702, + "rewards/rejected": -0.5757110714912415, + "step": 14179 + }, + { + "epoch": 0.83, + "learning_rate": 7.802600988304436e-09, + "logits/chosen": -2.1822099685668945, + "logits/rejected": -2.1804440021514893, + "logps/chosen": -6.882832050323486, + "logps/rejected": -227.2563018798828, + "loss": 0.3622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20284771919250488, + "rewards/margins": 1.9473698139190674, + "rewards/rejected": -1.7445220947265625, + "step": 14180 + }, + { + "epoch": 0.83, + "learning_rate": 7.797546455539655e-09, + "logits/chosen": -1.9240063428878784, + "logits/rejected": -1.9211304187774658, + "logps/chosen": -52.452880859375, + "logps/rejected": -293.8610534667969, + "loss": 0.0981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.824558675289154, + "rewards/margins": 5.73689079284668, + "rewards/rejected": -4.912332057952881, + "step": 14181 + }, + { + "epoch": 0.83, + "learning_rate": 7.792493422012464e-09, + "logits/chosen": -1.947843074798584, + "logits/rejected": -1.9249355792999268, + "logps/chosen": -0.10282639414072037, + "logps/rejected": -122.0750732421875, + "loss": 0.3998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06009579449892044, + "rewards/margins": 1.8866407871246338, + "rewards/rejected": -1.826545000076294, + "step": 14182 + }, + { + "epoch": 0.83, + "learning_rate": 7.787441887902357e-09, + "logits/chosen": -1.9219304323196411, + "logits/rejected": -1.9536784887313843, + "logps/chosen": -263.0789489746094, + "logps/rejected": -363.1485595703125, + "loss": 0.0576, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3441070318222046, + "rewards/margins": 3.160543918609619, + "rewards/rejected": -1.816436767578125, + "step": 14183 + }, + { + "epoch": 0.83, + "learning_rate": 7.782391853388798e-09, + "logits/chosen": -2.0693910121917725, + "logits/rejected": -2.064312219619751, + "logps/chosen": -1.996716856956482, + "logps/rejected": -286.0413513183594, + "loss": 0.3362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09009703248739243, + "rewards/margins": 4.6836137771606445, + "rewards/rejected": -4.593516826629639, + "step": 14184 + }, + { + "epoch": 0.83, + "learning_rate": 7.777343318651192e-09, + "logits/chosen": -1.899266004562378, + "logits/rejected": -1.897675633430481, + "logps/chosen": -14.359748840332031, + "logps/rejected": -171.43734741210938, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04999838024377823, + "rewards/margins": 3.374519109725952, + "rewards/rejected": -3.3245208263397217, + "step": 14185 + }, + { + "epoch": 0.83, + "learning_rate": 7.772296283868895e-09, + "logits/chosen": -1.7077770233154297, + "logits/rejected": -1.7023729085922241, + "logps/chosen": -0.4890737533569336, + "logps/rejected": -174.9625701904297, + "loss": 0.3488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001708036637865007, + "rewards/margins": 3.983954429626465, + "rewards/rejected": -3.9822463989257812, + "step": 14186 + }, + { + "epoch": 0.83, + "learning_rate": 7.767250749221165e-09, + "logits/chosen": -1.6500049829483032, + "logits/rejected": -1.6564090251922607, + "logps/chosen": -241.5889892578125, + "logps/rejected": -416.45172119140625, + "loss": 0.1001, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1506593227386475, + "rewards/margins": 2.358246088027954, + "rewards/rejected": -0.20758667588233948, + "step": 14187 + }, + { + "epoch": 0.83, + "learning_rate": 7.762206714887299e-09, + "logits/chosen": -2.056797504425049, + "logits/rejected": -2.0477635860443115, + "logps/chosen": -38.660892486572266, + "logps/rejected": -230.72483825683594, + "loss": 0.2572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39721259474754333, + "rewards/margins": 3.2940189838409424, + "rewards/rejected": -2.896806478500366, + "step": 14188 + }, + { + "epoch": 0.83, + "learning_rate": 7.757164181046444e-09, + "logits/chosen": -1.7189126014709473, + "logits/rejected": -1.710411787033081, + "logps/chosen": -3.792440176010132, + "logps/rejected": -207.15576171875, + "loss": 0.3319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.056566692888736725, + "rewards/margins": 4.124166488647461, + "rewards/rejected": -4.067599773406982, + "step": 14189 + }, + { + "epoch": 0.83, + "learning_rate": 7.75212314787776e-09, + "logits/chosen": -2.024939775466919, + "logits/rejected": -2.023517608642578, + "logps/chosen": -37.07009506225586, + "logps/rejected": -172.62274169921875, + "loss": 0.1299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0463260412216187, + "rewards/margins": 3.4053869247436523, + "rewards/rejected": -2.359060764312744, + "step": 14190 + }, + { + "epoch": 0.83, + "learning_rate": 7.747083615560296e-09, + "logits/chosen": -1.9109492301940918, + "logits/rejected": -1.9008593559265137, + "logps/chosen": -221.96832275390625, + "logps/rejected": -242.76382446289062, + "loss": 0.4117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.204681396484375, + "rewards/margins": 0.13751831650733948, + "rewards/rejected": 0.06716308742761612, + "step": 14191 + }, + { + "epoch": 0.83, + "learning_rate": 7.742045584273126e-09, + "logits/chosen": -1.9896830320358276, + "logits/rejected": -1.9862502813339233, + "logps/chosen": -169.666748046875, + "logps/rejected": -291.40411376953125, + "loss": 0.1355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5189193487167358, + "rewards/margins": 1.4769973754882812, + "rewards/rejected": 0.04192199930548668, + "step": 14192 + }, + { + "epoch": 0.83, + "learning_rate": 7.737009054195193e-09, + "logits/chosen": -1.8687480688095093, + "logits/rejected": -1.8641597032546997, + "logps/chosen": -170.27633666992188, + "logps/rejected": -271.52105712890625, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.483746290206909, + "rewards/margins": 2.421704053878784, + "rewards/rejected": 0.062042236328125, + "step": 14193 + }, + { + "epoch": 0.83, + "learning_rate": 7.731974025505427e-09, + "logits/chosen": -1.7527693510055542, + "logits/rejected": -1.7632311582565308, + "logps/chosen": -128.08834838867188, + "logps/rejected": -304.251220703125, + "loss": 0.0709, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.936236619949341, + "rewards/margins": 1.983514428138733, + "rewards/rejected": 0.9527221918106079, + "step": 14194 + }, + { + "epoch": 0.83, + "learning_rate": 7.726940498382701e-09, + "logits/chosen": -1.9600614309310913, + "logits/rejected": -1.945035457611084, + "logps/chosen": -26.365442276000977, + "logps/rejected": -289.2904052734375, + "loss": 0.2244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42882654070854187, + "rewards/margins": 3.2520992755889893, + "rewards/rejected": -2.823272705078125, + "step": 14195 + }, + { + "epoch": 0.83, + "learning_rate": 7.721908473005828e-09, + "logits/chosen": -1.9324437379837036, + "logits/rejected": -1.8245224952697754, + "logps/chosen": -188.946533203125, + "logps/rejected": -261.4422607421875, + "loss": 0.2877, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0014235973358154, + "rewards/margins": 0.5079482793807983, + "rewards/rejected": 1.493475317955017, + "step": 14196 + }, + { + "epoch": 0.83, + "learning_rate": 7.71687794955358e-09, + "logits/chosen": -1.88057541847229, + "logits/rejected": -1.8811124563217163, + "logps/chosen": -34.79051971435547, + "logps/rejected": -141.64730834960938, + "loss": 0.1787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8169353604316711, + "rewards/margins": 2.9970295429229736, + "rewards/rejected": -2.1800942420959473, + "step": 14197 + }, + { + "epoch": 0.83, + "learning_rate": 7.711848928204645e-09, + "logits/chosen": -1.9573835134506226, + "logits/rejected": -1.9660905599594116, + "logps/chosen": -171.80426025390625, + "logps/rejected": -291.5278015136719, + "loss": 0.0428, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.265576124191284, + "rewards/margins": 2.8425049781799316, + "rewards/rejected": -0.5769287347793579, + "step": 14198 + }, + { + "epoch": 0.83, + "learning_rate": 7.706821409137692e-09, + "logits/chosen": -1.831353783607483, + "logits/rejected": -1.8300557136535645, + "logps/chosen": -227.52719116210938, + "logps/rejected": -269.4349365234375, + "loss": 0.2423, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.224740743637085, + "rewards/margins": 0.5241608619689941, + "rewards/rejected": 1.7005798816680908, + "step": 14199 + }, + { + "epoch": 0.83, + "learning_rate": 7.701795392531318e-09, + "logits/chosen": -1.9881783723831177, + "logits/rejected": -1.9877864122390747, + "logps/chosen": -48.171791076660156, + "logps/rejected": -190.8829345703125, + "loss": 0.3041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4621395170688629, + "rewards/margins": 1.9168529510498047, + "rewards/rejected": -1.4547134637832642, + "step": 14200 + }, + { + "epoch": 0.83, + "learning_rate": 7.69677087856409e-09, + "logits/chosen": -1.8509557247161865, + "logits/rejected": -1.8419567346572876, + "logps/chosen": -1.0931596755981445, + "logps/rejected": -120.78309631347656, + "loss": 0.3769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04986642673611641, + "rewards/margins": 2.680129289627075, + "rewards/rejected": -2.630262851715088, + "step": 14201 + }, + { + "epoch": 0.83, + "learning_rate": 7.691747867414471e-09, + "logits/chosen": -2.0600080490112305, + "logits/rejected": -2.0650553703308105, + "logps/chosen": -106.30789947509766, + "logps/rejected": -250.83578491210938, + "loss": 0.6768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.572675347328186, + "rewards/margins": 0.49289631843566895, + "rewards/rejected": -1.065571665763855, + "step": 14202 + }, + { + "epoch": 0.83, + "learning_rate": 7.686726359260925e-09, + "logits/chosen": -1.8662868738174438, + "logits/rejected": -1.8691123723983765, + "logps/chosen": -36.05863952636719, + "logps/rejected": -216.58294677734375, + "loss": 0.4889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08863677829504013, + "rewards/margins": 0.8325912952423096, + "rewards/rejected": -0.9212280511856079, + "step": 14203 + }, + { + "epoch": 0.83, + "learning_rate": 7.681706354281836e-09, + "logits/chosen": -1.8682442903518677, + "logits/rejected": -1.8611663579940796, + "logps/chosen": -204.23715209960938, + "logps/rejected": -316.1481018066406, + "loss": 0.094, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5223388671875, + "rewards/margins": 1.9707305431365967, + "rewards/rejected": -0.44839173555374146, + "step": 14204 + }, + { + "epoch": 0.83, + "learning_rate": 7.676687852655545e-09, + "logits/chosen": -2.0538830757141113, + "logits/rejected": -2.0336391925811768, + "logps/chosen": -19.49861717224121, + "logps/rejected": -313.8786315917969, + "loss": 0.2635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18561363220214844, + "rewards/margins": 6.7120890617370605, + "rewards/rejected": -6.526475429534912, + "step": 14205 + }, + { + "epoch": 0.83, + "learning_rate": 7.671670854560309e-09, + "logits/chosen": -1.8104658126831055, + "logits/rejected": -1.8111687898635864, + "logps/chosen": -179.42747497558594, + "logps/rejected": -264.2159729003906, + "loss": 0.1504, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.440338134765625, + "rewards/margins": 1.1294708251953125, + "rewards/rejected": 2.3108673095703125, + "step": 14206 + }, + { + "epoch": 0.83, + "learning_rate": 7.66665536017439e-09, + "logits/chosen": -1.8205217123031616, + "logits/rejected": -1.808728575706482, + "logps/chosen": -268.22186279296875, + "logps/rejected": -477.3457946777344, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.919561743736267, + "rewards/margins": 6.532876968383789, + "rewards/rejected": -4.613315105438232, + "step": 14207 + }, + { + "epoch": 0.83, + "learning_rate": 7.661641369675942e-09, + "logits/chosen": -1.7347441911697388, + "logits/rejected": -1.7421544790267944, + "logps/chosen": -247.84988403320312, + "logps/rejected": -303.2779235839844, + "loss": 0.1583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6440154910087585, + "rewards/margins": 2.29473876953125, + "rewards/rejected": -1.6507233381271362, + "step": 14208 + }, + { + "epoch": 0.83, + "learning_rate": 7.656628883243105e-09, + "logits/chosen": -1.9507644176483154, + "logits/rejected": -1.951438307762146, + "logps/chosen": -2.5249950885772705, + "logps/rejected": -82.89759826660156, + "loss": 0.5213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1330866813659668, + "rewards/margins": 0.4575137197971344, + "rewards/rejected": -0.3244270384311676, + "step": 14209 + }, + { + "epoch": 0.83, + "learning_rate": 7.651617901053908e-09, + "logits/chosen": -1.926446795463562, + "logits/rejected": -1.9717851877212524, + "logps/chosen": -194.17112731933594, + "logps/rejected": -374.25146484375, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8773666620254517, + "rewards/margins": 4.612242221832275, + "rewards/rejected": -2.734875440597534, + "step": 14210 + }, + { + "epoch": 0.83, + "learning_rate": 7.64660842328641e-09, + "logits/chosen": -1.9727259874343872, + "logits/rejected": -1.9780480861663818, + "logps/chosen": -191.56527709960938, + "logps/rejected": -468.0845947265625, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.907385230064392, + "rewards/margins": 5.600653171539307, + "rewards/rejected": -3.693267822265625, + "step": 14211 + }, + { + "epoch": 0.83, + "learning_rate": 7.641600450118552e-09, + "logits/chosen": -1.876582384109497, + "logits/rejected": -1.8826402425765991, + "logps/chosen": -0.030060095712542534, + "logps/rejected": -120.02072143554688, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015021044528111815, + "rewards/margins": 2.4996097087860107, + "rewards/rejected": -2.5011117458343506, + "step": 14212 + }, + { + "epoch": 0.83, + "learning_rate": 7.636593981728234e-09, + "logits/chosen": -1.8189709186553955, + "logits/rejected": -1.8854347467422485, + "logps/chosen": -219.97152709960938, + "logps/rejected": -393.1539306640625, + "loss": 0.0496, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9761642217636108, + "rewards/margins": 3.266078233718872, + "rewards/rejected": -1.2899140119552612, + "step": 14213 + }, + { + "epoch": 0.83, + "learning_rate": 7.631589018293322e-09, + "logits/chosen": -2.0283429622650146, + "logits/rejected": -2.0317680835723877, + "logps/chosen": -0.00012409423652570695, + "logps/rejected": -202.78524780273438, + "loss": 0.357, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.179275366477668e-07, + "rewards/margins": 4.15862512588501, + "rewards/rejected": -4.158626079559326, + "step": 14214 + }, + { + "epoch": 0.83, + "learning_rate": 7.626585559991628e-09, + "logits/chosen": -1.7976125478744507, + "logits/rejected": -1.7896400690078735, + "logps/chosen": -253.2389678955078, + "logps/rejected": -446.2970275878906, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4475998878479004, + "rewards/margins": 3.519374370574951, + "rewards/rejected": -1.0717743635177612, + "step": 14215 + }, + { + "epoch": 0.83, + "learning_rate": 7.621583607000876e-09, + "logits/chosen": -2.015951633453369, + "logits/rejected": -2.018537998199463, + "logps/chosen": -0.745472252368927, + "logps/rejected": -201.9100799560547, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08979629725217819, + "rewards/margins": 3.24574875831604, + "rewards/rejected": -3.1559524536132812, + "step": 14216 + }, + { + "epoch": 0.83, + "learning_rate": 7.616583159498768e-09, + "logits/chosen": -1.9366313219070435, + "logits/rejected": -1.9122767448425293, + "logps/chosen": -65.3131103515625, + "logps/rejected": -358.4501037597656, + "loss": 0.1573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9594597220420837, + "rewards/margins": 2.598778486251831, + "rewards/rejected": -1.639318823814392, + "step": 14217 + }, + { + "epoch": 0.83, + "learning_rate": 7.611584217662942e-09, + "logits/chosen": -1.8542389869689941, + "logits/rejected": -1.84564208984375, + "logps/chosen": -17.258556365966797, + "logps/rejected": -136.24366760253906, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1051860824227333, + "rewards/margins": 2.1588757038116455, + "rewards/rejected": -2.05368971824646, + "step": 14218 + }, + { + "epoch": 0.83, + "learning_rate": 7.606586781670993e-09, + "logits/chosen": -2.0670204162597656, + "logits/rejected": -2.061944007873535, + "logps/chosen": -20.363422393798828, + "logps/rejected": -364.7492370605469, + "loss": 0.37, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21573983132839203, + "rewards/margins": 5.533119201660156, + "rewards/rejected": -5.74885892868042, + "step": 14219 + }, + { + "epoch": 0.83, + "learning_rate": 7.601590851700462e-09, + "logits/chosen": -1.8340927362442017, + "logits/rejected": -1.8049731254577637, + "logps/chosen": -167.20620727539062, + "logps/rejected": -365.46484375, + "loss": 0.132, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4807007312774658, + "rewards/margins": 1.4522796869277954, + "rewards/rejected": 0.02842102013528347, + "step": 14220 + }, + { + "epoch": 0.83, + "learning_rate": 7.596596427928803e-09, + "logits/chosen": -1.849698781967163, + "logits/rejected": -1.8391661643981934, + "logps/chosen": -410.1175537109375, + "logps/rejected": -589.54736328125, + "loss": 0.0882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.983642578125, + "rewards/margins": 1.73492431640625, + "rewards/rejected": 0.24871826171875, + "step": 14221 + }, + { + "epoch": 0.83, + "learning_rate": 7.591603510533456e-09, + "logits/chosen": -2.0235631465911865, + "logits/rejected": -2.0210273265838623, + "logps/chosen": -0.024677544832229614, + "logps/rejected": -252.902099609375, + "loss": 0.3126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015396717935800552, + "rewards/margins": 4.620002746582031, + "rewards/rejected": -4.621542453765869, + "step": 14222 + }, + { + "epoch": 0.83, + "learning_rate": 7.586612099691797e-09, + "logits/chosen": -1.9938448667526245, + "logits/rejected": -1.9530493021011353, + "logps/chosen": -206.66058349609375, + "logps/rejected": -349.14447021484375, + "loss": 0.069, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.161511182785034, + "rewards/margins": 2.1963071823120117, + "rewards/rejected": 0.9652038812637329, + "step": 14223 + }, + { + "epoch": 0.83, + "learning_rate": 7.58162219558115e-09, + "logits/chosen": -1.8167650699615479, + "logits/rejected": -1.8267099857330322, + "logps/chosen": -179.4946746826172, + "logps/rejected": -220.76356506347656, + "loss": 0.1224, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8090102672576904, + "rewards/margins": 1.4858489036560059, + "rewards/rejected": 1.3231613636016846, + "step": 14224 + }, + { + "epoch": 0.83, + "learning_rate": 7.576633798378757e-09, + "logits/chosen": -1.8548482656478882, + "logits/rejected": -1.8599467277526855, + "logps/chosen": -18.500093460083008, + "logps/rejected": -149.23484802246094, + "loss": 0.4191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031003952026367188, + "rewards/margins": 2.1586520671844482, + "rewards/rejected": -2.161752462387085, + "step": 14225 + }, + { + "epoch": 0.83, + "learning_rate": 7.571646908261863e-09, + "logits/chosen": -1.7055528163909912, + "logits/rejected": -1.7036116123199463, + "logps/chosen": -0.15107761323451996, + "logps/rejected": -41.620365142822266, + "loss": 0.673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002462916076183319, + "rewards/margins": 0.018510203808546066, + "rewards/rejected": -0.016047287732362747, + "step": 14226 + }, + { + "epoch": 0.83, + "learning_rate": 7.566661525407602e-09, + "logits/chosen": -1.7642765045166016, + "logits/rejected": -1.76935875415802, + "logps/chosen": -30.12160301208496, + "logps/rejected": -172.6144256591797, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10313987731933594, + "rewards/margins": 2.56425142288208, + "rewards/rejected": -2.461111545562744, + "step": 14227 + }, + { + "epoch": 0.83, + "learning_rate": 7.561677649993097e-09, + "logits/chosen": -1.9294379949569702, + "logits/rejected": -1.940346360206604, + "logps/chosen": -372.4676208496094, + "logps/rejected": -562.1787109375, + "loss": 0.0838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6166107058525085, + "rewards/margins": 5.821280002593994, + "rewards/rejected": -5.20466947555542, + "step": 14228 + }, + { + "epoch": 0.83, + "learning_rate": 7.556695282195369e-09, + "logits/chosen": -1.9346404075622559, + "logits/rejected": -1.9492515325546265, + "logps/chosen": -57.77928161621094, + "logps/rejected": -245.15371704101562, + "loss": 0.2006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.700207531452179, + "rewards/margins": 5.50740385055542, + "rewards/rejected": -4.807196140289307, + "step": 14229 + }, + { + "epoch": 0.83, + "learning_rate": 7.551714422191458e-09, + "logits/chosen": -1.7148526906967163, + "logits/rejected": -1.7533377408981323, + "logps/chosen": -252.1123046875, + "logps/rejected": -472.18768310546875, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.155017137527466, + "rewards/margins": 7.668536186218262, + "rewards/rejected": -5.513519287109375, + "step": 14230 + }, + { + "epoch": 0.83, + "learning_rate": 7.546735070158278e-09, + "logits/chosen": -1.9086180925369263, + "logits/rejected": -1.8990728855133057, + "logps/chosen": -64.66474151611328, + "logps/rejected": -246.23204040527344, + "loss": 0.2359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4277938902378082, + "rewards/margins": 3.642277717590332, + "rewards/rejected": -3.2144837379455566, + "step": 14231 + }, + { + "epoch": 0.83, + "learning_rate": 7.541757226272743e-09, + "logits/chosen": -1.9586601257324219, + "logits/rejected": -1.9527537822723389, + "logps/chosen": -0.033141691237688065, + "logps/rejected": -286.78253173828125, + "loss": 0.3289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00224397424608469, + "rewards/margins": 7.819839954376221, + "rewards/rejected": -7.822083950042725, + "step": 14232 + }, + { + "epoch": 0.83, + "learning_rate": 7.536780890711653e-09, + "logits/chosen": -2.0309879779815674, + "logits/rejected": -2.0348291397094727, + "logps/chosen": -0.0005133525119163096, + "logps/rejected": -151.72088623046875, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.121400393662043e-05, + "rewards/margins": 4.021254539489746, + "rewards/rejected": -4.021275520324707, + "step": 14233 + }, + { + "epoch": 0.83, + "learning_rate": 7.531806063651836e-09, + "logits/chosen": -2.1279590129852295, + "logits/rejected": -2.136207103729248, + "logps/chosen": -19.254650115966797, + "logps/rejected": -255.80548095703125, + "loss": 0.3387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07413806766271591, + "rewards/margins": 4.256097316741943, + "rewards/rejected": -4.330235481262207, + "step": 14234 + }, + { + "epoch": 0.83, + "learning_rate": 7.52683274526999e-09, + "logits/chosen": -1.8454747200012207, + "logits/rejected": -1.8312792778015137, + "logps/chosen": -24.127119064331055, + "logps/rejected": -305.0060119628906, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0877416655421257, + "rewards/margins": 5.35222864151001, + "rewards/rejected": -5.264486789703369, + "step": 14235 + }, + { + "epoch": 0.83, + "learning_rate": 7.521860935742808e-09, + "logits/chosen": -1.9938541650772095, + "logits/rejected": -1.9781897068023682, + "logps/chosen": -32.071533203125, + "logps/rejected": -225.99179077148438, + "loss": 0.4854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4403352737426758, + "rewards/margins": 2.6351850032806396, + "rewards/rejected": -3.0755202770233154, + "step": 14236 + }, + { + "epoch": 0.83, + "learning_rate": 7.516890635246902e-09, + "logits/chosen": -1.8962715864181519, + "logits/rejected": -1.8763693571090698, + "logps/chosen": -0.0009808624163269997, + "logps/rejected": -242.48126220703125, + "loss": 0.3505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001199501333758235, + "rewards/margins": 4.744780540466309, + "rewards/rejected": -4.7435808181762695, + "step": 14237 + }, + { + "epoch": 0.83, + "learning_rate": 7.511921843958857e-09, + "logits/chosen": -2.00980806350708, + "logits/rejected": -2.006681203842163, + "logps/chosen": -32.674346923828125, + "logps/rejected": -252.62808227539062, + "loss": 0.1512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9277176260948181, + "rewards/margins": 4.275306701660156, + "rewards/rejected": -3.3475892543792725, + "step": 14238 + }, + { + "epoch": 0.83, + "learning_rate": 7.506954562055185e-09, + "logits/chosen": -1.9729148149490356, + "logits/rejected": -1.9613761901855469, + "logps/chosen": -0.02631140686571598, + "logps/rejected": -111.29258728027344, + "loss": 0.4552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014221136458218098, + "rewards/margins": 1.2890892028808594, + "rewards/rejected": -1.2748680114746094, + "step": 14239 + }, + { + "epoch": 0.83, + "learning_rate": 7.501988789712333e-09, + "logits/chosen": -1.9179311990737915, + "logits/rejected": -1.9381935596466064, + "logps/chosen": -147.77178955078125, + "logps/rejected": -392.62261962890625, + "loss": 0.1238, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3812698125839233, + "rewards/margins": 2.275557041168213, + "rewards/rejected": -0.894287109375, + "step": 14240 + }, + { + "epoch": 0.83, + "learning_rate": 7.49702452710672e-09, + "logits/chosen": -1.9017000198364258, + "logits/rejected": -1.862587332725525, + "logps/chosen": -186.49514770507812, + "logps/rejected": -280.1786804199219, + "loss": 0.0542, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8547561168670654, + "rewards/margins": 2.678248643875122, + "rewards/rejected": 0.17650757730007172, + "step": 14241 + }, + { + "epoch": 0.83, + "learning_rate": 7.492061774414698e-09, + "logits/chosen": -1.6023919582366943, + "logits/rejected": -1.6037758588790894, + "logps/chosen": -30.231098175048828, + "logps/rejected": -352.1661071777344, + "loss": 0.2277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41630515456199646, + "rewards/margins": 2.20231294631958, + "rewards/rejected": -1.7860077619552612, + "step": 14242 + }, + { + "epoch": 0.83, + "learning_rate": 7.487100531812584e-09, + "logits/chosen": -1.8139474391937256, + "logits/rejected": -1.8048354387283325, + "logps/chosen": -24.848495483398438, + "logps/rejected": -166.91574096679688, + "loss": 0.3658, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7574100494384766, + "rewards/margins": 0.6338565945625305, + "rewards/rejected": 0.12355346977710724, + "step": 14243 + }, + { + "epoch": 0.83, + "learning_rate": 7.482140799476605e-09, + "logits/chosen": -2.0273311138153076, + "logits/rejected": -2.0347306728363037, + "logps/chosen": -274.4922180175781, + "logps/rejected": -383.1956787109375, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9411041736602783, + "rewards/margins": 0.4845489263534546, + "rewards/rejected": 1.4565552473068237, + "step": 14244 + }, + { + "epoch": 0.83, + "learning_rate": 7.477182577582957e-09, + "logits/chosen": -1.6429190635681152, + "logits/rejected": -1.6523516178131104, + "logps/chosen": -158.7694549560547, + "logps/rejected": -317.40289306640625, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1020736694335938, + "rewards/margins": 2.7962448596954346, + "rewards/rejected": -1.6941711902618408, + "step": 14245 + }, + { + "epoch": 0.83, + "learning_rate": 7.472225866307785e-09, + "logits/chosen": -1.9411689043045044, + "logits/rejected": -1.9214262962341309, + "logps/chosen": -222.0177459716797, + "logps/rejected": -442.30999755859375, + "loss": 0.0686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2276290655136108, + "rewards/margins": 3.3014144897460938, + "rewards/rejected": -2.0737855434417725, + "step": 14246 + }, + { + "epoch": 0.83, + "learning_rate": 7.46727066582719e-09, + "logits/chosen": -1.8424797058105469, + "logits/rejected": -1.824081540107727, + "logps/chosen": -206.28335571289062, + "logps/rejected": -421.35174560546875, + "loss": 0.178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.875634789466858, + "rewards/margins": 1.228515625, + "rewards/rejected": 0.6471191644668579, + "step": 14247 + }, + { + "epoch": 0.83, + "learning_rate": 7.462316976317168e-09, + "logits/chosen": -1.9325007200241089, + "logits/rejected": -1.7311344146728516, + "logps/chosen": -222.60400390625, + "logps/rejected": -885.878662109375, + "loss": 0.0636, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.118084669113159, + "rewards/margins": 4.110583305358887, + "rewards/rejected": -1.992498755455017, + "step": 14248 + }, + { + "epoch": 0.83, + "learning_rate": 7.457364797953741e-09, + "logits/chosen": -1.7678821086883545, + "logits/rejected": -1.765318512916565, + "logps/chosen": -231.3792724609375, + "logps/rejected": -361.6402587890625, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.052947998046875, + "rewards/margins": 4.224069118499756, + "rewards/rejected": -1.1711212396621704, + "step": 14249 + }, + { + "epoch": 0.83, + "learning_rate": 7.452414130912804e-09, + "logits/chosen": -1.8852940797805786, + "logits/rejected": -1.887221097946167, + "logps/chosen": -3.8742720789741725e-05, + "logps/rejected": -145.4014434814453, + "loss": 0.3401, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.629282094967493e-07, + "rewards/margins": 4.166633129119873, + "rewards/rejected": -4.166632175445557, + "step": 14250 + }, + { + "epoch": 0.83, + "learning_rate": 7.4474649753702545e-09, + "logits/chosen": -1.9269840717315674, + "logits/rejected": -1.92547607421875, + "logps/chosen": -56.262855529785156, + "logps/rejected": -156.5028533935547, + "loss": 0.1518, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7252418994903564, + "rewards/margins": 1.7381950616836548, + "rewards/rejected": -0.01295318640768528, + "step": 14251 + }, + { + "epoch": 0.83, + "learning_rate": 7.442517331501868e-09, + "logits/chosen": -1.8847315311431885, + "logits/rejected": -2.0379834175109863, + "logps/chosen": -233.64474487304688, + "logps/rejected": -311.823974609375, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.602935791015625, + "rewards/margins": 5.17010498046875, + "rewards/rejected": -3.567169189453125, + "step": 14252 + }, + { + "epoch": 0.83, + "learning_rate": 7.437571199483467e-09, + "logits/chosen": -1.7467706203460693, + "logits/rejected": -1.749239206314087, + "logps/chosen": -124.23946380615234, + "logps/rejected": -304.39239501953125, + "loss": 0.1729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.791181206703186, + "rewards/margins": 2.7275519371032715, + "rewards/rejected": -1.936370849609375, + "step": 14253 + }, + { + "epoch": 0.83, + "learning_rate": 7.432626579490714e-09, + "logits/chosen": -1.821982979774475, + "logits/rejected": -1.8172284364700317, + "logps/chosen": -21.81646728515625, + "logps/rejected": -249.95452880859375, + "loss": 0.3846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1628248244524002, + "rewards/margins": 1.5804396867752075, + "rewards/rejected": -1.4176148176193237, + "step": 14254 + }, + { + "epoch": 0.83, + "learning_rate": 7.427683471699292e-09, + "logits/chosen": -2.043478012084961, + "logits/rejected": -2.043015480041504, + "logps/chosen": -7.77562141418457, + "logps/rejected": -54.56764221191406, + "loss": 0.6214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25095701217651367, + "rewards/margins": 0.11222583055496216, + "rewards/rejected": -0.36318284273147583, + "step": 14255 + }, + { + "epoch": 0.83, + "learning_rate": 7.422741876284788e-09, + "logits/chosen": -1.6650880575180054, + "logits/rejected": -1.709398627281189, + "logps/chosen": -356.99945068359375, + "logps/rejected": -428.55352783203125, + "loss": 0.1882, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0397796630859375, + "rewards/margins": 1.046905517578125, + "rewards/rejected": 0.9928741455078125, + "step": 14256 + }, + { + "epoch": 0.83, + "learning_rate": 7.417801793422768e-09, + "logits/chosen": -1.8154654502868652, + "logits/rejected": -1.7670857906341553, + "logps/chosen": -175.88787841796875, + "logps/rejected": -308.0907897949219, + "loss": 0.0846, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.174826145172119, + "rewards/margins": 1.923883080482483, + "rewards/rejected": 1.2509430646896362, + "step": 14257 + }, + { + "epoch": 0.83, + "learning_rate": 7.4128632232887285e-09, + "logits/chosen": -1.9305627346038818, + "logits/rejected": -1.9211046695709229, + "logps/chosen": -52.509056091308594, + "logps/rejected": -198.95730590820312, + "loss": 0.7877, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6735332608222961, + "rewards/margins": -0.9234257340431213, + "rewards/rejected": 1.5969589948654175, + "step": 14258 + }, + { + "epoch": 0.83, + "learning_rate": 7.4079261660580925e-09, + "logits/chosen": -1.8788197040557861, + "logits/rejected": -1.8750978708267212, + "logps/chosen": -0.022498754784464836, + "logps/rejected": -110.72377014160156, + "loss": 0.3786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008269065292552114, + "rewards/margins": 2.0300509929656982, + "rewards/rejected": -2.0308778285980225, + "step": 14259 + }, + { + "epoch": 0.83, + "learning_rate": 7.402990621906258e-09, + "logits/chosen": -1.7830231189727783, + "logits/rejected": -1.783074975013733, + "logps/chosen": -398.6172790527344, + "logps/rejected": -415.619873046875, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8968505859375, + "rewards/margins": 0.15898430347442627, + "rewards/rejected": 1.7378662824630737, + "step": 14260 + }, + { + "epoch": 0.83, + "learning_rate": 7.398056591008561e-09, + "logits/chosen": -1.8791359663009644, + "logits/rejected": -1.8758525848388672, + "logps/chosen": -143.18856811523438, + "logps/rejected": -198.0471649169922, + "loss": 0.2731, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8724517822265625, + "rewards/margins": 0.6561126708984375, + "rewards/rejected": 0.216339111328125, + "step": 14261 + }, + { + "epoch": 0.83, + "learning_rate": 7.3931240735402966e-09, + "logits/chosen": -2.0096070766448975, + "logits/rejected": -2.0057103633880615, + "logps/chosen": -69.72322082519531, + "logps/rejected": -293.228271484375, + "loss": 0.0881, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3528931140899658, + "rewards/margins": 5.289671421051025, + "rewards/rejected": -3.9367783069610596, + "step": 14262 + }, + { + "epoch": 0.83, + "learning_rate": 7.388193069676662e-09, + "logits/chosen": -1.9123296737670898, + "logits/rejected": -1.9150394201278687, + "logps/chosen": -68.62189483642578, + "logps/rejected": -139.94752502441406, + "loss": 0.1948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9708511233329773, + "rewards/margins": 2.2041709423065186, + "rewards/rejected": -1.233319878578186, + "step": 14263 + }, + { + "epoch": 0.83, + "learning_rate": 7.3832635795928524e-09, + "logits/chosen": -1.797081470489502, + "logits/rejected": -1.9075756072998047, + "logps/chosen": -259.0733642578125, + "logps/rejected": -299.51031494140625, + "loss": 0.1688, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1800323724746704, + "rewards/margins": 1.3794068098068237, + "rewards/rejected": -0.19937439262866974, + "step": 14264 + }, + { + "epoch": 0.83, + "learning_rate": 7.378335603463981e-09, + "logits/chosen": -1.923905372619629, + "logits/rejected": -1.9225468635559082, + "logps/chosen": -7.830346584320068, + "logps/rejected": -113.77655029296875, + "loss": 0.4943, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1853773593902588, + "rewards/margins": 0.5440672636032104, + "rewards/rejected": -0.3586898744106293, + "step": 14265 + }, + { + "epoch": 0.83, + "learning_rate": 7.37340914146512e-09, + "logits/chosen": -1.9477760791778564, + "logits/rejected": -1.9305918216705322, + "logps/chosen": -106.79197692871094, + "logps/rejected": -325.63714599609375, + "loss": 0.1038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7283340692520142, + "rewards/margins": 2.5214035511016846, + "rewards/rejected": -1.7930694818496704, + "step": 14266 + }, + { + "epoch": 0.83, + "learning_rate": 7.368484193771257e-09, + "logits/chosen": -1.900476098060608, + "logits/rejected": -1.909227728843689, + "logps/chosen": -199.13998413085938, + "logps/rejected": -312.0325927734375, + "loss": 0.0484, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0074126720428467, + "rewards/margins": 2.5438263416290283, + "rewards/rejected": -0.5364136099815369, + "step": 14267 + }, + { + "epoch": 0.83, + "learning_rate": 7.363560760557391e-09, + "logits/chosen": -1.9717564582824707, + "logits/rejected": -1.9853217601776123, + "logps/chosen": -300.4698181152344, + "logps/rejected": -639.8136596679688, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.4424591064453125, + "rewards/margins": 15.122464179992676, + "rewards/rejected": -10.680005073547363, + "step": 14268 + }, + { + "epoch": 0.83, + "learning_rate": 7.358638841998388e-09, + "logits/chosen": -2.0464577674865723, + "logits/rejected": -2.0437815189361572, + "logps/chosen": -2.469088554382324, + "logps/rejected": -76.0367431640625, + "loss": 0.4792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2644951343536377, + "rewards/margins": 0.5947601795196533, + "rewards/rejected": -0.3302650451660156, + "step": 14269 + }, + { + "epoch": 0.83, + "learning_rate": 7.353718438269135e-09, + "logits/chosen": -1.9271490573883057, + "logits/rejected": -1.9179353713989258, + "logps/chosen": -28.32793426513672, + "logps/rejected": -274.2216796875, + "loss": 0.3025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4672233760356903, + "rewards/margins": 1.9747124910354614, + "rewards/rejected": -1.5074890851974487, + "step": 14270 + }, + { + "epoch": 0.83, + "learning_rate": 7.3487995495443765e-09, + "logits/chosen": -1.9430499076843262, + "logits/rejected": -1.9417861700057983, + "logps/chosen": -0.09413612633943558, + "logps/rejected": -144.21726989746094, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005421753507107496, + "rewards/margins": 4.139594078063965, + "rewards/rejected": -4.145015716552734, + "step": 14271 + }, + { + "epoch": 0.83, + "learning_rate": 7.343882175998917e-09, + "logits/chosen": -2.0899529457092285, + "logits/rejected": -2.09616756439209, + "logps/chosen": -30.76587677001953, + "logps/rejected": -159.11997985839844, + "loss": 0.2167, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1649742126464844, + "rewards/margins": 1.4226691722869873, + "rewards/rejected": -0.2576950192451477, + "step": 14272 + }, + { + "epoch": 0.83, + "learning_rate": 7.338966317807405e-09, + "logits/chosen": -1.963848352432251, + "logits/rejected": -1.9620130062103271, + "logps/chosen": -26.06651496887207, + "logps/rejected": -171.3217315673828, + "loss": 0.1543, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4043524265289307, + "rewards/margins": 2.16324520111084, + "rewards/rejected": -0.758892834186554, + "step": 14273 + }, + { + "epoch": 0.83, + "learning_rate": 7.334051975144495e-09, + "logits/chosen": -1.7155178785324097, + "logits/rejected": -1.7403169870376587, + "logps/chosen": -218.18472290039062, + "logps/rejected": -446.05718994140625, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9966766834259033, + "rewards/margins": 3.862912178039551, + "rewards/rejected": -1.866235375404358, + "step": 14274 + }, + { + "epoch": 0.83, + "learning_rate": 7.329139148184738e-09, + "logits/chosen": -2.1437361240386963, + "logits/rejected": -2.139472484588623, + "logps/chosen": -10.25755500793457, + "logps/rejected": -238.73764038085938, + "loss": 0.2124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5461151003837585, + "rewards/margins": 3.2375824451446533, + "rewards/rejected": -2.69146728515625, + "step": 14275 + }, + { + "epoch": 0.83, + "learning_rate": 7.324227837102709e-09, + "logits/chosen": -1.9768626689910889, + "logits/rejected": -1.97639000415802, + "logps/chosen": -0.06259404122829437, + "logps/rejected": -126.71817779541016, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006061436142772436, + "rewards/margins": 3.0250794887542725, + "rewards/rejected": -3.0311410427093506, + "step": 14276 + }, + { + "epoch": 0.83, + "learning_rate": 7.319318042072842e-09, + "logits/chosen": -1.8803781270980835, + "logits/rejected": -1.868607759475708, + "logps/chosen": -31.019813537597656, + "logps/rejected": -283.7041015625, + "loss": 0.1232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2803668975830078, + "rewards/margins": 3.623201370239258, + "rewards/rejected": -2.34283447265625, + "step": 14277 + }, + { + "epoch": 0.83, + "learning_rate": 7.314409763269574e-09, + "logits/chosen": -1.8141731023788452, + "logits/rejected": -1.8600777387619019, + "logps/chosen": -236.30584716796875, + "logps/rejected": -214.06414794921875, + "loss": 0.1614, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.117352247238159, + "rewards/margins": 1.1248106956481934, + "rewards/rejected": 0.992541491985321, + "step": 14278 + }, + { + "epoch": 0.83, + "learning_rate": 7.309503000867268e-09, + "logits/chosen": -1.6877963542938232, + "logits/rejected": -1.669179081916809, + "logps/chosen": -221.2451629638672, + "logps/rejected": -368.482421875, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.426161289215088, + "rewards/margins": 4.370420932769775, + "rewards/rejected": -1.9442596435546875, + "step": 14279 + }, + { + "epoch": 0.83, + "learning_rate": 7.304597755040243e-09, + "logits/chosen": -1.9012627601623535, + "logits/rejected": -1.894261360168457, + "logps/chosen": -17.545217514038086, + "logps/rejected": -274.3645324707031, + "loss": 0.2445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2957853376865387, + "rewards/margins": 4.081637382507324, + "rewards/rejected": -3.7858521938323975, + "step": 14280 + }, + { + "epoch": 0.83, + "learning_rate": 7.299694025962755e-09, + "logits/chosen": -2.0190510749816895, + "logits/rejected": -2.021294355392456, + "logps/chosen": -5.119000434875488, + "logps/rejected": -214.63323974609375, + "loss": 0.3562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.056131746619939804, + "rewards/margins": 3.7719779014587402, + "rewards/rejected": -3.8281097412109375, + "step": 14281 + }, + { + "epoch": 0.83, + "learning_rate": 7.294791813808998e-09, + "logits/chosen": -1.7460861206054688, + "logits/rejected": -1.8033853769302368, + "logps/chosen": -186.095703125, + "logps/rejected": -371.8796081542969, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.953662157058716, + "rewards/margins": 4.988741874694824, + "rewards/rejected": -2.0350799560546875, + "step": 14282 + }, + { + "epoch": 0.83, + "learning_rate": 7.289891118753128e-09, + "logits/chosen": -1.8262929916381836, + "logits/rejected": -1.8207305669784546, + "logps/chosen": -28.85769271850586, + "logps/rejected": -147.59103393554688, + "loss": 0.2459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4930644929409027, + "rewards/margins": 2.3188259601593018, + "rewards/rejected": -1.8257614374160767, + "step": 14283 + }, + { + "epoch": 0.83, + "learning_rate": 7.284991940969243e-09, + "logits/chosen": -1.725504755973816, + "logits/rejected": -1.7508758306503296, + "logps/chosen": -243.16598510742188, + "logps/rejected": -489.933837890625, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.562426805496216, + "rewards/margins": 5.864935398101807, + "rewards/rejected": -3.302508592605591, + "step": 14284 + }, + { + "epoch": 0.83, + "learning_rate": 7.280094280631399e-09, + "logits/chosen": -1.727960467338562, + "logits/rejected": -1.7081011533737183, + "logps/chosen": -200.2186279296875, + "logps/rejected": -354.0194091796875, + "loss": 0.1011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6761443614959717, + "rewards/margins": 1.5526549816131592, + "rewards/rejected": 1.1234893798828125, + "step": 14285 + }, + { + "epoch": 0.83, + "learning_rate": 7.275198137913563e-09, + "logits/chosen": -2.027132272720337, + "logits/rejected": -2.0311663150787354, + "logps/chosen": -9.12733268737793, + "logps/rejected": -96.05915832519531, + "loss": 0.3729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10046444088220596, + "rewards/margins": 2.209465503692627, + "rewards/rejected": -2.1090011596679688, + "step": 14286 + }, + { + "epoch": 0.83, + "learning_rate": 7.270303512989673e-09, + "logits/chosen": -1.7862776517868042, + "logits/rejected": -1.7954957485198975, + "logps/chosen": -0.0020908089354634285, + "logps/rejected": -181.59487915039062, + "loss": 0.4654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014309743710327893, + "rewards/margins": 1.2521427869796753, + "rewards/rejected": -1.2522858381271362, + "step": 14287 + }, + { + "epoch": 0.83, + "learning_rate": 7.265410406033618e-09, + "logits/chosen": -1.907589316368103, + "logits/rejected": -1.9075961112976074, + "logps/chosen": -45.63062286376953, + "logps/rejected": -260.15740966796875, + "loss": 0.1833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2300834655761719, + "rewards/margins": 1.578966498374939, + "rewards/rejected": -0.3488830626010895, + "step": 14288 + }, + { + "epoch": 0.83, + "learning_rate": 7.260518817219235e-09, + "logits/chosen": -1.6507201194763184, + "logits/rejected": -1.6493315696716309, + "logps/chosen": -28.665637969970703, + "logps/rejected": -176.42547607421875, + "loss": 0.2274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5308731198310852, + "rewards/margins": 2.6837494373321533, + "rewards/rejected": -2.152876377105713, + "step": 14289 + }, + { + "epoch": 0.83, + "learning_rate": 7.25562874672026e-09, + "logits/chosen": -1.9903472661972046, + "logits/rejected": -1.985973834991455, + "logps/chosen": -9.885716438293457, + "logps/rejected": -191.8325653076172, + "loss": 0.4203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08697757869958878, + "rewards/margins": 1.4202371835708618, + "rewards/rejected": -1.3332595825195312, + "step": 14290 + }, + { + "epoch": 0.83, + "learning_rate": 7.250740194710464e-09, + "logits/chosen": -1.9572230577468872, + "logits/rejected": -1.9540389776229858, + "logps/chosen": -11.06424331665039, + "logps/rejected": -146.6336669921875, + "loss": 0.3941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06539440155029297, + "rewards/margins": 1.7731962203979492, + "rewards/rejected": -1.7078018188476562, + "step": 14291 + }, + { + "epoch": 0.83, + "learning_rate": 7.2458531613634684e-09, + "logits/chosen": -2.124861001968384, + "logits/rejected": -2.1144018173217773, + "logps/chosen": -34.05466079711914, + "logps/rejected": -273.5198669433594, + "loss": 0.1544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7008296847343445, + "rewards/margins": 3.647704839706421, + "rewards/rejected": -2.9468750953674316, + "step": 14292 + }, + { + "epoch": 0.83, + "learning_rate": 7.240967646852919e-09, + "logits/chosen": -2.144207239151001, + "logits/rejected": -2.1372768878936768, + "logps/chosen": -17.344846725463867, + "logps/rejected": -127.34176635742188, + "loss": 0.3681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.65194171667099, + "rewards/margins": 0.939002275466919, + "rewards/rejected": -0.28706055879592896, + "step": 14293 + }, + { + "epoch": 0.83, + "learning_rate": 7.236083651352326e-09, + "logits/chosen": -2.0331101417541504, + "logits/rejected": -2.031738042831421, + "logps/chosen": -22.584304809570312, + "logps/rejected": -279.433349609375, + "loss": 0.5177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5505703091621399, + "rewards/margins": 2.1724767684936523, + "rewards/rejected": -2.7230470180511475, + "step": 14294 + }, + { + "epoch": 0.83, + "learning_rate": 7.231201175035251e-09, + "logits/chosen": -1.765390396118164, + "logits/rejected": -1.7314941883087158, + "logps/chosen": -179.0306854248047, + "logps/rejected": -265.73492431640625, + "loss": 0.0869, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.71629798412323, + "rewards/margins": 3.2575395107269287, + "rewards/rejected": -1.5412415266036987, + "step": 14295 + }, + { + "epoch": 0.83, + "learning_rate": 7.226320218075105e-09, + "logits/chosen": -1.7610015869140625, + "logits/rejected": -1.7491014003753662, + "logps/chosen": -45.05125045776367, + "logps/rejected": -178.46661376953125, + "loss": 0.2664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5107700228691101, + "rewards/margins": 2.700348377227783, + "rewards/rejected": -2.1895782947540283, + "step": 14296 + }, + { + "epoch": 0.83, + "learning_rate": 7.2214407806452886e-09, + "logits/chosen": -2.0780131816864014, + "logits/rejected": -2.07719349861145, + "logps/chosen": -14.180032730102539, + "logps/rejected": -243.64776611328125, + "loss": 0.2843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18552838265895844, + "rewards/margins": 4.298667907714844, + "rewards/rejected": -4.113139629364014, + "step": 14297 + }, + { + "epoch": 0.83, + "learning_rate": 7.2165628629191515e-09, + "logits/chosen": -1.9914973974227905, + "logits/rejected": -2.016460418701172, + "logps/chosen": -170.06024169921875, + "logps/rejected": -396.71331787109375, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7027374505996704, + "rewards/margins": 4.109286785125732, + "rewards/rejected": -2.4065492153167725, + "step": 14298 + }, + { + "epoch": 0.83, + "learning_rate": 7.211686465069977e-09, + "logits/chosen": -2.082085371017456, + "logits/rejected": -2.059614658355713, + "logps/chosen": -0.23686014115810394, + "logps/rejected": -204.70022583007812, + "loss": 0.418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004621528089046478, + "rewards/margins": 1.7830151319503784, + "rewards/rejected": -1.7783936262130737, + "step": 14299 + }, + { + "epoch": 0.83, + "learning_rate": 7.206811587271011e-09, + "logits/chosen": -1.9332668781280518, + "logits/rejected": -1.8608038425445557, + "logps/chosen": -215.53549194335938, + "logps/rejected": -366.827392578125, + "loss": 0.174, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.038630723953247, + "rewards/margins": 1.3668899536132812, + "rewards/rejected": 0.671740710735321, + "step": 14300 + }, + { + "epoch": 0.83, + "learning_rate": 7.201938229695409e-09, + "logits/chosen": -1.9854702949523926, + "logits/rejected": -1.978680968284607, + "logps/chosen": -0.010651220567524433, + "logps/rejected": -181.18460083007812, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005782960797660053, + "rewards/margins": 5.271629333496094, + "rewards/rejected": -5.272207736968994, + "step": 14301 + }, + { + "epoch": 0.83, + "learning_rate": 7.1970663925163096e-09, + "logits/chosen": -2.086610794067383, + "logits/rejected": -2.07560396194458, + "logps/chosen": -15.452520370483398, + "logps/rejected": -234.93954467773438, + "loss": 0.1645, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0461100339889526, + "rewards/margins": 2.9494686126708984, + "rewards/rejected": -1.9033584594726562, + "step": 14302 + }, + { + "epoch": 0.83, + "learning_rate": 7.192196075906787e-09, + "logits/chosen": -1.946276307106018, + "logits/rejected": -1.9463225603103638, + "logps/chosen": -4.8251519203186035, + "logps/rejected": -45.357757568359375, + "loss": 0.5297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018046094104647636, + "rewards/margins": 0.5345934629440308, + "rewards/rejected": -0.5165473818778992, + "step": 14303 + }, + { + "epoch": 0.83, + "learning_rate": 7.187327280039862e-09, + "logits/chosen": -1.6202925443649292, + "logits/rejected": -1.6080470085144043, + "logps/chosen": -18.911449432373047, + "logps/rejected": -109.89073181152344, + "loss": 0.5085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09446258842945099, + "rewards/margins": 0.8121612668037415, + "rewards/rejected": -0.7176986932754517, + "step": 14304 + }, + { + "epoch": 0.83, + "learning_rate": 7.1824600050884806e-09, + "logits/chosen": -1.9194276332855225, + "logits/rejected": -1.923193335533142, + "logps/chosen": -12.27359676361084, + "logps/rejected": -257.3752136230469, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2301138937473297, + "rewards/margins": 2.318319082260132, + "rewards/rejected": -2.088205099105835, + "step": 14305 + }, + { + "epoch": 0.83, + "learning_rate": 7.177594251225566e-09, + "logits/chosen": -2.100827217102051, + "logits/rejected": -2.161046266555786, + "logps/chosen": -185.77003479003906, + "logps/rejected": -398.586181640625, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.361384630203247, + "rewards/margins": 6.259981155395508, + "rewards/rejected": -3.8985962867736816, + "step": 14306 + }, + { + "epoch": 0.83, + "learning_rate": 7.172730018623968e-09, + "logits/chosen": -1.970306158065796, + "logits/rejected": -2.0304551124572754, + "logps/chosen": -227.49551391601562, + "logps/rejected": -282.9539794921875, + "loss": 0.0568, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0538361072540283, + "rewards/margins": 2.639004707336426, + "rewards/rejected": -0.5851684808731079, + "step": 14307 + }, + { + "epoch": 0.83, + "learning_rate": 7.167867307456505e-09, + "logits/chosen": -1.773085355758667, + "logits/rejected": -1.770463228225708, + "logps/chosen": -148.58338928222656, + "logps/rejected": -485.9267883300781, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4625473022460938, + "rewards/margins": 8.414339065551758, + "rewards/rejected": -5.951791286468506, + "step": 14308 + }, + { + "epoch": 0.83, + "learning_rate": 7.163006117895887e-09, + "logits/chosen": -1.8320859670639038, + "logits/rejected": -1.8330482244491577, + "logps/chosen": -197.87075805664062, + "logps/rejected": -606.20068359375, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.26544189453125, + "rewards/margins": 8.730725288391113, + "rewards/rejected": -6.465283393859863, + "step": 14309 + }, + { + "epoch": 0.83, + "learning_rate": 7.158146450114849e-09, + "logits/chosen": -1.8602041006088257, + "logits/rejected": -1.8540762662887573, + "logps/chosen": -155.27487182617188, + "logps/rejected": -347.44354248046875, + "loss": 0.1726, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9578217267990112, + "rewards/margins": 1.3935577869415283, + "rewards/rejected": 0.5642639398574829, + "step": 14310 + }, + { + "epoch": 0.83, + "learning_rate": 7.153288304286004e-09, + "logits/chosen": -1.8612279891967773, + "logits/rejected": -1.8508796691894531, + "logps/chosen": -133.89358520507812, + "logps/rejected": -290.42236328125, + "loss": 0.0468, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1465699672698975, + "rewards/margins": 3.5065431594848633, + "rewards/rejected": -1.3599731922149658, + "step": 14311 + }, + { + "epoch": 0.83, + "learning_rate": 7.148431680581951e-09, + "logits/chosen": -2.026012897491455, + "logits/rejected": -2.0330052375793457, + "logps/chosen": -220.05337524414062, + "logps/rejected": -390.562744140625, + "loss": 0.1019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8322784304618835, + "rewards/margins": 2.2767181396484375, + "rewards/rejected": -1.4444397687911987, + "step": 14312 + }, + { + "epoch": 0.83, + "learning_rate": 7.143576579175192e-09, + "logits/chosen": -1.8723851442337036, + "logits/rejected": -1.8702930212020874, + "logps/chosen": -29.574539184570312, + "logps/rejected": -383.294189453125, + "loss": 0.2637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46233293414115906, + "rewards/margins": 4.69665002822876, + "rewards/rejected": -4.234317302703857, + "step": 14313 + }, + { + "epoch": 0.83, + "learning_rate": 7.138723000238256e-09, + "logits/chosen": -2.1531291007995605, + "logits/rejected": -2.1523282527923584, + "logps/chosen": -7.126061916351318, + "logps/rejected": -154.53892517089844, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01249232329428196, + "rewards/margins": 2.8973968029022217, + "rewards/rejected": -2.9098892211914062, + "step": 14314 + }, + { + "epoch": 0.83, + "learning_rate": 7.133870943943515e-09, + "logits/chosen": -1.9099466800689697, + "logits/rejected": -1.8937783241271973, + "logps/chosen": -166.8829803466797, + "logps/rejected": -218.426025390625, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6035079956054688, + "rewards/margins": 0.7810623049736023, + "rewards/rejected": 0.8224456906318665, + "step": 14315 + }, + { + "epoch": 0.83, + "learning_rate": 7.129020410463365e-09, + "logits/chosen": -1.6469616889953613, + "logits/rejected": -1.6350897550582886, + "logps/chosen": -34.22806930541992, + "logps/rejected": -296.13116455078125, + "loss": 0.1959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7428417205810547, + "rewards/margins": 5.533511161804199, + "rewards/rejected": -4.7906694412231445, + "step": 14316 + }, + { + "epoch": 0.83, + "learning_rate": 7.124171399970114e-09, + "logits/chosen": -2.0031349658966064, + "logits/rejected": -1.994983196258545, + "logps/chosen": -35.577850341796875, + "logps/rejected": -226.97828674316406, + "loss": 0.4037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.048752594739198685, + "rewards/margins": 2.171497344970703, + "rewards/rejected": -2.220249891281128, + "step": 14317 + }, + { + "epoch": 0.83, + "learning_rate": 7.1193239126360216e-09, + "logits/chosen": -1.8925803899765015, + "logits/rejected": -1.8786927461624146, + "logps/chosen": -143.6112060546875, + "logps/rejected": -340.6271667480469, + "loss": 0.2319, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0370330810546875, + "rewards/margins": 2.2830231189727783, + "rewards/rejected": -2.320056200027466, + "step": 14318 + }, + { + "epoch": 0.83, + "learning_rate": 7.1144779486333125e-09, + "logits/chosen": -1.9860782623291016, + "logits/rejected": -1.9745612144470215, + "logps/chosen": -5.914344787597656, + "logps/rejected": -134.44090270996094, + "loss": 0.4981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.057608701288700104, + "rewards/margins": 0.8966224193572998, + "rewards/rejected": -0.8390136957168579, + "step": 14319 + }, + { + "epoch": 0.83, + "learning_rate": 7.109633508134106e-09, + "logits/chosen": -1.8617528676986694, + "logits/rejected": -1.87066650390625, + "logps/chosen": -215.4126434326172, + "logps/rejected": -345.75421142578125, + "loss": 0.0408, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6316680908203125, + "rewards/margins": 3.1681365966796875, + "rewards/rejected": -1.536468505859375, + "step": 14320 + }, + { + "epoch": 0.83, + "learning_rate": 7.1047905913105165e-09, + "logits/chosen": -1.9058637619018555, + "logits/rejected": -1.8928651809692383, + "logps/chosen": -34.13456344604492, + "logps/rejected": -240.98684692382812, + "loss": 0.2226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41026267409324646, + "rewards/margins": 4.549170970916748, + "rewards/rejected": -4.138908386230469, + "step": 14321 + }, + { + "epoch": 0.83, + "learning_rate": 7.099949198334593e-09, + "logits/chosen": -1.9952795505523682, + "logits/rejected": -1.9884541034698486, + "logps/chosen": -0.00044910862925462425, + "logps/rejected": -182.89590454101562, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00030197910382412374, + "rewards/margins": 5.136274337768555, + "rewards/rejected": -5.135972499847412, + "step": 14322 + }, + { + "epoch": 0.83, + "learning_rate": 7.0951093293783295e-09, + "logits/chosen": -1.9306697845458984, + "logits/rejected": -1.9040027856826782, + "logps/chosen": -234.599853515625, + "logps/rejected": -358.05401611328125, + "loss": 0.1787, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0422608852386475, + "rewards/margins": 0.8967561721801758, + "rewards/rejected": 2.1455047130584717, + "step": 14323 + }, + { + "epoch": 0.83, + "learning_rate": 7.090270984613639e-09, + "logits/chosen": -2.1683237552642822, + "logits/rejected": -2.158186912536621, + "logps/chosen": -5.661007404327393, + "logps/rejected": -100.81986999511719, + "loss": 0.5902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06139722093939781, + "rewards/margins": 0.3853764235973358, + "rewards/rejected": -0.3239791989326477, + "step": 14324 + }, + { + "epoch": 0.83, + "learning_rate": 7.085434164212417e-09, + "logits/chosen": -1.9858529567718506, + "logits/rejected": -1.9878557920455933, + "logps/chosen": -51.79753112792969, + "logps/rejected": -167.24014282226562, + "loss": 0.2117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3211311399936676, + "rewards/margins": 4.225363731384277, + "rewards/rejected": -3.9042327404022217, + "step": 14325 + }, + { + "epoch": 0.83, + "learning_rate": 7.080598868346493e-09, + "logits/chosen": -1.8527402877807617, + "logits/rejected": -1.860845685005188, + "logps/chosen": -210.57107543945312, + "logps/rejected": -376.9805603027344, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4230453968048096, + "rewards/margins": 3.885667324066162, + "rewards/rejected": -1.462622046470642, + "step": 14326 + }, + { + "epoch": 0.83, + "learning_rate": 7.0757650971876525e-09, + "logits/chosen": -2.039442539215088, + "logits/rejected": -2.0405991077423096, + "logps/chosen": -144.30972290039062, + "logps/rejected": -432.8974609375, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9586029052734375, + "rewards/margins": 3.066220283508301, + "rewards/rejected": -1.1076172590255737, + "step": 14327 + }, + { + "epoch": 0.83, + "learning_rate": 7.0709328509075794e-09, + "logits/chosen": -1.884398102760315, + "logits/rejected": -1.8810464143753052, + "logps/chosen": -29.269258499145508, + "logps/rejected": -121.41950988769531, + "loss": 0.4771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1609300673007965, + "rewards/margins": 1.4262586832046509, + "rewards/rejected": -1.587188720703125, + "step": 14328 + }, + { + "epoch": 0.83, + "learning_rate": 7.066102129677981e-09, + "logits/chosen": -1.8941192626953125, + "logits/rejected": -1.8961622714996338, + "logps/chosen": -24.498336791992188, + "logps/rejected": -192.24642944335938, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014771461486816406, + "rewards/margins": 3.5509023666381836, + "rewards/rejected": -3.565673828125, + "step": 14329 + }, + { + "epoch": 0.83, + "learning_rate": 7.061272933670431e-09, + "logits/chosen": -2.0829520225524902, + "logits/rejected": -2.0777924060821533, + "logps/chosen": -14.908784866333008, + "logps/rejected": -181.12203979492188, + "loss": 0.41, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025507068261504173, + "rewards/margins": 1.7973241806030273, + "rewards/rejected": -1.7718170881271362, + "step": 14330 + }, + { + "epoch": 0.83, + "learning_rate": 7.056445263056521e-09, + "logits/chosen": -1.8425500392913818, + "logits/rejected": -1.8451178073883057, + "logps/chosen": -173.56101989746094, + "logps/rejected": -256.42156982421875, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9275223016738892, + "rewards/margins": 0.6129196286201477, + "rewards/rejected": 0.31460267305374146, + "step": 14331 + }, + { + "epoch": 0.83, + "learning_rate": 7.0516191180077064e-09, + "logits/chosen": -1.9659732580184937, + "logits/rejected": -1.9640260934829712, + "logps/chosen": -20.732046127319336, + "logps/rejected": -65.60305786132812, + "loss": 0.4, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4017084240913391, + "rewards/margins": 0.7595399618148804, + "rewards/rejected": -0.35783156752586365, + "step": 14332 + }, + { + "epoch": 0.83, + "learning_rate": 7.046794498695491e-09, + "logits/chosen": -1.9850610494613647, + "logits/rejected": -1.9757914543151855, + "logps/chosen": -54.45378875732422, + "logps/rejected": -290.93353271484375, + "loss": 0.1096, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3186959028244019, + "rewards/margins": 3.451068878173828, + "rewards/rejected": -2.132373094558716, + "step": 14333 + }, + { + "epoch": 0.83, + "learning_rate": 7.041971405291225e-09, + "logits/chosen": -1.9669207334518433, + "logits/rejected": -1.9745315313339233, + "logps/chosen": -53.09205627441406, + "logps/rejected": -224.87408447265625, + "loss": 0.4862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1714450865983963, + "rewards/margins": 0.35665321350097656, + "rewards/rejected": -0.18520812690258026, + "step": 14334 + }, + { + "epoch": 0.83, + "learning_rate": 7.037149837966283e-09, + "logits/chosen": -1.9551714658737183, + "logits/rejected": -1.9560714960098267, + "logps/chosen": -49.39942169189453, + "logps/rejected": -251.03350830078125, + "loss": 0.3318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09249573200941086, + "rewards/margins": 2.0024993419647217, + "rewards/rejected": -1.910003662109375, + "step": 14335 + }, + { + "epoch": 0.83, + "learning_rate": 7.032329796891906e-09, + "logits/chosen": -1.9406630992889404, + "logits/rejected": -1.9449901580810547, + "logps/chosen": -65.22884368896484, + "logps/rejected": -172.75314331054688, + "loss": 0.2194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5879188776016235, + "rewards/margins": 2.205768585205078, + "rewards/rejected": -1.6178497076034546, + "step": 14336 + }, + { + "epoch": 0.83, + "learning_rate": 7.027511282239379e-09, + "logits/chosen": -2.0302233695983887, + "logits/rejected": -2.0152664184570312, + "logps/chosen": -66.9175796508789, + "logps/rejected": -242.48541259765625, + "loss": 0.0921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1172256469726562, + "rewards/margins": 5.159999370574951, + "rewards/rejected": -4.042773723602295, + "step": 14337 + }, + { + "epoch": 0.83, + "learning_rate": 7.022694294179838e-09, + "logits/chosen": -1.7193645238876343, + "logits/rejected": -1.6810338497161865, + "logps/chosen": -196.8560333251953, + "logps/rejected": -412.17742919921875, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2620407342910767, + "rewards/margins": 5.025465488433838, + "rewards/rejected": -3.7634246349334717, + "step": 14338 + }, + { + "epoch": 0.83, + "learning_rate": 7.017878832884416e-09, + "logits/chosen": -1.815505862236023, + "logits/rejected": -1.801979660987854, + "logps/chosen": -0.004823511466383934, + "logps/rejected": -176.25103759765625, + "loss": 0.4953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047258814447559416, + "rewards/margins": 0.962985634803772, + "rewards/rejected": -0.963458240032196, + "step": 14339 + }, + { + "epoch": 0.83, + "learning_rate": 7.013064898524196e-09, + "logits/chosen": -1.8335267305374146, + "logits/rejected": -1.8417558670043945, + "logps/chosen": -78.88298034667969, + "logps/rejected": -291.04742431640625, + "loss": 0.1165, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.284676432609558, + "rewards/margins": 5.651710033416748, + "rewards/rejected": -4.3670334815979, + "step": 14340 + }, + { + "epoch": 0.83, + "learning_rate": 7.0082524912701724e-09, + "logits/chosen": -1.8387019634246826, + "logits/rejected": -1.824465036392212, + "logps/chosen": -194.90684509277344, + "logps/rejected": -255.29693603515625, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9972060918807983, + "rewards/margins": 0.16619408130645752, + "rewards/rejected": 1.8310120105743408, + "step": 14341 + }, + { + "epoch": 0.83, + "learning_rate": 7.003441611293337e-09, + "logits/chosen": -1.5509790182113647, + "logits/rejected": -1.5533894300460815, + "logps/chosen": -187.86477661132812, + "logps/rejected": -232.50624084472656, + "loss": 0.4098, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5348678827285767, + "rewards/margins": -0.14180147647857666, + "rewards/rejected": 1.6766693592071533, + "step": 14342 + }, + { + "epoch": 0.83, + "learning_rate": 6.998632258764553e-09, + "logits/chosen": -1.9195371866226196, + "logits/rejected": -1.9187748432159424, + "logps/chosen": -5.013914585113525, + "logps/rejected": -106.29869842529297, + "loss": 0.6656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07306122779846191, + "rewards/margins": 0.1134309321641922, + "rewards/rejected": -0.1864921599626541, + "step": 14343 + }, + { + "epoch": 0.83, + "learning_rate": 6.9938244338547e-09, + "logits/chosen": -1.565971851348877, + "logits/rejected": -1.5906709432601929, + "logps/chosen": -211.8243408203125, + "logps/rejected": -297.59423828125, + "loss": 0.0926, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9217544794082642, + "rewards/margins": 1.8586746454238892, + "rewards/rejected": 0.063079833984375, + "step": 14344 + }, + { + "epoch": 0.83, + "learning_rate": 6.9890181367345704e-09, + "logits/chosen": -1.7956756353378296, + "logits/rejected": -1.7961896657943726, + "logps/chosen": -130.63272094726562, + "logps/rejected": -309.4729919433594, + "loss": 0.1793, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2054200172424316, + "rewards/margins": 1.0612274408340454, + "rewards/rejected": 1.1441925764083862, + "step": 14345 + }, + { + "epoch": 0.83, + "learning_rate": 6.984213367574915e-09, + "logits/chosen": -2.0481245517730713, + "logits/rejected": -2.0518558025360107, + "logps/chosen": -90.1548843383789, + "logps/rejected": -272.69171142578125, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.003063201904297, + "rewards/margins": 3.6354761123657227, + "rewards/rejected": -1.6324127912521362, + "step": 14346 + }, + { + "epoch": 0.83, + "learning_rate": 6.979410126546403e-09, + "logits/chosen": -1.8703515529632568, + "logits/rejected": -1.8655426502227783, + "logps/chosen": -278.91473388671875, + "logps/rejected": -654.053955078125, + "loss": 0.0479, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.569555640220642, + "rewards/margins": 7.944635391235352, + "rewards/rejected": -6.37507963180542, + "step": 14347 + }, + { + "epoch": 0.83, + "learning_rate": 6.974608413819677e-09, + "logits/chosen": -1.7281655073165894, + "logits/rejected": -1.730412244796753, + "logps/chosen": -61.30308532714844, + "logps/rejected": -209.87538146972656, + "loss": 0.935, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5792804956436157, + "rewards/margins": 2.4471678733825684, + "rewards/rejected": -4.0264482498168945, + "step": 14348 + }, + { + "epoch": 0.84, + "learning_rate": 6.969808229565322e-09, + "logits/chosen": -1.8930000066757202, + "logits/rejected": -1.8818374872207642, + "logps/chosen": -36.79136657714844, + "logps/rejected": -258.87969970703125, + "loss": 0.2694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3518756926059723, + "rewards/margins": 4.322972297668457, + "rewards/rejected": -3.9710967540740967, + "step": 14349 + }, + { + "epoch": 0.84, + "learning_rate": 6.965009573953873e-09, + "logits/chosen": -1.6903269290924072, + "logits/rejected": -1.6934521198272705, + "logps/chosen": -5.392768383026123, + "logps/rejected": -89.84385681152344, + "loss": 0.7342, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2740742266178131, + "rewards/margins": -0.3617030680179596, + "rewards/rejected": 0.6357772946357727, + "step": 14350 + }, + { + "epoch": 0.84, + "learning_rate": 6.960212447155767e-09, + "logits/chosen": -1.669199824333191, + "logits/rejected": -1.6689364910125732, + "logps/chosen": -1.8465200662612915, + "logps/rejected": -43.22344207763672, + "loss": 0.487, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05035508796572685, + "rewards/margins": 0.970190167427063, + "rewards/rejected": -1.020545244216919, + "step": 14351 + }, + { + "epoch": 0.84, + "learning_rate": 6.955416849341472e-09, + "logits/chosen": -1.7793041467666626, + "logits/rejected": -1.7868574857711792, + "logps/chosen": -195.20188903808594, + "logps/rejected": -270.5450439453125, + "loss": 0.0469, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.078425645828247, + "rewards/margins": 2.673017978668213, + "rewards/rejected": -0.594592273235321, + "step": 14352 + }, + { + "epoch": 0.84, + "learning_rate": 6.950622780681309e-09, + "logits/chosen": -1.5994747877120972, + "logits/rejected": -1.6082701683044434, + "logps/chosen": -10.627744674682617, + "logps/rejected": -242.18539428710938, + "loss": 0.4278, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12512493133544922, + "rewards/margins": 3.0331575870513916, + "rewards/rejected": -3.158282518386841, + "step": 14353 + }, + { + "epoch": 0.84, + "learning_rate": 6.945830241345618e-09, + "logits/chosen": -1.737702488899231, + "logits/rejected": -1.733883261680603, + "logps/chosen": -7.03633975982666, + "logps/rejected": -106.04283142089844, + "loss": 0.3707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2789725363254547, + "rewards/margins": 1.6699614524841309, + "rewards/rejected": -1.3909889459609985, + "step": 14354 + }, + { + "epoch": 0.84, + "learning_rate": 6.941039231504609e-09, + "logits/chosen": -1.833333969116211, + "logits/rejected": -1.8003615140914917, + "logps/chosen": -191.93991088867188, + "logps/rejected": -371.80316162109375, + "loss": 0.07, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.443734884262085, + "rewards/margins": 2.6198854446411133, + "rewards/rejected": -0.17615051567554474, + "step": 14355 + }, + { + "epoch": 0.84, + "learning_rate": 6.9362497513285375e-09, + "logits/chosen": -1.8527802228927612, + "logits/rejected": -1.8549413681030273, + "logps/chosen": -71.03792572021484, + "logps/rejected": -261.19659423828125, + "loss": 0.1231, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1027015447616577, + "rewards/margins": 5.457477569580078, + "rewards/rejected": -4.354775905609131, + "step": 14356 + }, + { + "epoch": 0.84, + "learning_rate": 6.931461800987509e-09, + "logits/chosen": -1.9138396978378296, + "logits/rejected": -1.913982629776001, + "logps/chosen": -28.31962013244629, + "logps/rejected": -247.5109405517578, + "loss": 0.4085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11216907948255539, + "rewards/margins": 1.9713621139526367, + "rewards/rejected": -2.083531141281128, + "step": 14357 + }, + { + "epoch": 0.84, + "learning_rate": 6.9266753806516306e-09, + "logits/chosen": -1.829305648803711, + "logits/rejected": -1.832842230796814, + "logps/chosen": -132.45037841796875, + "logps/rejected": -296.1701354980469, + "loss": 0.0825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9090454578399658, + "rewards/margins": 1.9910370111465454, + "rewards/rejected": -0.08199157565832138, + "step": 14358 + }, + { + "epoch": 0.84, + "learning_rate": 6.921890490490934e-09, + "logits/chosen": -1.7278958559036255, + "logits/rejected": -1.730635404586792, + "logps/chosen": -14.022760391235352, + "logps/rejected": -104.11952209472656, + "loss": 0.2639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8159490823745728, + "rewards/margins": 1.4703938961029053, + "rewards/rejected": -0.6544448733329773, + "step": 14359 + }, + { + "epoch": 0.84, + "learning_rate": 6.9171071306754035e-09, + "logits/chosen": -1.7848963737487793, + "logits/rejected": -1.764302134513855, + "logps/chosen": -156.1383514404297, + "logps/rejected": -238.9644317626953, + "loss": 0.3134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.541662573814392, + "rewards/margins": 0.2814497947692871, + "rewards/rejected": 1.260212779045105, + "step": 14360 + }, + { + "epoch": 0.84, + "learning_rate": 6.912325301374988e-09, + "logits/chosen": -1.6359251737594604, + "logits/rejected": -1.641981601715088, + "logps/chosen": -210.22305297851562, + "logps/rejected": -403.173828125, + "loss": 0.0926, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3715362548828125, + "rewards/margins": 1.7074096202850342, + "rewards/rejected": 0.6641265749931335, + "step": 14361 + }, + { + "epoch": 0.84, + "learning_rate": 6.907545002759524e-09, + "logits/chosen": -1.883209466934204, + "logits/rejected": -1.8846547603607178, + "logps/chosen": -43.025978088378906, + "logps/rejected": -98.46248626708984, + "loss": 0.5624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19481658935546875, + "rewards/margins": 0.36701279878616333, + "rewards/rejected": -0.1721961945295334, + "step": 14362 + }, + { + "epoch": 0.84, + "learning_rate": 6.902766234998853e-09, + "logits/chosen": -1.9794682264328003, + "logits/rejected": -1.990242838859558, + "logps/chosen": -55.00001907348633, + "logps/rejected": -257.48870849609375, + "loss": 0.1312, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0196247100830078, + "rewards/margins": 2.9826955795288086, + "rewards/rejected": -1.9630707502365112, + "step": 14363 + }, + { + "epoch": 0.84, + "learning_rate": 6.897988998262733e-09, + "logits/chosen": -1.935179352760315, + "logits/rejected": -1.9312177896499634, + "logps/chosen": -50.98318099975586, + "logps/rejected": -190.40069580078125, + "loss": 0.4478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17391549050807953, + "rewards/margins": 1.4147624969482422, + "rewards/rejected": -1.588678002357483, + "step": 14364 + }, + { + "epoch": 0.84, + "learning_rate": 6.893213292720895e-09, + "logits/chosen": -1.9864674806594849, + "logits/rejected": -1.9915778636932373, + "logps/chosen": -241.19921875, + "logps/rejected": -312.60577392578125, + "loss": 0.1119, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1062653064727783, + "rewards/margins": 1.5053253173828125, + "rewards/rejected": 1.6009399890899658, + "step": 14365 + }, + { + "epoch": 0.84, + "learning_rate": 6.888439118542966e-09, + "logits/chosen": -2.0195000171661377, + "logits/rejected": -2.040745973587036, + "logps/chosen": -151.4120635986328, + "logps/rejected": -238.71627807617188, + "loss": 0.0463, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5873688459396362, + "rewards/margins": 2.962890625, + "rewards/rejected": -1.3755218982696533, + "step": 14366 + }, + { + "epoch": 0.84, + "learning_rate": 6.883666475898559e-09, + "logits/chosen": -1.8126760721206665, + "logits/rejected": -1.8711435794830322, + "logps/chosen": -149.76808166503906, + "logps/rejected": -347.374267578125, + "loss": 0.0829, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.14910888671875, + "rewards/margins": 2.9959840774536133, + "rewards/rejected": -1.8468750715255737, + "step": 14367 + }, + { + "epoch": 0.84, + "learning_rate": 6.878895364957221e-09, + "logits/chosen": -1.8054975271224976, + "logits/rejected": -1.8347996473312378, + "logps/chosen": -199.02804565429688, + "logps/rejected": -413.634765625, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.260650634765625, + "rewards/margins": 3.448089599609375, + "rewards/rejected": -1.18743896484375, + "step": 14368 + }, + { + "epoch": 0.84, + "learning_rate": 6.874125785888463e-09, + "logits/chosen": -1.7533425092697144, + "logits/rejected": -1.7864148616790771, + "logps/chosen": -144.20631408691406, + "logps/rejected": -275.2417907714844, + "loss": 0.1457, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.578334093093872, + "rewards/margins": 1.336879014968872, + "rewards/rejected": 0.241455078125, + "step": 14369 + }, + { + "epoch": 0.84, + "learning_rate": 6.869357738861686e-09, + "logits/chosen": -1.935931921005249, + "logits/rejected": -1.9346508979797363, + "logps/chosen": -0.00016128478455357254, + "logps/rejected": -284.80078125, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.682149442989612e-06, + "rewards/margins": 7.726807117462158, + "rewards/rejected": -7.726809978485107, + "step": 14370 + }, + { + "epoch": 0.84, + "learning_rate": 6.8645912240463176e-09, + "logits/chosen": -1.9016127586364746, + "logits/rejected": -1.9046992063522339, + "logps/chosen": -2.336486977583263e-05, + "logps/rejected": -209.24993896484375, + "loss": 0.3462, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.536597644910216e-08, + "rewards/margins": 5.392086982727051, + "rewards/rejected": -5.392086982727051, + "step": 14371 + }, + { + "epoch": 0.84, + "learning_rate": 6.859826241611661e-09, + "logits/chosen": -1.8145440816879272, + "logits/rejected": -1.815524697303772, + "logps/chosen": -269.3611755371094, + "logps/rejected": -408.61236572265625, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0022614002227783, + "rewards/margins": 3.442343235015869, + "rewards/rejected": -0.44008180499076843, + "step": 14372 + }, + { + "epoch": 0.84, + "learning_rate": 6.855062791727001e-09, + "logits/chosen": -2.08254337310791, + "logits/rejected": -2.0718069076538086, + "logps/chosen": -6.6357197761535645, + "logps/rejected": -293.4593505859375, + "loss": 0.2261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4008239209651947, + "rewards/margins": 9.404637336730957, + "rewards/rejected": -9.003813743591309, + "step": 14373 + }, + { + "epoch": 0.84, + "learning_rate": 6.850300874561538e-09, + "logits/chosen": -2.013707399368286, + "logits/rejected": -2.000342607498169, + "logps/chosen": -34.135009765625, + "logps/rejected": -216.72991943359375, + "loss": 0.4058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2566997706890106, + "rewards/margins": 3.2339587211608887, + "rewards/rejected": -3.4906585216522217, + "step": 14374 + }, + { + "epoch": 0.84, + "learning_rate": 6.84554049028448e-09, + "logits/chosen": -1.9971064329147339, + "logits/rejected": -2.012127637863159, + "logps/chosen": -229.98764038085938, + "logps/rejected": -324.06365966796875, + "loss": 0.0456, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9251067638397217, + "rewards/margins": 2.6292572021484375, + "rewards/rejected": 0.29584962129592896, + "step": 14375 + }, + { + "epoch": 0.84, + "learning_rate": 6.840781639064896e-09, + "logits/chosen": -1.8851861953735352, + "logits/rejected": -1.8830666542053223, + "logps/chosen": -196.84768676757812, + "logps/rejected": -340.96051025390625, + "loss": 0.0746, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.224043369293213, + "rewards/margins": 2.537257432937622, + "rewards/rejected": -0.31321412324905396, + "step": 14376 + }, + { + "epoch": 0.84, + "learning_rate": 6.836024321071881e-09, + "logits/chosen": -1.8868513107299805, + "logits/rejected": -1.8890745639801025, + "logps/chosen": -148.99264526367188, + "logps/rejected": -294.0551452636719, + "loss": 0.0795, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8312698602676392, + "rewards/margins": 3.947523593902588, + "rewards/rejected": -3.116253614425659, + "step": 14377 + }, + { + "epoch": 0.84, + "learning_rate": 6.831268536474394e-09, + "logits/chosen": -1.7329633235931396, + "logits/rejected": -1.7234805822372437, + "logps/chosen": -226.97503662109375, + "logps/rejected": -436.50860595703125, + "loss": 0.2841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17942504584789276, + "rewards/margins": 0.9763244986534119, + "rewards/rejected": -0.7968994379043579, + "step": 14378 + }, + { + "epoch": 0.84, + "learning_rate": 6.8265142854414255e-09, + "logits/chosen": -1.9919750690460205, + "logits/rejected": -1.9917129278182983, + "logps/chosen": -1.0551424026489258, + "logps/rejected": -305.2832946777344, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14114999771118164, + "rewards/margins": 9.401443481445312, + "rewards/rejected": -9.260293960571289, + "step": 14379 + }, + { + "epoch": 0.84, + "learning_rate": 6.821761568141859e-09, + "logits/chosen": -1.969720482826233, + "logits/rejected": -1.9733532667160034, + "logps/chosen": -50.59747314453125, + "logps/rejected": -267.0570373535156, + "loss": 0.2589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3895297944545746, + "rewards/margins": 4.04648494720459, + "rewards/rejected": -3.6569550037384033, + "step": 14380 + }, + { + "epoch": 0.84, + "learning_rate": 6.817010384744526e-09, + "logits/chosen": -1.9747802019119263, + "logits/rejected": -1.973402738571167, + "logps/chosen": -24.7415771484375, + "logps/rejected": -174.49905395507812, + "loss": 0.2966, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19221840798854828, + "rewards/margins": 2.6249148845672607, + "rewards/rejected": -2.432696580886841, + "step": 14381 + }, + { + "epoch": 0.84, + "learning_rate": 6.8122607354182075e-09, + "logits/chosen": -1.8164923191070557, + "logits/rejected": -1.8101919889450073, + "logps/chosen": -0.08584635704755783, + "logps/rejected": -193.65626525878906, + "loss": 0.3675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008131659589707851, + "rewards/margins": 3.0558011531829834, + "rewards/rejected": -3.0639328956604004, + "step": 14382 + }, + { + "epoch": 0.84, + "learning_rate": 6.807512620331646e-09, + "logits/chosen": -1.6752210855484009, + "logits/rejected": -1.6745513677597046, + "logps/chosen": -54.32173538208008, + "logps/rejected": -197.76419067382812, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12745705246925354, + "rewards/margins": 2.154317855834961, + "rewards/rejected": -2.2817749977111816, + "step": 14383 + }, + { + "epoch": 0.84, + "learning_rate": 6.802766039653524e-09, + "logits/chosen": -1.9440637826919556, + "logits/rejected": -1.9360032081604004, + "logps/chosen": -20.038463592529297, + "logps/rejected": -241.4304962158203, + "loss": 0.3734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0525476448237896, + "rewards/margins": 5.89727258682251, + "rewards/rejected": -5.844725131988525, + "step": 14384 + }, + { + "epoch": 0.84, + "learning_rate": 6.798020993552439e-09, + "logits/chosen": -2.099752902984619, + "logits/rejected": -2.105470895767212, + "logps/chosen": -6.998325824737549, + "logps/rejected": -89.36143493652344, + "loss": 0.4511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13937817513942719, + "rewards/margins": 1.2170031070709229, + "rewards/rejected": -1.3563812971115112, + "step": 14385 + }, + { + "epoch": 0.84, + "learning_rate": 6.793277482196974e-09, + "logits/chosen": -1.9201699495315552, + "logits/rejected": -1.9126354455947876, + "logps/chosen": -66.81887817382812, + "logps/rejected": -169.41517639160156, + "loss": 0.2231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8307861685752869, + "rewards/margins": 1.617579698562622, + "rewards/rejected": -0.7867935299873352, + "step": 14386 + }, + { + "epoch": 0.84, + "learning_rate": 6.788535505755638e-09, + "logits/chosen": -1.7390464544296265, + "logits/rejected": -1.7380566596984863, + "logps/chosen": -0.0008004537667147815, + "logps/rejected": -152.32986450195312, + "loss": 0.4533, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.00454927230021e-06, + "rewards/margins": 1.365732192993164, + "rewards/rejected": -1.3657242059707642, + "step": 14387 + }, + { + "epoch": 0.84, + "learning_rate": 6.783795064396902e-09, + "logits/chosen": -1.9997565746307373, + "logits/rejected": -1.998042345046997, + "logps/chosen": -56.09599685668945, + "logps/rejected": -164.7045135498047, + "loss": 0.435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18136100471019745, + "rewards/margins": 2.2429141998291016, + "rewards/rejected": -2.4242751598358154, + "step": 14388 + }, + { + "epoch": 0.84, + "learning_rate": 6.779056158289148e-09, + "logits/chosen": -1.8210722208023071, + "logits/rejected": -1.8078522682189941, + "logps/chosen": -120.00447845458984, + "logps/rejected": -396.5326232910156, + "loss": 0.6951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9970383048057556, + "rewards/margins": 1.1121323108673096, + "rewards/rejected": -2.10917067527771, + "step": 14389 + }, + { + "epoch": 0.84, + "learning_rate": 6.774318787600735e-09, + "logits/chosen": -1.8815217018127441, + "logits/rejected": -1.8700926303863525, + "logps/chosen": -30.481595993041992, + "logps/rejected": -196.80316162109375, + "loss": 0.3251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12903766334056854, + "rewards/margins": 2.0722062587738037, + "rewards/rejected": -1.9431686401367188, + "step": 14390 + }, + { + "epoch": 0.84, + "learning_rate": 6.769582952499964e-09, + "logits/chosen": -1.9399094581604004, + "logits/rejected": -1.9381247758865356, + "logps/chosen": -18.948078155517578, + "logps/rejected": -156.34124755859375, + "loss": 0.4362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10374565422534943, + "rewards/margins": 1.2342135906219482, + "rewards/rejected": -1.3379592895507812, + "step": 14391 + }, + { + "epoch": 0.84, + "learning_rate": 6.7648486531550756e-09, + "logits/chosen": -1.824533462524414, + "logits/rejected": -1.8312582969665527, + "logps/chosen": -10.065790176391602, + "logps/rejected": -171.25526428222656, + "loss": 0.4289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01646718941628933, + "rewards/margins": 1.4089980125427246, + "rewards/rejected": -1.3925307989120483, + "step": 14392 + }, + { + "epoch": 0.84, + "learning_rate": 6.760115889734231e-09, + "logits/chosen": -1.604848861694336, + "logits/rejected": -1.6032860279083252, + "logps/chosen": -172.68035888671875, + "logps/rejected": -284.78533935546875, + "loss": 0.3368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9336441159248352, + "rewards/margins": 0.6777603626251221, + "rewards/rejected": 0.2558837831020355, + "step": 14393 + }, + { + "epoch": 0.84, + "learning_rate": 6.755384662405594e-09, + "logits/chosen": -1.9307869672775269, + "logits/rejected": -1.9294459819793701, + "logps/chosen": -13.127617835998535, + "logps/rejected": -206.10818481445312, + "loss": 0.429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1706933081150055, + "rewards/margins": 2.590555429458618, + "rewards/rejected": -2.761248826980591, + "step": 14394 + }, + { + "epoch": 0.84, + "learning_rate": 6.750654971337222e-09, + "logits/chosen": -2.0296778678894043, + "logits/rejected": -2.020554780960083, + "logps/chosen": -19.058080673217773, + "logps/rejected": -320.54180908203125, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19876861572265625, + "rewards/margins": 4.856141567230225, + "rewards/rejected": -4.657372951507568, + "step": 14395 + }, + { + "epoch": 0.84, + "learning_rate": 6.745926816697145e-09, + "logits/chosen": -1.7719494104385376, + "logits/rejected": -1.7524203062057495, + "logps/chosen": -172.47244262695312, + "logps/rejected": -234.0347137451172, + "loss": 0.3147, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.13421630859375, + "rewards/margins": 0.37320101261138916, + "rewards/rejected": 1.7610152959823608, + "step": 14396 + }, + { + "epoch": 0.84, + "learning_rate": 6.741200198653308e-09, + "logits/chosen": -1.9068409204483032, + "logits/rejected": -1.9112673997879028, + "logps/chosen": -0.012828147038817406, + "logps/rejected": -125.06272888183594, + "loss": 0.3318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007869414403103292, + "rewards/margins": 3.8134655952453613, + "rewards/rejected": -3.8142526149749756, + "step": 14397 + }, + { + "epoch": 0.84, + "learning_rate": 6.736475117373663e-09, + "logits/chosen": -1.8539680242538452, + "logits/rejected": -1.8600986003875732, + "logps/chosen": -53.51340866088867, + "logps/rejected": -208.0991973876953, + "loss": 0.2266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6151859164237976, + "rewards/margins": 3.3054723739624023, + "rewards/rejected": -2.69028639793396, + "step": 14398 + }, + { + "epoch": 0.84, + "learning_rate": 6.73175157302604e-09, + "logits/chosen": -1.814400553703308, + "logits/rejected": -1.8129647970199585, + "logps/chosen": -0.0004202977870590985, + "logps/rejected": -69.65086364746094, + "loss": 0.6275, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.438728915876709e-05, + "rewards/margins": 0.23311617970466614, + "rewards/rejected": -0.23314057290554047, + "step": 14399 + }, + { + "epoch": 0.84, + "learning_rate": 6.727029565778242e-09, + "logits/chosen": -1.9836052656173706, + "logits/rejected": -1.9872488975524902, + "logps/chosen": -0.03533000499010086, + "logps/rejected": -255.63909912109375, + "loss": 0.3491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013160370290279388, + "rewards/margins": 4.542673587799072, + "rewards/rejected": -4.543989658355713, + "step": 14400 + }, + { + "epoch": 0.84, + "learning_rate": 6.722309095798029e-09, + "logits/chosen": -1.696076512336731, + "logits/rejected": -1.693448543548584, + "logps/chosen": -35.30171203613281, + "logps/rejected": -243.17388916015625, + "loss": 0.2571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5577980279922485, + "rewards/margins": 1.9356987476348877, + "rewards/rejected": -1.3779007196426392, + "step": 14401 + }, + { + "epoch": 0.84, + "learning_rate": 6.71759016325309e-09, + "logits/chosen": -1.654261589050293, + "logits/rejected": -1.6708134412765503, + "logps/chosen": -178.52874755859375, + "logps/rejected": -358.1009521484375, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7917816638946533, + "rewards/margins": 4.3162689208984375, + "rewards/rejected": -1.5244873762130737, + "step": 14402 + }, + { + "epoch": 0.84, + "learning_rate": 6.712872768311078e-09, + "logits/chosen": -1.921007513999939, + "logits/rejected": -1.9345767498016357, + "logps/chosen": -193.43521118164062, + "logps/rejected": -267.39105224609375, + "loss": 0.0715, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0528855323791504, + "rewards/margins": 2.137037754058838, + "rewards/rejected": -0.0841522216796875, + "step": 14403 + }, + { + "epoch": 0.84, + "learning_rate": 6.708156911139551e-09, + "logits/chosen": -1.9055168628692627, + "logits/rejected": -1.8847304582595825, + "logps/chosen": -162.84950256347656, + "logps/rejected": -329.92498779296875, + "loss": 0.044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.90178382396698, + "rewards/margins": 3.114732503890991, + "rewards/rejected": -1.2129486799240112, + "step": 14404 + }, + { + "epoch": 0.84, + "learning_rate": 6.703442591906055e-09, + "logits/chosen": -2.0170507431030273, + "logits/rejected": -2.017190456390381, + "logps/chosen": -194.81883239746094, + "logps/rejected": -327.68109130859375, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8332595825195312, + "rewards/margins": 4.103132724761963, + "rewards/rejected": -1.269873023033142, + "step": 14405 + }, + { + "epoch": 0.84, + "learning_rate": 6.698729810778064e-09, + "logits/chosen": -1.6404849290847778, + "logits/rejected": -1.6562151908874512, + "logps/chosen": -281.406494140625, + "logps/rejected": -460.36700439453125, + "loss": 0.0564, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5205811262130737, + "rewards/margins": 3.5185089111328125, + "rewards/rejected": -1.9979279041290283, + "step": 14406 + }, + { + "epoch": 0.84, + "learning_rate": 6.694018567923016e-09, + "logits/chosen": -1.9427103996276855, + "logits/rejected": -1.946899652481079, + "logps/chosen": -34.424888610839844, + "logps/rejected": -194.17898559570312, + "loss": 0.1758, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49585723876953125, + "rewards/margins": 4.357077121734619, + "rewards/rejected": -3.861219882965088, + "step": 14407 + }, + { + "epoch": 0.84, + "learning_rate": 6.689308863508242e-09, + "logits/chosen": -2.0754501819610596, + "logits/rejected": -2.074193000793457, + "logps/chosen": -0.01648077368736267, + "logps/rejected": -287.68902587890625, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032405051751993597, + "rewards/margins": 7.926088809967041, + "rewards/rejected": -7.926413059234619, + "step": 14408 + }, + { + "epoch": 0.84, + "learning_rate": 6.684600697701076e-09, + "logits/chosen": -1.8668880462646484, + "logits/rejected": -1.8644065856933594, + "logps/chosen": -202.49183654785156, + "logps/rejected": -499.0814208984375, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6034821271896362, + "rewards/margins": 5.792608737945557, + "rewards/rejected": -4.189126491546631, + "step": 14409 + }, + { + "epoch": 0.84, + "learning_rate": 6.679894070668774e-09, + "logits/chosen": -2.0101158618927, + "logits/rejected": -2.007835865020752, + "logps/chosen": -11.487446784973145, + "logps/rejected": -160.40672302246094, + "loss": 0.4397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11667194217443466, + "rewards/margins": 1.8479337692260742, + "rewards/rejected": -1.964605689048767, + "step": 14410 + }, + { + "epoch": 0.84, + "learning_rate": 6.675188982578545e-09, + "logits/chosen": -1.8681892156600952, + "logits/rejected": -1.8722572326660156, + "logps/chosen": -0.02392388880252838, + "logps/rejected": -214.04647827148438, + "loss": 0.339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0053252787329256535, + "rewards/margins": 5.8238372802734375, + "rewards/rejected": -5.818511962890625, + "step": 14411 + }, + { + "epoch": 0.84, + "learning_rate": 6.670485433597506e-09, + "logits/chosen": -1.9772313833236694, + "logits/rejected": -1.976423978805542, + "logps/chosen": -0.0016997752245515585, + "logps/rejected": -155.74234008789062, + "loss": 0.393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008573381346650422, + "rewards/margins": 2.2829482555389404, + "rewards/rejected": -2.282090902328491, + "step": 14412 + }, + { + "epoch": 0.84, + "learning_rate": 6.6657834238927956e-09, + "logits/chosen": -1.91568124294281, + "logits/rejected": -1.918636441230774, + "logps/chosen": -46.260738372802734, + "logps/rejected": -136.58001708984375, + "loss": 0.2537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8056442141532898, + "rewards/margins": 1.3191696405410767, + "rewards/rejected": -0.5135254263877869, + "step": 14413 + }, + { + "epoch": 0.84, + "learning_rate": 6.661082953631414e-09, + "logits/chosen": -1.9399495124816895, + "logits/rejected": -1.9199999570846558, + "logps/chosen": -4.908535003662109, + "logps/rejected": -151.6282501220703, + "loss": 0.242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5886157155036926, + "rewards/margins": 2.907707452774048, + "rewards/rejected": -2.319091796875, + "step": 14414 + }, + { + "epoch": 0.84, + "learning_rate": 6.656384022980371e-09, + "logits/chosen": -2.036513328552246, + "logits/rejected": -2.0207390785217285, + "logps/chosen": -0.01887548714876175, + "logps/rejected": -125.0696029663086, + "loss": 0.36, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026793796569108963, + "rewards/margins": 3.1603927612304688, + "rewards/rejected": -3.133599042892456, + "step": 14415 + }, + { + "epoch": 0.84, + "learning_rate": 6.651686632106568e-09, + "logits/chosen": -1.8781834840774536, + "logits/rejected": -1.8646990060806274, + "logps/chosen": -64.27694702148438, + "logps/rejected": -346.11474609375, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02202301099896431, + "rewards/margins": 5.5094099044799805, + "rewards/rejected": -5.53143310546875, + "step": 14416 + }, + { + "epoch": 0.84, + "learning_rate": 6.646990781176909e-09, + "logits/chosen": -1.984122395515442, + "logits/rejected": -1.9726916551589966, + "logps/chosen": -8.329167366027832, + "logps/rejected": -194.724609375, + "loss": 0.3604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08723535388708115, + "rewards/margins": 2.7162210941314697, + "rewards/rejected": -2.628985643386841, + "step": 14417 + }, + { + "epoch": 0.84, + "learning_rate": 6.642296470358194e-09, + "logits/chosen": -1.9933655261993408, + "logits/rejected": -2.0001704692840576, + "logps/chosen": -17.131975173950195, + "logps/rejected": -87.87445068359375, + "loss": 0.6529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014199447818100452, + "rewards/margins": 0.12648582458496094, + "rewards/rejected": -0.1406852751970291, + "step": 14418 + }, + { + "epoch": 0.84, + "learning_rate": 6.6376036998171905e-09, + "logits/chosen": -2.037538528442383, + "logits/rejected": -2.033830165863037, + "logps/chosen": -34.456092834472656, + "logps/rejected": -147.6997833251953, + "loss": 0.3711, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0068501234054565, + "rewards/margins": 0.4452778697013855, + "rewards/rejected": 0.561572253704071, + "step": 14419 + }, + { + "epoch": 0.84, + "learning_rate": 6.6329124697206144e-09, + "logits/chosen": -1.9600470066070557, + "logits/rejected": -1.950156807899475, + "logps/chosen": -80.99226379394531, + "logps/rejected": -155.3890380859375, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5685088634490967, + "rewards/margins": 2.74806809425354, + "rewards/rejected": -0.17955933511257172, + "step": 14420 + }, + { + "epoch": 0.84, + "learning_rate": 6.628222780235121e-09, + "logits/chosen": -1.8984520435333252, + "logits/rejected": -1.8888493776321411, + "logps/chosen": -86.91368103027344, + "logps/rejected": -254.1866455078125, + "loss": 0.1, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.386439561843872, + "rewards/margins": 3.4372634887695312, + "rewards/rejected": -2.050823926925659, + "step": 14421 + }, + { + "epoch": 0.84, + "learning_rate": 6.6235346315273165e-09, + "logits/chosen": -1.851036548614502, + "logits/rejected": -1.8564318418502808, + "logps/chosen": -13.45689582824707, + "logps/rejected": -142.34979248046875, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15614500641822815, + "rewards/margins": 2.4389102458953857, + "rewards/rejected": -2.2827651500701904, + "step": 14422 + }, + { + "epoch": 0.84, + "learning_rate": 6.618848023763729e-09, + "logits/chosen": -1.8598268032073975, + "logits/rejected": -1.846278190612793, + "logps/chosen": -178.51394653320312, + "logps/rejected": -311.11492919921875, + "loss": 0.2949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8899810910224915, + "rewards/margins": 0.717022716999054, + "rewards/rejected": 0.1729583740234375, + "step": 14423 + }, + { + "epoch": 0.84, + "learning_rate": 6.6141629571108585e-09, + "logits/chosen": -2.031409502029419, + "logits/rejected": -2.0223066806793213, + "logps/chosen": -0.004976819735020399, + "logps/rejected": -233.4285125732422, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0017247541109099984, + "rewards/margins": 5.034221649169922, + "rewards/rejected": -5.032496929168701, + "step": 14424 + }, + { + "epoch": 0.84, + "learning_rate": 6.609479431735143e-09, + "logits/chosen": -2.0014004707336426, + "logits/rejected": -2.01227068901062, + "logps/chosen": -158.98342895507812, + "logps/rejected": -294.1971740722656, + "loss": 0.0624, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5067840814590454, + "rewards/margins": 2.364593505859375, + "rewards/rejected": -0.8578094840049744, + "step": 14425 + }, + { + "epoch": 0.84, + "learning_rate": 6.604797447802979e-09, + "logits/chosen": -1.9231079816818237, + "logits/rejected": -1.9119445085525513, + "logps/chosen": -148.20010375976562, + "logps/rejected": -298.6590576171875, + "loss": 0.256, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.089434862136841, + "rewards/margins": 0.43219900131225586, + "rewards/rejected": 2.657235860824585, + "step": 14426 + }, + { + "epoch": 0.84, + "learning_rate": 6.600117005480665e-09, + "logits/chosen": -1.8798714876174927, + "logits/rejected": -1.8664658069610596, + "logps/chosen": -41.45188903808594, + "logps/rejected": -360.0550842285156, + "loss": 0.3534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.036243438720703125, + "rewards/margins": 5.855173587799072, + "rewards/rejected": -5.818930149078369, + "step": 14427 + }, + { + "epoch": 0.84, + "learning_rate": 6.595438104934487e-09, + "logits/chosen": -2.0203659534454346, + "logits/rejected": -2.026118278503418, + "logps/chosen": -17.54205322265625, + "logps/rejected": -60.503658294677734, + "loss": 0.5643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2152935117483139, + "rewards/margins": 0.3442787230014801, + "rewards/rejected": -0.1289852112531662, + "step": 14428 + }, + { + "epoch": 0.84, + "learning_rate": 6.590760746330659e-09, + "logits/chosen": -1.8864645957946777, + "logits/rejected": -1.8801491260528564, + "logps/chosen": -180.97840881347656, + "logps/rejected": -278.98065185546875, + "loss": 0.1103, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.761857748031616, + "rewards/margins": 1.7519516944885254, + "rewards/rejected": 1.0099060535430908, + "step": 14429 + }, + { + "epoch": 0.84, + "learning_rate": 6.586084929835362e-09, + "logits/chosen": -1.6376582384109497, + "logits/rejected": -1.6403231620788574, + "logps/chosen": -78.47358703613281, + "logps/rejected": -202.45884704589844, + "loss": 0.2066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5020515322685242, + "rewards/margins": 2.484543561935425, + "rewards/rejected": -1.9824920892715454, + "step": 14430 + }, + { + "epoch": 0.84, + "learning_rate": 6.581410655614667e-09, + "logits/chosen": -1.9249545335769653, + "logits/rejected": -1.925551176071167, + "logps/chosen": -0.00023935583885759115, + "logps/rejected": -121.32515716552734, + "loss": 0.3848, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.316681151474768e-07, + "rewards/margins": 2.418755531311035, + "rewards/rejected": -2.4187562465667725, + "step": 14431 + }, + { + "epoch": 0.84, + "learning_rate": 6.576737923834663e-09, + "logits/chosen": -1.9163669347763062, + "logits/rejected": -1.8702752590179443, + "logps/chosen": -231.42440795898438, + "logps/rejected": -429.92034912109375, + "loss": 0.0651, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3058747053146362, + "rewards/margins": 3.7079286575317383, + "rewards/rejected": -2.4020538330078125, + "step": 14432 + }, + { + "epoch": 0.84, + "learning_rate": 6.57206673466133e-09, + "logits/chosen": -2.1837575435638428, + "logits/rejected": -2.1873302459716797, + "logps/chosen": -27.083663940429688, + "logps/rejected": -163.9257354736328, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48934993147850037, + "rewards/margins": 1.3915364742279053, + "rewards/rejected": -0.9021865725517273, + "step": 14433 + }, + { + "epoch": 0.84, + "learning_rate": 6.567397088260629e-09, + "logits/chosen": -1.8937019109725952, + "logits/rejected": -1.865043044090271, + "logps/chosen": -182.59275817871094, + "logps/rejected": -290.603515625, + "loss": 0.0568, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.931500196456909, + "rewards/margins": 2.457754373550415, + "rewards/rejected": 0.473745733499527, + "step": 14434 + }, + { + "epoch": 0.84, + "learning_rate": 6.56272898479841e-09, + "logits/chosen": -2.017467975616455, + "logits/rejected": -2.0158565044403076, + "logps/chosen": -42.80995178222656, + "logps/rejected": -264.5153503417969, + "loss": 0.3895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05371895059943199, + "rewards/margins": 2.6238362789154053, + "rewards/rejected": -2.570117235183716, + "step": 14435 + }, + { + "epoch": 0.84, + "learning_rate": 6.5580624244405525e-09, + "logits/chosen": -2.026870012283325, + "logits/rejected": -2.0141351222991943, + "logps/chosen": -19.371488571166992, + "logps/rejected": -298.41796875, + "loss": 0.2365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3276823163032532, + "rewards/margins": 7.745305061340332, + "rewards/rejected": -7.4176225662231445, + "step": 14436 + }, + { + "epoch": 0.84, + "learning_rate": 6.553397407352806e-09, + "logits/chosen": -1.6760852336883545, + "logits/rejected": -1.669279932975769, + "logps/chosen": -142.4476776123047, + "logps/rejected": -439.3697509765625, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9518158435821533, + "rewards/margins": 11.72784423828125, + "rewards/rejected": -9.776028633117676, + "step": 14437 + }, + { + "epoch": 0.84, + "learning_rate": 6.5487339337009114e-09, + "logits/chosen": -1.650253415107727, + "logits/rejected": -1.6562093496322632, + "logps/chosen": -9.629472732543945, + "logps/rejected": -32.04679489135742, + "loss": 0.6643, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05383196100592613, + "rewards/margins": -0.08869656920433044, + "rewards/rejected": 0.14252853393554688, + "step": 14438 + }, + { + "epoch": 0.84, + "learning_rate": 6.544072003650514e-09, + "logits/chosen": -1.7898370027542114, + "logits/rejected": -1.8001781702041626, + "logps/chosen": -7.295396062545478e-05, + "logps/rejected": -173.6885223388672, + "loss": 0.3798, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8357285398451495e-06, + "rewards/margins": 2.10528826713562, + "rewards/rejected": -2.105290174484253, + "step": 14439 + }, + { + "epoch": 0.84, + "learning_rate": 6.539411617367252e-09, + "logits/chosen": -1.7957477569580078, + "logits/rejected": -1.7657850980758667, + "logps/chosen": -360.26141357421875, + "logps/rejected": -580.8890991210938, + "loss": 0.1533, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.574462890625, + "rewards/margins": 1.242974877357483, + "rewards/rejected": 0.3314880430698395, + "step": 14440 + }, + { + "epoch": 0.84, + "learning_rate": 6.534752775016689e-09, + "logits/chosen": -1.7548305988311768, + "logits/rejected": -1.744870901107788, + "logps/chosen": -34.707157135009766, + "logps/rejected": -176.8555450439453, + "loss": 0.4268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17507782578468323, + "rewards/margins": 1.8475159406661987, + "rewards/rejected": -2.0225937366485596, + "step": 14441 + }, + { + "epoch": 0.84, + "learning_rate": 6.530095476764313e-09, + "logits/chosen": -1.944410800933838, + "logits/rejected": -1.941906213760376, + "logps/chosen": -34.88832473754883, + "logps/rejected": -254.94468688964844, + "loss": 0.2201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5645179748535156, + "rewards/margins": 3.3357017040252686, + "rewards/rejected": -2.771183729171753, + "step": 14442 + }, + { + "epoch": 0.84, + "learning_rate": 6.525439722775578e-09, + "logits/chosen": -1.7547879219055176, + "logits/rejected": -1.7480480670928955, + "logps/chosen": -232.54617309570312, + "logps/rejected": -351.2670593261719, + "loss": 0.1942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9716339111328125, + "rewards/margins": 0.8714781999588013, + "rewards/rejected": 1.1001557111740112, + "step": 14443 + }, + { + "epoch": 0.84, + "learning_rate": 6.520785513215877e-09, + "logits/chosen": -1.8830589056015015, + "logits/rejected": -1.9334553480148315, + "logps/chosen": -225.83395385742188, + "logps/rejected": -304.1024169921875, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4442765712738037, + "rewards/margins": 3.591566562652588, + "rewards/rejected": -1.1472901105880737, + "step": 14444 + }, + { + "epoch": 0.84, + "learning_rate": 6.516132848250566e-09, + "logits/chosen": -2.0258538722991943, + "logits/rejected": -2.02167010307312, + "logps/chosen": -46.49507141113281, + "logps/rejected": -305.4671630859375, + "loss": 0.278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10473518818616867, + "rewards/margins": 5.9336018562316895, + "rewards/rejected": -5.828866481781006, + "step": 14445 + }, + { + "epoch": 0.84, + "learning_rate": 6.511481728044909e-09, + "logits/chosen": -1.9710078239440918, + "logits/rejected": -1.9455803632736206, + "logps/chosen": -39.70566177368164, + "logps/rejected": -591.8826293945312, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2869033813476562, + "rewards/margins": 8.123517990112305, + "rewards/rejected": -6.836615085601807, + "step": 14446 + }, + { + "epoch": 0.84, + "learning_rate": 6.5068321527641525e-09, + "logits/chosen": -1.9282296895980835, + "logits/rejected": -1.9187893867492676, + "logps/chosen": -5.43588466825895e-05, + "logps/rejected": -224.09608459472656, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.960413886896276e-07, + "rewards/margins": 5.02895450592041, + "rewards/rejected": -5.028954982757568, + "step": 14447 + }, + { + "epoch": 0.84, + "learning_rate": 6.502184122573456e-09, + "logits/chosen": -1.7589263916015625, + "logits/rejected": -1.7670698165893555, + "logps/chosen": -38.365966796875, + "logps/rejected": -216.42205810546875, + "loss": 0.3793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18854065239429474, + "rewards/margins": 1.6535248756408691, + "rewards/rejected": -1.4649841785430908, + "step": 14448 + }, + { + "epoch": 0.84, + "learning_rate": 6.497537637637967e-09, + "logits/chosen": -1.7527427673339844, + "logits/rejected": -1.679975152015686, + "logps/chosen": -257.60516357421875, + "logps/rejected": -428.47821044921875, + "loss": 0.0381, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5432770252227783, + "rewards/margins": 3.0647614002227783, + "rewards/rejected": -0.521484375, + "step": 14449 + }, + { + "epoch": 0.84, + "learning_rate": 6.492892698122726e-09, + "logits/chosen": -1.898821473121643, + "logits/rejected": -1.9082390069961548, + "logps/chosen": -21.760385513305664, + "logps/rejected": -210.45370483398438, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38557568192481995, + "rewards/margins": 1.9739179611206055, + "rewards/rejected": -1.588342308998108, + "step": 14450 + }, + { + "epoch": 0.84, + "learning_rate": 6.4882493041927444e-09, + "logits/chosen": -1.8259551525115967, + "logits/rejected": -1.818758487701416, + "logps/chosen": -0.14989358186721802, + "logps/rejected": -108.64519500732422, + "loss": 0.338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002854700433090329, + "rewards/margins": 3.1114888191223145, + "rewards/rejected": -3.1143434047698975, + "step": 14451 + }, + { + "epoch": 0.84, + "learning_rate": 6.483607456012996e-09, + "logits/chosen": -1.9583500623703003, + "logits/rejected": -1.9461467266082764, + "logps/chosen": -9.955846786499023, + "logps/rejected": -103.19776916503906, + "loss": 0.6281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10292138904333115, + "rewards/margins": 0.1395951211452484, + "rewards/rejected": -0.036673735827207565, + "step": 14452 + }, + { + "epoch": 0.84, + "learning_rate": 6.478967153748377e-09, + "logits/chosen": -1.6099141836166382, + "logits/rejected": -1.6203174591064453, + "logps/chosen": -20.63509750366211, + "logps/rejected": -182.98455810546875, + "loss": 0.3615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10509414970874786, + "rewards/margins": 4.752151012420654, + "rewards/rejected": -4.857244968414307, + "step": 14453 + }, + { + "epoch": 0.84, + "learning_rate": 6.474328397563711e-09, + "logits/chosen": -1.6139698028564453, + "logits/rejected": -1.5874943733215332, + "logps/chosen": -205.68212890625, + "logps/rejected": -358.109130859375, + "loss": 0.0861, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2841553688049316, + "rewards/margins": 2.091305732727051, + "rewards/rejected": 0.19284974038600922, + "step": 14454 + }, + { + "epoch": 0.84, + "learning_rate": 6.469691187623827e-09, + "logits/chosen": -1.8432406187057495, + "logits/rejected": -1.8443114757537842, + "logps/chosen": -3.8209965229034424, + "logps/rejected": -106.52323150634766, + "loss": 0.6444, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.07136929035186768, + "rewards/margins": -0.03376605361700058, + "rewards/rejected": 0.10513534396886826, + "step": 14455 + }, + { + "epoch": 0.84, + "learning_rate": 6.465055524093427e-09, + "logits/chosen": -1.9544157981872559, + "logits/rejected": -1.951337456703186, + "logps/chosen": -21.256689071655273, + "logps/rejected": -94.16464233398438, + "loss": 0.4434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01945667341351509, + "rewards/margins": 1.4884873628616333, + "rewards/rejected": -1.469030737876892, + "step": 14456 + }, + { + "epoch": 0.84, + "learning_rate": 6.460421407137223e-09, + "logits/chosen": -2.012812376022339, + "logits/rejected": -2.0118355751037598, + "logps/chosen": -168.14073181152344, + "logps/rejected": -213.04019165039062, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7382248640060425, + "rewards/margins": 0.25778353214263916, + "rewards/rejected": 1.4804413318634033, + "step": 14457 + }, + { + "epoch": 0.84, + "learning_rate": 6.455788836919807e-09, + "logits/chosen": -1.516043782234192, + "logits/rejected": -1.4576088190078735, + "logps/chosen": -230.22845458984375, + "logps/rejected": -481.98187255859375, + "loss": 0.0742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8134552240371704, + "rewards/margins": 3.3008270263671875, + "rewards/rejected": -1.487371802330017, + "step": 14458 + }, + { + "epoch": 0.84, + "learning_rate": 6.451157813605784e-09, + "logits/chosen": -1.884138822555542, + "logits/rejected": -1.871152639389038, + "logps/chosen": -1.7166048564831726e-05, + "logps/rejected": -205.4189453125, + "loss": 0.3853, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.960537663440846e-08, + "rewards/margins": 2.443890333175659, + "rewards/rejected": -2.443890333175659, + "step": 14459 + }, + { + "epoch": 0.84, + "learning_rate": 6.446528337359647e-09, + "logits/chosen": -1.9440748691558838, + "logits/rejected": -1.9398634433746338, + "logps/chosen": -0.00033398193772882223, + "logps/rejected": -89.46033477783203, + "loss": 0.5311, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.601822987140622e-05, + "rewards/margins": 0.7104438543319702, + "rewards/rejected": -0.7104598879814148, + "step": 14460 + }, + { + "epoch": 0.84, + "learning_rate": 6.441900408345868e-09, + "logits/chosen": -1.976744294166565, + "logits/rejected": -1.9669281244277954, + "logps/chosen": -7.839885234832764, + "logps/rejected": -221.56787109375, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22838549315929413, + "rewards/margins": 3.423914670944214, + "rewards/rejected": -3.1955292224884033, + "step": 14461 + }, + { + "epoch": 0.84, + "learning_rate": 6.4372740267288506e-09, + "logits/chosen": -1.9141443967819214, + "logits/rejected": -1.8617357015609741, + "logps/chosen": -123.02938842773438, + "logps/rejected": -293.34619140625, + "loss": 0.1144, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8393828868865967, + "rewards/margins": 1.662512183189392, + "rewards/rejected": 1.1768707036972046, + "step": 14462 + }, + { + "epoch": 0.84, + "learning_rate": 6.432649192672951e-09, + "logits/chosen": -1.7558130025863647, + "logits/rejected": -1.7563647031784058, + "logps/chosen": -0.0010097157210111618, + "logps/rejected": -262.2071533203125, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.256637425394729e-05, + "rewards/margins": 6.751121520996094, + "rewards/rejected": -6.751183986663818, + "step": 14463 + }, + { + "epoch": 0.84, + "learning_rate": 6.4280259063424716e-09, + "logits/chosen": -1.7336077690124512, + "logits/rejected": -1.7045538425445557, + "logps/chosen": -251.1014862060547, + "logps/rejected": -579.6669921875, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.923976182937622, + "rewards/margins": 3.731794834136963, + "rewards/rejected": 0.19218139350414276, + "step": 14464 + }, + { + "epoch": 0.84, + "learning_rate": 6.423404167901636e-09, + "logits/chosen": -1.7747368812561035, + "logits/rejected": -1.7549501657485962, + "logps/chosen": -255.36781311035156, + "logps/rejected": -520.056884765625, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4593522548675537, + "rewards/margins": 4.012983798980713, + "rewards/rejected": -0.553631603717804, + "step": 14465 + }, + { + "epoch": 0.84, + "learning_rate": 6.418783977514641e-09, + "logits/chosen": -1.877634882926941, + "logits/rejected": -1.8722913265228271, + "logps/chosen": -24.230792999267578, + "logps/rejected": -164.71014404296875, + "loss": 0.9796, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.8293835520744324, + "rewards/margins": -0.43486449122428894, + "rewards/rejected": -0.39451906085014343, + "step": 14466 + }, + { + "epoch": 0.84, + "learning_rate": 6.414165335345617e-09, + "logits/chosen": -1.977318286895752, + "logits/rejected": -1.9595556259155273, + "logps/chosen": -29.326597213745117, + "logps/rejected": -273.0683898925781, + "loss": 0.1412, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0372194051742554, + "rewards/margins": 3.2622437477111816, + "rewards/rejected": -2.225024461746216, + "step": 14467 + }, + { + "epoch": 0.84, + "learning_rate": 6.409548241558654e-09, + "logits/chosen": -1.9746849536895752, + "logits/rejected": -1.9746880531311035, + "logps/chosen": -73.53384399414062, + "logps/rejected": -185.9864959716797, + "loss": 0.2131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.998626708984375, + "rewards/margins": 1.6352005004882812, + "rewards/rejected": -0.6365737915039062, + "step": 14468 + }, + { + "epoch": 0.84, + "learning_rate": 6.4049326963177445e-09, + "logits/chosen": -1.8470603227615356, + "logits/rejected": -1.8496167659759521, + "logps/chosen": -10.625110626220703, + "logps/rejected": -297.43316650390625, + "loss": 0.3011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16249971091747284, + "rewards/margins": 7.181286811828613, + "rewards/rejected": -7.018786907196045, + "step": 14469 + }, + { + "epoch": 0.84, + "learning_rate": 6.400318699786878e-09, + "logits/chosen": -1.8474184274673462, + "logits/rejected": -1.8790227174758911, + "logps/chosen": -134.60321044921875, + "logps/rejected": -312.5137939453125, + "loss": 0.0868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7903854846954346, + "rewards/margins": 1.9488601684570312, + "rewards/rejected": -0.15847472846508026, + "step": 14470 + }, + { + "epoch": 0.84, + "learning_rate": 6.3957062521299624e-09, + "logits/chosen": -1.7722537517547607, + "logits/rejected": -1.8153705596923828, + "logps/chosen": -196.88253784179688, + "logps/rejected": -322.7795104980469, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6293914318084717, + "rewards/margins": 4.974877834320068, + "rewards/rejected": -2.3454864025115967, + "step": 14471 + }, + { + "epoch": 0.84, + "learning_rate": 6.391095353510856e-09, + "logits/chosen": -1.8713592290878296, + "logits/rejected": -1.847375750541687, + "logps/chosen": -137.4210205078125, + "logps/rejected": -253.390869140625, + "loss": 0.1216, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1567628383636475, + "rewards/margins": 1.7550568580627441, + "rewards/rejected": 0.40170595049858093, + "step": 14472 + }, + { + "epoch": 0.84, + "learning_rate": 6.386486004093339e-09, + "logits/chosen": -2.0549402236938477, + "logits/rejected": -2.0364465713500977, + "logps/chosen": -50.758087158203125, + "logps/rejected": -282.2882385253906, + "loss": 0.1865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.373696893453598, + "rewards/margins": 6.131036281585693, + "rewards/rejected": -5.7573394775390625, + "step": 14473 + }, + { + "epoch": 0.84, + "learning_rate": 6.381878204041196e-09, + "logits/chosen": -2.0268568992614746, + "logits/rejected": -1.9792672395706177, + "logps/chosen": -168.60150146484375, + "logps/rejected": -290.4102783203125, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9033753871917725, + "rewards/margins": 4.044803142547607, + "rewards/rejected": -1.1414276361465454, + "step": 14474 + }, + { + "epoch": 0.84, + "learning_rate": 6.377271953518093e-09, + "logits/chosen": -1.7321035861968994, + "logits/rejected": -1.722473382949829, + "logps/chosen": -124.89386749267578, + "logps/rejected": -311.861083984375, + "loss": 0.0772, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4502112865448, + "rewards/margins": 2.139218807220459, + "rewards/rejected": 0.31099244952201843, + "step": 14475 + }, + { + "epoch": 0.84, + "learning_rate": 6.3726672526876745e-09, + "logits/chosen": -2.019028663635254, + "logits/rejected": -1.9772671461105347, + "logps/chosen": -127.06695556640625, + "logps/rejected": -403.1837463378906, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24702148139476776, + "rewards/margins": 3.2746734619140625, + "rewards/rejected": -3.5216948986053467, + "step": 14476 + }, + { + "epoch": 0.84, + "learning_rate": 6.368064101713505e-09, + "logits/chosen": -1.9504421949386597, + "logits/rejected": -1.934861421585083, + "logps/chosen": -197.90066528320312, + "logps/rejected": -402.43060302734375, + "loss": 0.203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017364501953125, + "rewards/margins": 2.9951202869415283, + "rewards/rejected": -2.9777557849884033, + "step": 14477 + }, + { + "epoch": 0.84, + "learning_rate": 6.363462500759142e-09, + "logits/chosen": -1.9953032732009888, + "logits/rejected": -2.0101726055145264, + "logps/chosen": -215.18226623535156, + "logps/rejected": -225.58963012695312, + "loss": 0.6092, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7883651852607727, + "rewards/margins": -0.34999388456344604, + "rewards/rejected": 1.1383590698242188, + "step": 14478 + }, + { + "epoch": 0.84, + "learning_rate": 6.358862449988034e-09, + "logits/chosen": -1.8968287706375122, + "logits/rejected": -1.9011839628219604, + "logps/chosen": -7.865959167480469, + "logps/rejected": -213.92623901367188, + "loss": 0.3548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18567009270191193, + "rewards/margins": 1.7734218835830688, + "rewards/rejected": -1.5877517461776733, + "step": 14479 + }, + { + "epoch": 0.84, + "learning_rate": 6.354263949563615e-09, + "logits/chosen": -1.9872108697891235, + "logits/rejected": -1.9798294305801392, + "logps/chosen": -3.3821051120758057, + "logps/rejected": -139.69464111328125, + "loss": 0.4724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0783546194434166, + "rewards/margins": 0.7815864086151123, + "rewards/rejected": -0.7032318115234375, + "step": 14480 + }, + { + "epoch": 0.84, + "learning_rate": 6.3496669996492055e-09, + "logits/chosen": -1.6222631931304932, + "logits/rejected": -1.6168652772903442, + "logps/chosen": -209.69430541992188, + "logps/rejected": -437.2260437011719, + "loss": 0.369, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0902557373046875, + "rewards/margins": 0.14317619800567627, + "rewards/rejected": 1.9470795392990112, + "step": 14481 + }, + { + "epoch": 0.84, + "learning_rate": 6.345071600408159e-09, + "logits/chosen": -1.9141528606414795, + "logits/rejected": -1.912313461303711, + "logps/chosen": -75.97576904296875, + "logps/rejected": -236.286376953125, + "loss": 0.1786, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2004806995391846, + "rewards/margins": 1.499531626701355, + "rewards/rejected": -0.299050897359848, + "step": 14482 + }, + { + "epoch": 0.84, + "learning_rate": 6.34047775200372e-09, + "logits/chosen": -1.8953073024749756, + "logits/rejected": -1.9002296924591064, + "logps/chosen": -269.16131591796875, + "logps/rejected": -450.2906494140625, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.946142554283142, + "rewards/margins": 4.429602146148682, + "rewards/rejected": -2.48345947265625, + "step": 14483 + }, + { + "epoch": 0.84, + "learning_rate": 6.33588545459906e-09, + "logits/chosen": -1.7986352443695068, + "logits/rejected": -1.8154805898666382, + "logps/chosen": -221.44644165039062, + "logps/rejected": -334.1761474609375, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.62113356590271, + "rewards/margins": 4.624246597290039, + "rewards/rejected": -2.00311279296875, + "step": 14484 + }, + { + "epoch": 0.84, + "learning_rate": 6.331294708357332e-09, + "logits/chosen": -1.7500174045562744, + "logits/rejected": -1.7495394945144653, + "logps/chosen": -14.375396728515625, + "logps/rejected": -244.09716796875, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15001420676708221, + "rewards/margins": 6.5572123527526855, + "rewards/rejected": -6.707226753234863, + "step": 14485 + }, + { + "epoch": 0.84, + "learning_rate": 6.326705513441621e-09, + "logits/chosen": -1.9729020595550537, + "logits/rejected": -1.9758963584899902, + "logps/chosen": -0.060743071138858795, + "logps/rejected": -201.97976684570312, + "loss": 0.3542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0022247738670557737, + "rewards/margins": 5.332499980926514, + "rewards/rejected": -5.330275058746338, + "step": 14486 + }, + { + "epoch": 0.84, + "learning_rate": 6.322117870014976e-09, + "logits/chosen": -1.7782320976257324, + "logits/rejected": -1.7760993242263794, + "logps/chosen": -172.27774047851562, + "logps/rejected": -307.3852844238281, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7769715785980225, + "rewards/margins": 4.390390396118164, + "rewards/rejected": -0.6134185791015625, + "step": 14487 + }, + { + "epoch": 0.84, + "learning_rate": 6.317531778240337e-09, + "logits/chosen": -2.0094029903411865, + "logits/rejected": -2.00180983543396, + "logps/chosen": -0.564539909362793, + "logps/rejected": -100.01416015625, + "loss": 0.4732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034992244094610214, + "rewards/margins": 0.9410881400108337, + "rewards/rejected": -0.9760803580284119, + "step": 14488 + }, + { + "epoch": 0.84, + "learning_rate": 6.312947238280647e-09, + "logits/chosen": -2.017843723297119, + "logits/rejected": -2.0179028511047363, + "logps/chosen": -0.048779528588056564, + "logps/rejected": -53.366851806640625, + "loss": 0.511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001395916217006743, + "rewards/margins": 0.9495161771774292, + "rewards/rejected": -0.9509121179580688, + "step": 14489 + }, + { + "epoch": 0.84, + "learning_rate": 6.30836425029877e-09, + "logits/chosen": -1.9059699773788452, + "logits/rejected": -1.9068750143051147, + "logps/chosen": -91.08316802978516, + "logps/rejected": -186.87408447265625, + "loss": 0.2667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6216850280761719, + "rewards/margins": 1.5464653968811035, + "rewards/rejected": -0.9247803092002869, + "step": 14490 + }, + { + "epoch": 0.84, + "learning_rate": 6.303782814457526e-09, + "logits/chosen": -1.8928955793380737, + "logits/rejected": -1.928491473197937, + "logps/chosen": -298.91192626953125, + "logps/rejected": -359.60784912109375, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.668121337890625, + "rewards/margins": 2.7862000465393066, + "rewards/rejected": -0.11807861179113388, + "step": 14491 + }, + { + "epoch": 0.84, + "learning_rate": 6.299202930919639e-09, + "logits/chosen": -1.8557754755020142, + "logits/rejected": -1.8117356300354004, + "logps/chosen": -132.3321990966797, + "logps/rejected": -312.85052490234375, + "loss": 0.2588, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.582891821861267, + "rewards/margins": 0.7800384163856506, + "rewards/rejected": 0.8028534054756165, + "step": 14492 + }, + { + "epoch": 0.84, + "learning_rate": 6.2946245998478366e-09, + "logits/chosen": -2.0438427925109863, + "logits/rejected": -2.041783094406128, + "logps/chosen": -86.38465881347656, + "logps/rejected": -238.922607421875, + "loss": 0.1665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8134163022041321, + "rewards/margins": 4.101288795471191, + "rewards/rejected": -3.287872314453125, + "step": 14493 + }, + { + "epoch": 0.84, + "learning_rate": 6.290047821404748e-09, + "logits/chosen": -1.894132375717163, + "logits/rejected": -1.883660078048706, + "logps/chosen": -55.65300750732422, + "logps/rejected": -156.96554565429688, + "loss": 0.2981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9193069338798523, + "rewards/margins": 0.8201095461845398, + "rewards/rejected": 0.0991973876953125, + "step": 14494 + }, + { + "epoch": 0.84, + "learning_rate": 6.285472595752983e-09, + "logits/chosen": -2.0176258087158203, + "logits/rejected": -2.009845733642578, + "logps/chosen": -26.005802154541016, + "logps/rejected": -246.8569793701172, + "loss": 0.232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4944475293159485, + "rewards/margins": 2.8002519607543945, + "rewards/rejected": -2.305804491043091, + "step": 14495 + }, + { + "epoch": 0.84, + "learning_rate": 6.280898923055039e-09, + "logits/chosen": -1.8411760330200195, + "logits/rejected": -1.8442426919937134, + "logps/chosen": -0.6827477812767029, + "logps/rejected": -183.63735961914062, + "loss": 0.329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03805633261799812, + "rewards/margins": 2.6866557598114014, + "rewards/rejected": -2.64859938621521, + "step": 14496 + }, + { + "epoch": 0.84, + "learning_rate": 6.276326803473442e-09, + "logits/chosen": -1.8987164497375488, + "logits/rejected": -1.8906711339950562, + "logps/chosen": -12.025171279907227, + "logps/rejected": -254.952880859375, + "loss": 0.4032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.558546245098114, + "rewards/margins": 0.7203458547592163, + "rewards/rejected": -0.1617996245622635, + "step": 14497 + }, + { + "epoch": 0.84, + "learning_rate": 6.271756237170583e-09, + "logits/chosen": -1.9771125316619873, + "logits/rejected": -2.007800817489624, + "logps/chosen": -146.3798828125, + "logps/rejected": -376.46319580078125, + "loss": 0.0363, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6696624755859375, + "rewards/margins": 5.023935317993164, + "rewards/rejected": -3.3542726039886475, + "step": 14498 + }, + { + "epoch": 0.84, + "learning_rate": 6.267187224308846e-09, + "logits/chosen": -1.770950436592102, + "logits/rejected": -1.7832194566726685, + "logps/chosen": -69.69599914550781, + "logps/rejected": -252.53334045410156, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4641365110874176, + "rewards/margins": 1.8395698070526123, + "rewards/rejected": -1.375433325767517, + "step": 14499 + }, + { + "epoch": 0.84, + "learning_rate": 6.26261976505052e-09, + "logits/chosen": -1.8168903589248657, + "logits/rejected": -1.809437870979309, + "logps/chosen": -0.07919108867645264, + "logps/rejected": -104.90570068359375, + "loss": 0.3951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017024640692397952, + "rewards/margins": 2.191957473754883, + "rewards/rejected": -2.193660020828247, + "step": 14500 + }, + { + "epoch": 0.84, + "learning_rate": 6.258053859557894e-09, + "logits/chosen": -1.7826999425888062, + "logits/rejected": -1.7892895936965942, + "logps/chosen": -159.148193359375, + "logps/rejected": -369.7371520996094, + "loss": 0.095, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2820724248886108, + "rewards/margins": 5.488441467285156, + "rewards/rejected": -4.206368923187256, + "step": 14501 + }, + { + "epoch": 0.84, + "learning_rate": 6.253489507993171e-09, + "logits/chosen": -2.093810558319092, + "logits/rejected": -2.077181816101074, + "logps/chosen": -9.375283241271973, + "logps/rejected": -201.50433349609375, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.200022891163826, + "rewards/margins": 3.2748537063598633, + "rewards/rejected": -3.0748307704925537, + "step": 14502 + }, + { + "epoch": 0.84, + "learning_rate": 6.248926710518476e-09, + "logits/chosen": -2.0489673614501953, + "logits/rejected": -2.040294885635376, + "logps/chosen": -70.53752899169922, + "logps/rejected": -143.43487548828125, + "loss": 0.3987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3762931823730469, + "rewards/margins": 0.6318687200546265, + "rewards/rejected": -0.255575567483902, + "step": 14503 + }, + { + "epoch": 0.84, + "learning_rate": 6.24436546729592e-09, + "logits/chosen": -1.7834630012512207, + "logits/rejected": -1.7727360725402832, + "logps/chosen": -0.42121607065200806, + "logps/rejected": -121.54939270019531, + "loss": 0.4216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005275947041809559, + "rewards/margins": 1.584151268005371, + "rewards/rejected": -1.5894272327423096, + "step": 14504 + }, + { + "epoch": 0.84, + "learning_rate": 6.239805778487528e-09, + "logits/chosen": -1.9481552839279175, + "logits/rejected": -1.9446772336959839, + "logps/chosen": -223.51983642578125, + "logps/rejected": -376.6226806640625, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.84311842918396, + "rewards/margins": 4.063830852508545, + "rewards/rejected": -1.2207123041152954, + "step": 14505 + }, + { + "epoch": 0.84, + "learning_rate": 6.235247644255298e-09, + "logits/chosen": -1.8062684535980225, + "logits/rejected": -1.8069835901260376, + "logps/chosen": -0.007850605063140392, + "logps/rejected": -160.36215209960938, + "loss": 0.358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017868731811176986, + "rewards/margins": 3.130793809890747, + "rewards/rejected": -3.130972385406494, + "step": 14506 + }, + { + "epoch": 0.84, + "learning_rate": 6.23069106476114e-09, + "logits/chosen": -1.8272337913513184, + "logits/rejected": -1.8172472715377808, + "logps/chosen": -74.4720458984375, + "logps/rejected": -185.83651733398438, + "loss": 0.2059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9611534476280212, + "rewards/margins": 2.465749502182007, + "rewards/rejected": -1.5045959949493408, + "step": 14507 + }, + { + "epoch": 0.84, + "learning_rate": 6.226136040166935e-09, + "logits/chosen": -1.7941746711730957, + "logits/rejected": -1.787388801574707, + "logps/chosen": -0.8344457149505615, + "logps/rejected": -125.0258560180664, + "loss": 0.4037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06324541568756104, + "rewards/margins": 1.7912635803222656, + "rewards/rejected": -1.8545089960098267, + "step": 14508 + }, + { + "epoch": 0.84, + "learning_rate": 6.221582570634498e-09, + "logits/chosen": -1.6132068634033203, + "logits/rejected": -1.5998075008392334, + "logps/chosen": -68.56185150146484, + "logps/rejected": -231.0086212158203, + "loss": 0.1726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6907768249511719, + "rewards/margins": 2.643540382385254, + "rewards/rejected": -1.9527634382247925, + "step": 14509 + }, + { + "epoch": 0.84, + "learning_rate": 6.217030656325606e-09, + "logits/chosen": -1.8693435192108154, + "logits/rejected": -1.8557485342025757, + "logps/chosen": -9.512955665588379, + "logps/rejected": -293.766357421875, + "loss": 0.2687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2586278021335602, + "rewards/margins": 5.237207889556885, + "rewards/rejected": -4.978579998016357, + "step": 14510 + }, + { + "epoch": 0.84, + "learning_rate": 6.212480297401934e-09, + "logits/chosen": -1.6513334512710571, + "logits/rejected": -1.667618989944458, + "logps/chosen": -271.2518005371094, + "logps/rejected": -415.1619873046875, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0549652576446533, + "rewards/margins": 3.8822999000549316, + "rewards/rejected": -1.8273346424102783, + "step": 14511 + }, + { + "epoch": 0.84, + "learning_rate": 6.207931494025153e-09, + "logits/chosen": -1.9075204133987427, + "logits/rejected": -1.9006376266479492, + "logps/chosen": -66.0583267211914, + "logps/rejected": -191.891357421875, + "loss": 0.5084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3035324215888977, + "rewards/margins": 0.03538361191749573, + "rewards/rejected": 0.268148809671402, + "step": 14512 + }, + { + "epoch": 0.84, + "learning_rate": 6.203384246356858e-09, + "logits/chosen": -1.9197871685028076, + "logits/rejected": -1.9248701333999634, + "logps/chosen": -12.3721342086792, + "logps/rejected": -83.82323455810547, + "loss": 0.5475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11347341537475586, + "rewards/margins": 0.6342162489891052, + "rewards/rejected": -0.5207428336143494, + "step": 14513 + }, + { + "epoch": 0.84, + "learning_rate": 6.1988385545585944e-09, + "logits/chosen": -1.701694369316101, + "logits/rejected": -1.6844967603683472, + "logps/chosen": -211.9044647216797, + "logps/rejected": -260.4014892578125, + "loss": 0.5889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5080307126045227, + "rewards/margins": 0.23377227783203125, + "rewards/rejected": -0.741802990436554, + "step": 14514 + }, + { + "epoch": 0.84, + "learning_rate": 6.1942944187918175e-09, + "logits/chosen": -2.0851478576660156, + "logits/rejected": -2.0725560188293457, + "logps/chosen": -0.7614473700523376, + "logps/rejected": -277.09844970703125, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11523747444152832, + "rewards/margins": 7.5786590576171875, + "rewards/rejected": -7.463421821594238, + "step": 14515 + }, + { + "epoch": 0.84, + "learning_rate": 6.189751839218005e-09, + "logits/chosen": -1.7949484586715698, + "logits/rejected": -1.7886502742767334, + "logps/chosen": -170.23849487304688, + "logps/rejected": -319.47857666015625, + "loss": 0.0908, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9335449934005737, + "rewards/margins": 2.132162570953369, + "rewards/rejected": -0.19861756265163422, + "step": 14516 + }, + { + "epoch": 0.84, + "learning_rate": 6.185210815998493e-09, + "logits/chosen": -1.696295142173767, + "logits/rejected": -1.7007384300231934, + "logps/chosen": -0.06241833046078682, + "logps/rejected": -72.74396514892578, + "loss": 0.4569, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005337362643331289, + "rewards/margins": 1.2014977931976318, + "rewards/rejected": -1.206835150718689, + "step": 14517 + }, + { + "epoch": 0.84, + "learning_rate": 6.1806713492946246e-09, + "logits/chosen": -2.095083236694336, + "logits/rejected": -2.044008731842041, + "logps/chosen": -208.11807250976562, + "logps/rejected": -406.4555358886719, + "loss": 0.3153, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9275054931640625, + "rewards/margins": 0.2707397937774658, + "rewards/rejected": 2.6567656993865967, + "step": 14518 + }, + { + "epoch": 0.84, + "learning_rate": 6.176133439267633e-09, + "logits/chosen": -2.0640764236450195, + "logits/rejected": -2.0595521926879883, + "logps/chosen": -4.455026149749756, + "logps/rejected": -122.03675842285156, + "loss": 0.4316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2028704732656479, + "rewards/margins": 1.119555950164795, + "rewards/rejected": -0.9166855216026306, + "step": 14519 + }, + { + "epoch": 0.84, + "learning_rate": 6.1715970860787666e-09, + "logits/chosen": -1.7839480638504028, + "logits/rejected": -1.781660556793213, + "logps/chosen": -257.75006103515625, + "logps/rejected": -298.00091552734375, + "loss": 0.1514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7834869623184204, + "rewards/margins": 1.531158447265625, + "rewards/rejected": -0.7476715445518494, + "step": 14520 + }, + { + "epoch": 0.85, + "learning_rate": 6.167062289889152e-09, + "logits/chosen": -2.00331449508667, + "logits/rejected": -1.9975467920303345, + "logps/chosen": -31.45646858215332, + "logps/rejected": -163.91622924804688, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38749828934669495, + "rewards/margins": 3.0551161766052246, + "rewards/rejected": -2.6676177978515625, + "step": 14521 + }, + { + "epoch": 0.85, + "learning_rate": 6.162529050859894e-09, + "logits/chosen": -1.7661645412445068, + "logits/rejected": -1.7629625797271729, + "logps/chosen": -191.68521118164062, + "logps/rejected": -373.4013366699219, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.866595506668091, + "rewards/margins": 4.7751922607421875, + "rewards/rejected": -1.9085968732833862, + "step": 14522 + }, + { + "epoch": 0.85, + "learning_rate": 6.157997369152035e-09, + "logits/chosen": -1.8192795515060425, + "logits/rejected": -1.8202022314071655, + "logps/chosen": -54.50337219238281, + "logps/rejected": -233.34927368164062, + "loss": 0.2669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.88912433385849, + "rewards/margins": 1.6346901655197144, + "rewards/rejected": -0.7455658316612244, + "step": 14523 + }, + { + "epoch": 0.85, + "learning_rate": 6.153467244926564e-09, + "logits/chosen": -2.0252902507781982, + "logits/rejected": -2.0257670879364014, + "logps/chosen": -10.987060546875, + "logps/rejected": -163.34967041015625, + "loss": 0.2066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6487960815429688, + "rewards/margins": 4.798469066619873, + "rewards/rejected": -4.149672985076904, + "step": 14524 + }, + { + "epoch": 0.85, + "learning_rate": 6.148938678344423e-09, + "logits/chosen": -1.8662933111190796, + "logits/rejected": -1.8724943399429321, + "logps/chosen": -10.430347442626953, + "logps/rejected": -129.72984313964844, + "loss": 0.5227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021271323785185814, + "rewards/margins": 0.9152357578277588, + "rewards/rejected": -0.893964409828186, + "step": 14525 + }, + { + "epoch": 0.85, + "learning_rate": 6.1444116695664735e-09, + "logits/chosen": -1.991528034210205, + "logits/rejected": -1.9885684251785278, + "logps/chosen": -8.500511169433594, + "logps/rejected": -200.14541625976562, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14998827874660492, + "rewards/margins": 2.499765396118164, + "rewards/rejected": -2.3497772216796875, + "step": 14526 + }, + { + "epoch": 0.85, + "learning_rate": 6.139886218753537e-09, + "logits/chosen": -1.921661615371704, + "logits/rejected": -1.921440601348877, + "logps/chosen": -20.270837783813477, + "logps/rejected": -74.12394714355469, + "loss": 0.6414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12137012928724289, + "rewards/margins": 0.0731775313615799, + "rewards/rejected": 0.048192597925662994, + "step": 14527 + }, + { + "epoch": 0.85, + "learning_rate": 6.135362326066385e-09, + "logits/chosen": -1.8413362503051758, + "logits/rejected": -1.9360244274139404, + "logps/chosen": -273.94183349609375, + "logps/rejected": -383.41766357421875, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7803528308868408, + "rewards/margins": 3.275177001953125, + "rewards/rejected": -1.4948242902755737, + "step": 14528 + }, + { + "epoch": 0.85, + "learning_rate": 6.1308399916657425e-09, + "logits/chosen": -2.082001209259033, + "logits/rejected": -2.077634334564209, + "logps/chosen": -26.356090545654297, + "logps/rejected": -267.0236511230469, + "loss": 0.1815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6948089599609375, + "rewards/margins": 5.0520477294921875, + "rewards/rejected": -4.35723876953125, + "step": 14529 + }, + { + "epoch": 0.85, + "learning_rate": 6.126319215712239e-09, + "logits/chosen": -1.8369863033294678, + "logits/rejected": -1.8323349952697754, + "logps/chosen": -21.45857048034668, + "logps/rejected": -83.27039337158203, + "loss": 0.3506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36142751574516296, + "rewards/margins": 1.6715853214263916, + "rewards/rejected": -1.3101577758789062, + "step": 14530 + }, + { + "epoch": 0.85, + "learning_rate": 6.121799998366484e-09, + "logits/chosen": -1.8529417514801025, + "logits/rejected": -1.8492851257324219, + "logps/chosen": -0.0021983489859849215, + "logps/rejected": -51.18305206298828, + "loss": 0.6049, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8634857850847766e-05, + "rewards/margins": 0.39098218083381653, + "rewards/rejected": -0.3910408020019531, + "step": 14531 + }, + { + "epoch": 0.85, + "learning_rate": 6.117282339789026e-09, + "logits/chosen": -1.6832517385482788, + "logits/rejected": -1.6535683870315552, + "logps/chosen": -211.86599731445312, + "logps/rejected": -290.0722961425781, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.078726291656494, + "rewards/margins": 0.32974863052368164, + "rewards/rejected": 2.7489776611328125, + "step": 14532 + }, + { + "epoch": 0.85, + "learning_rate": 6.11276624014036e-09, + "logits/chosen": -2.0172598361968994, + "logits/rejected": -2.0116467475891113, + "logps/chosen": -0.6116840243339539, + "logps/rejected": -225.30393981933594, + "loss": 0.327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11529400199651718, + "rewards/margins": 3.587555170059204, + "rewards/rejected": -3.4722611904144287, + "step": 14533 + }, + { + "epoch": 0.85, + "learning_rate": 6.1082516995808965e-09, + "logits/chosen": -1.9783695936203003, + "logits/rejected": -1.987971544265747, + "logps/chosen": -124.68339538574219, + "logps/rejected": -301.67889404296875, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7087539434432983, + "rewards/margins": 1.7228652238845825, + "rewards/rejected": -0.014111327938735485, + "step": 14534 + }, + { + "epoch": 0.85, + "learning_rate": 6.103738718271051e-09, + "logits/chosen": -2.000504493713379, + "logits/rejected": -1.9920247793197632, + "logps/chosen": -49.50078201293945, + "logps/rejected": -198.57662963867188, + "loss": 0.3957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8367248773574829, + "rewards/margins": 0.4833969175815582, + "rewards/rejected": 0.3533279597759247, + "step": 14535 + }, + { + "epoch": 0.85, + "learning_rate": 6.0992272963711125e-09, + "logits/chosen": -1.7920809984207153, + "logits/rejected": -1.8111228942871094, + "logps/chosen": -67.0012435913086, + "logps/rejected": -214.2323760986328, + "loss": 0.417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40503692626953125, + "rewards/margins": 4.198375225067139, + "rewards/rejected": -4.60341215133667, + "step": 14536 + }, + { + "epoch": 0.85, + "learning_rate": 6.094717434041374e-09, + "logits/chosen": -1.926945686340332, + "logits/rejected": -1.9310654401779175, + "logps/chosen": -164.61636352539062, + "logps/rejected": -420.62799072265625, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7286438941955566, + "rewards/margins": 8.19866943359375, + "rewards/rejected": -5.470025539398193, + "step": 14537 + }, + { + "epoch": 0.85, + "learning_rate": 6.09020913144202e-09, + "logits/chosen": -2.071115493774414, + "logits/rejected": -2.072575092315674, + "logps/chosen": -31.881065368652344, + "logps/rejected": -129.6655731201172, + "loss": 1.0129, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7870947122573853, + "rewards/margins": -0.5520727634429932, + "rewards/rejected": -0.23502197861671448, + "step": 14538 + }, + { + "epoch": 0.85, + "learning_rate": 6.0857023887332385e-09, + "logits/chosen": -1.8617422580718994, + "logits/rejected": -1.856691598892212, + "logps/chosen": -0.5497292280197144, + "logps/rejected": -275.4090881347656, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1409839242696762, + "rewards/margins": 6.926085472106934, + "rewards/rejected": -6.785101413726807, + "step": 14539 + }, + { + "epoch": 0.85, + "learning_rate": 6.081197206075106e-09, + "logits/chosen": -1.7654269933700562, + "logits/rejected": -1.7610466480255127, + "logps/chosen": -61.854515075683594, + "logps/rejected": -291.14837646484375, + "loss": 0.2193, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0287666320800781, + "rewards/margins": 1.3509162664413452, + "rewards/rejected": -0.3221496641635895, + "step": 14540 + }, + { + "epoch": 0.85, + "learning_rate": 6.0766935836276956e-09, + "logits/chosen": -1.8165909051895142, + "logits/rejected": -1.808397889137268, + "logps/chosen": -50.91474533081055, + "logps/rejected": -235.1815948486328, + "loss": 0.2797, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2748631238937378, + "rewards/margins": 0.764189600944519, + "rewards/rejected": 0.5106735229492188, + "step": 14541 + }, + { + "epoch": 0.85, + "learning_rate": 6.072191521550951e-09, + "logits/chosen": -1.9762530326843262, + "logits/rejected": -1.9732842445373535, + "logps/chosen": -42.829498291015625, + "logps/rejected": -292.1388244628906, + "loss": 0.2022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32745590806007385, + "rewards/margins": 3.685650110244751, + "rewards/rejected": -3.35819411277771, + "step": 14542 + }, + { + "epoch": 0.85, + "learning_rate": 6.067691020004856e-09, + "logits/chosen": -2.0329582691192627, + "logits/rejected": -2.0406112670898438, + "logps/chosen": -172.3485107421875, + "logps/rejected": -259.4696960449219, + "loss": 0.5138, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6303665041923523, + "rewards/margins": -0.33811497688293457, + "rewards/rejected": 0.9684814810752869, + "step": 14543 + }, + { + "epoch": 0.85, + "learning_rate": 6.063192079149276e-09, + "logits/chosen": -1.952264666557312, + "logits/rejected": -1.9517003297805786, + "logps/chosen": -13.103994369506836, + "logps/rejected": -193.31594848632812, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3025660514831543, + "rewards/margins": 2.957603931427002, + "rewards/rejected": -3.2601699829101562, + "step": 14544 + }, + { + "epoch": 0.85, + "learning_rate": 6.058694699144029e-09, + "logits/chosen": -2.0682787895202637, + "logits/rejected": -2.0790960788726807, + "logps/chosen": -280.78656005859375, + "logps/rejected": -483.0826416015625, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.995965600013733, + "rewards/margins": 5.33778715133667, + "rewards/rejected": -3.3418214321136475, + "step": 14545 + }, + { + "epoch": 0.85, + "learning_rate": 6.054198880148881e-09, + "logits/chosen": -1.7503503561019897, + "logits/rejected": -1.683190107345581, + "logps/chosen": -201.36810302734375, + "logps/rejected": -534.4772338867188, + "loss": 0.1125, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6312439441680908, + "rewards/margins": 2.901446580886841, + "rewards/rejected": -1.27020263671875, + "step": 14546 + }, + { + "epoch": 0.85, + "learning_rate": 6.049704622323554e-09, + "logits/chosen": -1.7501248121261597, + "logits/rejected": -1.7568702697753906, + "logps/chosen": -14.14124584197998, + "logps/rejected": -279.0863037109375, + "loss": 0.2364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5057851076126099, + "rewards/margins": 2.2055165767669678, + "rewards/rejected": -1.699731469154358, + "step": 14547 + }, + { + "epoch": 0.85, + "learning_rate": 6.045211925827709e-09, + "logits/chosen": -1.8322290182113647, + "logits/rejected": -1.8005807399749756, + "logps/chosen": -123.6436538696289, + "logps/rejected": -449.5665283203125, + "loss": 0.0456, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.293683648109436, + "rewards/margins": 7.792328834533691, + "rewards/rejected": -6.498645305633545, + "step": 14548 + }, + { + "epoch": 0.85, + "learning_rate": 6.040720790820936e-09, + "logits/chosen": -1.9205437898635864, + "logits/rejected": -1.9174567461013794, + "logps/chosen": -29.74631690979004, + "logps/rejected": -49.90918731689453, + "loss": 0.6021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11311588436365128, + "rewards/margins": 0.18094387650489807, + "rewards/rejected": -0.0678279921412468, + "step": 14549 + }, + { + "epoch": 0.85, + "learning_rate": 6.036231217462784e-09, + "logits/chosen": -1.7298171520233154, + "logits/rejected": -1.806616187095642, + "logps/chosen": -188.54006958007812, + "logps/rejected": -310.1694030761719, + "loss": 0.0496, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.170977830886841, + "rewards/margins": 2.638681173324585, + "rewards/rejected": -0.467703253030777, + "step": 14550 + }, + { + "epoch": 0.85, + "learning_rate": 6.031743205912754e-09, + "logits/chosen": -1.957704782485962, + "logits/rejected": -1.9572458267211914, + "logps/chosen": -0.3964175879955292, + "logps/rejected": -203.8841552734375, + "loss": 0.3441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007525924127548933, + "rewards/margins": 5.6782546043396, + "rewards/rejected": -5.6857805252075195, + "step": 14551 + }, + { + "epoch": 0.85, + "learning_rate": 6.027256756330284e-09, + "logits/chosen": -1.8330286741256714, + "logits/rejected": -1.7782576084136963, + "logps/chosen": -369.1121826171875, + "logps/rejected": -550.6011352539062, + "loss": 0.0469, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7650513648986816, + "rewards/margins": 2.75457763671875, + "rewards/rejected": 0.01047363318502903, + "step": 14552 + }, + { + "epoch": 0.85, + "learning_rate": 6.022771868874743e-09, + "logits/chosen": -2.1216509342193604, + "logits/rejected": -2.125270128250122, + "logps/chosen": -23.850305557250977, + "logps/rejected": -171.71759033203125, + "loss": 0.3694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031394004821777344, + "rewards/margins": 1.8718149662017822, + "rewards/rejected": -1.9032089710235596, + "step": 14553 + }, + { + "epoch": 0.85, + "learning_rate": 6.018288543705457e-09, + "logits/chosen": -1.781460165977478, + "logits/rejected": -1.7844418287277222, + "logps/chosen": -96.48007202148438, + "logps/rejected": -152.93472290039062, + "loss": 1.0512, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6765961050987244, + "rewards/margins": -0.7083938717842102, + "rewards/rejected": 0.03179779276251793, + "step": 14554 + }, + { + "epoch": 0.85, + "learning_rate": 6.0138067809817005e-09, + "logits/chosen": -1.9306124448776245, + "logits/rejected": -1.931725263595581, + "logps/chosen": -1.6407535076141357, + "logps/rejected": -189.43101501464844, + "loss": 0.3981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10302553325891495, + "rewards/margins": 1.8690259456634521, + "rewards/rejected": -1.7660003900527954, + "step": 14555 + }, + { + "epoch": 0.85, + "learning_rate": 6.009326580862694e-09, + "logits/chosen": -1.8145533800125122, + "logits/rejected": -1.811260461807251, + "logps/chosen": -88.88284301757812, + "logps/rejected": -246.63902282714844, + "loss": 0.1989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4286560118198395, + "rewards/margins": 3.7749040126800537, + "rewards/rejected": -3.346247911453247, + "step": 14556 + }, + { + "epoch": 0.85, + "learning_rate": 6.004847943507574e-09, + "logits/chosen": -1.8400611877441406, + "logits/rejected": -1.8293077945709229, + "logps/chosen": -16.238845825195312, + "logps/rejected": -148.38555908203125, + "loss": 0.3261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09764213860034943, + "rewards/margins": 2.5428380966186523, + "rewards/rejected": -2.4451959133148193, + "step": 14557 + }, + { + "epoch": 0.85, + "learning_rate": 6.000370869075477e-09, + "logits/chosen": -1.8963547945022583, + "logits/rejected": -1.9038984775543213, + "logps/chosen": -6.65267276763916, + "logps/rejected": -139.78622436523438, + "loss": 0.4404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03122706525027752, + "rewards/margins": 1.372270941734314, + "rewards/rejected": -1.4034980535507202, + "step": 14558 + }, + { + "epoch": 0.85, + "learning_rate": 5.995895357725422e-09, + "logits/chosen": -1.7254399061203003, + "logits/rejected": -1.7523529529571533, + "logps/chosen": -220.23190307617188, + "logps/rejected": -346.0522766113281, + "loss": 0.2074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2230347394943237, + "rewards/margins": 1.4834107160568237, + "rewards/rejected": -0.2603759765625, + "step": 14559 + }, + { + "epoch": 0.85, + "learning_rate": 5.9914214096164205e-09, + "logits/chosen": -1.7865666151046753, + "logits/rejected": -1.808997631072998, + "logps/chosen": -244.94064331054688, + "logps/rejected": -508.27099609375, + "loss": 0.0473, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.339324951171875, + "rewards/margins": 3.802447557449341, + "rewards/rejected": -2.463122606277466, + "step": 14560 + }, + { + "epoch": 0.85, + "learning_rate": 5.986949024907378e-09, + "logits/chosen": -1.8241617679595947, + "logits/rejected": -1.8147032260894775, + "logps/chosen": -4.589509262586944e-05, + "logps/rejected": -79.2812271118164, + "loss": 0.5514, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.655771009420278e-07, + "rewards/margins": 0.6700468063354492, + "rewards/rejected": -0.6700477600097656, + "step": 14561 + }, + { + "epoch": 0.85, + "learning_rate": 5.982478203757208e-09, + "logits/chosen": -2.0139458179473877, + "logits/rejected": -1.9935272932052612, + "logps/chosen": -120.75071716308594, + "logps/rejected": -249.632568359375, + "loss": 0.4501, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1415542364120483, + "rewards/margins": 0.04942166805267334, + "rewards/rejected": 1.092132568359375, + "step": 14562 + }, + { + "epoch": 0.85, + "learning_rate": 5.978008946324737e-09, + "logits/chosen": -1.8742796182632446, + "logits/rejected": -1.8832519054412842, + "logps/chosen": -207.6473846435547, + "logps/rejected": -319.5023193359375, + "loss": 0.0759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4542007446289062, + "rewards/margins": 2.3996810913085938, + "rewards/rejected": -0.9454803466796875, + "step": 14563 + }, + { + "epoch": 0.85, + "learning_rate": 5.973541252768715e-09, + "logits/chosen": -2.07181453704834, + "logits/rejected": -2.067808151245117, + "logps/chosen": -17.449209213256836, + "logps/rejected": -104.19542694091797, + "loss": 0.3361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.578647792339325, + "rewards/margins": 1.3592851161956787, + "rewards/rejected": -0.7806373834609985, + "step": 14564 + }, + { + "epoch": 0.85, + "learning_rate": 5.9690751232478635e-09, + "logits/chosen": -1.8784842491149902, + "logits/rejected": -1.8800567388534546, + "logps/chosen": -46.12535095214844, + "logps/rejected": -169.46380615234375, + "loss": 0.6518, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04570617899298668, + "rewards/margins": -0.19692839682102203, + "rewards/rejected": 0.242634579539299, + "step": 14565 + }, + { + "epoch": 0.85, + "learning_rate": 5.964610557920841e-09, + "logits/chosen": -1.988305926322937, + "logits/rejected": -1.9865792989730835, + "logps/chosen": -148.5254364013672, + "logps/rejected": -250.23410034179688, + "loss": 0.1963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4281692504882812, + "rewards/margins": 1.0671920776367188, + "rewards/rejected": 0.3609771728515625, + "step": 14566 + }, + { + "epoch": 0.85, + "learning_rate": 5.96014755694626e-09, + "logits/chosen": -1.694750428199768, + "logits/rejected": -1.6963229179382324, + "logps/chosen": -1.9552286863327026, + "logps/rejected": -58.8264274597168, + "loss": 0.4594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.053120460361242294, + "rewards/margins": 1.070518970489502, + "rewards/rejected": -1.017398476600647, + "step": 14567 + }, + { + "epoch": 0.85, + "learning_rate": 5.95568612048265e-09, + "logits/chosen": -1.8721411228179932, + "logits/rejected": -1.8691976070404053, + "logps/chosen": -15.0006685256958, + "logps/rejected": -88.7468032836914, + "loss": 0.5857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.062445737421512604, + "rewards/margins": 0.39046773314476013, + "rewards/rejected": -0.3280220031738281, + "step": 14568 + }, + { + "epoch": 0.85, + "learning_rate": 5.9512262486885126e-09, + "logits/chosen": -1.6521973609924316, + "logits/rejected": -1.6653779745101929, + "logps/chosen": -214.53932189941406, + "logps/rejected": -343.01220703125, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1199021339416504, + "rewards/margins": 3.3427233695983887, + "rewards/rejected": -1.2228211164474487, + "step": 14569 + }, + { + "epoch": 0.85, + "learning_rate": 5.946767941722286e-09, + "logits/chosen": -1.672150731086731, + "logits/rejected": -1.6430845260620117, + "logps/chosen": -263.2923583984375, + "logps/rejected": -566.852294921875, + "loss": 0.0993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.707080066204071, + "rewards/margins": 3.1565065383911133, + "rewards/rejected": -2.4494264125823975, + "step": 14570 + }, + { + "epoch": 0.85, + "learning_rate": 5.942311199742362e-09, + "logits/chosen": -1.7903783321380615, + "logits/rejected": -1.813849925994873, + "logps/chosen": -158.88580322265625, + "logps/rejected": -398.2958984375, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.303381323814392, + "rewards/margins": 4.956793308258057, + "rewards/rejected": -3.653411865234375, + "step": 14571 + }, + { + "epoch": 0.85, + "learning_rate": 5.937856022907045e-09, + "logits/chosen": -1.8001571893692017, + "logits/rejected": -1.7996875047683716, + "logps/chosen": -33.790523529052734, + "logps/rejected": -101.48883056640625, + "loss": 0.4238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24984245002269745, + "rewards/margins": 1.0036503076553345, + "rewards/rejected": -0.7538078427314758, + "step": 14572 + }, + { + "epoch": 0.85, + "learning_rate": 5.93340241137461e-09, + "logits/chosen": -1.8875148296356201, + "logits/rejected": -1.8756413459777832, + "logps/chosen": -221.97390747070312, + "logps/rejected": -551.7528076171875, + "loss": 0.0847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4192138612270355, + "rewards/margins": 2.5923705101013184, + "rewards/rejected": -2.17315673828125, + "step": 14573 + }, + { + "epoch": 0.85, + "learning_rate": 5.92895036530327e-09, + "logits/chosen": -1.8158283233642578, + "logits/rejected": -1.8223724365234375, + "logps/chosen": -65.92153930664062, + "logps/rejected": -199.9718017578125, + "loss": 0.2174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5071678161621094, + "rewards/margins": 2.0234122276306152, + "rewards/rejected": -1.5162445306777954, + "step": 14574 + }, + { + "epoch": 0.85, + "learning_rate": 5.924499884851203e-09, + "logits/chosen": -1.8396432399749756, + "logits/rejected": -1.9418178796768188, + "logps/chosen": -223.56875610351562, + "logps/rejected": -332.35986328125, + "loss": 0.0609, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8905868530273438, + "rewards/margins": 2.2994918823242188, + "rewards/rejected": -0.408905029296875, + "step": 14575 + }, + { + "epoch": 0.85, + "learning_rate": 5.920050970176477e-09, + "logits/chosen": -1.8002821207046509, + "logits/rejected": -1.901658535003662, + "logps/chosen": -193.62937927246094, + "logps/rejected": -151.72879028320312, + "loss": 0.0313, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.204301595687866, + "rewards/margins": 2.882838487625122, + "rewards/rejected": 0.321463018655777, + "step": 14576 + }, + { + "epoch": 0.85, + "learning_rate": 5.915603621437182e-09, + "logits/chosen": -1.8179142475128174, + "logits/rejected": -1.8254797458648682, + "logps/chosen": -183.1029052734375, + "logps/rejected": -569.2135620117188, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.746272325515747, + "rewards/margins": 6.835390090942383, + "rewards/rejected": -4.089117527008057, + "step": 14577 + }, + { + "epoch": 0.85, + "learning_rate": 5.91115783879127e-09, + "logits/chosen": -1.9982951879501343, + "logits/rejected": -1.9928746223449707, + "logps/chosen": -14.170085906982422, + "logps/rejected": -108.98160552978516, + "loss": 0.3263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009049988351762295, + "rewards/margins": 2.582897186279297, + "rewards/rejected": -2.5738472938537598, + "step": 14578 + }, + { + "epoch": 0.85, + "learning_rate": 5.906713622396703e-09, + "logits/chosen": -1.7541321516036987, + "logits/rejected": -1.751442313194275, + "logps/chosen": -80.7255859375, + "logps/rejected": -155.45013427734375, + "loss": 0.1644, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3190354108810425, + "rewards/margins": 1.8054200410842896, + "rewards/rejected": -0.4863846004009247, + "step": 14579 + }, + { + "epoch": 0.85, + "learning_rate": 5.902270972411339e-09, + "logits/chosen": -1.689721941947937, + "logits/rejected": -1.7702155113220215, + "logps/chosen": -187.58021545410156, + "logps/rejected": -198.49041748046875, + "loss": 0.1421, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0668747425079346, + "rewards/margins": 1.2900314331054688, + "rewards/rejected": 0.776843249797821, + "step": 14580 + }, + { + "epoch": 0.85, + "learning_rate": 5.897829888993028e-09, + "logits/chosen": -1.8593411445617676, + "logits/rejected": -1.867851734161377, + "logps/chosen": -17.563228607177734, + "logps/rejected": -121.5934829711914, + "loss": 0.2382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6661545038223267, + "rewards/margins": 2.3109123706817627, + "rewards/rejected": -1.644757866859436, + "step": 14581 + }, + { + "epoch": 0.85, + "learning_rate": 5.893390372299523e-09, + "logits/chosen": -1.7799025774002075, + "logits/rejected": -1.7797818183898926, + "logps/chosen": -23.58742904663086, + "logps/rejected": -311.004150390625, + "loss": 0.2068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2556629180908203, + "rewards/margins": 4.5745320320129395, + "rewards/rejected": -4.318869113922119, + "step": 14582 + }, + { + "epoch": 0.85, + "learning_rate": 5.888952422488536e-09, + "logits/chosen": -1.8780938386917114, + "logits/rejected": -1.863568663597107, + "logps/chosen": -143.79135131835938, + "logps/rejected": -346.7393798828125, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7628114223480225, + "rewards/margins": 3.6381654739379883, + "rewards/rejected": -0.875353991985321, + "step": 14583 + }, + { + "epoch": 0.85, + "learning_rate": 5.8845160397177296e-09, + "logits/chosen": -1.8458325862884521, + "logits/rejected": -1.8473303318023682, + "logps/chosen": -0.9754669666290283, + "logps/rejected": -57.351768493652344, + "loss": 0.4274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08917564153671265, + "rewards/margins": 1.4683516025543213, + "rewards/rejected": -1.5575271844863892, + "step": 14584 + }, + { + "epoch": 0.85, + "learning_rate": 5.880081224144706e-09, + "logits/chosen": -1.808983564376831, + "logits/rejected": -1.8041154146194458, + "logps/chosen": -59.42885971069336, + "logps/rejected": -220.47225952148438, + "loss": 0.1403, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.128691554069519, + "rewards/margins": 2.6153676509857178, + "rewards/rejected": -1.4866760969161987, + "step": 14585 + }, + { + "epoch": 0.85, + "learning_rate": 5.875647975927023e-09, + "logits/chosen": -1.8210476636886597, + "logits/rejected": -1.814092755317688, + "logps/chosen": -25.915504455566406, + "logps/rejected": -89.93586730957031, + "loss": 0.4025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3475685119628906, + "rewards/margins": 0.9923942685127258, + "rewards/rejected": -0.6448257565498352, + "step": 14586 + }, + { + "epoch": 0.85, + "learning_rate": 5.871216295222148e-09, + "logits/chosen": -1.9426928758621216, + "logits/rejected": -1.9186079502105713, + "logps/chosen": -206.93222045898438, + "logps/rejected": -417.0382385253906, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.867694139480591, + "rewards/margins": 3.599810838699341, + "rewards/rejected": 0.26788330078125, + "step": 14587 + }, + { + "epoch": 0.85, + "learning_rate": 5.866786182187528e-09, + "logits/chosen": -1.8145588636398315, + "logits/rejected": -1.7976335287094116, + "logps/chosen": -45.22889709472656, + "logps/rejected": -293.51141357421875, + "loss": 0.2406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6399585604667664, + "rewards/margins": 3.180595874786377, + "rewards/rejected": -2.540637254714966, + "step": 14588 + }, + { + "epoch": 0.85, + "learning_rate": 5.862357636980536e-09, + "logits/chosen": -1.9819029569625854, + "logits/rejected": -1.975759506225586, + "logps/chosen": -0.8828957676887512, + "logps/rejected": -110.02897644042969, + "loss": 0.3641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49810582399368286, + "rewards/margins": 1.2051706314086914, + "rewards/rejected": -0.7070648074150085, + "step": 14589 + }, + { + "epoch": 0.85, + "learning_rate": 5.85793065975852e-09, + "logits/chosen": -1.7735466957092285, + "logits/rejected": -1.7973915338516235, + "logps/chosen": -183.8760223388672, + "logps/rejected": -324.06927490234375, + "loss": 0.2428, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8076828718185425, + "rewards/margins": 0.6699936389923096, + "rewards/rejected": 1.137689232826233, + "step": 14590 + }, + { + "epoch": 0.85, + "learning_rate": 5.853505250678714e-09, + "logits/chosen": -1.7928814888000488, + "logits/rejected": -1.7986254692077637, + "logps/chosen": -191.19515991210938, + "logps/rejected": -284.93157958984375, + "loss": 0.0592, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7090651988983154, + "rewards/margins": 2.2742323875427246, + "rewards/rejected": 0.43483278155326843, + "step": 14591 + }, + { + "epoch": 0.85, + "learning_rate": 5.849081409898349e-09, + "logits/chosen": -1.849999189376831, + "logits/rejected": -1.832654356956482, + "logps/chosen": -176.59275817871094, + "logps/rejected": -353.3828125, + "loss": 0.4845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9619888663291931, + "rewards/margins": 1.5543746948242188, + "rewards/rejected": -2.5163636207580566, + "step": 14592 + }, + { + "epoch": 0.85, + "learning_rate": 5.844659137574576e-09, + "logits/chosen": -1.6563866138458252, + "logits/rejected": -1.6143180131912231, + "logps/chosen": -198.0494384765625, + "logps/rejected": -397.797607421875, + "loss": 0.1114, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4839675426483154, + "rewards/margins": 1.4475477933883667, + "rewards/rejected": 1.0364197492599487, + "step": 14593 + }, + { + "epoch": 0.85, + "learning_rate": 5.840238433864508e-09, + "logits/chosen": -1.827600359916687, + "logits/rejected": -1.8365850448608398, + "logps/chosen": -161.2225341796875, + "logps/rejected": -405.89239501953125, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0722198486328125, + "rewards/margins": 7.575918674468994, + "rewards/rejected": -5.503698825836182, + "step": 14594 + }, + { + "epoch": 0.85, + "learning_rate": 5.835819298925165e-09, + "logits/chosen": -2.0433366298675537, + "logits/rejected": -2.03652024269104, + "logps/chosen": -0.00012635457096621394, + "logps/rejected": -159.04103088378906, + "loss": 0.4033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6951046240574215e-06, + "rewards/margins": 2.0823893547058105, + "rewards/rejected": -2.082392930984497, + "step": 14595 + }, + { + "epoch": 0.85, + "learning_rate": 5.831401732913555e-09, + "logits/chosen": -1.8503546714782715, + "logits/rejected": -1.8396430015563965, + "logps/chosen": -27.954303741455078, + "logps/rejected": -339.5892333984375, + "loss": 0.1991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3939575254917145, + "rewards/margins": 4.125433444976807, + "rewards/rejected": -3.731475830078125, + "step": 14596 + }, + { + "epoch": 0.85, + "learning_rate": 5.826985735986606e-09, + "logits/chosen": -1.6990501880645752, + "logits/rejected": -1.702479362487793, + "logps/chosen": -1.3352415561676025, + "logps/rejected": -170.59866333007812, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1491534262895584, + "rewards/margins": 4.359355449676514, + "rewards/rejected": -4.210202217102051, + "step": 14597 + }, + { + "epoch": 0.85, + "learning_rate": 5.822571308301211e-09, + "logits/chosen": -1.940619707107544, + "logits/rejected": -1.95211923122406, + "logps/chosen": -189.3792266845703, + "logps/rejected": -372.7498779296875, + "loss": 0.0762, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2348954677581787, + "rewards/margins": 2.0404038429260254, + "rewards/rejected": 1.1944916248321533, + "step": 14598 + }, + { + "epoch": 0.85, + "learning_rate": 5.818158450014154e-09, + "logits/chosen": -2.0262818336486816, + "logits/rejected": -2.026172161102295, + "logps/chosen": -31.383651733398438, + "logps/rejected": -81.55058288574219, + "loss": 0.5036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42839908599853516, + "rewards/margins": 2.044780969619751, + "rewards/rejected": -2.473180055618286, + "step": 14599 + }, + { + "epoch": 0.85, + "learning_rate": 5.813747161282251e-09, + "logits/chosen": -1.904862403869629, + "logits/rejected": -1.8903497457504272, + "logps/chosen": -135.2010955810547, + "logps/rejected": -214.75106811523438, + "loss": 0.2361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4475570917129517, + "rewards/margins": 0.9907639026641846, + "rewards/rejected": 0.4567932188510895, + "step": 14600 + }, + { + "epoch": 0.85, + "learning_rate": 5.809337442262169e-09, + "logits/chosen": -1.900582194328308, + "logits/rejected": -1.9014041423797607, + "logps/chosen": -1.803310751914978, + "logps/rejected": -143.78602600097656, + "loss": 0.369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010014927946031094, + "rewards/margins": 1.4908788204193115, + "rewards/rejected": -1.4808639287948608, + "step": 14601 + }, + { + "epoch": 0.85, + "learning_rate": 5.8049292931106e-09, + "logits/chosen": -1.9266215562820435, + "logits/rejected": -1.9225902557373047, + "logps/chosen": -52.88914489746094, + "logps/rejected": -255.31719970703125, + "loss": 0.2279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5887901186943054, + "rewards/margins": 4.093993186950684, + "rewards/rejected": -3.5052032470703125, + "step": 14602 + }, + { + "epoch": 0.85, + "learning_rate": 5.8005227139840964e-09, + "logits/chosen": -1.9777990579605103, + "logits/rejected": -1.9516150951385498, + "logps/chosen": -105.05692291259766, + "logps/rejected": -515.541259765625, + "loss": 0.4671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5810753107070923, + "rewards/margins": 8.659811973571777, + "rewards/rejected": -9.240887641906738, + "step": 14603 + }, + { + "epoch": 0.85, + "learning_rate": 5.796117705039244e-09, + "logits/chosen": -1.7027326822280884, + "logits/rejected": -1.715993046760559, + "logps/chosen": -35.622047424316406, + "logps/rejected": -183.94639587402344, + "loss": 0.4678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5092391967773438, + "rewards/margins": 0.701373279094696, + "rewards/rejected": -0.1921340972185135, + "step": 14604 + }, + { + "epoch": 0.85, + "learning_rate": 5.791714266432523e-09, + "logits/chosen": -1.8005788326263428, + "logits/rejected": -1.8542819023132324, + "logps/chosen": -135.3195037841797, + "logps/rejected": -248.1655731201172, + "loss": 0.1158, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1604782342910767, + "rewards/margins": 1.8639328479766846, + "rewards/rejected": -0.7034546136856079, + "step": 14605 + }, + { + "epoch": 0.85, + "learning_rate": 5.787312398320349e-09, + "logits/chosen": -1.7708094120025635, + "logits/rejected": -1.7791945934295654, + "logps/chosen": -139.1148223876953, + "logps/rejected": -342.1773681640625, + "loss": 0.131, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0346558094024658, + "rewards/margins": 3.1576662063598633, + "rewards/rejected": -2.1230103969573975, + "step": 14606 + }, + { + "epoch": 0.85, + "learning_rate": 5.7829121008590995e-09, + "logits/chosen": -2.073124885559082, + "logits/rejected": -2.06891131401062, + "logps/chosen": -39.81739807128906, + "logps/rejected": -249.158935546875, + "loss": 0.0904, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4912704229354858, + "rewards/margins": 3.970689296722412, + "rewards/rejected": -2.479418992996216, + "step": 14607 + }, + { + "epoch": 0.85, + "learning_rate": 5.7785133742051075e-09, + "logits/chosen": -1.8480035066604614, + "logits/rejected": -1.835071086883545, + "logps/chosen": -256.1287536621094, + "logps/rejected": -311.8966369628906, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6600005626678467, + "rewards/margins": 0.06771540641784668, + "rewards/rejected": 2.59228515625, + "step": 14608 + }, + { + "epoch": 0.85, + "learning_rate": 5.7741162185146405e-09, + "logits/chosen": -2.0066633224487305, + "logits/rejected": -1.9980412721633911, + "logps/chosen": -149.694580078125, + "logps/rejected": -333.18194580078125, + "loss": 0.2016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1528122425079346, + "rewards/margins": 1.691157579421997, + "rewards/rejected": -0.5383453369140625, + "step": 14609 + }, + { + "epoch": 0.85, + "learning_rate": 5.769720633943892e-09, + "logits/chosen": -1.9181592464447021, + "logits/rejected": -1.9162461757659912, + "logps/chosen": -9.596002928446978e-05, + "logps/rejected": -188.77383422851562, + "loss": 0.3348, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7190782222751295e-06, + "rewards/margins": 5.8452582359313965, + "rewards/rejected": -5.845262050628662, + "step": 14610 + }, + { + "epoch": 0.85, + "learning_rate": 5.7653266206490136e-09, + "logits/chosen": -1.7771483659744263, + "logits/rejected": -1.773385763168335, + "logps/chosen": -11.73556900024414, + "logps/rejected": -145.3221893310547, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3122270703315735, + "rewards/margins": 2.7147867679595947, + "rewards/rejected": -2.402559757232666, + "step": 14611 + }, + { + "epoch": 0.85, + "learning_rate": 5.760934178786109e-09, + "logits/chosen": -1.9508148431777954, + "logits/rejected": -1.9480386972427368, + "logps/chosen": -0.057669252157211304, + "logps/rejected": -183.716796875, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004249539226293564, + "rewards/margins": 4.294106960296631, + "rewards/rejected": -4.289857387542725, + "step": 14612 + }, + { + "epoch": 0.85, + "learning_rate": 5.75654330851123e-09, + "logits/chosen": -2.133244514465332, + "logits/rejected": -2.1265411376953125, + "logps/chosen": -12.52454948425293, + "logps/rejected": -229.26002502441406, + "loss": 0.4177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15079288184642792, + "rewards/margins": 2.510204315185547, + "rewards/rejected": -2.660997152328491, + "step": 14613 + }, + { + "epoch": 0.85, + "learning_rate": 5.752154009980342e-09, + "logits/chosen": -1.7464680671691895, + "logits/rejected": -1.7853032350540161, + "logps/chosen": -178.31088256835938, + "logps/rejected": -322.6765441894531, + "loss": 0.0844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4784882068634033, + "rewards/margins": 2.17270827293396, + "rewards/rejected": -0.6942200064659119, + "step": 14614 + }, + { + "epoch": 0.85, + "learning_rate": 5.7477662833493865e-09, + "logits/chosen": -1.7657274007797241, + "logits/rejected": -1.7563523054122925, + "logps/chosen": -222.736572265625, + "logps/rejected": -421.78900146484375, + "loss": 0.0309, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1491317749023438, + "rewards/margins": 3.8805527687072754, + "rewards/rejected": -1.731420874595642, + "step": 14615 + }, + { + "epoch": 0.85, + "learning_rate": 5.743380128774228e-09, + "logits/chosen": -2.008051633834839, + "logits/rejected": -2.000674247741699, + "logps/chosen": -33.48978042602539, + "logps/rejected": -84.02261352539062, + "loss": 0.5211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03574371337890625, + "rewards/margins": 0.8375992178916931, + "rewards/rejected": -0.8733429312705994, + "step": 14616 + }, + { + "epoch": 0.85, + "learning_rate": 5.738995546410702e-09, + "logits/chosen": -1.7280895709991455, + "logits/rejected": -1.7243609428405762, + "logps/chosen": -44.61696243286133, + "logps/rejected": -174.60792541503906, + "loss": 0.4339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5625514984130859, + "rewards/margins": 0.4890666902065277, + "rewards/rejected": 0.07348480075597763, + "step": 14617 + }, + { + "epoch": 0.85, + "learning_rate": 5.734612536414541e-09, + "logits/chosen": -1.8662041425704956, + "logits/rejected": -1.8445476293563843, + "logps/chosen": -197.70574951171875, + "logps/rejected": -309.0455017089844, + "loss": 0.0959, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8994996547698975, + "rewards/margins": 1.811105489730835, + "rewards/rejected": 1.0883941650390625, + "step": 14618 + }, + { + "epoch": 0.85, + "learning_rate": 5.730231098941485e-09, + "logits/chosen": -1.9583947658538818, + "logits/rejected": -1.9759448766708374, + "logps/chosen": -226.0043487548828, + "logps/rejected": -410.8230285644531, + "loss": 0.0376, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2037842273712158, + "rewards/margins": 6.2789459228515625, + "rewards/rejected": -5.075161933898926, + "step": 14619 + }, + { + "epoch": 0.85, + "learning_rate": 5.7258512341471555e-09, + "logits/chosen": -2.085193157196045, + "logits/rejected": -2.0833730697631836, + "logps/chosen": -0.0007852257695049047, + "logps/rejected": -243.53204345703125, + "loss": 0.327, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.243995161028579e-05, + "rewards/margins": 6.346787929534912, + "rewards/rejected": -6.34682035446167, + "step": 14620 + }, + { + "epoch": 0.85, + "learning_rate": 5.721472942187172e-09, + "logits/chosen": -1.8679770231246948, + "logits/rejected": -1.8762986660003662, + "logps/chosen": -255.48709106445312, + "logps/rejected": -535.2440185546875, + "loss": 0.0683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8189148306846619, + "rewards/margins": 6.3182525634765625, + "rewards/rejected": -5.499337673187256, + "step": 14621 + }, + { + "epoch": 0.85, + "learning_rate": 5.7170962232170325e-09, + "logits/chosen": -1.8744925260543823, + "logits/rejected": -1.8668705224990845, + "logps/chosen": -31.275362014770508, + "logps/rejected": -394.6724853515625, + "loss": 0.2788, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3281663954257965, + "rewards/margins": 7.440434455871582, + "rewards/rejected": -7.112267971038818, + "step": 14622 + }, + { + "epoch": 0.85, + "learning_rate": 5.712721077392263e-09, + "logits/chosen": -1.9713366031646729, + "logits/rejected": -1.9622732400894165, + "logps/chosen": -0.071417436003685, + "logps/rejected": -96.80894470214844, + "loss": 0.5921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004218443762511015, + "rewards/margins": 0.3883477747440338, + "rewards/rejected": -0.3841293454170227, + "step": 14623 + }, + { + "epoch": 0.85, + "learning_rate": 5.708347504868283e-09, + "logits/chosen": -2.08042311668396, + "logits/rejected": -2.0776026248931885, + "logps/chosen": -15.080828666687012, + "logps/rejected": -390.64617919921875, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.060648441314697266, + "rewards/margins": 5.994932174682617, + "rewards/rejected": -5.93428373336792, + "step": 14624 + }, + { + "epoch": 0.85, + "learning_rate": 5.70397550580044e-09, + "logits/chosen": -1.8598194122314453, + "logits/rejected": -1.8553787469863892, + "logps/chosen": -61.6842155456543, + "logps/rejected": -250.63893127441406, + "loss": 0.1373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.950778603553772, + "rewards/margins": 3.9328970909118652, + "rewards/rejected": -2.9821183681488037, + "step": 14625 + }, + { + "epoch": 0.85, + "learning_rate": 5.6996050803440665e-09, + "logits/chosen": -1.924848198890686, + "logits/rejected": -1.9266573190689087, + "logps/chosen": -33.881866455078125, + "logps/rejected": -120.38617706298828, + "loss": 0.7485, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.09103470295667648, + "rewards/margins": -0.26685333251953125, + "rewards/rejected": 0.17581863701343536, + "step": 14626 + }, + { + "epoch": 0.85, + "learning_rate": 5.695236228654416e-09, + "logits/chosen": -1.9408382177352905, + "logits/rejected": -1.9491759538650513, + "logps/chosen": -30.81340980529785, + "logps/rejected": -268.6363525390625, + "loss": 0.7743, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.474308580160141, + "rewards/margins": -0.15571725368499756, + "rewards/rejected": -0.31859132647514343, + "step": 14627 + }, + { + "epoch": 0.85, + "learning_rate": 5.690868950886701e-09, + "logits/chosen": -1.4491311311721802, + "logits/rejected": -1.4485480785369873, + "logps/chosen": -4.029244155390188e-05, + "logps/rejected": -113.63636779785156, + "loss": 0.3712, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.695393502312072e-07, + "rewards/margins": 2.6032230854034424, + "rewards/rejected": -2.6032235622406006, + "step": 14628 + }, + { + "epoch": 0.85, + "learning_rate": 5.686503247196051e-09, + "logits/chosen": -1.812064290046692, + "logits/rejected": -1.7993934154510498, + "logps/chosen": -225.78172302246094, + "logps/rejected": -265.6797790527344, + "loss": 0.0387, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.560682773590088, + "rewards/margins": 2.949336290359497, + "rewards/rejected": 0.611346423625946, + "step": 14629 + }, + { + "epoch": 0.85, + "learning_rate": 5.682139117737567e-09, + "logits/chosen": -1.910997986793518, + "logits/rejected": -1.9078108072280884, + "logps/chosen": -225.9435272216797, + "logps/rejected": -327.3179931640625, + "loss": 0.4628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43521881103515625, + "rewards/margins": 0.9062362909317017, + "rewards/rejected": -1.341455101966858, + "step": 14630 + }, + { + "epoch": 0.85, + "learning_rate": 5.677776562666281e-09, + "logits/chosen": -1.9559340476989746, + "logits/rejected": -1.9576600790023804, + "logps/chosen": -0.055829524993896484, + "logps/rejected": -156.66256713867188, + "loss": 0.3871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005272680427879095, + "rewards/margins": 2.5000038146972656, + "rewards/rejected": -2.5052764415740967, + "step": 14631 + }, + { + "epoch": 0.85, + "learning_rate": 5.673415582137192e-09, + "logits/chosen": -1.9221012592315674, + "logits/rejected": -1.9227849245071411, + "logps/chosen": -135.1338348388672, + "logps/rejected": -318.34686279296875, + "loss": 0.0753, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.463568091392517, + "rewards/margins": 3.6637511253356934, + "rewards/rejected": -2.200183153152466, + "step": 14632 + }, + { + "epoch": 0.85, + "learning_rate": 5.669056176305187e-09, + "logits/chosen": -1.709732174873352, + "logits/rejected": -1.7563867568969727, + "logps/chosen": -195.7750244140625, + "logps/rejected": -353.31890869140625, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1689529418945312, + "rewards/margins": 2.596235752105713, + "rewards/rejected": -0.4272827208042145, + "step": 14633 + }, + { + "epoch": 0.85, + "learning_rate": 5.6646983453251586e-09, + "logits/chosen": -1.847802996635437, + "logits/rejected": -1.9025390148162842, + "logps/chosen": -195.11497497558594, + "logps/rejected": -382.05499267578125, + "loss": 0.0281, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.590358018875122, + "rewards/margins": 3.215235948562622, + "rewards/rejected": -0.6248779296875, + "step": 14634 + }, + { + "epoch": 0.85, + "learning_rate": 5.6603420893519105e-09, + "logits/chosen": -1.9415959119796753, + "logits/rejected": -1.948358178138733, + "logps/chosen": -228.04989624023438, + "logps/rejected": -390.48968505859375, + "loss": 0.0296, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.32987380027771, + "rewards/margins": 4.66633939743042, + "rewards/rejected": -2.33646559715271, + "step": 14635 + }, + { + "epoch": 0.85, + "learning_rate": 5.6559874085402134e-09, + "logits/chosen": -1.8767343759536743, + "logits/rejected": -1.8784950971603394, + "logps/chosen": -0.07381618767976761, + "logps/rejected": -111.52340698242188, + "loss": 0.5682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004090914037078619, + "rewards/margins": 0.537261962890625, + "rewards/rejected": -0.5413528680801392, + "step": 14636 + }, + { + "epoch": 0.85, + "learning_rate": 5.651634303044728e-09, + "logits/chosen": -1.9923810958862305, + "logits/rejected": -2.0035359859466553, + "logps/chosen": -148.60604858398438, + "logps/rejected": -346.6144104003906, + "loss": 0.1742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13135376572608948, + "rewards/margins": 3.274472236633301, + "rewards/rejected": -3.143118381500244, + "step": 14637 + }, + { + "epoch": 0.85, + "learning_rate": 5.647282773020145e-09, + "logits/chosen": -1.7786625623703003, + "logits/rejected": -1.8051707744598389, + "logps/chosen": -199.8172607421875, + "logps/rejected": -609.4376220703125, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.061908006668091, + "rewards/margins": 10.600293159484863, + "rewards/rejected": -8.538385391235352, + "step": 14638 + }, + { + "epoch": 0.85, + "learning_rate": 5.6429328186210215e-09, + "logits/chosen": -1.888508915901184, + "logits/rejected": -1.8612513542175293, + "logps/chosen": -181.60665893554688, + "logps/rejected": -372.94964599609375, + "loss": 0.1385, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4784119129180908, + "rewards/margins": 2.8474245071411133, + "rewards/rejected": -1.369012475013733, + "step": 14639 + }, + { + "epoch": 0.85, + "learning_rate": 5.638584440001909e-09, + "logits/chosen": -1.9772320985794067, + "logits/rejected": -1.9590283632278442, + "logps/chosen": -14.6265869140625, + "logps/rejected": -195.3597412109375, + "loss": 0.2944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23789425194263458, + "rewards/margins": 4.536283016204834, + "rewards/rejected": -4.298388957977295, + "step": 14640 + }, + { + "epoch": 0.85, + "learning_rate": 5.6342376373172514e-09, + "logits/chosen": -2.0080573558807373, + "logits/rejected": -1.9978859424591064, + "logps/chosen": -2.3361551761627197, + "logps/rejected": -394.9264221191406, + "loss": 0.3346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029778648167848587, + "rewards/margins": 7.160192489624023, + "rewards/rejected": -7.130414009094238, + "step": 14641 + }, + { + "epoch": 0.85, + "learning_rate": 5.629892410721509e-09, + "logits/chosen": -1.923294186592102, + "logits/rejected": -1.9075928926467896, + "logps/chosen": -3.7431400414789096e-05, + "logps/rejected": -307.35614013671875, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1443898983998224e-06, + "rewards/margins": 6.59316349029541, + "rewards/rejected": -6.593162536621094, + "step": 14642 + }, + { + "epoch": 0.85, + "learning_rate": 5.62554876036902e-09, + "logits/chosen": -1.72941255569458, + "logits/rejected": -1.7338860034942627, + "logps/chosen": -257.80426025390625, + "logps/rejected": -299.138916015625, + "loss": 0.1095, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.388195753097534, + "rewards/margins": 1.8508177995681763, + "rewards/rejected": 0.5373779535293579, + "step": 14643 + }, + { + "epoch": 0.85, + "learning_rate": 5.621206686414093e-09, + "logits/chosen": -1.9230201244354248, + "logits/rejected": -1.9427058696746826, + "logps/chosen": -182.76075744628906, + "logps/rejected": -253.29519653320312, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8476943969726562, + "rewards/margins": 3.243910312652588, + "rewards/rejected": -0.3962158262729645, + "step": 14644 + }, + { + "epoch": 0.85, + "learning_rate": 5.616866189010988e-09, + "logits/chosen": -1.8857364654541016, + "logits/rejected": -1.8809651136398315, + "logps/chosen": -238.24603271484375, + "logps/rejected": -420.8890380859375, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5565932989120483, + "rewards/margins": 4.0964674949646, + "rewards/rejected": -2.539874315261841, + "step": 14645 + }, + { + "epoch": 0.85, + "learning_rate": 5.612527268313894e-09, + "logits/chosen": -1.9335711002349854, + "logits/rejected": -1.9330625534057617, + "logps/chosen": -49.91110610961914, + "logps/rejected": -190.8324737548828, + "loss": 0.1141, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4681462049484253, + "rewards/margins": 2.4578022956848145, + "rewards/rejected": -0.9896560907363892, + "step": 14646 + }, + { + "epoch": 0.85, + "learning_rate": 5.608189924476964e-09, + "logits/chosen": -1.87087881565094, + "logits/rejected": -1.8577919006347656, + "logps/chosen": -35.040069580078125, + "logps/rejected": -189.3655242919922, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.757293701171875, + "rewards/margins": 3.1172332763671875, + "rewards/rejected": -2.3599395751953125, + "step": 14647 + }, + { + "epoch": 0.85, + "learning_rate": 5.6038541576542644e-09, + "logits/chosen": -1.7748336791992188, + "logits/rejected": -1.7617290019989014, + "logps/chosen": -81.68859100341797, + "logps/rejected": -419.15960693359375, + "loss": 0.2163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5424888730049133, + "rewards/margins": 7.316158771514893, + "rewards/rejected": -6.773669719696045, + "step": 14648 + }, + { + "epoch": 0.85, + "learning_rate": 5.599519967999827e-09, + "logits/chosen": -1.9191648960113525, + "logits/rejected": -1.915900468826294, + "logps/chosen": -46.23939895629883, + "logps/rejected": -147.6468048095703, + "loss": 0.6645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6211925745010376, + "rewards/margins": 1.0492069721221924, + "rewards/rejected": -1.67039954662323, + "step": 14649 + }, + { + "epoch": 0.85, + "learning_rate": 5.59518735566763e-09, + "logits/chosen": -1.9806957244873047, + "logits/rejected": -1.982588529586792, + "logps/chosen": -0.00010263785225106403, + "logps/rejected": -170.46377563476562, + "loss": 0.3784, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.362781510280911e-06, + "rewards/margins": 2.720409870147705, + "rewards/rejected": -2.7204055786132812, + "step": 14650 + }, + { + "epoch": 0.85, + "learning_rate": 5.590856320811594e-09, + "logits/chosen": -1.8435733318328857, + "logits/rejected": -1.850419521331787, + "logps/chosen": -4.947238922119141, + "logps/rejected": -78.91694641113281, + "loss": 0.5352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07499952614307404, + "rewards/margins": 0.6342188715934753, + "rewards/rejected": -0.5592193603515625, + "step": 14651 + }, + { + "epoch": 0.85, + "learning_rate": 5.586526863585561e-09, + "logits/chosen": -1.8531949520111084, + "logits/rejected": -1.8516618013381958, + "logps/chosen": -110.04688262939453, + "logps/rejected": -281.8884582519531, + "loss": 0.2614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4471298158168793, + "rewards/margins": 2.4075331687927246, + "rewards/rejected": -1.9604034423828125, + "step": 14652 + }, + { + "epoch": 0.85, + "learning_rate": 5.582198984143338e-09, + "logits/chosen": -2.048830270767212, + "logits/rejected": -2.0496649742126465, + "logps/chosen": -1.2086595296859741, + "logps/rejected": -42.962501525878906, + "loss": 0.4013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05828598886728287, + "rewards/margins": 1.7444616556167603, + "rewards/rejected": -1.6861757040023804, + "step": 14653 + }, + { + "epoch": 0.85, + "learning_rate": 5.5778726826386845e-09, + "logits/chosen": -1.7552238702774048, + "logits/rejected": -1.7517871856689453, + "logps/chosen": -5.329041481018066, + "logps/rejected": -31.930957794189453, + "loss": 0.8243, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3617679178714752, + "rewards/margins": -0.31879955530166626, + "rewards/rejected": -0.04296837002038956, + "step": 14654 + }, + { + "epoch": 0.85, + "learning_rate": 5.5735479592252945e-09, + "logits/chosen": -2.027918577194214, + "logits/rejected": -1.9680263996124268, + "logps/chosen": -105.71530151367188, + "logps/rejected": -315.24139404296875, + "loss": 0.1055, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0087172985076904, + "rewards/margins": 2.0175185203552246, + "rewards/rejected": -0.008801269344985485, + "step": 14655 + }, + { + "epoch": 0.85, + "learning_rate": 5.569224814056783e-09, + "logits/chosen": -1.870723843574524, + "logits/rejected": -1.872005820274353, + "logps/chosen": -10.000473022460938, + "logps/rejected": -54.272979736328125, + "loss": 0.367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07504463195800781, + "rewards/margins": 2.3452155590057373, + "rewards/rejected": -2.2701709270477295, + "step": 14656 + }, + { + "epoch": 0.85, + "learning_rate": 5.5649032472867395e-09, + "logits/chosen": -1.699608325958252, + "logits/rejected": -1.718692660331726, + "logps/chosen": -149.87841796875, + "logps/rejected": -323.6931457519531, + "loss": 0.0664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0985275506973267, + "rewards/margins": 2.6449875831604004, + "rewards/rejected": -1.5464600324630737, + "step": 14657 + }, + { + "epoch": 0.85, + "learning_rate": 5.560583259068691e-09, + "logits/chosen": -2.0803706645965576, + "logits/rejected": -2.0604777336120605, + "logps/chosen": -14.505345344543457, + "logps/rejected": -393.2283935546875, + "loss": 0.2706, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0820918083190918, + "rewards/margins": 4.98775577545166, + "rewards/rejected": -4.905663967132568, + "step": 14658 + }, + { + "epoch": 0.85, + "learning_rate": 5.556264849556108e-09, + "logits/chosen": -1.924227237701416, + "logits/rejected": -1.9191588163375854, + "logps/chosen": -68.50044250488281, + "logps/rejected": -307.0343017578125, + "loss": 0.2318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16421814262866974, + "rewards/margins": 3.8904664516448975, + "rewards/rejected": -3.726248264312744, + "step": 14659 + }, + { + "epoch": 0.85, + "learning_rate": 5.5519480189023805e-09, + "logits/chosen": -1.9209794998168945, + "logits/rejected": -1.933670997619629, + "logps/chosen": -213.9014892578125, + "logps/rejected": -317.3692626953125, + "loss": 0.109, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3844666481018066, + "rewards/margins": 1.5625122785568237, + "rewards/rejected": 0.8219543695449829, + "step": 14660 + }, + { + "epoch": 0.85, + "learning_rate": 5.547632767260896e-09, + "logits/chosen": -2.0354249477386475, + "logits/rejected": -2.0259146690368652, + "logps/chosen": -21.766151428222656, + "logps/rejected": -180.34193420410156, + "loss": 0.333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2236778289079666, + "rewards/margins": 2.5801002979278564, + "rewards/rejected": -2.3564224243164062, + "step": 14661 + }, + { + "epoch": 0.85, + "learning_rate": 5.54331909478492e-09, + "logits/chosen": -1.9448318481445312, + "logits/rejected": -1.937988042831421, + "logps/chosen": -146.44586181640625, + "logps/rejected": -248.1106719970703, + "loss": 0.2692, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2313110828399658, + "rewards/margins": 0.6233108639717102, + "rewards/rejected": 0.6080002188682556, + "step": 14662 + }, + { + "epoch": 0.85, + "learning_rate": 5.539007001627727e-09, + "logits/chosen": -1.8921101093292236, + "logits/rejected": -1.8936727046966553, + "logps/chosen": -21.571718215942383, + "logps/rejected": -55.6512451171875, + "loss": 0.9797, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.8190227746963501, + "rewards/margins": -0.18523216247558594, + "rewards/rejected": -0.6337906122207642, + "step": 14663 + }, + { + "epoch": 0.85, + "learning_rate": 5.53469648794247e-09, + "logits/chosen": -1.7345234155654907, + "logits/rejected": -1.7061740159988403, + "logps/chosen": -176.75790405273438, + "logps/rejected": -428.901611328125, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7161194086074829, + "rewards/margins": 6.278692722320557, + "rewards/rejected": -5.562573432922363, + "step": 14664 + }, + { + "epoch": 0.85, + "learning_rate": 5.530387553882304e-09, + "logits/chosen": -1.7848705053329468, + "logits/rejected": -1.7899360656738281, + "logps/chosen": -16.817031860351562, + "logps/rejected": -130.49781799316406, + "loss": 0.4267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0919637680053711, + "rewards/margins": 1.4701818227767944, + "rewards/rejected": -1.3782180547714233, + "step": 14665 + }, + { + "epoch": 0.85, + "learning_rate": 5.526080199600302e-09, + "logits/chosen": -1.792453408241272, + "logits/rejected": -1.7920644283294678, + "logps/chosen": -68.4131088256836, + "logps/rejected": -192.57264709472656, + "loss": 0.2667, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0791329145431519, + "rewards/margins": 1.139168620109558, + "rewards/rejected": -0.06003570556640625, + "step": 14666 + }, + { + "epoch": 0.85, + "learning_rate": 5.5217744252494704e-09, + "logits/chosen": -2.0657894611358643, + "logits/rejected": -2.053427219390869, + "logps/chosen": -30.327848434448242, + "logps/rejected": -425.7112121582031, + "loss": 0.1263, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1843088865280151, + "rewards/margins": 10.562034606933594, + "rewards/rejected": -9.377725601196289, + "step": 14667 + }, + { + "epoch": 0.85, + "learning_rate": 5.5174702309827795e-09, + "logits/chosen": -1.8199548721313477, + "logits/rejected": -1.819401502609253, + "logps/chosen": -14.333087921142578, + "logps/rejected": -132.42889404296875, + "loss": 0.3344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01650247536599636, + "rewards/margins": 1.9181504249572754, + "rewards/rejected": -1.901647925376892, + "step": 14668 + }, + { + "epoch": 0.85, + "learning_rate": 5.513167616953135e-09, + "logits/chosen": -1.8553026914596558, + "logits/rejected": -1.8604416847229004, + "logps/chosen": -0.007417378947138786, + "logps/rejected": -112.87528991699219, + "loss": 0.5063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007004487561061978, + "rewards/margins": 0.7973693013191223, + "rewards/rejected": -0.7980697751045227, + "step": 14669 + }, + { + "epoch": 0.85, + "learning_rate": 5.508866583313393e-09, + "logits/chosen": -1.9369853734970093, + "logits/rejected": -1.9372438192367554, + "logps/chosen": -0.04080380126833916, + "logps/rejected": -211.42193603515625, + "loss": 0.3765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017409453867003322, + "rewards/margins": 2.840785264968872, + "rewards/rejected": -2.8425261974334717, + "step": 14670 + }, + { + "epoch": 0.85, + "learning_rate": 5.504567130216325e-09, + "logits/chosen": -1.9308807849884033, + "logits/rejected": -1.926516056060791, + "logps/chosen": -148.43612670898438, + "logps/rejected": -194.98464965820312, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4346023797988892, + "rewards/margins": 0.38117218017578125, + "rewards/rejected": 1.053430199623108, + "step": 14671 + }, + { + "epoch": 0.85, + "learning_rate": 5.500269257814688e-09, + "logits/chosen": -1.9611425399780273, + "logits/rejected": -1.9740575551986694, + "logps/chosen": -200.58395385742188, + "logps/rejected": -279.0713195800781, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8074357509613037, + "rewards/margins": 3.2877609729766846, + "rewards/rejected": 0.5196747183799744, + "step": 14672 + }, + { + "epoch": 0.85, + "learning_rate": 5.495972966261159e-09, + "logits/chosen": -1.7620073556900024, + "logits/rejected": -1.7574045658111572, + "logps/chosen": -41.693302154541016, + "logps/rejected": -179.57943725585938, + "loss": 0.4942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0769500732421875, + "rewards/margins": 0.879833996295929, + "rewards/rejected": -0.9567840695381165, + "step": 14673 + }, + { + "epoch": 0.85, + "learning_rate": 5.4916782557083706e-09, + "logits/chosen": -1.9308719635009766, + "logits/rejected": -1.9321341514587402, + "logps/chosen": -37.609275817871094, + "logps/rejected": -115.38720703125, + "loss": 0.0773, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.443724513053894, + "rewards/margins": 3.892484188079834, + "rewards/rejected": -2.4487595558166504, + "step": 14674 + }, + { + "epoch": 0.85, + "learning_rate": 5.48738512630888e-09, + "logits/chosen": -2.0594067573547363, + "logits/rejected": -2.054957866668701, + "logps/chosen": -27.101276397705078, + "logps/rejected": -193.505615234375, + "loss": 0.2658, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6151172518730164, + "rewards/margins": 1.3946857452392578, + "rewards/rejected": -0.7795684933662415, + "step": 14675 + }, + { + "epoch": 0.85, + "learning_rate": 5.483093578215198e-09, + "logits/chosen": -1.8882931470870972, + "logits/rejected": -1.8785895109176636, + "logps/chosen": -133.19009399414062, + "logps/rejected": -258.0482177734375, + "loss": 0.0379, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2035187482833862, + "rewards/margins": 3.8534560203552246, + "rewards/rejected": -2.649937391281128, + "step": 14676 + }, + { + "epoch": 0.85, + "learning_rate": 5.478803611579791e-09, + "logits/chosen": -2.0060667991638184, + "logits/rejected": -2.008326768875122, + "logps/chosen": -9.417319961357862e-05, + "logps/rejected": -71.32705688476562, + "loss": 0.567, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9205455120973056e-06, + "rewards/margins": 0.5231400728225708, + "rewards/rejected": -0.5231429934501648, + "step": 14677 + }, + { + "epoch": 0.85, + "learning_rate": 5.474515226555066e-09, + "logits/chosen": -1.9290050268173218, + "logits/rejected": -1.8942784070968628, + "logps/chosen": -207.91259765625, + "logps/rejected": -325.2669982910156, + "loss": 0.0809, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3023011684417725, + "rewards/margins": 1.9496736526489258, + "rewards/rejected": 0.35262757539749146, + "step": 14678 + }, + { + "epoch": 0.85, + "learning_rate": 5.470228423293333e-09, + "logits/chosen": -1.7556880712509155, + "logits/rejected": -1.7504419088363647, + "logps/chosen": -17.75295639038086, + "logps/rejected": -171.17578125, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8095283508300781, + "rewards/margins": 3.1364142894744873, + "rewards/rejected": -2.326885938644409, + "step": 14679 + }, + { + "epoch": 0.85, + "learning_rate": 5.465943201946932e-09, + "logits/chosen": -1.8727092742919922, + "logits/rejected": -1.8874518871307373, + "logps/chosen": -231.79672241210938, + "logps/rejected": -478.8956604003906, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1000092029571533, + "rewards/margins": 7.301889419555664, + "rewards/rejected": -4.201879978179932, + "step": 14680 + }, + { + "epoch": 0.85, + "learning_rate": 5.461659562668053e-09, + "logits/chosen": -1.923356056213379, + "logits/rejected": -1.9136308431625366, + "logps/chosen": -61.01456832885742, + "logps/rejected": -282.75006103515625, + "loss": 0.1621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8458843231201172, + "rewards/margins": 6.779658317565918, + "rewards/rejected": -5.933773994445801, + "step": 14681 + }, + { + "epoch": 0.85, + "learning_rate": 5.457377505608901e-09, + "logits/chosen": -1.8458682298660278, + "logits/rejected": -1.8339797258377075, + "logps/chosen": -216.1592254638672, + "logps/rejected": -535.858154296875, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.788700819015503, + "rewards/margins": 3.1278364658355713, + "rewards/rejected": -0.3391357362270355, + "step": 14682 + }, + { + "epoch": 0.85, + "learning_rate": 5.453097030921561e-09, + "logits/chosen": -1.8909356594085693, + "logits/rejected": -1.8896052837371826, + "logps/chosen": -36.36260223388672, + "logps/rejected": -181.57029724121094, + "loss": 0.6173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3685581386089325, + "rewards/margins": 0.9029033184051514, + "rewards/rejected": -1.2714614868164062, + "step": 14683 + }, + { + "epoch": 0.85, + "learning_rate": 5.448818138758127e-09, + "logits/chosen": -1.9693208932876587, + "logits/rejected": -1.9656392335891724, + "logps/chosen": -94.45885467529297, + "logps/rejected": -305.7304992675781, + "loss": 0.1854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9848777651786804, + "rewards/margins": 2.5337822437286377, + "rewards/rejected": -1.5489044189453125, + "step": 14684 + }, + { + "epoch": 0.85, + "learning_rate": 5.444540829270605e-09, + "logits/chosen": -1.7896469831466675, + "logits/rejected": -1.7827917337417603, + "logps/chosen": -44.502601623535156, + "logps/rejected": -330.395263671875, + "loss": 0.2914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27439767122268677, + "rewards/margins": 9.185896873474121, + "rewards/rejected": -8.9114990234375, + "step": 14685 + }, + { + "epoch": 0.85, + "learning_rate": 5.440265102610925e-09, + "logits/chosen": -1.9173173904418945, + "logits/rejected": -1.9447294473648071, + "logps/chosen": -383.6121826171875, + "logps/rejected": -646.45263671875, + "loss": 0.0936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42897340655326843, + "rewards/margins": 5.097039699554443, + "rewards/rejected": -4.668066501617432, + "step": 14686 + }, + { + "epoch": 0.85, + "learning_rate": 5.435990958930997e-09, + "logits/chosen": -1.8356196880340576, + "logits/rejected": -1.8459941148757935, + "logps/chosen": -329.19244384765625, + "logps/rejected": -418.7436218261719, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.320349097251892, + "rewards/margins": 5.323343276977539, + "rewards/rejected": -4.002994060516357, + "step": 14687 + }, + { + "epoch": 0.85, + "learning_rate": 5.431718398382651e-09, + "logits/chosen": -1.9387439489364624, + "logits/rejected": -1.939851999282837, + "logps/chosen": -0.00017749435210134834, + "logps/rejected": -203.74295043945312, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0165920432191342e-05, + "rewards/margins": 3.5440053939819336, + "rewards/rejected": -3.54398512840271, + "step": 14688 + }, + { + "epoch": 0.85, + "learning_rate": 5.427447421117693e-09, + "logits/chosen": -1.9488621950149536, + "logits/rejected": -1.9486638307571411, + "logps/chosen": -8.92479133605957, + "logps/rejected": -201.12152099609375, + "loss": 0.2238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40473899245262146, + "rewards/margins": 4.104232311248779, + "rewards/rejected": -3.699493408203125, + "step": 14689 + }, + { + "epoch": 0.85, + "learning_rate": 5.423178027287811e-09, + "logits/chosen": -1.7933326959609985, + "logits/rejected": -1.7871671915054321, + "logps/chosen": -26.831085205078125, + "logps/rejected": -145.18447875976562, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35110875964164734, + "rewards/margins": 2.0712473392486572, + "rewards/rejected": -1.7201385498046875, + "step": 14690 + }, + { + "epoch": 0.85, + "learning_rate": 5.418910217044703e-09, + "logits/chosen": -1.871195673942566, + "logits/rejected": -1.8548719882965088, + "logps/chosen": -269.5565185546875, + "logps/rejected": -651.7894287109375, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3008911609649658, + "rewards/margins": 5.273486137390137, + "rewards/rejected": -3.97259521484375, + "step": 14691 + }, + { + "epoch": 0.85, + "learning_rate": 5.414643990539969e-09, + "logits/chosen": -1.8110524415969849, + "logits/rejected": -1.7937284708023071, + "logps/chosen": -186.18612670898438, + "logps/rejected": -328.84368896484375, + "loss": 0.1392, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4561920166015625, + "rewards/margins": 1.3679839372634888, + "rewards/rejected": 1.0882080793380737, + "step": 14692 + }, + { + "epoch": 0.86, + "learning_rate": 5.4103793479251815e-09, + "logits/chosen": -2.0347228050231934, + "logits/rejected": -2.0246570110321045, + "logps/chosen": -0.00016593144391663373, + "logps/rejected": -144.225830078125, + "loss": 0.4879, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.181151583296014e-05, + "rewards/margins": 1.1096217632293701, + "rewards/rejected": -1.1096099615097046, + "step": 14693 + }, + { + "epoch": 0.86, + "learning_rate": 5.406116289351819e-09, + "logits/chosen": -1.9885410070419312, + "logits/rejected": -1.9831781387329102, + "logps/chosen": -2.532548427581787, + "logps/rejected": -115.40840148925781, + "loss": 0.3959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.057833027094602585, + "rewards/margins": 1.6524055004119873, + "rewards/rejected": -1.5945724248886108, + "step": 14694 + }, + { + "epoch": 0.86, + "learning_rate": 5.4018548149713464e-09, + "logits/chosen": -1.9219906330108643, + "logits/rejected": -1.9045110940933228, + "logps/chosen": -226.05194091796875, + "logps/rejected": -404.0331726074219, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6637481451034546, + "rewards/margins": 4.577392578125, + "rewards/rejected": -2.913644552230835, + "step": 14695 + }, + { + "epoch": 0.86, + "learning_rate": 5.3975949249351334e-09, + "logits/chosen": -1.9612915515899658, + "logits/rejected": -1.9555267095565796, + "logps/chosen": -26.77756118774414, + "logps/rejected": -73.6279525756836, + "loss": 0.5878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4218465983867645, + "rewards/margins": 0.8556958436965942, + "rewards/rejected": -1.2775424718856812, + "step": 14696 + }, + { + "epoch": 0.86, + "learning_rate": 5.3933366193945395e-09, + "logits/chosen": -2.142534017562866, + "logits/rejected": -2.140392303466797, + "logps/chosen": -0.0001162263288279064, + "logps/rejected": -176.05401611328125, + "loss": 0.3563, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.411682195699541e-06, + "rewards/margins": 3.3606791496276855, + "rewards/rejected": -3.360673666000366, + "step": 14697 + }, + { + "epoch": 0.86, + "learning_rate": 5.389079898500809e-09, + "logits/chosen": -1.810878872871399, + "logits/rejected": -1.8064643144607544, + "logps/chosen": -6.675594340777025e-05, + "logps/rejected": -213.84097290039062, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.384091288831769e-07, + "rewards/margins": 3.8228330612182617, + "rewards/rejected": -3.822833299636841, + "step": 14698 + }, + { + "epoch": 0.86, + "learning_rate": 5.3848247624051824e-09, + "logits/chosen": -1.8332852125167847, + "logits/rejected": -1.80316162109375, + "logps/chosen": -203.8709716796875, + "logps/rejected": -383.4937438964844, + "loss": 0.182, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0857880115509033, + "rewards/margins": 1.20159912109375, + "rewards/rejected": 0.8841888308525085, + "step": 14699 + }, + { + "epoch": 0.86, + "learning_rate": 5.380571211258811e-09, + "logits/chosen": -2.001107692718506, + "logits/rejected": -2.0008928775787354, + "logps/chosen": -29.669021606445312, + "logps/rejected": -199.1541748046875, + "loss": 0.1655, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1093510389328003, + "rewards/margins": 2.459953784942627, + "rewards/rejected": -1.3506027460098267, + "step": 14700 + }, + { + "epoch": 0.86, + "learning_rate": 5.3763192452128184e-09, + "logits/chosen": -1.8598493337631226, + "logits/rejected": -1.866448163986206, + "logps/chosen": -64.51689147949219, + "logps/rejected": -268.1881103515625, + "loss": 0.1588, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1290565729141235, + "rewards/margins": 2.6138086318969727, + "rewards/rejected": -1.4847519397735596, + "step": 14701 + }, + { + "epoch": 0.86, + "learning_rate": 5.372068864418228e-09, + "logits/chosen": -1.926344633102417, + "logits/rejected": -1.9271807670593262, + "logps/chosen": -308.7372131347656, + "logps/rejected": -337.212646484375, + "loss": 0.4906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21494446694850922, + "rewards/margins": 0.06132812798023224, + "rewards/rejected": -0.27627259492874146, + "step": 14702 + }, + { + "epoch": 0.86, + "learning_rate": 5.367820069026063e-09, + "logits/chosen": -1.9686120748519897, + "logits/rejected": -1.9384876489639282, + "logps/chosen": -90.23126220703125, + "logps/rejected": -270.5871276855469, + "loss": 0.2174, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7371872067451477, + "rewards/margins": 2.0950303077697754, + "rewards/rejected": -1.357843041419983, + "step": 14703 + }, + { + "epoch": 0.86, + "learning_rate": 5.363572859187249e-09, + "logits/chosen": -1.805724859237671, + "logits/rejected": -1.7953561544418335, + "logps/chosen": -10.203865051269531, + "logps/rejected": -218.70513916015625, + "loss": 0.1622, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1927502155303955, + "rewards/margins": 4.211434364318848, + "rewards/rejected": -3.0186843872070312, + "step": 14704 + }, + { + "epoch": 0.86, + "learning_rate": 5.359327235052657e-09, + "logits/chosen": -2.0732581615448, + "logits/rejected": -2.0740807056427, + "logps/chosen": -0.8113999366760254, + "logps/rejected": -176.8737030029297, + "loss": 0.4105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06661583483219147, + "rewards/margins": 3.3828699588775635, + "rewards/rejected": -3.4494857788085938, + "step": 14705 + }, + { + "epoch": 0.86, + "learning_rate": 5.35508319677313e-09, + "logits/chosen": -1.9771158695220947, + "logits/rejected": -1.979156494140625, + "logps/chosen": -42.09203338623047, + "logps/rejected": -269.20880126953125, + "loss": 0.2542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2297103852033615, + "rewards/margins": 2.4002456665039062, + "rewards/rejected": -2.1705353260040283, + "step": 14706 + }, + { + "epoch": 0.86, + "learning_rate": 5.350840744499424e-09, + "logits/chosen": -1.6912970542907715, + "logits/rejected": -1.6911530494689941, + "logps/chosen": -35.32404327392578, + "logps/rejected": -276.06201171875, + "loss": 0.141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9566757082939148, + "rewards/margins": 5.332864284515381, + "rewards/rejected": -4.3761887550354, + "step": 14707 + }, + { + "epoch": 0.86, + "learning_rate": 5.346599878382274e-09, + "logits/chosen": -2.1029787063598633, + "logits/rejected": -2.0920164585113525, + "logps/chosen": -4.41088342666626, + "logps/rejected": -159.5789031982422, + "loss": 0.3834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09642479568719864, + "rewards/margins": 3.8951172828674316, + "rewards/rejected": -3.991542100906372, + "step": 14708 + }, + { + "epoch": 0.86, + "learning_rate": 5.342360598572304e-09, + "logits/chosen": -2.0077390670776367, + "logits/rejected": -1.9925522804260254, + "logps/chosen": -46.04894256591797, + "logps/rejected": -183.3095703125, + "loss": 0.2979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44165346026420593, + "rewards/margins": 1.7216888666152954, + "rewards/rejected": -1.280035376548767, + "step": 14709 + }, + { + "epoch": 0.86, + "learning_rate": 5.338122905220132e-09, + "logits/chosen": -1.7766554355621338, + "logits/rejected": -1.779126763343811, + "logps/chosen": -1.7498788833618164, + "logps/rejected": -127.24591827392578, + "loss": 0.402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1708461344242096, + "rewards/margins": 1.3601665496826172, + "rewards/rejected": -1.18932044506073, + "step": 14710 + }, + { + "epoch": 0.86, + "learning_rate": 5.3338867984763005e-09, + "logits/chosen": -1.8600178956985474, + "logits/rejected": -1.867077112197876, + "logps/chosen": -9.953208923339844, + "logps/rejected": -167.25926208496094, + "loss": 0.2433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.333818256855011, + "rewards/margins": 4.4201130867004395, + "rewards/rejected": -4.086294651031494, + "step": 14711 + }, + { + "epoch": 0.86, + "learning_rate": 5.3296522784913035e-09, + "logits/chosen": -2.181046724319458, + "logits/rejected": -2.167997121810913, + "logps/chosen": -9.324620246887207, + "logps/rejected": -349.1962585449219, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.045540906488895416, + "rewards/margins": 7.053887367248535, + "rewards/rejected": -7.0083465576171875, + "step": 14712 + }, + { + "epoch": 0.86, + "learning_rate": 5.325419345415555e-09, + "logits/chosen": -1.8231078386306763, + "logits/rejected": -1.7895435094833374, + "logps/chosen": -168.85147094726562, + "logps/rejected": -231.07476806640625, + "loss": 0.6011, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.0468018054962158, + "rewards/margins": -0.5182067155838013, + "rewards/rejected": 1.565008521080017, + "step": 14713 + }, + { + "epoch": 0.86, + "learning_rate": 5.321187999399434e-09, + "logits/chosen": -1.823117733001709, + "logits/rejected": -1.8293383121490479, + "logps/chosen": -40.053524017333984, + "logps/rejected": -310.23712158203125, + "loss": 0.125, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0748767852783203, + "rewards/margins": 4.8178606033325195, + "rewards/rejected": -3.7429840564727783, + "step": 14714 + }, + { + "epoch": 0.86, + "learning_rate": 5.316958240593272e-09, + "logits/chosen": -2.0243148803710938, + "logits/rejected": -2.0218772888183594, + "logps/chosen": -31.885562896728516, + "logps/rejected": -217.19558715820312, + "loss": 0.244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3819439113140106, + "rewards/margins": 2.6103649139404297, + "rewards/rejected": -2.2284209728240967, + "step": 14715 + }, + { + "epoch": 0.86, + "learning_rate": 5.3127300691473255e-09, + "logits/chosen": -1.832844853401184, + "logits/rejected": -1.8314634561538696, + "logps/chosen": -219.8782196044922, + "logps/rejected": -253.58694458007812, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4094727039337158, + "rewards/margins": 2.0163896083831787, + "rewards/rejected": -0.6069168448448181, + "step": 14716 + }, + { + "epoch": 0.86, + "learning_rate": 5.308503485211785e-09, + "logits/chosen": -2.074388265609741, + "logits/rejected": -2.0844907760620117, + "logps/chosen": -139.76539611816406, + "logps/rejected": -189.65977478027344, + "loss": 0.4848, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7796890139579773, + "rewards/margins": -0.2839340567588806, + "rewards/rejected": 1.063623070716858, + "step": 14717 + }, + { + "epoch": 0.86, + "learning_rate": 5.304278488936814e-09, + "logits/chosen": -1.8699637651443481, + "logits/rejected": -1.8635475635528564, + "logps/chosen": -18.893173217773438, + "logps/rejected": -234.5789337158203, + "loss": 0.1711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8445337414741516, + "rewards/margins": 2.838918447494507, + "rewards/rejected": -1.994384765625, + "step": 14718 + }, + { + "epoch": 0.86, + "learning_rate": 5.300055080472499e-09, + "logits/chosen": -1.9659008979797363, + "logits/rejected": -1.9718632698059082, + "logps/chosen": -0.4163759648799896, + "logps/rejected": -58.7889404296875, + "loss": 0.4415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14224611222743988, + "rewards/margins": 1.161000370979309, + "rewards/rejected": -1.018754243850708, + "step": 14719 + }, + { + "epoch": 0.86, + "learning_rate": 5.29583325996889e-09, + "logits/chosen": -1.8310414552688599, + "logits/rejected": -1.8331695795059204, + "logps/chosen": -171.31878662109375, + "logps/rejected": -273.1904296875, + "loss": 0.0605, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.015484571456909, + "rewards/margins": 2.4272398948669434, + "rewards/rejected": 0.588244616985321, + "step": 14720 + }, + { + "epoch": 0.86, + "learning_rate": 5.291613027575936e-09, + "logits/chosen": -1.8074878454208374, + "logits/rejected": -1.8055649995803833, + "logps/chosen": -216.90313720703125, + "logps/rejected": -375.0262756347656, + "loss": 0.0405, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3526337146759033, + "rewards/margins": 2.8321290016174316, + "rewards/rejected": -0.47949525713920593, + "step": 14721 + }, + { + "epoch": 0.86, + "learning_rate": 5.287394383443594e-09, + "logits/chosen": -1.977242350578308, + "logits/rejected": -1.9741376638412476, + "logps/chosen": -90.33319854736328, + "logps/rejected": -226.11050415039062, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2302558869123459, + "rewards/margins": 2.3900198936462402, + "rewards/rejected": -2.159764051437378, + "step": 14722 + }, + { + "epoch": 0.86, + "learning_rate": 5.283177327721716e-09, + "logits/chosen": -1.8832974433898926, + "logits/rejected": -1.9397926330566406, + "logps/chosen": -178.33180236816406, + "logps/rejected": -246.4634246826172, + "loss": 0.1018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7056869268417358, + "rewards/margins": 1.7542632818222046, + "rewards/rejected": -0.04857635498046875, + "step": 14723 + }, + { + "epoch": 0.86, + "learning_rate": 5.278961860560116e-09, + "logits/chosen": -1.778228998184204, + "logits/rejected": -1.7639225721359253, + "logps/chosen": -45.820682525634766, + "logps/rejected": -191.23602294921875, + "loss": 0.1772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1623848676681519, + "rewards/margins": 1.8180184364318848, + "rewards/rejected": -0.6556335687637329, + "step": 14724 + }, + { + "epoch": 0.86, + "learning_rate": 5.274747982108524e-09, + "logits/chosen": -1.939518928527832, + "logits/rejected": -1.9370933771133423, + "logps/chosen": -8.658651351928711, + "logps/rejected": -137.7061309814453, + "loss": 0.4688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.061410047113895416, + "rewards/margins": 1.5754834413528442, + "rewards/rejected": -1.6368935108184814, + "step": 14725 + }, + { + "epoch": 0.86, + "learning_rate": 5.2705356925166754e-09, + "logits/chosen": -2.0830719470977783, + "logits/rejected": -2.0833544731140137, + "logps/chosen": -13.328444480895996, + "logps/rejected": -277.981689453125, + "loss": 0.1862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6117369532585144, + "rewards/margins": 3.8778700828552246, + "rewards/rejected": -3.2661330699920654, + "step": 14726 + }, + { + "epoch": 0.86, + "learning_rate": 5.2663249919342016e-09, + "logits/chosen": -1.833696722984314, + "logits/rejected": -1.8474845886230469, + "logps/chosen": -222.7460479736328, + "logps/rejected": -309.6640625, + "loss": 0.0667, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3410125970840454, + "rewards/margins": 3.0006256103515625, + "rewards/rejected": -1.659613013267517, + "step": 14727 + }, + { + "epoch": 0.86, + "learning_rate": 5.262115880510664e-09, + "logits/chosen": -1.9413520097732544, + "logits/rejected": -1.9342752695083618, + "logps/chosen": -84.26614379882812, + "logps/rejected": -154.57101440429688, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9127799868583679, + "rewards/margins": 1.0512046813964844, + "rewards/rejected": -0.13842467963695526, + "step": 14728 + }, + { + "epoch": 0.86, + "learning_rate": 5.257908358395613e-09, + "logits/chosen": -2.0116007328033447, + "logits/rejected": -2.009094476699829, + "logps/chosen": -49.734153747558594, + "logps/rejected": -312.5473327636719, + "loss": 0.2578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18665389716625214, + "rewards/margins": 4.993111610412598, + "rewards/rejected": -4.80645751953125, + "step": 14729 + }, + { + "epoch": 0.86, + "learning_rate": 5.253702425738515e-09, + "logits/chosen": -1.8935385942459106, + "logits/rejected": -1.8937166929244995, + "logps/chosen": -185.9138641357422, + "logps/rejected": -316.9355163574219, + "loss": 0.084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9504791498184204, + "rewards/margins": 2.463470458984375, + "rewards/rejected": -1.5129913091659546, + "step": 14730 + }, + { + "epoch": 0.86, + "learning_rate": 5.249498082688791e-09, + "logits/chosen": -1.7247401475906372, + "logits/rejected": -1.7240396738052368, + "logps/chosen": -0.0005985072930343449, + "logps/rejected": -248.3678436279297, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.178159229515586e-05, + "rewards/margins": 8.31775951385498, + "rewards/rejected": -8.317781448364258, + "step": 14731 + }, + { + "epoch": 0.86, + "learning_rate": 5.245295329395788e-09, + "logits/chosen": -1.9703038930892944, + "logits/rejected": -1.8964271545410156, + "logps/chosen": -211.58102416992188, + "logps/rejected": -413.8326416015625, + "loss": 0.0918, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7970367670059204, + "rewards/margins": 2.902606248855591, + "rewards/rejected": -1.1055694818496704, + "step": 14732 + }, + { + "epoch": 0.86, + "learning_rate": 5.241094166008808e-09, + "logits/chosen": -1.9513154029846191, + "logits/rejected": -1.953501582145691, + "logps/chosen": -0.007930855266749859, + "logps/rejected": -86.59233093261719, + "loss": 0.518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0010346967028453946, + "rewards/margins": 0.8003190755844116, + "rewards/rejected": -0.7992843985557556, + "step": 14733 + }, + { + "epoch": 0.86, + "learning_rate": 5.23689459267711e-09, + "logits/chosen": -1.9279547929763794, + "logits/rejected": -1.929359793663025, + "logps/chosen": -1.3756978511810303, + "logps/rejected": -77.97596740722656, + "loss": 0.7132, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02089240588247776, + "rewards/margins": -0.2058531939983368, + "rewards/rejected": 0.22674560546875, + "step": 14734 + }, + { + "epoch": 0.86, + "learning_rate": 5.23269660954988e-09, + "logits/chosen": -1.7409065961837769, + "logits/rejected": -1.7614763975143433, + "logps/chosen": -250.3966064453125, + "logps/rejected": -253.418212890625, + "loss": 0.1459, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0852296352386475, + "rewards/margins": 1.1804535388946533, + "rewards/rejected": 0.9047760367393494, + "step": 14735 + }, + { + "epoch": 0.86, + "learning_rate": 5.228500216776238e-09, + "logits/chosen": -1.8074525594711304, + "logits/rejected": -1.8126044273376465, + "logps/chosen": -231.71722412109375, + "logps/rejected": -329.9865417480469, + "loss": 0.0743, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.467459201812744, + "rewards/margins": 1.9923768043518066, + "rewards/rejected": 1.4750823974609375, + "step": 14736 + }, + { + "epoch": 0.86, + "learning_rate": 5.22430541450527e-09, + "logits/chosen": -1.918848991394043, + "logits/rejected": -1.915216326713562, + "logps/chosen": -2.081271171569824, + "logps/rejected": -163.26206970214844, + "loss": 0.5542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017981363460421562, + "rewards/margins": 0.7967884540557861, + "rewards/rejected": -0.7788071036338806, + "step": 14737 + }, + { + "epoch": 0.86, + "learning_rate": 5.220112202885996e-09, + "logits/chosen": -1.8650410175323486, + "logits/rejected": -1.8549138307571411, + "logps/chosen": -132.83004760742188, + "logps/rejected": -279.3715515136719, + "loss": 0.0729, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4685349464416504, + "rewards/margins": 2.5181045532226562, + "rewards/rejected": -0.04956970363855362, + "step": 14738 + }, + { + "epoch": 0.86, + "learning_rate": 5.21592058206739e-09, + "logits/chosen": -2.0531466007232666, + "logits/rejected": -2.0508203506469727, + "logps/chosen": -3.542081356048584, + "logps/rejected": -104.00032806396484, + "loss": 0.4246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02234664000570774, + "rewards/margins": 1.2718240022659302, + "rewards/rejected": -1.2494773864746094, + "step": 14739 + }, + { + "epoch": 0.86, + "learning_rate": 5.211730552198323e-09, + "logits/chosen": -1.8700132369995117, + "logits/rejected": -1.8324581384658813, + "logps/chosen": -178.02838134765625, + "logps/rejected": -310.3026123046875, + "loss": 0.0783, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.225045919418335, + "rewards/margins": 1.9753022193908691, + "rewards/rejected": 1.2497437000274658, + "step": 14740 + }, + { + "epoch": 0.86, + "learning_rate": 5.207542113427693e-09, + "logits/chosen": -1.7552721500396729, + "logits/rejected": -1.759136438369751, + "logps/chosen": -3.8952560424804688, + "logps/rejected": -107.22808837890625, + "loss": 0.4543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09466361999511719, + "rewards/margins": 1.2328014373779297, + "rewards/rejected": -1.1381378173828125, + "step": 14741 + }, + { + "epoch": 0.86, + "learning_rate": 5.203355265904263e-09, + "logits/chosen": -1.8530563116073608, + "logits/rejected": -1.8485466241836548, + "logps/chosen": -0.5982152223587036, + "logps/rejected": -39.84051513671875, + "loss": 0.7303, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03932187706232071, + "rewards/margins": -0.3042488396167755, + "rewards/rejected": 0.3435707092285156, + "step": 14742 + }, + { + "epoch": 0.86, + "learning_rate": 5.199170009776782e-09, + "logits/chosen": -1.8942610025405884, + "logits/rejected": -1.8839764595031738, + "logps/chosen": -0.00042911150376312435, + "logps/rejected": -100.22706604003906, + "loss": 0.4847, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.788831185782328e-05, + "rewards/margins": 1.0922012329101562, + "rewards/rejected": -1.0921432971954346, + "step": 14743 + }, + { + "epoch": 0.86, + "learning_rate": 5.194986345193914e-09, + "logits/chosen": -1.6425083875656128, + "logits/rejected": -1.7169023752212524, + "logps/chosen": -145.70455932617188, + "logps/rejected": -386.3542785644531, + "loss": 0.0687, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1601440906524658, + "rewards/margins": 2.955514430999756, + "rewards/rejected": -1.7953704595565796, + "step": 14744 + }, + { + "epoch": 0.86, + "learning_rate": 5.1908042723043e-09, + "logits/chosen": -1.7872707843780518, + "logits/rejected": -1.783968448638916, + "logps/chosen": -4.4891533851623535, + "logps/rejected": -91.75200653076172, + "loss": 0.6055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06832652539014816, + "rewards/margins": 0.34196171164512634, + "rewards/rejected": -0.4102882444858551, + "step": 14745 + }, + { + "epoch": 0.86, + "learning_rate": 5.186623791256517e-09, + "logits/chosen": -1.9227319955825806, + "logits/rejected": -1.9224904775619507, + "logps/chosen": -25.225862503051758, + "logps/rejected": -214.83767700195312, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4797739088535309, + "rewards/margins": 2.0926127433776855, + "rewards/rejected": -1.6128387451171875, + "step": 14746 + }, + { + "epoch": 0.86, + "learning_rate": 5.1824449021990514e-09, + "logits/chosen": -1.918266773223877, + "logits/rejected": -1.9033843278884888, + "logps/chosen": -28.242237091064453, + "logps/rejected": -181.3760528564453, + "loss": 0.4173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2936159074306488, + "rewards/margins": 0.8455644845962524, + "rewards/rejected": -0.5519485473632812, + "step": 14747 + }, + { + "epoch": 0.86, + "learning_rate": 5.178267605280367e-09, + "logits/chosen": -1.9384329319000244, + "logits/rejected": -1.9282761812210083, + "logps/chosen": -0.4880427122116089, + "logps/rejected": -113.71219635009766, + "loss": 0.4951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06566369533538818, + "rewards/margins": 0.9554471969604492, + "rewards/rejected": -0.889783501625061, + "step": 14748 + }, + { + "epoch": 0.86, + "learning_rate": 5.1740919006488695e-09, + "logits/chosen": -1.9776380062103271, + "logits/rejected": -1.9728244543075562, + "logps/chosen": -58.594783782958984, + "logps/rejected": -264.867919921875, + "loss": 0.197, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8004894256591797, + "rewards/margins": 1.8780971765518188, + "rewards/rejected": -1.0776077508926392, + "step": 14749 + }, + { + "epoch": 0.86, + "learning_rate": 5.169917788452904e-09, + "logits/chosen": -1.8466805219650269, + "logits/rejected": -1.8372735977172852, + "logps/chosen": -10.773847579956055, + "logps/rejected": -222.24734497070312, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0861627608537674, + "rewards/margins": 3.9659602642059326, + "rewards/rejected": -3.8797974586486816, + "step": 14750 + }, + { + "epoch": 0.86, + "learning_rate": 5.165745268840732e-09, + "logits/chosen": -1.7194894552230835, + "logits/rejected": -1.7225338220596313, + "logps/chosen": -170.9425048828125, + "logps/rejected": -356.86175537109375, + "loss": 0.0455, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9692718982696533, + "rewards/margins": 2.9486329555511475, + "rewards/rejected": 0.02063903771340847, + "step": 14751 + }, + { + "epoch": 0.86, + "learning_rate": 5.1615743419606e-09, + "logits/chosen": -1.9736257791519165, + "logits/rejected": -1.9705699682235718, + "logps/chosen": -59.052337646484375, + "logps/rejected": -203.93902587890625, + "loss": 0.1072, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.41344153881073, + "rewards/margins": 3.747663974761963, + "rewards/rejected": -2.3342225551605225, + "step": 14752 + }, + { + "epoch": 0.86, + "learning_rate": 5.157405007960675e-09, + "logits/chosen": -1.876183271408081, + "logits/rejected": -1.8815783262252808, + "logps/chosen": -1.4804068803787231, + "logps/rejected": -54.832679748535156, + "loss": 0.509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16781191527843475, + "rewards/margins": 0.49672389030456543, + "rewards/rejected": -0.32891198992729187, + "step": 14753 + }, + { + "epoch": 0.86, + "learning_rate": 5.15323726698908e-09, + "logits/chosen": -1.8298330307006836, + "logits/rejected": -1.809182047843933, + "logps/chosen": -192.7548065185547, + "logps/rejected": -299.2867431640625, + "loss": 0.2031, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.309645175933838, + "rewards/margins": 0.9228928089141846, + "rewards/rejected": 1.3867523670196533, + "step": 14754 + }, + { + "epoch": 0.86, + "learning_rate": 5.14907111919386e-09, + "logits/chosen": -1.9814189672470093, + "logits/rejected": -1.9786486625671387, + "logps/chosen": -0.15641656517982483, + "logps/rejected": -81.58232116699219, + "loss": 0.5499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008137522265315056, + "rewards/margins": 0.6643005609512329, + "rewards/rejected": -0.6724380850791931, + "step": 14755 + }, + { + "epoch": 0.86, + "learning_rate": 5.144906564723022e-09, + "logits/chosen": -1.992279052734375, + "logits/rejected": -2.0469553470611572, + "logps/chosen": -169.06539916992188, + "logps/rejected": -373.69549560546875, + "loss": 0.186, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.425866723060608, + "rewards/margins": 1.101434350013733, + "rewards/rejected": 0.324432373046875, + "step": 14756 + }, + { + "epoch": 0.86, + "learning_rate": 5.140743603724512e-09, + "logits/chosen": -1.9504660367965698, + "logits/rejected": -1.973053216934204, + "logps/chosen": -207.89866638183594, + "logps/rejected": -241.89068603515625, + "loss": 0.2314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2493804693222046, + "rewards/margins": 1.0119597911834717, + "rewards/rejected": 0.23742066323757172, + "step": 14757 + }, + { + "epoch": 0.86, + "learning_rate": 5.136582236346232e-09, + "logits/chosen": -1.883272409439087, + "logits/rejected": -1.8684111833572388, + "logps/chosen": -22.758766174316406, + "logps/rejected": -153.39456176757812, + "loss": 0.3562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13547249138355255, + "rewards/margins": 1.7740559577941895, + "rewards/rejected": -1.6385834217071533, + "step": 14758 + }, + { + "epoch": 0.86, + "learning_rate": 5.1324224627359914e-09, + "logits/chosen": -1.8300254344940186, + "logits/rejected": -1.832865834236145, + "logps/chosen": -185.3887939453125, + "logps/rejected": -330.19586181640625, + "loss": 0.1601, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2529847621917725, + "rewards/margins": 1.167190670967102, + "rewards/rejected": 1.0857940912246704, + "step": 14759 + }, + { + "epoch": 0.86, + "learning_rate": 5.128264283041572e-09, + "logits/chosen": -1.7937567234039307, + "logits/rejected": -1.7922881841659546, + "logps/chosen": -164.75477600097656, + "logps/rejected": -271.9975280761719, + "loss": 0.0466, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7225570678710938, + "rewards/margins": 2.879765272140503, + "rewards/rejected": -0.15720824897289276, + "step": 14760 + }, + { + "epoch": 0.86, + "learning_rate": 5.124107697410701e-09, + "logits/chosen": -1.9809739589691162, + "logits/rejected": -1.9741613864898682, + "logps/chosen": -0.7455396056175232, + "logps/rejected": -127.73690795898438, + "loss": 0.4971, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18150591850280762, + "rewards/margins": 0.7387554049491882, + "rewards/rejected": -0.5572494864463806, + "step": 14761 + }, + { + "epoch": 0.86, + "learning_rate": 5.119952705991043e-09, + "logits/chosen": -1.9400514364242554, + "logits/rejected": -1.9447335004806519, + "logps/chosen": -68.78802490234375, + "logps/rejected": -231.36032104492188, + "loss": 0.135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9899353384971619, + "rewards/margins": 6.055394172668457, + "rewards/rejected": -5.06545877456665, + "step": 14762 + }, + { + "epoch": 0.86, + "learning_rate": 5.11579930893018e-09, + "logits/chosen": -1.9798877239227295, + "logits/rejected": -1.9793740510940552, + "logps/chosen": -81.97956085205078, + "logps/rejected": -231.94155883789062, + "loss": 0.0511, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7880287170410156, + "rewards/margins": 4.06002140045166, + "rewards/rejected": -2.2719924449920654, + "step": 14763 + }, + { + "epoch": 0.86, + "learning_rate": 5.111647506375694e-09, + "logits/chosen": -2.0991275310516357, + "logits/rejected": -2.094601631164551, + "logps/chosen": -30.488338470458984, + "logps/rejected": -178.99472045898438, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42191505432128906, + "rewards/margins": 1.1445469856262207, + "rewards/rejected": -0.7226318717002869, + "step": 14764 + }, + { + "epoch": 0.86, + "learning_rate": 5.107497298475056e-09, + "logits/chosen": -2.1477479934692383, + "logits/rejected": -2.1430389881134033, + "logps/chosen": -39.57343292236328, + "logps/rejected": -186.73748779296875, + "loss": 0.3576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02460479736328125, + "rewards/margins": 3.990475654602051, + "rewards/rejected": -4.015080451965332, + "step": 14765 + }, + { + "epoch": 0.86, + "learning_rate": 5.103348685375702e-09, + "logits/chosen": -1.8862106800079346, + "logits/rejected": -1.7855232954025269, + "logps/chosen": -216.79714965820312, + "logps/rejected": -571.7525634765625, + "loss": 0.0313, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7355377674102783, + "rewards/margins": 3.588528633117676, + "rewards/rejected": -0.8529907464981079, + "step": 14766 + }, + { + "epoch": 0.86, + "learning_rate": 5.099201667225023e-09, + "logits/chosen": -2.0583128929138184, + "logits/rejected": -2.0566587448120117, + "logps/chosen": -0.00027153262635692954, + "logps/rejected": -20.126535415649414, + "loss": 0.7544, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00046294817002490163, + "rewards/margins": -0.27420860528945923, + "rewards/rejected": 0.2746715545654297, + "step": 14767 + }, + { + "epoch": 0.86, + "learning_rate": 5.095056244170331e-09, + "logits/chosen": -1.7512019872665405, + "logits/rejected": -1.752923607826233, + "logps/chosen": -26.260364532470703, + "logps/rejected": -54.33726501464844, + "loss": 0.5236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3334835171699524, + "rewards/margins": 0.24942323565483093, + "rewards/rejected": 0.08406028896570206, + "step": 14768 + }, + { + "epoch": 0.86, + "learning_rate": 5.0909124163589004e-09, + "logits/chosen": -1.9034379720687866, + "logits/rejected": -1.896990418434143, + "logps/chosen": -17.95311164855957, + "logps/rejected": -208.82666015625, + "loss": 0.1878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7810811996459961, + "rewards/margins": 4.451921463012695, + "rewards/rejected": -3.6708405017852783, + "step": 14769 + }, + { + "epoch": 0.86, + "learning_rate": 5.0867701839379275e-09, + "logits/chosen": -1.905264973640442, + "logits/rejected": -1.8963892459869385, + "logps/chosen": -0.0026810811832547188, + "logps/rejected": -121.42586517333984, + "loss": 0.422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008699097670614719, + "rewards/margins": 1.866165280342102, + "rewards/rejected": -1.86529541015625, + "step": 14770 + }, + { + "epoch": 0.86, + "learning_rate": 5.082629547054573e-09, + "logits/chosen": -1.8651480674743652, + "logits/rejected": -1.8503042459487915, + "logps/chosen": -257.7004699707031, + "logps/rejected": -305.85198974609375, + "loss": 0.0973, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.94736647605896, + "rewards/margins": 1.5876679420471191, + "rewards/rejected": 1.3596985340118408, + "step": 14771 + }, + { + "epoch": 0.86, + "learning_rate": 5.078490505855937e-09, + "logits/chosen": -1.7632203102111816, + "logits/rejected": -1.764685869216919, + "logps/chosen": -0.06432359665632248, + "logps/rejected": -16.53579330444336, + "loss": 0.6797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006469023879617453, + "rewards/margins": 0.021415390074253082, + "rewards/rejected": -0.014946365728974342, + "step": 14772 + }, + { + "epoch": 0.86, + "learning_rate": 5.074353060489056e-09, + "logits/chosen": -1.9808411598205566, + "logits/rejected": -1.9701522588729858, + "logps/chosen": -8.04358959197998, + "logps/rejected": -225.90003967285156, + "loss": 0.3067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2238926887512207, + "rewards/margins": 4.7252631187438965, + "rewards/rejected": -4.501370429992676, + "step": 14773 + }, + { + "epoch": 0.86, + "learning_rate": 5.070217211100903e-09, + "logits/chosen": -1.9603477716445923, + "logits/rejected": -1.9501079320907593, + "logps/chosen": -28.307575225830078, + "logps/rejected": -254.80242919921875, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1477949172258377, + "rewards/margins": 6.913547992706299, + "rewards/rejected": -6.765753269195557, + "step": 14774 + }, + { + "epoch": 0.86, + "learning_rate": 5.066082957838408e-09, + "logits/chosen": -2.0203003883361816, + "logits/rejected": -1.9879227876663208, + "logps/chosen": -67.6259536743164, + "logps/rejected": -472.2913818359375, + "loss": 0.2854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1694633513689041, + "rewards/margins": 8.77897834777832, + "rewards/rejected": -8.609515190124512, + "step": 14775 + }, + { + "epoch": 0.86, + "learning_rate": 5.061950300848439e-09, + "logits/chosen": -1.736928105354309, + "logits/rejected": -1.7310295104980469, + "logps/chosen": -8.542826652526855, + "logps/rejected": -204.21298217773438, + "loss": 0.3517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03552589565515518, + "rewards/margins": 2.0105350017547607, + "rewards/rejected": -1.9750092029571533, + "step": 14776 + }, + { + "epoch": 0.86, + "learning_rate": 5.057819240277822e-09, + "logits/chosen": -1.7596620321273804, + "logits/rejected": -1.7701942920684814, + "logps/chosen": -228.99774169921875, + "logps/rejected": -351.3978271484375, + "loss": 0.0999, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3964385986328125, + "rewards/margins": 2.0580811500549316, + "rewards/rejected": 0.338357537984848, + "step": 14777 + }, + { + "epoch": 0.86, + "learning_rate": 5.053689776273296e-09, + "logits/chosen": -1.9653046131134033, + "logits/rejected": -1.9740920066833496, + "logps/chosen": -169.75579833984375, + "logps/rejected": -465.8369445800781, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2821946144104004, + "rewards/margins": 6.520753860473633, + "rewards/rejected": -4.238559246063232, + "step": 14778 + }, + { + "epoch": 0.86, + "learning_rate": 5.049561908981564e-09, + "logits/chosen": -1.7629690170288086, + "logits/rejected": -1.7031512260437012, + "logps/chosen": -298.1905517578125, + "logps/rejected": -584.845703125, + "loss": 0.1137, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0346009731292725, + "rewards/margins": 2.104828119277954, + "rewards/rejected": -0.07022704929113388, + "step": 14779 + }, + { + "epoch": 0.86, + "learning_rate": 5.0454356385492666e-09, + "logits/chosen": -1.6193994283676147, + "logits/rejected": -1.616388201713562, + "logps/chosen": -0.0012916552368551493, + "logps/rejected": -108.68492126464844, + "loss": 0.5102, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.159534627338871e-05, + "rewards/margins": 0.9486473798751831, + "rewards/rejected": -0.9487289786338806, + "step": 14780 + }, + { + "epoch": 0.86, + "learning_rate": 5.0413109651230025e-09, + "logits/chosen": -1.8632830381393433, + "logits/rejected": -1.8705418109893799, + "logps/chosen": -220.53598022460938, + "logps/rejected": -277.10968017578125, + "loss": 0.2923, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9546524286270142, + "rewards/margins": 0.33835291862487793, + "rewards/rejected": 1.6162995100021362, + "step": 14781 + }, + { + "epoch": 0.86, + "learning_rate": 5.0371878888492705e-09, + "logits/chosen": -1.8931235074996948, + "logits/rejected": -1.88469398021698, + "logps/chosen": -66.25300598144531, + "logps/rejected": -286.06011962890625, + "loss": 0.1881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7808868288993835, + "rewards/margins": 5.077059745788574, + "rewards/rejected": -4.296173095703125, + "step": 14782 + }, + { + "epoch": 0.86, + "learning_rate": 5.033066409874581e-09, + "logits/chosen": -2.0107452869415283, + "logits/rejected": -2.0099596977233887, + "logps/chosen": -2.4719085693359375, + "logps/rejected": -172.0758056640625, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16712389886379242, + "rewards/margins": 4.5803961753845215, + "rewards/rejected": -4.413272380828857, + "step": 14783 + }, + { + "epoch": 0.86, + "learning_rate": 5.028946528345323e-09, + "logits/chosen": -1.954824447631836, + "logits/rejected": -1.9303468465805054, + "logps/chosen": -190.384033203125, + "logps/rejected": -422.43695068359375, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.670239210128784, + "rewards/margins": 4.9606475830078125, + "rewards/rejected": -1.2904083728790283, + "step": 14784 + }, + { + "epoch": 0.86, + "learning_rate": 5.02482824440787e-09, + "logits/chosen": -1.8777533769607544, + "logits/rejected": -1.8754098415374756, + "logps/chosen": -26.571208953857422, + "logps/rejected": -267.4268798828125, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02384033240377903, + "rewards/margins": 5.802838325500488, + "rewards/rejected": -5.778997898101807, + "step": 14785 + }, + { + "epoch": 0.86, + "learning_rate": 5.020711558208496e-09, + "logits/chosen": -1.558211326599121, + "logits/rejected": -1.5320990085601807, + "logps/chosen": -300.60321044921875, + "logps/rejected": -519.7860107421875, + "loss": 0.1032, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7685730457305908, + "rewards/margins": 2.7698822021484375, + "rewards/rejected": -1.0013092756271362, + "step": 14786 + }, + { + "epoch": 0.86, + "learning_rate": 5.0165964698934695e-09, + "logits/chosen": -1.8715888261795044, + "logits/rejected": -1.8301697969436646, + "logps/chosen": -153.97955322265625, + "logps/rejected": -401.4256896972656, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9948211908340454, + "rewards/margins": 5.726315498352051, + "rewards/rejected": -3.731494188308716, + "step": 14787 + }, + { + "epoch": 0.86, + "learning_rate": 5.012482979608989e-09, + "logits/chosen": -1.970795750617981, + "logits/rejected": -1.9291213750839233, + "logps/chosen": -176.423828125, + "logps/rejected": -360.1157531738281, + "loss": 0.0679, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.136500597000122, + "rewards/margins": 2.506514072418213, + "rewards/rejected": -0.37001344561576843, + "step": 14788 + }, + { + "epoch": 0.86, + "learning_rate": 5.008371087501156e-09, + "logits/chosen": -1.9372926950454712, + "logits/rejected": -1.9899486303329468, + "logps/chosen": -184.1735076904297, + "logps/rejected": -498.5835266113281, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.146070957183838, + "rewards/margins": 5.472190856933594, + "rewards/rejected": -2.326120138168335, + "step": 14789 + }, + { + "epoch": 0.86, + "learning_rate": 5.0042607937160645e-09, + "logits/chosen": -1.7263922691345215, + "logits/rejected": -1.72287917137146, + "logps/chosen": -9.030413627624512, + "logps/rejected": -105.85535430908203, + "loss": 0.8053, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01363077200949192, + "rewards/margins": -0.3838006258010864, + "rewards/rejected": 0.37016984820365906, + "step": 14790 + }, + { + "epoch": 0.86, + "learning_rate": 5.0001520983997296e-09, + "logits/chosen": -1.9070181846618652, + "logits/rejected": -1.901314616203308, + "logps/chosen": -12.165335655212402, + "logps/rejected": -135.53530883789062, + "loss": 0.4229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06059684976935387, + "rewards/margins": 1.9019695520401, + "rewards/rejected": -1.8413727283477783, + "step": 14791 + }, + { + "epoch": 0.86, + "learning_rate": 4.9960450016981124e-09, + "logits/chosen": -1.8723214864730835, + "logits/rejected": -1.873185396194458, + "logps/chosen": -36.70793914794922, + "logps/rejected": -201.27984619140625, + "loss": 0.1455, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2199233770370483, + "rewards/margins": 3.1543242931365967, + "rewards/rejected": -1.9344009160995483, + "step": 14792 + }, + { + "epoch": 0.86, + "learning_rate": 4.991939503757109e-09, + "logits/chosen": -2.0630946159362793, + "logits/rejected": -2.0600297451019287, + "logps/chosen": -17.264616012573242, + "logps/rejected": -194.20358276367188, + "loss": 0.5969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5745865106582642, + "rewards/margins": 1.387475609779358, + "rewards/rejected": -1.962062120437622, + "step": 14793 + }, + { + "epoch": 0.86, + "learning_rate": 4.987835604722574e-09, + "logits/chosen": -1.4983842372894287, + "logits/rejected": -1.5076459646224976, + "logps/chosen": -0.0358385369181633, + "logps/rejected": -166.9937744140625, + "loss": 0.3725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0027377598453313112, + "rewards/margins": 2.9864535331726074, + "rewards/rejected": -2.983715772628784, + "step": 14794 + }, + { + "epoch": 0.86, + "learning_rate": 4.983733304740295e-09, + "logits/chosen": -2.075045585632324, + "logits/rejected": -2.053057909011841, + "logps/chosen": -171.48287963867188, + "logps/rejected": -272.0074462890625, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7237517833709717, + "rewards/margins": 2.021273612976074, + "rewards/rejected": 0.7024780511856079, + "step": 14795 + }, + { + "epoch": 0.86, + "learning_rate": 4.979632603956019e-09, + "logits/chosen": -1.8909789323806763, + "logits/rejected": -1.8842178583145142, + "logps/chosen": -5.268762588500977, + "logps/rejected": -110.14876556396484, + "loss": 0.6298, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.19704404473304749, + "rewards/margins": -0.005141779780387878, + "rewards/rejected": 0.20218582451343536, + "step": 14796 + }, + { + "epoch": 0.86, + "learning_rate": 4.9755335025153995e-09, + "logits/chosen": -1.9290390014648438, + "logits/rejected": -1.921898603439331, + "logps/chosen": -0.0033740245271474123, + "logps/rejected": -207.87936401367188, + "loss": 0.3659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00018049849313683808, + "rewards/margins": 3.2000279426574707, + "rewards/rejected": -3.199847459793091, + "step": 14797 + }, + { + "epoch": 0.86, + "learning_rate": 4.971436000564072e-09, + "logits/chosen": -1.9026129245758057, + "logits/rejected": -1.896086573600769, + "logps/chosen": -18.09955596923828, + "logps/rejected": -144.12234497070312, + "loss": 0.2724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1686851531267166, + "rewards/margins": 3.4851951599121094, + "rewards/rejected": -3.316509962081909, + "step": 14798 + }, + { + "epoch": 0.86, + "learning_rate": 4.967340098247591e-09, + "logits/chosen": -1.8170057535171509, + "logits/rejected": -1.804187536239624, + "logps/chosen": -121.18500518798828, + "logps/rejected": -344.2152099609375, + "loss": 0.2134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6089805364608765, + "rewards/margins": 0.9539024233818054, + "rewards/rejected": 0.655078113079071, + "step": 14799 + }, + { + "epoch": 0.86, + "learning_rate": 4.963245795711479e-09, + "logits/chosen": -1.7849267721176147, + "logits/rejected": -1.7826554775238037, + "logps/chosen": -6.446469783782959, + "logps/rejected": -108.68392181396484, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31801629066467285, + "rewards/margins": 2.14693021774292, + "rewards/rejected": -1.828913927078247, + "step": 14800 + }, + { + "epoch": 0.86, + "learning_rate": 4.9591530931011705e-09, + "logits/chosen": -1.7647995948791504, + "logits/rejected": -1.7627201080322266, + "logps/chosen": -159.87161254882812, + "logps/rejected": -482.8392333984375, + "loss": 0.0744, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0091736316680908, + "rewards/margins": 5.297586441040039, + "rewards/rejected": -4.288412570953369, + "step": 14801 + }, + { + "epoch": 0.86, + "learning_rate": 4.95506199056206e-09, + "logits/chosen": -1.9687830209732056, + "logits/rejected": -1.973426103591919, + "logps/chosen": -78.8510971069336, + "logps/rejected": -184.3155517578125, + "loss": 0.9119, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4285004138946533, + "rewards/margins": 0.9571900367736816, + "rewards/rejected": -2.385690450668335, + "step": 14802 + }, + { + "epoch": 0.86, + "learning_rate": 4.9509724882394855e-09, + "logits/chosen": -1.6874208450317383, + "logits/rejected": -1.6642976999282837, + "logps/chosen": -252.19073486328125, + "logps/rejected": -472.50628662109375, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.11476469039917, + "rewards/margins": 5.754279136657715, + "rewards/rejected": -1.6395142078399658, + "step": 14803 + }, + { + "epoch": 0.86, + "learning_rate": 4.946884586278732e-09, + "logits/chosen": -2.1077213287353516, + "logits/rejected": -2.107017993927002, + "logps/chosen": -24.644493103027344, + "logps/rejected": -144.01304626464844, + "loss": 0.205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6596264243125916, + "rewards/margins": 1.991379976272583, + "rewards/rejected": -1.3317536115646362, + "step": 14804 + }, + { + "epoch": 0.86, + "learning_rate": 4.9427982848250005e-09, + "logits/chosen": -1.9115691184997559, + "logits/rejected": -1.9205646514892578, + "logps/chosen": -186.47903442382812, + "logps/rejected": -480.10931396484375, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.44647216796875, + "rewards/margins": 5.644952774047852, + "rewards/rejected": -3.1984803676605225, + "step": 14805 + }, + { + "epoch": 0.86, + "learning_rate": 4.938713584023485e-09, + "logits/chosen": -2.0155868530273438, + "logits/rejected": -2.0067942142486572, + "logps/chosen": -11.250861167907715, + "logps/rejected": -184.42381286621094, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1820935308933258, + "rewards/margins": 2.030468463897705, + "rewards/rejected": -1.8483749628067017, + "step": 14806 + }, + { + "epoch": 0.86, + "learning_rate": 4.934630484019292e-09, + "logits/chosen": -1.7993179559707642, + "logits/rejected": -1.8054512739181519, + "logps/chosen": -52.93572998046875, + "logps/rejected": -161.95907592773438, + "loss": 0.2041, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.002111792564392, + "rewards/margins": 1.715063452720642, + "rewards/rejected": -0.71295166015625, + "step": 14807 + }, + { + "epoch": 0.86, + "learning_rate": 4.93054898495745e-09, + "logits/chosen": -1.8124868869781494, + "logits/rejected": -1.8131437301635742, + "logps/chosen": -30.207962036132812, + "logps/rejected": -160.9169921875, + "loss": 0.8767, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4048074781894684, + "rewards/margins": -0.30248507857322693, + "rewards/rejected": -0.10232239216566086, + "step": 14808 + }, + { + "epoch": 0.86, + "learning_rate": 4.926469086982965e-09, + "logits/chosen": -2.0714848041534424, + "logits/rejected": -2.0692498683929443, + "logps/chosen": -17.98810577392578, + "logps/rejected": -175.45306396484375, + "loss": 0.3809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09117908775806427, + "rewards/margins": 3.197460651397705, + "rewards/rejected": -3.288639783859253, + "step": 14809 + }, + { + "epoch": 0.86, + "learning_rate": 4.9223907902407766e-09, + "logits/chosen": -2.082486391067505, + "logits/rejected": -2.087580680847168, + "logps/chosen": -8.320654887938872e-05, + "logps/rejected": -281.71258544921875, + "loss": 0.3574, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.635766915977001e-06, + "rewards/margins": 4.864849090576172, + "rewards/rejected": -4.8648529052734375, + "step": 14810 + }, + { + "epoch": 0.86, + "learning_rate": 4.9183140948757805e-09, + "logits/chosen": -1.9412367343902588, + "logits/rejected": -1.9355155229568481, + "logps/chosen": -1.5123549699783325, + "logps/rejected": -187.2169647216797, + "loss": 0.4491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16938523948192596, + "rewards/margins": 0.8823857307434082, + "rewards/rejected": -0.713000476360321, + "step": 14811 + }, + { + "epoch": 0.86, + "learning_rate": 4.914239001032772e-09, + "logits/chosen": -2.020463705062866, + "logits/rejected": -2.014660596847534, + "logps/chosen": -7.3247833251953125, + "logps/rejected": -211.83963012695312, + "loss": 0.4751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17622852325439453, + "rewards/margins": 0.908159077167511, + "rewards/rejected": -0.7319305539131165, + "step": 14812 + }, + { + "epoch": 0.86, + "learning_rate": 4.910165508856534e-09, + "logits/chosen": -1.9851616621017456, + "logits/rejected": -1.970218300819397, + "logps/chosen": -1.1371712684631348, + "logps/rejected": -344.52227783203125, + "loss": 0.2898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14419539272785187, + "rewards/margins": 5.461331367492676, + "rewards/rejected": -5.317135810852051, + "step": 14813 + }, + { + "epoch": 0.86, + "learning_rate": 4.906093618491774e-09, + "logits/chosen": -1.8460956811904907, + "logits/rejected": -1.8562129735946655, + "logps/chosen": -109.47203063964844, + "logps/rejected": -209.5308837890625, + "loss": 0.2707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.365853875875473, + "rewards/margins": 2.076824903488159, + "rewards/rejected": -1.7109711170196533, + "step": 14814 + }, + { + "epoch": 0.86, + "learning_rate": 4.902023330083161e-09, + "logits/chosen": -1.6169993877410889, + "logits/rejected": -1.6255862712860107, + "logps/chosen": -10.155110359191895, + "logps/rejected": -83.65679168701172, + "loss": 0.9326, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.16644679009914398, + "rewards/margins": -0.7997155785560608, + "rewards/rejected": 0.6332687735557556, + "step": 14815 + }, + { + "epoch": 0.86, + "learning_rate": 4.89795464377526e-09, + "logits/chosen": -1.889672040939331, + "logits/rejected": -1.8743798732757568, + "logps/chosen": -28.62735939025879, + "logps/rejected": -212.96087646484375, + "loss": 0.4208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2073751538991928, + "rewards/margins": 3.6495249271392822, + "rewards/rejected": -3.8568999767303467, + "step": 14816 + }, + { + "epoch": 0.86, + "learning_rate": 4.8938875597126336e-09, + "logits/chosen": -2.0276503562927246, + "logits/rejected": -2.0102133750915527, + "logps/chosen": -67.2218017578125, + "logps/rejected": -393.521240234375, + "loss": 0.1762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26882097125053406, + "rewards/margins": 7.842893123626709, + "rewards/rejected": -7.574072360992432, + "step": 14817 + }, + { + "epoch": 0.86, + "learning_rate": 4.889822078039757e-09, + "logits/chosen": -1.8551944494247437, + "logits/rejected": -1.8587719202041626, + "logps/chosen": -79.8003921508789, + "logps/rejected": -127.33757019042969, + "loss": 0.3659, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4726479053497314, + "rewards/margins": 0.23216474056243896, + "rewards/rejected": 1.2404831647872925, + "step": 14818 + }, + { + "epoch": 0.86, + "learning_rate": 4.885758198901069e-09, + "logits/chosen": -1.7620049715042114, + "logits/rejected": -1.765446662902832, + "logps/chosen": -1.0062580108642578, + "logps/rejected": -75.59381866455078, + "loss": 0.4556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03293338045477867, + "rewards/margins": 0.9601336717605591, + "rewards/rejected": -0.9272003173828125, + "step": 14819 + }, + { + "epoch": 0.86, + "learning_rate": 4.881695922440915e-09, + "logits/chosen": -1.8778828382492065, + "logits/rejected": -1.866497278213501, + "logps/chosen": -191.11380004882812, + "logps/rejected": -547.192138671875, + "loss": 0.0522, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0631836652755737, + "rewards/margins": 6.936587810516357, + "rewards/rejected": -5.873404026031494, + "step": 14820 + }, + { + "epoch": 0.86, + "learning_rate": 4.877635248803624e-09, + "logits/chosen": -1.7291399240493774, + "logits/rejected": -1.731919527053833, + "logps/chosen": -56.50250244140625, + "logps/rejected": -249.33364868164062, + "loss": 0.2511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42031556367874146, + "rewards/margins": 4.708993911743164, + "rewards/rejected": -4.288678169250488, + "step": 14821 + }, + { + "epoch": 0.86, + "learning_rate": 4.8735761781334425e-09, + "logits/chosen": -1.3676713705062866, + "logits/rejected": -1.3756266832351685, + "logps/chosen": -211.00546264648438, + "logps/rejected": -478.88458251953125, + "loss": 0.0596, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.00091552734375, + "rewards/margins": 8.767261505126953, + "rewards/rejected": -7.766345500946045, + "step": 14822 + }, + { + "epoch": 0.86, + "learning_rate": 4.869518710574583e-09, + "logits/chosen": -1.9354373216629028, + "logits/rejected": -1.9300798177719116, + "logps/chosen": -49.43433380126953, + "logps/rejected": -128.4623565673828, + "loss": 0.4112, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06483306735754013, + "rewards/margins": 1.5607528686523438, + "rewards/rejected": -1.625585913658142, + "step": 14823 + }, + { + "epoch": 0.86, + "learning_rate": 4.865462846271157e-09, + "logits/chosen": -1.8367396593093872, + "logits/rejected": -1.8256973028182983, + "logps/chosen": -197.77769470214844, + "logps/rejected": -321.7495422363281, + "loss": 0.305, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2239913940429688, + "rewards/margins": 0.22276759147644043, + "rewards/rejected": 2.0012238025665283, + "step": 14824 + }, + { + "epoch": 0.86, + "learning_rate": 4.861408585367288e-09, + "logits/chosen": -1.8693455457687378, + "logits/rejected": -1.8948501348495483, + "logps/chosen": -188.93280029296875, + "logps/rejected": -330.8652648925781, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8150527477264404, + "rewards/margins": 4.68838357925415, + "rewards/rejected": -1.8733307123184204, + "step": 14825 + }, + { + "epoch": 0.86, + "learning_rate": 4.857355928006973e-09, + "logits/chosen": -1.9357950687408447, + "logits/rejected": -1.9161226749420166, + "logps/chosen": -243.6417999267578, + "logps/rejected": -502.53271484375, + "loss": 0.0582, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1300324201583862, + "rewards/margins": 4.524117946624756, + "rewards/rejected": -3.394085645675659, + "step": 14826 + }, + { + "epoch": 0.86, + "learning_rate": 4.8533048743341855e-09, + "logits/chosen": -1.935693621635437, + "logits/rejected": -1.9314734935760498, + "logps/chosen": -0.10839715600013733, + "logps/rejected": -152.9696044921875, + "loss": 0.3537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0062674167566001415, + "rewards/margins": 4.615849494934082, + "rewards/rejected": -4.622117042541504, + "step": 14827 + }, + { + "epoch": 0.86, + "learning_rate": 4.849255424492854e-09, + "logits/chosen": -1.8027757406234741, + "logits/rejected": -1.8181159496307373, + "logps/chosen": -14.878634452819824, + "logps/rejected": -120.43587493896484, + "loss": 0.217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6499724388122559, + "rewards/margins": 3.828721523284912, + "rewards/rejected": -3.1787490844726562, + "step": 14828 + }, + { + "epoch": 0.86, + "learning_rate": 4.84520757862682e-09, + "logits/chosen": -1.9111167192459106, + "logits/rejected": -1.9970711469650269, + "logps/chosen": -192.1370849609375, + "logps/rejected": -336.1936950683594, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6226136684417725, + "rewards/margins": 3.70383620262146, + "rewards/rejected": -1.0812225341796875, + "step": 14829 + }, + { + "epoch": 0.86, + "learning_rate": 4.841161336879901e-09, + "logits/chosen": -1.9597091674804688, + "logits/rejected": -1.9443260431289673, + "logps/chosen": -0.00015353788330685347, + "logps/rejected": -132.78343200683594, + "loss": 0.5584, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4398037567152642e-05, + "rewards/margins": 0.624529242515564, + "rewards/rejected": -0.6245048642158508, + "step": 14830 + }, + { + "epoch": 0.86, + "learning_rate": 4.8371166993958154e-09, + "logits/chosen": -1.9886138439178467, + "logits/rejected": -1.9986355304718018, + "logps/chosen": -33.84989929199219, + "logps/rejected": -211.8544921875, + "loss": 0.3932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0823570266366005, + "rewards/margins": 1.9238227605819702, + "rewards/rejected": -2.0061798095703125, + "step": 14831 + }, + { + "epoch": 0.86, + "learning_rate": 4.8330736663182656e-09, + "logits/chosen": -2.0151097774505615, + "logits/rejected": -2.006932497024536, + "logps/chosen": -183.9939727783203, + "logps/rejected": -351.66436767578125, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6294387578964233, + "rewards/margins": 3.445826768875122, + "rewards/rejected": -1.8163880109786987, + "step": 14832 + }, + { + "epoch": 0.86, + "learning_rate": 4.82903223779087e-09, + "logits/chosen": -1.8413954973220825, + "logits/rejected": -1.8379637002944946, + "logps/chosen": -140.7569580078125, + "logps/rejected": -208.7981719970703, + "loss": 0.0416, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5157928466796875, + "rewards/margins": 2.6716690063476562, + "rewards/rejected": 0.8441238403320312, + "step": 14833 + }, + { + "epoch": 0.86, + "learning_rate": 4.824992413957213e-09, + "logits/chosen": -1.949018120765686, + "logits/rejected": -1.9376823902130127, + "logps/chosen": -0.0005114282248541713, + "logps/rejected": -175.72166442871094, + "loss": 0.3623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0005491111078299582, + "rewards/margins": 3.1220717430114746, + "rewards/rejected": -3.1215226650238037, + "step": 14834 + }, + { + "epoch": 0.86, + "learning_rate": 4.820954194960797e-09, + "logits/chosen": -1.9623124599456787, + "logits/rejected": -1.890065312385559, + "logps/chosen": -209.78086853027344, + "logps/rejected": -465.9264221191406, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.18939208984375, + "rewards/margins": 3.7002196311950684, + "rewards/rejected": -0.5108276605606079, + "step": 14835 + }, + { + "epoch": 0.86, + "learning_rate": 4.816917580945074e-09, + "logits/chosen": -1.8643567562103271, + "logits/rejected": -1.8702696561813354, + "logps/chosen": -2.7431814670562744, + "logps/rejected": -224.091796875, + "loss": 0.3714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0399777889251709, + "rewards/margins": 2.67600679397583, + "rewards/rejected": -2.636029005050659, + "step": 14836 + }, + { + "epoch": 0.86, + "learning_rate": 4.812882572053462e-09, + "logits/chosen": -1.952282428741455, + "logits/rejected": -1.948527455329895, + "logps/chosen": -88.69915771484375, + "logps/rejected": -211.64791870117188, + "loss": 0.173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7972480654716492, + "rewards/margins": 2.8138787746429443, + "rewards/rejected": -2.0166306495666504, + "step": 14837 + }, + { + "epoch": 0.86, + "learning_rate": 4.808849168429296e-09, + "logits/chosen": -1.9270228147506714, + "logits/rejected": -1.9546953439712524, + "logps/chosen": -165.44888305664062, + "logps/rejected": -348.3759765625, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0992400646209717, + "rewards/margins": 2.8326873779296875, + "rewards/rejected": -0.733447253704071, + "step": 14838 + }, + { + "epoch": 0.86, + "learning_rate": 4.804817370215858e-09, + "logits/chosen": -1.9640263319015503, + "logits/rejected": -1.96494460105896, + "logps/chosen": -0.14289529621601105, + "logps/rejected": -70.95878601074219, + "loss": 0.6655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01130677293986082, + "rewards/margins": 0.044006336480379105, + "rewards/rejected": -0.0553131103515625, + "step": 14839 + }, + { + "epoch": 0.86, + "learning_rate": 4.800787177556376e-09, + "logits/chosen": -2.002716541290283, + "logits/rejected": -1.9903854131698608, + "logps/chosen": -46.802433013916016, + "logps/rejected": -217.2833251953125, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24881744384765625, + "rewards/margins": 1.5791854858398438, + "rewards/rejected": -1.3303680419921875, + "step": 14840 + }, + { + "epoch": 0.86, + "learning_rate": 4.79675859059403e-09, + "logits/chosen": -1.907097339630127, + "logits/rejected": -1.903921365737915, + "logps/chosen": -37.09709548950195, + "logps/rejected": -405.0425109863281, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.419377565383911, + "rewards/margins": 7.779097557067871, + "rewards/rejected": -5.359719753265381, + "step": 14841 + }, + { + "epoch": 0.86, + "learning_rate": 4.792731609471939e-09, + "logits/chosen": -1.9153039455413818, + "logits/rejected": -1.9392569065093994, + "logps/chosen": -133.41517639160156, + "logps/rejected": -467.61553955078125, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4887100458145142, + "rewards/margins": 6.0245041847229, + "rewards/rejected": -4.535794258117676, + "step": 14842 + }, + { + "epoch": 0.86, + "learning_rate": 4.788706234333134e-09, + "logits/chosen": -1.8778749704360962, + "logits/rejected": -1.875546932220459, + "logps/chosen": -0.0003274336049798876, + "logps/rejected": -51.98796463012695, + "loss": 0.7807, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.9190016246284358e-05, + "rewards/margins": -0.39463475346565247, + "rewards/rejected": 0.3946155607700348, + "step": 14843 + }, + { + "epoch": 0.86, + "learning_rate": 4.7846824653206605e-09, + "logits/chosen": -1.9652303457260132, + "logits/rejected": -1.9673259258270264, + "logps/chosen": -1.69215726852417, + "logps/rejected": -26.595195770263672, + "loss": 0.6994, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05087242275476456, + "rewards/margins": -0.19645613431930542, + "rewards/rejected": 0.24732856452465057, + "step": 14844 + }, + { + "epoch": 0.86, + "learning_rate": 4.780660302577422e-09, + "logits/chosen": -1.5600184202194214, + "logits/rejected": -1.552435278892517, + "logps/chosen": -5.465551376342773, + "logps/rejected": -58.20148468017578, + "loss": 0.5556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14297471940517426, + "rewards/margins": 0.33175110816955566, + "rewards/rejected": -0.1887764036655426, + "step": 14845 + }, + { + "epoch": 0.86, + "learning_rate": 4.776639746246336e-09, + "logits/chosen": -1.8650261163711548, + "logits/rejected": -1.8623871803283691, + "logps/chosen": -2.6308071613311768, + "logps/rejected": -219.20455932617188, + "loss": 0.3606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0069526913575828075, + "rewards/margins": 4.649947166442871, + "rewards/rejected": -4.656899929046631, + "step": 14846 + }, + { + "epoch": 0.86, + "learning_rate": 4.772620796470195e-09, + "logits/chosen": -1.8742985725402832, + "logits/rejected": -1.8737081289291382, + "logps/chosen": -207.20477294921875, + "logps/rejected": -266.69415283203125, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.223397970199585, + "rewards/margins": 3.692678928375244, + "rewards/rejected": -0.46928101778030396, + "step": 14847 + }, + { + "epoch": 0.86, + "learning_rate": 4.768603453391807e-09, + "logits/chosen": -1.955004334449768, + "logits/rejected": -1.9568490982055664, + "logps/chosen": -5.466099739074707, + "logps/rejected": -201.1653289794922, + "loss": 0.339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17747803032398224, + "rewards/margins": 3.194715976715088, + "rewards/rejected": -3.017237901687622, + "step": 14848 + }, + { + "epoch": 0.86, + "learning_rate": 4.764587717153878e-09, + "logits/chosen": -1.7051175832748413, + "logits/rejected": -1.7368085384368896, + "logps/chosen": -198.72808837890625, + "logps/rejected": -279.7044372558594, + "loss": 0.1105, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6922760009765625, + "rewards/margins": 2.4976439476013184, + "rewards/rejected": -0.8053680658340454, + "step": 14849 + }, + { + "epoch": 0.86, + "learning_rate": 4.760573587899058e-09, + "logits/chosen": -1.9164091348648071, + "logits/rejected": -1.9064079523086548, + "logps/chosen": -62.10445022583008, + "logps/rejected": -424.5538330078125, + "loss": 0.171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8167400360107422, + "rewards/margins": 8.602195739746094, + "rewards/rejected": -7.785455226898193, + "step": 14850 + }, + { + "epoch": 0.86, + "learning_rate": 4.756561065769959e-09, + "logits/chosen": -2.0522210597991943, + "logits/rejected": -2.047506093978882, + "logps/chosen": -49.024681091308594, + "logps/rejected": -146.74208068847656, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24265137314796448, + "rewards/margins": 1.4106658697128296, + "rewards/rejected": -1.1680145263671875, + "step": 14851 + }, + { + "epoch": 0.86, + "learning_rate": 4.752550150909113e-09, + "logits/chosen": -2.014207363128662, + "logits/rejected": -2.0057668685913086, + "logps/chosen": -0.016687724739313126, + "logps/rejected": -125.17926025390625, + "loss": 0.62, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005420099012553692, + "rewards/margins": 0.23427599668502808, + "rewards/rejected": -0.2288558930158615, + "step": 14852 + }, + { + "epoch": 0.86, + "learning_rate": 4.748540843459031e-09, + "logits/chosen": -1.8484827280044556, + "logits/rejected": -1.8347479104995728, + "logps/chosen": -181.84666442871094, + "logps/rejected": -260.89898681640625, + "loss": 0.4139, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7968063354492188, + "rewards/margins": -0.13345181941986084, + "rewards/rejected": 1.9302581548690796, + "step": 14853 + }, + { + "epoch": 0.86, + "learning_rate": 4.7445331435621126e-09, + "logits/chosen": -1.8091645240783691, + "logits/rejected": -1.796809434890747, + "logps/chosen": -297.4385986328125, + "logps/rejected": -436.24755859375, + "loss": 0.0561, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7356202602386475, + "rewards/margins": 2.630310297012329, + "rewards/rejected": 0.10531006008386612, + "step": 14854 + }, + { + "epoch": 0.86, + "learning_rate": 4.740527051360748e-09, + "logits/chosen": -1.9919792413711548, + "logits/rejected": -1.9912405014038086, + "logps/chosen": -35.43744659423828, + "logps/rejected": -234.44161987304688, + "loss": 0.1482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.954937756061554, + "rewards/margins": 4.560143947601318, + "rewards/rejected": -3.605206251144409, + "step": 14855 + }, + { + "epoch": 0.86, + "learning_rate": 4.736522566997248e-09, + "logits/chosen": -1.8171405792236328, + "logits/rejected": -1.857476830482483, + "logps/chosen": -250.51734924316406, + "logps/rejected": -455.2908935546875, + "loss": 0.0403, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3454697132110596, + "rewards/margins": 8.425895690917969, + "rewards/rejected": -7.080426216125488, + "step": 14856 + }, + { + "epoch": 0.86, + "learning_rate": 4.732519690613884e-09, + "logits/chosen": -2.0004332065582275, + "logits/rejected": -1.9885433912277222, + "logps/chosen": -62.4776611328125, + "logps/rejected": -393.959228515625, + "loss": 0.2598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16725540161132812, + "rewards/margins": 7.769806861877441, + "rewards/rejected": -7.602551460266113, + "step": 14857 + }, + { + "epoch": 0.86, + "learning_rate": 4.72851842235284e-09, + "logits/chosen": -1.7248797416687012, + "logits/rejected": -1.717691421508789, + "logps/chosen": -72.41061401367188, + "logps/rejected": -194.25442504882812, + "loss": 0.5819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5034393668174744, + "rewards/margins": 1.1089491844177246, + "rewards/rejected": -1.6123886108398438, + "step": 14858 + }, + { + "epoch": 0.86, + "learning_rate": 4.7245187623562666e-09, + "logits/chosen": -1.9570242166519165, + "logits/rejected": -1.954768419265747, + "logps/chosen": -24.605274200439453, + "logps/rejected": -89.92310333251953, + "loss": 0.4408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16191387176513672, + "rewards/margins": 1.2262868881225586, + "rewards/rejected": -1.0643730163574219, + "step": 14859 + }, + { + "epoch": 0.86, + "learning_rate": 4.720520710766257e-09, + "logits/chosen": -1.873633623123169, + "logits/rejected": -1.8690662384033203, + "logps/chosen": -0.062084078788757324, + "logps/rejected": -82.65382385253906, + "loss": 0.5058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0016910814447328448, + "rewards/margins": 0.9575695395469666, + "rewards/rejected": -0.9558784365653992, + "step": 14860 + }, + { + "epoch": 0.86, + "learning_rate": 4.716524267724842e-09, + "logits/chosen": -1.861236572265625, + "logits/rejected": -1.8615697622299194, + "logps/chosen": -64.95746612548828, + "logps/rejected": -287.45098876953125, + "loss": 0.1608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7519584894180298, + "rewards/margins": 3.1832056045532227, + "rewards/rejected": -2.4312469959259033, + "step": 14861 + }, + { + "epoch": 0.86, + "learning_rate": 4.712529433373985e-09, + "logits/chosen": -1.8997116088867188, + "logits/rejected": -1.9415003061294556, + "logps/chosen": -181.3806915283203, + "logps/rejected": -383.4698791503906, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2268874645233154, + "rewards/margins": 5.641493320465088, + "rewards/rejected": -2.4146058559417725, + "step": 14862 + }, + { + "epoch": 0.86, + "learning_rate": 4.7085362078556015e-09, + "logits/chosen": -1.9573529958724976, + "logits/rejected": -1.957004427909851, + "logps/chosen": -72.9461898803711, + "logps/rejected": -216.25506591796875, + "loss": 0.1917, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8522056937217712, + "rewards/margins": 2.4284050464630127, + "rewards/rejected": -1.5761994123458862, + "step": 14863 + }, + { + "epoch": 0.86, + "learning_rate": 4.70454459131156e-09, + "logits/chosen": -1.8977774381637573, + "logits/rejected": -1.8136197328567505, + "logps/chosen": -227.5802001953125, + "logps/rejected": -396.10101318359375, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.572222888469696, + "rewards/margins": 2.459033250808716, + "rewards/rejected": -1.886810302734375, + "step": 14864 + }, + { + "epoch": 0.87, + "learning_rate": 4.70055458388367e-09, + "logits/chosen": -2.0056800842285156, + "logits/rejected": -2.008901596069336, + "logps/chosen": -0.0001304101460846141, + "logps/rejected": -127.42913818359375, + "loss": 0.4522, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.940566087607294e-06, + "rewards/margins": 1.374590277671814, + "rewards/rejected": -1.3745803833007812, + "step": 14865 + }, + { + "epoch": 0.87, + "learning_rate": 4.696566185713647e-09, + "logits/chosen": -2.0188045501708984, + "logits/rejected": -2.0201542377471924, + "logps/chosen": -182.3526611328125, + "logps/rejected": -405.39642333984375, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4725677967071533, + "rewards/margins": 4.472958564758301, + "rewards/rejected": -2.0003907680511475, + "step": 14866 + }, + { + "epoch": 0.87, + "learning_rate": 4.692579396943202e-09, + "logits/chosen": -1.7525579929351807, + "logits/rejected": -1.7504030466079712, + "logps/chosen": -53.30576705932617, + "logps/rejected": -348.96380615234375, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.737308144569397, + "rewards/margins": 0.484470009803772, + "rewards/rejected": 1.252838134765625, + "step": 14867 + }, + { + "epoch": 0.87, + "learning_rate": 4.688594217713971e-09, + "logits/chosen": -1.868241786956787, + "logits/rejected": -1.864985704421997, + "logps/chosen": -0.00040312224882654846, + "logps/rejected": -243.98646545410156, + "loss": 0.3372, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4637093954661395e-05, + "rewards/margins": 5.5612874031066895, + "rewards/rejected": -5.561302185058594, + "step": 14868 + }, + { + "epoch": 0.87, + "learning_rate": 4.684610648167503e-09, + "logits/chosen": -1.9974868297576904, + "logits/rejected": -2.003476858139038, + "logps/chosen": -3.488744020462036, + "logps/rejected": -103.14254760742188, + "loss": 0.4436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037061359733343124, + "rewards/margins": 0.9970703125, + "rewards/rejected": -1.034131646156311, + "step": 14869 + }, + { + "epoch": 0.87, + "learning_rate": 4.680628688445326e-09, + "logits/chosen": -1.985960602760315, + "logits/rejected": -1.981513261795044, + "logps/chosen": -2.8128559589385986, + "logps/rejected": -193.47329711914062, + "loss": 0.2805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28165891766548157, + "rewards/margins": 2.9986205101013184, + "rewards/rejected": -2.716961622238159, + "step": 14870 + }, + { + "epoch": 0.87, + "learning_rate": 4.676648338688905e-09, + "logits/chosen": -2.005251169204712, + "logits/rejected": -2.0080485343933105, + "logps/chosen": -2.23840594291687, + "logps/rejected": -131.1508331298828, + "loss": 0.4032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08225269615650177, + "rewards/margins": 1.378332257270813, + "rewards/rejected": -1.4605849981307983, + "step": 14871 + }, + { + "epoch": 0.87, + "learning_rate": 4.672669599039641e-09, + "logits/chosen": -1.8722823858261108, + "logits/rejected": -1.869462013244629, + "logps/chosen": -101.95134735107422, + "logps/rejected": -317.2559814453125, + "loss": 0.8977, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6583664417266846, + "rewards/margins": 4.667951583862305, + "rewards/rejected": -6.326318264007568, + "step": 14872 + }, + { + "epoch": 0.87, + "learning_rate": 4.668692469638863e-09, + "logits/chosen": -1.7863866090774536, + "logits/rejected": -1.789422869682312, + "logps/chosen": -66.88316345214844, + "logps/rejected": -336.4117431640625, + "loss": 0.0955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8736785650253296, + "rewards/margins": 2.5800018310546875, + "rewards/rejected": -0.7063232660293579, + "step": 14873 + }, + { + "epoch": 0.87, + "learning_rate": 4.664716950627867e-09, + "logits/chosen": -2.0539748668670654, + "logits/rejected": -2.052276134490967, + "logps/chosen": -32.35623550415039, + "logps/rejected": -141.02105712890625, + "loss": 0.3119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.566580593585968, + "rewards/margins": 0.8544635772705078, + "rewards/rejected": -0.2878830134868622, + "step": 14874 + }, + { + "epoch": 0.87, + "learning_rate": 4.660743042147886e-09, + "logits/chosen": -2.0207748413085938, + "logits/rejected": -2.021284580230713, + "logps/chosen": -155.7437744140625, + "logps/rejected": -158.6934814453125, + "loss": 0.4013, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.17905592918396, + "rewards/margins": -0.08737778663635254, + "rewards/rejected": 2.2664337158203125, + "step": 14875 + }, + { + "epoch": 0.87, + "learning_rate": 4.656770744340099e-09, + "logits/chosen": -1.8425049781799316, + "logits/rejected": -1.8293719291687012, + "logps/chosen": -60.57289123535156, + "logps/rejected": -448.3077392578125, + "loss": 0.2351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2514766752719879, + "rewards/margins": 10.549367904663086, + "rewards/rejected": -10.297891616821289, + "step": 14876 + }, + { + "epoch": 0.87, + "learning_rate": 4.6528000573456016e-09, + "logits/chosen": -1.9551682472229004, + "logits/rejected": -1.9538397789001465, + "logps/chosen": -34.921661376953125, + "logps/rejected": -211.63839721679688, + "loss": 0.3253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00476341275498271, + "rewards/margins": 2.0747859477996826, + "rewards/rejected": -2.0700225830078125, + "step": 14877 + }, + { + "epoch": 0.87, + "learning_rate": 4.6488309813054624e-09, + "logits/chosen": -1.845497727394104, + "logits/rejected": -1.8291069269180298, + "logps/chosen": -15.939129829406738, + "logps/rejected": -161.83700561523438, + "loss": 0.3606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1531367301940918, + "rewards/margins": 1.6303385496139526, + "rewards/rejected": -1.4772018194198608, + "step": 14878 + }, + { + "epoch": 0.87, + "learning_rate": 4.644863516360686e-09, + "logits/chosen": -1.7946223020553589, + "logits/rejected": -1.7876367568969727, + "logps/chosen": -265.14984130859375, + "logps/rejected": -361.5021057128906, + "loss": 0.0699, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8298492431640625, + "rewards/margins": 2.218728542327881, + "rewards/rejected": 0.6111206412315369, + "step": 14879 + }, + { + "epoch": 0.87, + "learning_rate": 4.640897662652227e-09, + "logits/chosen": -1.9112515449523926, + "logits/rejected": -1.906432032585144, + "logps/chosen": -46.849815368652344, + "logps/rejected": -132.74002075195312, + "loss": 0.6754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21262474358081818, + "rewards/margins": 0.294742226600647, + "rewards/rejected": -0.507366955280304, + "step": 14880 + }, + { + "epoch": 0.87, + "learning_rate": 4.636933420320943e-09, + "logits/chosen": -1.959091305732727, + "logits/rejected": -1.9506229162216187, + "logps/chosen": -61.7906608581543, + "logps/rejected": -265.4190673828125, + "loss": 0.1369, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4116092920303345, + "rewards/margins": 2.3651461601257324, + "rewards/rejected": -0.9535369873046875, + "step": 14881 + }, + { + "epoch": 0.87, + "learning_rate": 4.632970789507679e-09, + "logits/chosen": -1.7474615573883057, + "logits/rejected": -1.7413733005523682, + "logps/chosen": -13.74295711517334, + "logps/rejected": -169.12924194335938, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23726855218410492, + "rewards/margins": 3.6220905780792236, + "rewards/rejected": -3.384822130203247, + "step": 14882 + }, + { + "epoch": 0.87, + "learning_rate": 4.629009770353209e-09, + "logits/chosen": -1.8426992893218994, + "logits/rejected": -1.8308738470077515, + "logps/chosen": -0.004201484844088554, + "logps/rejected": -142.5937042236328, + "loss": 0.3607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020321305782999843, + "rewards/margins": 2.5578794479370117, + "rewards/rejected": -2.5580825805664062, + "step": 14883 + }, + { + "epoch": 0.87, + "learning_rate": 4.625050362998256e-09, + "logits/chosen": -1.8684355020523071, + "logits/rejected": -1.8124797344207764, + "logps/chosen": -252.11972045898438, + "logps/rejected": -404.1890563964844, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0138275623321533, + "rewards/margins": 0.6089050769805908, + "rewards/rejected": 1.4049224853515625, + "step": 14884 + }, + { + "epoch": 0.87, + "learning_rate": 4.6210925675834455e-09, + "logits/chosen": -1.619677186012268, + "logits/rejected": -1.6110048294067383, + "logps/chosen": -60.66199493408203, + "logps/rejected": -247.52377319335938, + "loss": 0.231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32301101088523865, + "rewards/margins": 4.397884368896484, + "rewards/rejected": -4.074873447418213, + "step": 14885 + }, + { + "epoch": 0.87, + "learning_rate": 4.617136384249415e-09, + "logits/chosen": -2.0565643310546875, + "logits/rejected": -2.056868314743042, + "logps/chosen": -46.029396057128906, + "logps/rejected": -185.4830780029297, + "loss": 0.0784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8895736932754517, + "rewards/margins": 2.5021042823791504, + "rewards/rejected": -0.612530529499054, + "step": 14886 + }, + { + "epoch": 0.87, + "learning_rate": 4.613181813136691e-09, + "logits/chosen": -2.0383245944976807, + "logits/rejected": -2.037447929382324, + "logps/chosen": -41.71388626098633, + "logps/rejected": -118.45843505859375, + "loss": 0.7067, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03972587734460831, + "rewards/margins": -0.18108177185058594, + "rewards/rejected": 0.14135590195655823, + "step": 14887 + }, + { + "epoch": 0.87, + "learning_rate": 4.609228854385749e-09, + "logits/chosen": -1.8840025663375854, + "logits/rejected": -1.8689161539077759, + "logps/chosen": -179.10287475585938, + "logps/rejected": -330.1287536621094, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4728119373321533, + "rewards/margins": 2.83465576171875, + "rewards/rejected": -0.36184388399124146, + "step": 14888 + }, + { + "epoch": 0.87, + "learning_rate": 4.605277508137034e-09, + "logits/chosen": -1.8041651248931885, + "logits/rejected": -1.8169891834259033, + "logps/chosen": -138.89866638183594, + "logps/rejected": -270.4961853027344, + "loss": 0.1742, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.542341709136963, + "rewards/margins": 1.0776718854904175, + "rewards/rejected": 1.4646698236465454, + "step": 14889 + }, + { + "epoch": 0.87, + "learning_rate": 4.601327774530917e-09, + "logits/chosen": -1.9587537050247192, + "logits/rejected": -1.9373029470443726, + "logps/chosen": -173.1422119140625, + "logps/rejected": -357.0662841796875, + "loss": 0.3138, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.807460069656372, + "rewards/margins": 0.28059232234954834, + "rewards/rejected": 1.5268677473068237, + "step": 14890 + }, + { + "epoch": 0.87, + "learning_rate": 4.5973796537077056e-09, + "logits/chosen": -1.9312838315963745, + "logits/rejected": -1.922330379486084, + "logps/chosen": -20.687210083007812, + "logps/rejected": -138.82717895507812, + "loss": 0.147, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.223896861076355, + "rewards/margins": 2.3206703662872314, + "rewards/rejected": -1.0967735052108765, + "step": 14891 + }, + { + "epoch": 0.87, + "learning_rate": 4.593433145807657e-09, + "logits/chosen": -1.8549343347549438, + "logits/rejected": -1.8563528060913086, + "logps/chosen": -10.687909126281738, + "logps/rejected": -74.2700424194336, + "loss": 0.4082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30178767442703247, + "rewards/margins": 1.0764329433441162, + "rewards/rejected": -0.7746452689170837, + "step": 14892 + }, + { + "epoch": 0.87, + "learning_rate": 4.5894882509709635e-09, + "logits/chosen": -2.0091938972473145, + "logits/rejected": -2.01060152053833, + "logps/chosen": -54.308372497558594, + "logps/rejected": -139.62245178222656, + "loss": 0.8079, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3891708552837372, + "rewards/margins": -0.8511810302734375, + "rewards/rejected": 1.240351915359497, + "step": 14893 + }, + { + "epoch": 0.87, + "learning_rate": 4.585544969337779e-09, + "logits/chosen": -1.815940499305725, + "logits/rejected": -1.8170051574707031, + "logps/chosen": -199.08676147460938, + "logps/rejected": -376.1417541503906, + "loss": 0.0526, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6643997430801392, + "rewards/margins": 3.3277053833007812, + "rewards/rejected": -1.663305640220642, + "step": 14894 + }, + { + "epoch": 0.87, + "learning_rate": 4.581603301048187e-09, + "logits/chosen": -1.9711161851882935, + "logits/rejected": -1.9625511169433594, + "logps/chosen": -51.512481689453125, + "logps/rejected": -147.6735382080078, + "loss": 0.5544, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6853668093681335, + "rewards/margins": -0.22415924072265625, + "rewards/rejected": 0.9095260500907898, + "step": 14895 + }, + { + "epoch": 0.87, + "learning_rate": 4.5776632462422085e-09, + "logits/chosen": -1.8570722341537476, + "logits/rejected": -1.8547019958496094, + "logps/chosen": -45.38892364501953, + "logps/rejected": -176.29019165039062, + "loss": 0.5452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2562999725341797, + "rewards/margins": 1.0676593780517578, + "rewards/rejected": -1.3239593505859375, + "step": 14896 + }, + { + "epoch": 0.87, + "learning_rate": 4.5737248050598085e-09, + "logits/chosen": -1.9856395721435547, + "logits/rejected": -1.9907888174057007, + "logps/chosen": -177.42889404296875, + "logps/rejected": -457.0248107910156, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8355804681777954, + "rewards/margins": 6.624392509460449, + "rewards/rejected": -4.788812160491943, + "step": 14897 + }, + { + "epoch": 0.87, + "learning_rate": 4.569787977640915e-09, + "logits/chosen": -1.8072274923324585, + "logits/rejected": -1.8133145570755005, + "logps/chosen": -0.0589909665286541, + "logps/rejected": -118.67813873291016, + "loss": 0.5248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00553626473993063, + "rewards/margins": 0.847629189491272, + "rewards/rejected": -0.8420929312705994, + "step": 14898 + }, + { + "epoch": 0.87, + "learning_rate": 4.565852764125383e-09, + "logits/chosen": -1.9401755332946777, + "logits/rejected": -1.9483025074005127, + "logps/chosen": -31.18782615661621, + "logps/rejected": -250.29354858398438, + "loss": 0.3094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10096301883459091, + "rewards/margins": 3.3192293643951416, + "rewards/rejected": -3.218266248703003, + "step": 14899 + }, + { + "epoch": 0.87, + "learning_rate": 4.561919164652989e-09, + "logits/chosen": -1.6627947092056274, + "logits/rejected": -1.669881820678711, + "logps/chosen": -28.89938735961914, + "logps/rejected": -265.8529968261719, + "loss": 0.2982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28487855195999146, + "rewards/margins": 2.463958740234375, + "rewards/rejected": -2.1790802478790283, + "step": 14900 + }, + { + "epoch": 0.87, + "learning_rate": 4.5579871793634925e-09, + "logits/chosen": -1.5897339582443237, + "logits/rejected": -1.584189534187317, + "logps/chosen": -63.325111389160156, + "logps/rejected": -345.09735107421875, + "loss": 0.5409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7832477688789368, + "rewards/margins": 4.751310348510742, + "rewards/rejected": -5.534558296203613, + "step": 14901 + }, + { + "epoch": 0.87, + "learning_rate": 4.554056808396573e-09, + "logits/chosen": -1.9098418951034546, + "logits/rejected": -1.906198501586914, + "logps/chosen": -0.33011889457702637, + "logps/rejected": -116.94890594482422, + "loss": 0.4708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05012378841638565, + "rewards/margins": 1.1554621458053589, + "rewards/rejected": -1.1053383350372314, + "step": 14902 + }, + { + "epoch": 0.87, + "learning_rate": 4.550128051891866e-09, + "logits/chosen": -1.8245540857315063, + "logits/rejected": -1.820947527885437, + "logps/chosen": -16.885143280029297, + "logps/rejected": -221.47976684570312, + "loss": 0.2816, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20037288963794708, + "rewards/margins": 3.1647253036499023, + "rewards/rejected": -2.9643523693084717, + "step": 14903 + }, + { + "epoch": 0.87, + "learning_rate": 4.5462009099889174e-09, + "logits/chosen": -1.9103949069976807, + "logits/rejected": -1.9412834644317627, + "logps/chosen": -195.3638458251953, + "logps/rejected": -392.08966064453125, + "loss": 0.1134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.133148193359375, + "rewards/margins": 2.788662910461426, + "rewards/rejected": -1.6555145978927612, + "step": 14904 + }, + { + "epoch": 0.87, + "learning_rate": 4.5422753828272486e-09, + "logits/chosen": -1.766394019126892, + "logits/rejected": -1.7669882774353027, + "logps/chosen": -264.5711669921875, + "logps/rejected": -416.53759765625, + "loss": 0.1067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5687042474746704, + "rewards/margins": 2.8552794456481934, + "rewards/rejected": -2.2865753173828125, + "step": 14905 + }, + { + "epoch": 0.87, + "learning_rate": 4.5383514705463196e-09, + "logits/chosen": -1.9477369785308838, + "logits/rejected": -1.9711871147155762, + "logps/chosen": -272.3260803222656, + "logps/rejected": -621.3955688476562, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7286163568496704, + "rewards/margins": 12.232889175415039, + "rewards/rejected": -10.5042724609375, + "step": 14906 + }, + { + "epoch": 0.87, + "learning_rate": 4.534429173285531e-09, + "logits/chosen": -2.130967378616333, + "logits/rejected": -2.128162145614624, + "logps/chosen": -7.760459266137332e-05, + "logps/rejected": -299.14996337890625, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.60289493023447e-07, + "rewards/margins": 5.769654273986816, + "rewards/rejected": -5.769654750823975, + "step": 14907 + }, + { + "epoch": 0.87, + "learning_rate": 4.530508491184198e-09, + "logits/chosen": -1.7903242111206055, + "logits/rejected": -1.7637253999710083, + "logps/chosen": -62.74635696411133, + "logps/rejected": -337.1595458984375, + "loss": 0.1385, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.136227011680603, + "rewards/margins": 3.687624931335449, + "rewards/rejected": -2.5513978004455566, + "step": 14908 + }, + { + "epoch": 0.87, + "learning_rate": 4.526589424381627e-09, + "logits/chosen": -2.1239969730377197, + "logits/rejected": -2.119800567626953, + "logps/chosen": -2.8063900470733643, + "logps/rejected": -179.2048797607422, + "loss": 0.3747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022714852821081877, + "rewards/margins": 2.64035701751709, + "rewards/rejected": -2.6426284313201904, + "step": 14909 + }, + { + "epoch": 0.87, + "learning_rate": 4.522671973017045e-09, + "logits/chosen": -1.8901665210723877, + "logits/rejected": -1.891851782798767, + "logps/chosen": -82.90016174316406, + "logps/rejected": -220.92904663085938, + "loss": 0.2591, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0307457447052002, + "rewards/margins": 1.3241204023361206, + "rewards/rejected": -0.293374627828598, + "step": 14910 + }, + { + "epoch": 0.87, + "learning_rate": 4.518756137229596e-09, + "logits/chosen": -2.090794801712036, + "logits/rejected": -2.0709705352783203, + "logps/chosen": -0.00011920621909666806, + "logps/rejected": -274.0503234863281, + "loss": 0.3449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00011582236038520932, + "rewards/margins": 5.126808166503906, + "rewards/rejected": -5.126692295074463, + "step": 14911 + }, + { + "epoch": 0.87, + "learning_rate": 4.514841917158402e-09, + "logits/chosen": -2.0394606590270996, + "logits/rejected": -2.0326709747314453, + "logps/chosen": -290.34222412109375, + "logps/rejected": -438.9507751464844, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.908679246902466, + "rewards/margins": 4.512243747711182, + "rewards/rejected": -1.6035645008087158, + "step": 14912 + }, + { + "epoch": 0.87, + "learning_rate": 4.510929312942518e-09, + "logits/chosen": -1.989046335220337, + "logits/rejected": -1.997721791267395, + "logps/chosen": -119.4090347290039, + "logps/rejected": -262.8707580566406, + "loss": 0.199, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.206897735595703, + "rewards/margins": 0.8640937805175781, + "rewards/rejected": 1.342803955078125, + "step": 14913 + }, + { + "epoch": 0.87, + "learning_rate": 4.5070183247209456e-09, + "logits/chosen": -1.8526450395584106, + "logits/rejected": -1.8324371576309204, + "logps/chosen": -7.2102861404418945, + "logps/rejected": -165.92703247070312, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3687836229801178, + "rewards/margins": 3.596452236175537, + "rewards/rejected": -3.965235948562622, + "step": 14914 + }, + { + "epoch": 0.87, + "learning_rate": 4.5031089526326006e-09, + "logits/chosen": -1.9549018144607544, + "logits/rejected": -1.9525420665740967, + "logps/chosen": -2.9822146892547607, + "logps/rejected": -125.47337341308594, + "loss": 0.3596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07167177647352219, + "rewards/margins": 2.440704107284546, + "rewards/rejected": -2.369032382965088, + "step": 14915 + }, + { + "epoch": 0.87, + "learning_rate": 4.499201196816377e-09, + "logits/chosen": -1.7881357669830322, + "logits/rejected": -1.8007584810256958, + "logps/chosen": -216.7866668701172, + "logps/rejected": -279.7231140136719, + "loss": 0.0513, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.364640951156616, + "rewards/margins": 2.4970903396606445, + "rewards/rejected": 0.8675506711006165, + "step": 14916 + }, + { + "epoch": 0.87, + "learning_rate": 4.495295057411091e-09, + "logits/chosen": -1.6369991302490234, + "logits/rejected": -1.6650397777557373, + "logps/chosen": -268.93389892578125, + "logps/rejected": -494.78228759765625, + "loss": 0.0436, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.749706983566284, + "rewards/margins": 2.821118116378784, + "rewards/rejected": -0.0714111328125, + "step": 14917 + }, + { + "epoch": 0.87, + "learning_rate": 4.491390534555523e-09, + "logits/chosen": -1.926262617111206, + "logits/rejected": -1.9004920721054077, + "logps/chosen": -32.37567138671875, + "logps/rejected": -344.4800720214844, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09410858154296875, + "rewards/margins": 6.909181118011475, + "rewards/rejected": -6.815072536468506, + "step": 14918 + }, + { + "epoch": 0.87, + "learning_rate": 4.4874876283883645e-09, + "logits/chosen": -1.9951162338256836, + "logits/rejected": -1.9845516681671143, + "logps/chosen": -170.54721069335938, + "logps/rejected": -333.9501953125, + "loss": 0.2886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6609466671943665, + "rewards/margins": 1.4002501964569092, + "rewards/rejected": -0.7393035888671875, + "step": 14919 + }, + { + "epoch": 0.87, + "learning_rate": 4.483586339048268e-09, + "logits/chosen": -1.9870710372924805, + "logits/rejected": -1.9866334199905396, + "logps/chosen": -13.362350463867188, + "logps/rejected": -215.38748168945312, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11976327747106552, + "rewards/margins": 3.0506930351257324, + "rewards/rejected": -2.930929660797119, + "step": 14920 + }, + { + "epoch": 0.87, + "learning_rate": 4.479686666673827e-09, + "logits/chosen": -1.844273328781128, + "logits/rejected": -1.8317350149154663, + "logps/chosen": -248.91746520996094, + "logps/rejected": -393.73944091796875, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2287185192108154, + "rewards/margins": 4.034934997558594, + "rewards/rejected": -1.8062164783477783, + "step": 14921 + }, + { + "epoch": 0.87, + "learning_rate": 4.475788611403592e-09, + "logits/chosen": -1.6814528703689575, + "logits/rejected": -1.6907048225402832, + "logps/chosen": -272.9646911621094, + "logps/rejected": -390.86358642578125, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4335999488830566, + "rewards/margins": 3.5188112258911133, + "rewards/rejected": -1.085211157798767, + "step": 14922 + }, + { + "epoch": 0.87, + "learning_rate": 4.471892173376018e-09, + "logits/chosen": -1.7947355508804321, + "logits/rejected": -1.7835756540298462, + "logps/chosen": -0.4813089072704315, + "logps/rejected": -84.79216003417969, + "loss": 0.5933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007275879615917802, + "rewards/margins": 0.3892011046409607, + "rewards/rejected": -0.3884735107421875, + "step": 14923 + }, + { + "epoch": 0.87, + "learning_rate": 4.467997352729541e-09, + "logits/chosen": -1.7789973020553589, + "logits/rejected": -1.8065720796585083, + "logps/chosen": -190.3802947998047, + "logps/rejected": -495.517822265625, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2402969598770142, + "rewards/margins": 4.603294372558594, + "rewards/rejected": -3.362997531890869, + "step": 14924 + }, + { + "epoch": 0.87, + "learning_rate": 4.464104149602515e-09, + "logits/chosen": -1.8445242643356323, + "logits/rejected": -1.8452348709106445, + "logps/chosen": -182.70265197753906, + "logps/rejected": -530.9664306640625, + "loss": 0.0557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2365951538085938, + "rewards/margins": 5.684532165527344, + "rewards/rejected": -4.44793701171875, + "step": 14925 + }, + { + "epoch": 0.87, + "learning_rate": 4.460212564133259e-09, + "logits/chosen": -1.846455693244934, + "logits/rejected": -1.816849708557129, + "logps/chosen": -93.38056945800781, + "logps/rejected": -650.6275024414062, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11966323852539062, + "rewards/margins": 14.576767921447754, + "rewards/rejected": -14.457104682922363, + "step": 14926 + }, + { + "epoch": 0.87, + "learning_rate": 4.4563225964599935e-09, + "logits/chosen": -1.771888017654419, + "logits/rejected": -1.749910593032837, + "logps/chosen": -264.21905517578125, + "logps/rejected": -434.2437744140625, + "loss": 0.0391, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.970953345298767, + "rewards/margins": 3.2351136207580566, + "rewards/rejected": -1.26416015625, + "step": 14927 + }, + { + "epoch": 0.87, + "learning_rate": 4.452434246720937e-09, + "logits/chosen": -1.917510986328125, + "logits/rejected": -1.910724401473999, + "logps/chosen": -4.6578264236450195, + "logps/rejected": -204.67916870117188, + "loss": 0.2696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5311266183853149, + "rewards/margins": 1.8841447830200195, + "rewards/rejected": -1.3530181646347046, + "step": 14928 + }, + { + "epoch": 0.87, + "learning_rate": 4.448547515054224e-09, + "logits/chosen": -1.7098886966705322, + "logits/rejected": -1.7155057191848755, + "logps/chosen": -0.00024878152180463076, + "logps/rejected": -239.34884643554688, + "loss": 0.4161, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.9876529121538624e-05, + "rewards/margins": 1.8260669708251953, + "rewards/rejected": -1.8260071277618408, + "step": 14929 + }, + { + "epoch": 0.87, + "learning_rate": 4.444662401597904e-09, + "logits/chosen": -1.7433855533599854, + "logits/rejected": -1.7483421564102173, + "logps/chosen": -278.63311767578125, + "logps/rejected": -384.7769775390625, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4668030738830566, + "rewards/margins": 3.1118409633636475, + "rewards/rejected": -0.645037829875946, + "step": 14930 + }, + { + "epoch": 0.87, + "learning_rate": 4.440778906490017e-09, + "logits/chosen": -2.087989091873169, + "logits/rejected": -2.095686197280884, + "logps/chosen": -20.48367691040039, + "logps/rejected": -308.2015075683594, + "loss": 0.152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9434106945991516, + "rewards/margins": 3.8313653469085693, + "rewards/rejected": -2.8879547119140625, + "step": 14931 + }, + { + "epoch": 0.87, + "learning_rate": 4.43689702986852e-09, + "logits/chosen": -2.0580315589904785, + "logits/rejected": -2.042717933654785, + "logps/chosen": -33.319190979003906, + "logps/rejected": -271.5821838378906, + "loss": 0.2012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5545951724052429, + "rewards/margins": 4.7961859703063965, + "rewards/rejected": -4.241590976715088, + "step": 14932 + }, + { + "epoch": 0.87, + "learning_rate": 4.433016771871317e-09, + "logits/chosen": -1.9910346269607544, + "logits/rejected": -1.9869624376296997, + "logps/chosen": -2.3841670554247685e-05, + "logps/rejected": -197.79489135742188, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.861017662780796e-07, + "rewards/margins": 3.136653423309326, + "rewards/rejected": -3.136653184890747, + "step": 14933 + }, + { + "epoch": 0.87, + "learning_rate": 4.4291381326362395e-09, + "logits/chosen": -1.879248857498169, + "logits/rejected": -1.8806942701339722, + "logps/chosen": -1.0685399770736694, + "logps/rejected": -31.000261306762695, + "loss": 0.504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05767296627163887, + "rewards/margins": 0.8747744560241699, + "rewards/rejected": -0.9324474334716797, + "step": 14934 + }, + { + "epoch": 0.87, + "learning_rate": 4.425261112301093e-09, + "logits/chosen": -2.00228214263916, + "logits/rejected": -1.9998594522476196, + "logps/chosen": -107.99211120605469, + "logps/rejected": -253.88839721679688, + "loss": 0.0898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.404492974281311, + "rewards/margins": 3.6746301651000977, + "rewards/rejected": -2.270137071609497, + "step": 14935 + }, + { + "epoch": 0.87, + "learning_rate": 4.4213857110036e-09, + "logits/chosen": -1.7986373901367188, + "logits/rejected": -1.7916265726089478, + "logps/chosen": -3.8854076862335205, + "logps/rejected": -191.05615234375, + "loss": 0.2208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40861138701438904, + "rewards/margins": 4.797744274139404, + "rewards/rejected": -4.389132976531982, + "step": 14936 + }, + { + "epoch": 0.87, + "learning_rate": 4.4175119288814455e-09, + "logits/chosen": -1.9689916372299194, + "logits/rejected": -1.953359842300415, + "logps/chosen": -65.66825866699219, + "logps/rejected": -160.86390686035156, + "loss": 0.3989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17086945474147797, + "rewards/margins": 1.1935501098632812, + "rewards/rejected": -1.022680640220642, + "step": 14937 + }, + { + "epoch": 0.87, + "learning_rate": 4.413639766072224e-09, + "logits/chosen": -1.8694263696670532, + "logits/rejected": -1.851487159729004, + "logps/chosen": -178.68614196777344, + "logps/rejected": -328.9798278808594, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8623368740081787, + "rewards/margins": 3.494188070297241, + "rewards/rejected": -0.6318511962890625, + "step": 14938 + }, + { + "epoch": 0.87, + "learning_rate": 4.409769222713506e-09, + "logits/chosen": -1.9516631364822388, + "logits/rejected": -1.9434230327606201, + "logps/chosen": -174.69151306152344, + "logps/rejected": -235.1467742919922, + "loss": 0.3117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3000900745391846, + "rewards/margins": 0.20023798942565918, + "rewards/rejected": 2.0998520851135254, + "step": 14939 + }, + { + "epoch": 0.87, + "learning_rate": 4.405900298942788e-09, + "logits/chosen": -1.9291353225708008, + "logits/rejected": -1.915122389793396, + "logps/chosen": -1.338070273399353, + "logps/rejected": -195.29727172851562, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48828205466270447, + "rewards/margins": 0.7074196338653564, + "rewards/rejected": -0.21913757920265198, + "step": 14940 + }, + { + "epoch": 0.87, + "learning_rate": 4.402032994897531e-09, + "logits/chosen": -1.800958514213562, + "logits/rejected": -1.774351716041565, + "logps/chosen": -210.43850708007812, + "logps/rejected": -378.9181213378906, + "loss": 0.0485, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6127350330352783, + "rewards/margins": 3.5656983852386475, + "rewards/rejected": -0.9529632925987244, + "step": 14941 + }, + { + "epoch": 0.87, + "learning_rate": 4.398167310715095e-09, + "logits/chosen": -1.9071025848388672, + "logits/rejected": -1.9049022197723389, + "logps/chosen": -42.98630142211914, + "logps/rejected": -202.18702697753906, + "loss": 0.1849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7904720306396484, + "rewards/margins": 2.636805534362793, + "rewards/rejected": -1.846333384513855, + "step": 14942 + }, + { + "epoch": 0.87, + "learning_rate": 4.394303246532816e-09, + "logits/chosen": -1.8616511821746826, + "logits/rejected": -1.8630813360214233, + "logps/chosen": -19.065582275390625, + "logps/rejected": -56.59723663330078, + "loss": 0.5788, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19047795236110687, + "rewards/margins": 0.16000157594680786, + "rewards/rejected": 0.03047638013958931, + "step": 14943 + }, + { + "epoch": 0.87, + "learning_rate": 4.390440802487971e-09, + "logits/chosen": -2.054119825363159, + "logits/rejected": -2.0442209243774414, + "logps/chosen": -47.65189743041992, + "logps/rejected": -119.98495483398438, + "loss": 0.4073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7768318057060242, + "rewards/margins": 0.33901211619377136, + "rewards/rejected": 0.4378196895122528, + "step": 14944 + }, + { + "epoch": 0.87, + "learning_rate": 4.386579978717775e-09, + "logits/chosen": -1.8272944688796997, + "logits/rejected": -1.8183947801589966, + "logps/chosen": -181.34170532226562, + "logps/rejected": -376.69183349609375, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9173431396484375, + "rewards/margins": 4.4448699951171875, + "rewards/rejected": -1.52752685546875, + "step": 14945 + }, + { + "epoch": 0.87, + "learning_rate": 4.3827207753593605e-09, + "logits/chosen": -1.7368413209915161, + "logits/rejected": -1.7356832027435303, + "logps/chosen": -29.837675094604492, + "logps/rejected": -131.3883056640625, + "loss": 0.2612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20569248497486115, + "rewards/margins": 3.9163522720336914, + "rewards/rejected": -3.7106597423553467, + "step": 14946 + }, + { + "epoch": 0.87, + "learning_rate": 4.378863192549859e-09, + "logits/chosen": -1.8479653596878052, + "logits/rejected": -1.8200323581695557, + "logps/chosen": -251.387939453125, + "logps/rejected": -687.413818359375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.0430755615234375, + "rewards/margins": 6.8325653076171875, + "rewards/rejected": -2.78948974609375, + "step": 14947 + }, + { + "epoch": 0.87, + "learning_rate": 4.375007230426286e-09, + "logits/chosen": -1.7136696577072144, + "logits/rejected": -1.702864408493042, + "logps/chosen": -194.4507293701172, + "logps/rejected": -399.2030029296875, + "loss": 0.1348, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9682022333145142, + "rewards/margins": 1.4001877307891846, + "rewards/rejected": 0.5680145621299744, + "step": 14948 + }, + { + "epoch": 0.87, + "learning_rate": 4.3711528891256345e-09, + "logits/chosen": -1.7086378335952759, + "logits/rejected": -1.6658183336257935, + "logps/chosen": -157.857421875, + "logps/rejected": -299.4164733886719, + "loss": 0.0338, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2587921619415283, + "rewards/margins": 3.125750780105591, + "rewards/rejected": 0.1330413818359375, + "step": 14949 + }, + { + "epoch": 0.87, + "learning_rate": 4.367300168784827e-09, + "logits/chosen": -1.7382621765136719, + "logits/rejected": -1.7526253461837769, + "logps/chosen": -150.19187927246094, + "logps/rejected": -357.62420654296875, + "loss": 0.0765, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3684371709823608, + "rewards/margins": 2.639512538909912, + "rewards/rejected": -1.2710754871368408, + "step": 14950 + }, + { + "epoch": 0.87, + "learning_rate": 4.363449069540726e-09, + "logits/chosen": -2.024552822113037, + "logits/rejected": -1.997016429901123, + "logps/chosen": -163.20297241210938, + "logps/rejected": -390.8179931640625, + "loss": 0.0406, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.263354539871216, + "rewards/margins": 3.647387742996216, + "rewards/rejected": -1.384033203125, + "step": 14951 + }, + { + "epoch": 0.87, + "learning_rate": 4.359599591530161e-09, + "logits/chosen": -2.099128007888794, + "logits/rejected": -2.0924768447875977, + "logps/chosen": -25.284748077392578, + "logps/rejected": -181.7494659423828, + "loss": 0.1619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7242938876152039, + "rewards/margins": 3.8420262336730957, + "rewards/rejected": -3.117732286453247, + "step": 14952 + }, + { + "epoch": 0.87, + "learning_rate": 4.355751734889857e-09, + "logits/chosen": -1.915107250213623, + "logits/rejected": -1.9140830039978027, + "logps/chosen": -15.079896926879883, + "logps/rejected": -97.09046936035156, + "loss": 0.6003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04153585433959961, + "rewards/margins": 0.1892668753862381, + "rewards/rejected": -0.1477310210466385, + "step": 14953 + }, + { + "epoch": 0.87, + "learning_rate": 4.3519054997565255e-09, + "logits/chosen": -1.6441209316253662, + "logits/rejected": -1.7336393594741821, + "logps/chosen": -325.7851867675781, + "logps/rejected": -397.09100341796875, + "loss": 0.0842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8577789068222046, + "rewards/margins": 2.300546169281006, + "rewards/rejected": -0.44276735186576843, + "step": 14954 + }, + { + "epoch": 0.87, + "learning_rate": 4.348060886266797e-09, + "logits/chosen": -1.8079041242599487, + "logits/rejected": -1.7993348836898804, + "logps/chosen": -252.3510284423828, + "logps/rejected": -444.23272705078125, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.596226453781128, + "rewards/margins": 3.7638840675354004, + "rewards/rejected": -1.167657494544983, + "step": 14955 + }, + { + "epoch": 0.87, + "learning_rate": 4.3442178945572614e-09, + "logits/chosen": -1.7418168783187866, + "logits/rejected": -1.7445436716079712, + "logps/chosen": -251.84463500976562, + "logps/rejected": -438.794189453125, + "loss": 0.1708, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.030426025390625, + "rewards/margins": 2.6366701126098633, + "rewards/rejected": -1.6062439680099487, + "step": 14956 + }, + { + "epoch": 0.87, + "learning_rate": 4.340376524764427e-09, + "logits/chosen": -2.032134771347046, + "logits/rejected": -2.0202746391296387, + "logps/chosen": -45.752681732177734, + "logps/rejected": -237.02272033691406, + "loss": 0.416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33613815903663635, + "rewards/margins": 4.74184513092041, + "rewards/rejected": -5.077983379364014, + "step": 14957 + }, + { + "epoch": 0.87, + "learning_rate": 4.336536777024763e-09, + "logits/chosen": -2.013110876083374, + "logits/rejected": -2.013909339904785, + "logps/chosen": -9.469340324401855, + "logps/rejected": -35.949134826660156, + "loss": 0.6156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09649258106946945, + "rewards/margins": 0.1826261579990387, + "rewards/rejected": -0.08613357692956924, + "step": 14958 + }, + { + "epoch": 0.87, + "learning_rate": 4.332698651474676e-09, + "logits/chosen": -1.9128540754318237, + "logits/rejected": -1.9147995710372925, + "logps/chosen": -27.45318603515625, + "logps/rejected": -248.2928466796875, + "loss": 0.282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8600425720214844, + "rewards/margins": 0.8686546087265015, + "rewards/rejected": -0.00861206091940403, + "step": 14959 + }, + { + "epoch": 0.87, + "learning_rate": 4.328862148250523e-09, + "logits/chosen": -2.123967409133911, + "logits/rejected": -2.124680519104004, + "logps/chosen": -9.747872352600098, + "logps/rejected": -123.54586029052734, + "loss": 0.6097, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.34085455536842346, + "rewards/margins": -0.12444162368774414, + "rewards/rejected": 0.4652961790561676, + "step": 14960 + }, + { + "epoch": 0.87, + "learning_rate": 4.3250272674885845e-09, + "logits/chosen": -1.823668360710144, + "logits/rejected": -1.820898413658142, + "logps/chosen": -207.79461669921875, + "logps/rejected": -240.31048583984375, + "loss": 0.4567, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.165026903152466, + "rewards/margins": -0.353057861328125, + "rewards/rejected": 2.518084764480591, + "step": 14961 + }, + { + "epoch": 0.87, + "learning_rate": 4.321194009325091e-09, + "logits/chosen": -2.044879198074341, + "logits/rejected": -2.0508623123168945, + "logps/chosen": -41.89882278442383, + "logps/rejected": -252.39584350585938, + "loss": 0.154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5778061151504517, + "rewards/margins": 4.1192779541015625, + "rewards/rejected": -3.5414719581604004, + "step": 14962 + }, + { + "epoch": 0.87, + "learning_rate": 4.3173623738962325e-09, + "logits/chosen": -1.7361154556274414, + "logits/rejected": -1.6935676336288452, + "logps/chosen": -227.8629913330078, + "logps/rejected": -414.11090087890625, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.386042833328247, + "rewards/margins": 3.387782335281372, + "rewards/rejected": -1.001739501953125, + "step": 14963 + }, + { + "epoch": 0.87, + "learning_rate": 4.313532361338124e-09, + "logits/chosen": -1.9329023361206055, + "logits/rejected": -1.9248195886611938, + "logps/chosen": -35.902854919433594, + "logps/rejected": -147.12916564941406, + "loss": 0.1691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7059772610664368, + "rewards/margins": 2.9980976581573486, + "rewards/rejected": -2.2921204566955566, + "step": 14964 + }, + { + "epoch": 0.87, + "learning_rate": 4.309703971786816e-09, + "logits/chosen": -1.8699737787246704, + "logits/rejected": -1.9225748777389526, + "logps/chosen": -195.13345336914062, + "logps/rejected": -293.11932373046875, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5904908180236816, + "rewards/margins": 2.972827196121216, + "rewards/rejected": -0.38233643770217896, + "step": 14965 + }, + { + "epoch": 0.87, + "learning_rate": 4.305877205378316e-09, + "logits/chosen": -1.78300940990448, + "logits/rejected": -1.7786856889724731, + "logps/chosen": -38.12330627441406, + "logps/rejected": -53.05839538574219, + "loss": 0.9009, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6927284598350525, + "rewards/margins": -0.027847707271575928, + "rewards/rejected": -0.6648807525634766, + "step": 14966 + }, + { + "epoch": 0.87, + "learning_rate": 4.302052062248573e-09, + "logits/chosen": -1.9358550310134888, + "logits/rejected": -1.9438966512680054, + "logps/chosen": -29.910297393798828, + "logps/rejected": -160.06918334960938, + "loss": 0.618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1385040283203125, + "rewards/margins": 0.3487258851528168, + "rewards/rejected": -0.4872299134731293, + "step": 14967 + }, + { + "epoch": 0.87, + "learning_rate": 4.298228542533489e-09, + "logits/chosen": -1.9425089359283447, + "logits/rejected": -1.9317377805709839, + "logps/chosen": -22.149076461791992, + "logps/rejected": -138.2548828125, + "loss": 0.2348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8985626101493835, + "rewards/margins": 1.5052521228790283, + "rewards/rejected": -0.606689453125, + "step": 14968 + }, + { + "epoch": 0.87, + "learning_rate": 4.294406646368853e-09, + "logits/chosen": -1.9683332443237305, + "logits/rejected": -1.9708958864212036, + "logps/chosen": -92.54517364501953, + "logps/rejected": -359.7496337890625, + "loss": 0.1571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.941114068031311, + "rewards/margins": 3.653430938720703, + "rewards/rejected": -2.7123169898986816, + "step": 14969 + }, + { + "epoch": 0.87, + "learning_rate": 4.290586373890476e-09, + "logits/chosen": -1.7704609632492065, + "logits/rejected": -1.76810622215271, + "logps/chosen": -13.9649019241333, + "logps/rejected": -178.80172729492188, + "loss": 0.4155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030688095837831497, + "rewards/margins": 1.9877538681030273, + "rewards/rejected": -2.018441915512085, + "step": 14970 + }, + { + "epoch": 0.87, + "learning_rate": 4.286767725234064e-09, + "logits/chosen": -2.036271572113037, + "logits/rejected": -2.0282139778137207, + "logps/chosen": -187.2520751953125, + "logps/rejected": -311.3496398925781, + "loss": 0.2436, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3671692609786987, + "rewards/margins": 0.6746704578399658, + "rewards/rejected": 0.6924988031387329, + "step": 14971 + }, + { + "epoch": 0.87, + "learning_rate": 4.282950700535265e-09, + "logits/chosen": -1.8310298919677734, + "logits/rejected": -1.83480966091156, + "logps/chosen": -247.39126586914062, + "logps/rejected": -261.636474609375, + "loss": 0.3739, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.9089173078536987, + "rewards/margins": -0.0004851818084716797, + "rewards/rejected": 1.9094024896621704, + "step": 14972 + }, + { + "epoch": 0.87, + "learning_rate": 4.279135299929681e-09, + "logits/chosen": -1.7261366844177246, + "logits/rejected": -1.729825735092163, + "logps/chosen": -173.73219299316406, + "logps/rejected": -283.3390197753906, + "loss": 0.1435, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5752731561660767, + "rewards/margins": 1.5112320184707642, + "rewards/rejected": 0.0640411376953125, + "step": 14973 + }, + { + "epoch": 0.87, + "learning_rate": 4.2753215235528605e-09, + "logits/chosen": -1.8920129537582397, + "logits/rejected": -1.8713641166687012, + "logps/chosen": -36.16984176635742, + "logps/rejected": -351.2891845703125, + "loss": 0.2658, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3592887818813324, + "rewards/margins": 4.820436954498291, + "rewards/rejected": -4.461148262023926, + "step": 14974 + }, + { + "epoch": 0.87, + "learning_rate": 4.271509371540289e-09, + "logits/chosen": -1.7283438444137573, + "logits/rejected": -1.7319416999816895, + "logps/chosen": -22.44464874267578, + "logps/rejected": -164.53163146972656, + "loss": 0.1996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7689846158027649, + "rewards/margins": 2.6679515838623047, + "rewards/rejected": -1.8989670276641846, + "step": 14975 + }, + { + "epoch": 0.87, + "learning_rate": 4.2676988440273735e-09, + "logits/chosen": -2.0857725143432617, + "logits/rejected": -2.0862128734588623, + "logps/chosen": -42.889564514160156, + "logps/rejected": -213.68479919433594, + "loss": 0.2565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24149703979492188, + "rewards/margins": 3.4221901893615723, + "rewards/rejected": -3.1806931495666504, + "step": 14976 + }, + { + "epoch": 0.87, + "learning_rate": 4.2638899411495015e-09, + "logits/chosen": -1.9565552473068237, + "logits/rejected": -1.9580309391021729, + "logps/chosen": -179.43997192382812, + "logps/rejected": -250.2314453125, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5314972400665283, + "rewards/margins": 0.40186774730682373, + "rewards/rejected": 1.1296294927597046, + "step": 14977 + }, + { + "epoch": 0.87, + "learning_rate": 4.260082663041981e-09, + "logits/chosen": -1.8883064985275269, + "logits/rejected": -1.8737202882766724, + "logps/chosen": -49.95435333251953, + "logps/rejected": -187.06661987304688, + "loss": 0.3597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.058346938341856, + "rewards/margins": 3.398566246032715, + "rewards/rejected": -3.340219259262085, + "step": 14978 + }, + { + "epoch": 0.87, + "learning_rate": 4.2562770098400644e-09, + "logits/chosen": -1.838316798210144, + "logits/rejected": -1.8905798196792603, + "logps/chosen": -193.85757446289062, + "logps/rejected": -375.93804931640625, + "loss": 0.0406, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4763200283050537, + "rewards/margins": 2.8879778385162354, + "rewards/rejected": -0.4116577208042145, + "step": 14979 + }, + { + "epoch": 0.87, + "learning_rate": 4.25247298167894e-09, + "logits/chosen": -2.028454065322876, + "logits/rejected": -2.0276787281036377, + "logps/chosen": -118.83861541748047, + "logps/rejected": -283.9625244140625, + "loss": 0.0511, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6551414728164673, + "rewards/margins": 2.8732872009277344, + "rewards/rejected": -1.218145728111267, + "step": 14980 + }, + { + "epoch": 0.87, + "learning_rate": 4.248670578693747e-09, + "logits/chosen": -1.913787603378296, + "logits/rejected": -1.9130157232284546, + "logps/chosen": -18.182737350463867, + "logps/rejected": -106.90625, + "loss": 0.3237, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3519960343837738, + "rewards/margins": 1.7771135568618774, + "rewards/rejected": -1.4251174926757812, + "step": 14981 + }, + { + "epoch": 0.87, + "learning_rate": 4.244869801019574e-09, + "logits/chosen": -1.9452884197235107, + "logits/rejected": -1.944847583770752, + "logps/chosen": -0.00037429254734888673, + "logps/rejected": -103.4729995727539, + "loss": 0.6856, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.0008496802765876e-05, + "rewards/margins": 0.016466259956359863, + "rewards/rejected": -0.01640625111758709, + "step": 14982 + }, + { + "epoch": 0.87, + "learning_rate": 4.241070648791445e-09, + "logits/chosen": -1.633428931236267, + "logits/rejected": -1.6196024417877197, + "logps/chosen": -67.62960815429688, + "logps/rejected": -261.5509338378906, + "loss": 0.0813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5392471551895142, + "rewards/margins": 3.659994602203369, + "rewards/rejected": -2.1207473278045654, + "step": 14983 + }, + { + "epoch": 0.87, + "learning_rate": 4.237273122144308e-09, + "logits/chosen": -1.7614420652389526, + "logits/rejected": -1.7485053539276123, + "logps/chosen": -168.9678497314453, + "logps/rejected": -282.9774169921875, + "loss": 0.2549, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3826324939727783, + "rewards/margins": 0.46029365062713623, + "rewards/rejected": 1.922338843345642, + "step": 14984 + }, + { + "epoch": 0.87, + "learning_rate": 4.233477221213083e-09, + "logits/chosen": -1.9714534282684326, + "logits/rejected": -1.9503071308135986, + "logps/chosen": -246.08834838867188, + "logps/rejected": -440.0738830566406, + "loss": 0.1621, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0412659645080566, + "rewards/margins": 1.1987245082855225, + "rewards/rejected": 0.842541515827179, + "step": 14985 + }, + { + "epoch": 0.87, + "learning_rate": 4.2296829461326125e-09, + "logits/chosen": -1.5750795602798462, + "logits/rejected": -1.6314277648925781, + "logps/chosen": -224.59117126464844, + "logps/rejected": -478.2616882324219, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2387282848358154, + "rewards/margins": 6.9451093673706055, + "rewards/rejected": -3.706381320953369, + "step": 14986 + }, + { + "epoch": 0.87, + "learning_rate": 4.225890297037699e-09, + "logits/chosen": -1.7675338983535767, + "logits/rejected": -1.8330457210540771, + "logps/chosen": -189.1729736328125, + "logps/rejected": -502.7822265625, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8498718738555908, + "rewards/margins": 7.763312339782715, + "rewards/rejected": -5.913440227508545, + "step": 14987 + }, + { + "epoch": 0.87, + "learning_rate": 4.222099274063051e-09, + "logits/chosen": -1.8668560981750488, + "logits/rejected": -1.8861565589904785, + "logps/chosen": -173.60964965820312, + "logps/rejected": -329.2767639160156, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5676422119140625, + "rewards/margins": 5.335009574890137, + "rewards/rejected": -3.7673676013946533, + "step": 14988 + }, + { + "epoch": 0.87, + "learning_rate": 4.218309877343368e-09, + "logits/chosen": -1.7894248962402344, + "logits/rejected": -1.7941277027130127, + "logps/chosen": -15.863255500793457, + "logps/rejected": -165.44688415527344, + "loss": 0.2247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5949856638908386, + "rewards/margins": 3.7295682430267334, + "rewards/rejected": -3.13458251953125, + "step": 14989 + }, + { + "epoch": 0.87, + "learning_rate": 4.214522107013269e-09, + "logits/chosen": -2.029494047164917, + "logits/rejected": -2.016815185546875, + "logps/chosen": -0.0018154099816456437, + "logps/rejected": -228.75991821289062, + "loss": 0.4003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015033490490168333, + "rewards/margins": 2.489208221435547, + "rewards/rejected": -2.48935866355896, + "step": 14990 + }, + { + "epoch": 0.87, + "learning_rate": 4.2107359632073015e-09, + "logits/chosen": -1.8504093885421753, + "logits/rejected": -1.8502377271652222, + "logps/chosen": -0.00014781320351175964, + "logps/rejected": -128.78939819335938, + "loss": 0.5306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00020274925918783993, + "rewards/margins": 0.798800528049469, + "rewards/rejected": -0.7985977530479431, + "step": 14991 + }, + { + "epoch": 0.87, + "learning_rate": 4.2069514460599645e-09, + "logits/chosen": -1.9043972492218018, + "logits/rejected": -1.9041210412979126, + "logps/chosen": -56.13743591308594, + "logps/rejected": -245.800537109375, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2248912900686264, + "rewards/margins": 3.28189754486084, + "rewards/rejected": -3.506788730621338, + "step": 14992 + }, + { + "epoch": 0.87, + "learning_rate": 4.203168555705716e-09, + "logits/chosen": -1.9797488451004028, + "logits/rejected": -1.972931146621704, + "logps/chosen": -19.582548141479492, + "logps/rejected": -64.77601623535156, + "loss": 0.6068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1950342208147049, + "rewards/margins": 0.3875913619995117, + "rewards/rejected": -0.5826255679130554, + "step": 14993 + }, + { + "epoch": 0.87, + "learning_rate": 4.199387292278944e-09, + "logits/chosen": -1.8513312339782715, + "logits/rejected": -1.8470202684402466, + "logps/chosen": -56.37827682495117, + "logps/rejected": -220.4292449951172, + "loss": 0.1636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8507580161094666, + "rewards/margins": 3.316807270050049, + "rewards/rejected": -2.4660491943359375, + "step": 14994 + }, + { + "epoch": 0.87, + "learning_rate": 4.195607655913963e-09, + "logits/chosen": -1.792061686515808, + "logits/rejected": -1.7812981605529785, + "logps/chosen": -19.381816864013672, + "logps/rejected": -263.794921875, + "loss": 0.4472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5622312426567078, + "rewards/margins": 2.9219210147857666, + "rewards/rejected": -3.484152317047119, + "step": 14995 + }, + { + "epoch": 0.87, + "learning_rate": 4.191829646745048e-09, + "logits/chosen": -1.7654534578323364, + "logits/rejected": -1.768552303314209, + "logps/chosen": -31.544448852539062, + "logps/rejected": -230.26022338867188, + "loss": 0.232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.880166232585907, + "rewards/margins": 1.8182125091552734, + "rewards/rejected": -0.9380462765693665, + "step": 14996 + }, + { + "epoch": 0.87, + "learning_rate": 4.188053264906421e-09, + "logits/chosen": -1.9428296089172363, + "logits/rejected": -1.9337146282196045, + "logps/chosen": -35.51411437988281, + "logps/rejected": -105.79141235351562, + "loss": 0.4327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2417854368686676, + "rewards/margins": 1.0964813232421875, + "rewards/rejected": -0.8546959161758423, + "step": 14997 + }, + { + "epoch": 0.87, + "learning_rate": 4.184278510532235e-09, + "logits/chosen": -1.7629605531692505, + "logits/rejected": -1.7577037811279297, + "logps/chosen": -50.630157470703125, + "logps/rejected": -181.9158477783203, + "loss": 0.5824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21862450242042542, + "rewards/margins": 0.5125248432159424, + "rewards/rejected": -0.7311493158340454, + "step": 14998 + }, + { + "epoch": 0.87, + "learning_rate": 4.180505383756583e-09, + "logits/chosen": -1.8939412832260132, + "logits/rejected": -1.8884447813034058, + "logps/chosen": -9.48878878261894e-05, + "logps/rejected": -205.31277465820312, + "loss": 0.3596, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3614066978771007e-06, + "rewards/margins": 3.28523588180542, + "rewards/rejected": -3.2852325439453125, + "step": 14999 + }, + { + "epoch": 0.87, + "learning_rate": 4.176733884713502e-09, + "logits/chosen": -1.9814165830612183, + "logits/rejected": -1.978990077972412, + "logps/chosen": -35.913795471191406, + "logps/rejected": -255.9397735595703, + "loss": 0.2451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3617301881313324, + "rewards/margins": 6.622015953063965, + "rewards/rejected": -6.2602858543396, + "step": 15000 + }, + { + "epoch": 0.87, + "learning_rate": 4.172964013536984e-09, + "logits/chosen": -1.7881091833114624, + "logits/rejected": -1.790609359741211, + "logps/chosen": -214.96725463867188, + "logps/rejected": -468.0681457519531, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4091737270355225, + "rewards/margins": 7.307431221008301, + "rewards/rejected": -4.898257732391357, + "step": 15001 + }, + { + "epoch": 0.87, + "learning_rate": 4.169195770360956e-09, + "logits/chosen": -1.8374736309051514, + "logits/rejected": -1.8361423015594482, + "logps/chosen": -0.024481939151883125, + "logps/rejected": -148.83309936523438, + "loss": 0.3697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002249564742669463, + "rewards/margins": 3.3347744941711426, + "rewards/rejected": -3.337023973464966, + "step": 15002 + }, + { + "epoch": 0.87, + "learning_rate": 4.165429155319267e-09, + "logits/chosen": -1.9442895650863647, + "logits/rejected": -1.9486377239227295, + "logps/chosen": -37.36796188354492, + "logps/rejected": -142.41184997558594, + "loss": 1.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2522385120391846, + "rewards/margins": 0.18876945972442627, + "rewards/rejected": -1.4410079717636108, + "step": 15003 + }, + { + "epoch": 0.87, + "learning_rate": 4.161664168545731e-09, + "logits/chosen": -1.7340935468673706, + "logits/rejected": -1.7392569780349731, + "logps/chosen": -207.94488525390625, + "logps/rejected": -357.92095947265625, + "loss": 0.0667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7644745111465454, + "rewards/margins": 3.6043124198913574, + "rewards/rejected": -2.8398377895355225, + "step": 15004 + }, + { + "epoch": 0.87, + "learning_rate": 4.157900810174109e-09, + "logits/chosen": -2.00583553314209, + "logits/rejected": -2.0106825828552246, + "logps/chosen": -30.31768035888672, + "logps/rejected": -148.16505432128906, + "loss": 0.2071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7124210596084595, + "rewards/margins": 3.7201666831970215, + "rewards/rejected": -3.0077455043792725, + "step": 15005 + }, + { + "epoch": 0.87, + "learning_rate": 4.154139080338098e-09, + "logits/chosen": -1.8689403533935547, + "logits/rejected": -1.8473873138427734, + "logps/chosen": -160.90963745117188, + "logps/rejected": -276.35394287109375, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26762086153030396, + "rewards/margins": 6.644045829772949, + "rewards/rejected": -6.9116668701171875, + "step": 15006 + }, + { + "epoch": 0.87, + "learning_rate": 4.150378979171309e-09, + "logits/chosen": -2.00235652923584, + "logits/rejected": -1.995082139968872, + "logps/chosen": -20.727994918823242, + "logps/rejected": -98.34521484375, + "loss": 0.4099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.076441191136837, + "rewards/margins": 1.7678123712539673, + "rewards/rejected": -1.8442535400390625, + "step": 15007 + }, + { + "epoch": 0.87, + "learning_rate": 4.1466205068073354e-09, + "logits/chosen": -1.9521957635879517, + "logits/rejected": -1.9573495388031006, + "logps/chosen": -3.5715138912200928, + "logps/rejected": -116.97634887695312, + "loss": 0.5046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1060248613357544, + "rewards/margins": 0.8394355177879333, + "rewards/rejected": -0.733410656452179, + "step": 15008 + }, + { + "epoch": 0.87, + "learning_rate": 4.1428636633796975e-09, + "logits/chosen": -1.8150060176849365, + "logits/rejected": -1.8056145906448364, + "logps/chosen": -73.14673614501953, + "logps/rejected": -186.11904907226562, + "loss": 0.1992, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.405901312828064, + "rewards/margins": 1.3367118835449219, + "rewards/rejected": 0.06918945163488388, + "step": 15009 + }, + { + "epoch": 0.87, + "learning_rate": 4.1391084490218495e-09, + "logits/chosen": -1.874185562133789, + "logits/rejected": -1.871803641319275, + "logps/chosen": -1.9492096900939941, + "logps/rejected": -268.3828125, + "loss": 0.3049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.051415182650089264, + "rewards/margins": 2.79923939704895, + "rewards/rejected": -2.747824192047119, + "step": 15010 + }, + { + "epoch": 0.87, + "learning_rate": 4.135354863867202e-09, + "logits/chosen": -1.9786063432693481, + "logits/rejected": -1.975874662399292, + "logps/chosen": -11.73104476928711, + "logps/rejected": -145.0074462890625, + "loss": 0.3513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2733360230922699, + "rewards/margins": 2.4451842308044434, + "rewards/rejected": -2.1718482971191406, + "step": 15011 + }, + { + "epoch": 0.87, + "learning_rate": 4.131602908049098e-09, + "logits/chosen": -1.8353139162063599, + "logits/rejected": -1.838232398033142, + "logps/chosen": -0.008642864413559437, + "logps/rejected": -116.27609252929688, + "loss": 0.4147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0003435869875829667, + "rewards/margins": 1.5975161790847778, + "rewards/rejected": -1.5971726179122925, + "step": 15012 + }, + { + "epoch": 0.87, + "learning_rate": 4.127852581700836e-09, + "logits/chosen": -2.0727105140686035, + "logits/rejected": -2.0556089878082275, + "logps/chosen": -23.27894401550293, + "logps/rejected": -437.85174560546875, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0779409408569336, + "rewards/margins": 11.804610252380371, + "rewards/rejected": -11.726669311523438, + "step": 15013 + }, + { + "epoch": 0.87, + "learning_rate": 4.124103884955621e-09, + "logits/chosen": -1.6125495433807373, + "logits/rejected": -1.6149451732635498, + "logps/chosen": -182.391845703125, + "logps/rejected": -243.68490600585938, + "loss": 0.2154, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4349000453948975, + "rewards/margins": 0.735244870185852, + "rewards/rejected": 1.6996551752090454, + "step": 15014 + }, + { + "epoch": 0.87, + "learning_rate": 4.1203568179466465e-09, + "logits/chosen": -1.80424964427948, + "logits/rejected": -1.80406653881073, + "logps/chosen": -26.725536346435547, + "logps/rejected": -180.58396911621094, + "loss": 0.6917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5560903549194336, + "rewards/margins": 0.6905802488327026, + "rewards/rejected": -1.2466706037521362, + "step": 15015 + }, + { + "epoch": 0.87, + "learning_rate": 4.116611380807011e-09, + "logits/chosen": -1.898152470588684, + "logits/rejected": -1.892878532409668, + "logps/chosen": -159.336669921875, + "logps/rejected": -353.2846374511719, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1471664905548096, + "rewards/margins": 3.1863603591918945, + "rewards/rejected": -1.0391937494277954, + "step": 15016 + }, + { + "epoch": 0.87, + "learning_rate": 4.112867573669792e-09, + "logits/chosen": -1.9286937713623047, + "logits/rejected": -1.9474644660949707, + "logps/chosen": -255.91644287109375, + "logps/rejected": -502.11212158203125, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.699621558189392, + "rewards/margins": 3.3775086402893066, + "rewards/rejected": -1.677886962890625, + "step": 15017 + }, + { + "epoch": 0.87, + "learning_rate": 4.109125396667968e-09, + "logits/chosen": -1.736186146736145, + "logits/rejected": -1.738615870475769, + "logps/chosen": -238.24349975585938, + "logps/rejected": -406.43145751953125, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2812042236328125, + "rewards/margins": 3.741555690765381, + "rewards/rejected": -1.460351586341858, + "step": 15018 + }, + { + "epoch": 0.87, + "learning_rate": 4.1053848499344844e-09, + "logits/chosen": -2.0976929664611816, + "logits/rejected": -2.0708353519439697, + "logps/chosen": -2.309842348098755, + "logps/rejected": -223.73715209960938, + "loss": 0.3797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011717534624040127, + "rewards/margins": 3.7370100021362305, + "rewards/rejected": -3.748727560043335, + "step": 15019 + }, + { + "epoch": 0.87, + "learning_rate": 4.101645933602227e-09, + "logits/chosen": -1.9794832468032837, + "logits/rejected": -1.9736140966415405, + "logps/chosen": -57.153358459472656, + "logps/rejected": -247.16574096679688, + "loss": 0.0893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.509966254234314, + "rewards/margins": 5.807973861694336, + "rewards/rejected": -4.298007488250732, + "step": 15020 + }, + { + "epoch": 0.87, + "learning_rate": 4.097908647804027e-09, + "logits/chosen": -1.943182110786438, + "logits/rejected": -1.8462581634521484, + "logps/chosen": -25.724958419799805, + "logps/rejected": -800.7644653320312, + "loss": 0.2142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4981922209262848, + "rewards/margins": 10.179850578308105, + "rewards/rejected": -9.681658744812012, + "step": 15021 + }, + { + "epoch": 0.87, + "learning_rate": 4.094172992672635e-09, + "logits/chosen": -1.921823263168335, + "logits/rejected": -1.9265981912612915, + "logps/chosen": -215.4559326171875, + "logps/rejected": -324.5118713378906, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.357919454574585, + "rewards/margins": 3.08803129196167, + "rewards/rejected": -0.7301117181777954, + "step": 15022 + }, + { + "epoch": 0.87, + "learning_rate": 4.0904389683407646e-09, + "logits/chosen": -1.962302803993225, + "logits/rejected": -1.9635099172592163, + "logps/chosen": -15.668050765991211, + "logps/rejected": -83.62298583984375, + "loss": 0.3937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07254085689783096, + "rewards/margins": 1.7850912809371948, + "rewards/rejected": -1.712550401687622, + "step": 15023 + }, + { + "epoch": 0.87, + "learning_rate": 4.086706574941073e-09, + "logits/chosen": -1.9090299606323242, + "logits/rejected": -1.9235117435455322, + "logps/chosen": -142.83187866210938, + "logps/rejected": -210.1003875732422, + "loss": 0.0754, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3536666631698608, + "rewards/margins": 2.4317963123321533, + "rewards/rejected": -1.0781296491622925, + "step": 15024 + }, + { + "epoch": 0.87, + "learning_rate": 4.0829758126061595e-09, + "logits/chosen": -2.032423734664917, + "logits/rejected": -2.0269064903259277, + "logps/chosen": -0.00200429605320096, + "logps/rejected": -145.6695556640625, + "loss": 0.5218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0008478820673190057, + "rewards/margins": 0.851574182510376, + "rewards/rejected": -0.850726306438446, + "step": 15025 + }, + { + "epoch": 0.87, + "learning_rate": 4.07924668146854e-09, + "logits/chosen": -2.1047351360321045, + "logits/rejected": -2.108898639678955, + "logps/chosen": -124.09485626220703, + "logps/rejected": -377.5605163574219, + "loss": 0.0688, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.558724284172058, + "rewards/margins": 3.362868547439575, + "rewards/rejected": -1.804144263267517, + "step": 15026 + }, + { + "epoch": 0.87, + "learning_rate": 4.075519181660703e-09, + "logits/chosen": -1.7299846410751343, + "logits/rejected": -1.7371810674667358, + "logps/chosen": -245.79869079589844, + "logps/rejected": -344.7069091796875, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.80974280834198, + "rewards/margins": 0.5922714471817017, + "rewards/rejected": 1.2174713611602783, + "step": 15027 + }, + { + "epoch": 0.87, + "learning_rate": 4.071793313315064e-09, + "logits/chosen": -2.053246259689331, + "logits/rejected": -2.049643039703369, + "logps/chosen": -4.3511005060281605e-05, + "logps/rejected": -176.8769989013672, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.78813579054804e-07, + "rewards/margins": 3.8288087844848633, + "rewards/rejected": -3.828808546066284, + "step": 15028 + }, + { + "epoch": 0.87, + "learning_rate": 4.0680690765639956e-09, + "logits/chosen": -1.9747551679611206, + "logits/rejected": -1.9629324674606323, + "logps/chosen": -35.99458694458008, + "logps/rejected": -273.9586181640625, + "loss": 0.2289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6071300506591797, + "rewards/margins": 2.0666608810424805, + "rewards/rejected": -1.4595307111740112, + "step": 15029 + }, + { + "epoch": 0.87, + "learning_rate": 4.06434647153977e-09, + "logits/chosen": -1.8313409090042114, + "logits/rejected": -1.8342281579971313, + "logps/chosen": -5.876880823052488e-05, + "logps/rejected": -72.64217376708984, + "loss": 0.3872, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.649486129437719e-07, + "rewards/margins": 2.3756515979766846, + "rewards/rejected": -2.3756511211395264, + "step": 15030 + }, + { + "epoch": 0.87, + "learning_rate": 4.060625498374664e-09, + "logits/chosen": -1.8557685613632202, + "logits/rejected": -1.8405907154083252, + "logps/chosen": -174.83042907714844, + "logps/rejected": -273.067626953125, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.1155104637146, + "rewards/margins": 3.786698818206787, + "rewards/rejected": 0.3288116455078125, + "step": 15031 + }, + { + "epoch": 0.87, + "learning_rate": 4.056906157200862e-09, + "logits/chosen": -1.8997819423675537, + "logits/rejected": -1.9000561237335205, + "logps/chosen": -249.2576904296875, + "logps/rejected": -469.15850830078125, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.479174852371216, + "rewards/margins": 5.609707832336426, + "rewards/rejected": -3.13053297996521, + "step": 15032 + }, + { + "epoch": 0.87, + "learning_rate": 4.05318844815048e-09, + "logits/chosen": -2.0259768962860107, + "logits/rejected": -2.029871702194214, + "logps/chosen": -0.00010716666292864829, + "logps/rejected": -119.82231903076172, + "loss": 0.3574, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.933417509775609e-05, + "rewards/margins": 3.065629005432129, + "rewards/rejected": -3.065549612045288, + "step": 15033 + }, + { + "epoch": 0.87, + "learning_rate": 4.049472371355589e-09, + "logits/chosen": -1.697287917137146, + "logits/rejected": -1.6904990673065186, + "logps/chosen": -59.07142639160156, + "logps/rejected": -303.9357604980469, + "loss": 0.1162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4128998517990112, + "rewards/margins": 4.507800579071045, + "rewards/rejected": -3.094900608062744, + "step": 15034 + }, + { + "epoch": 0.87, + "learning_rate": 4.045757926948213e-09, + "logits/chosen": -1.8433027267456055, + "logits/rejected": -1.8836250305175781, + "logps/chosen": -180.8099822998047, + "logps/rejected": -313.01068115234375, + "loss": 0.0638, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4645493030548096, + "rewards/margins": 2.9312057495117188, + "rewards/rejected": -1.4666565656661987, + "step": 15035 + }, + { + "epoch": 0.88, + "learning_rate": 4.0420451150603056e-09, + "logits/chosen": -1.763887643814087, + "logits/rejected": -1.7812998294830322, + "logps/chosen": -218.49493408203125, + "logps/rejected": -455.702880859375, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.286390781402588, + "rewards/margins": 7.283900737762451, + "rewards/rejected": -4.997509956359863, + "step": 15036 + }, + { + "epoch": 0.88, + "learning_rate": 4.038333935823756e-09, + "logits/chosen": -1.951101303100586, + "logits/rejected": -1.9539488554000854, + "logps/chosen": -40.222137451171875, + "logps/rejected": -192.38375854492188, + "loss": 0.1713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7185878753662109, + "rewards/margins": 2.710261344909668, + "rewards/rejected": -1.9916733503341675, + "step": 15037 + }, + { + "epoch": 0.88, + "learning_rate": 4.034624389370405e-09, + "logits/chosen": -1.9219005107879639, + "logits/rejected": -1.9292436838150024, + "logps/chosen": -33.47501754760742, + "logps/rejected": -175.58206176757812, + "loss": 0.2543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6665016412734985, + "rewards/margins": 1.6923271417617798, + "rewards/rejected": -1.0258255004882812, + "step": 15038 + }, + { + "epoch": 0.88, + "learning_rate": 4.0309164758320354e-09, + "logits/chosen": -1.8802108764648438, + "logits/rejected": -1.8648185729980469, + "logps/chosen": -56.830631256103516, + "logps/rejected": -203.61355590820312, + "loss": 0.4643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5844475030899048, + "rewards/margins": 0.5259437561035156, + "rewards/rejected": 0.05850372463464737, + "step": 15039 + }, + { + "epoch": 0.88, + "learning_rate": 4.027210195340386e-09, + "logits/chosen": -1.9592294692993164, + "logits/rejected": -1.965572476387024, + "logps/chosen": -0.03926747292280197, + "logps/rejected": -52.850914001464844, + "loss": 0.7032, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0023079917300492525, + "rewards/margins": -0.1277981847524643, + "rewards/rejected": 0.1254901885986328, + "step": 15040 + }, + { + "epoch": 0.88, + "learning_rate": 4.023505548027095e-09, + "logits/chosen": -1.7651242017745972, + "logits/rejected": -1.7664469480514526, + "logps/chosen": -24.04816436767578, + "logps/rejected": -90.99075317382812, + "loss": 0.5451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14954929053783417, + "rewards/margins": 0.9883100390434265, + "rewards/rejected": -1.1378593444824219, + "step": 15041 + }, + { + "epoch": 0.88, + "learning_rate": 4.01980253402378e-09, + "logits/chosen": -1.9215935468673706, + "logits/rejected": -1.9194709062576294, + "logps/chosen": -43.49256896972656, + "logps/rejected": -110.51226806640625, + "loss": 0.3236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2476402372121811, + "rewards/margins": 2.415756940841675, + "rewards/rejected": -2.168116807937622, + "step": 15042 + }, + { + "epoch": 0.88, + "learning_rate": 4.016101153461998e-09, + "logits/chosen": -1.8440473079681396, + "logits/rejected": -1.8508182764053345, + "logps/chosen": -33.44706726074219, + "logps/rejected": -301.6014709472656, + "loss": 0.1991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5332657098770142, + "rewards/margins": 4.8005170822143555, + "rewards/rejected": -4.267251491546631, + "step": 15043 + }, + { + "epoch": 0.88, + "learning_rate": 4.012401406473243e-09, + "logits/chosen": -1.793000340461731, + "logits/rejected": -1.874384880065918, + "logps/chosen": -256.7639465332031, + "logps/rejected": -340.4946594238281, + "loss": 0.0858, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2764739990234375, + "rewards/margins": 1.7518737316131592, + "rewards/rejected": 1.5246002674102783, + "step": 15044 + }, + { + "epoch": 0.88, + "learning_rate": 4.008703293188926e-09, + "logits/chosen": -1.8455026149749756, + "logits/rejected": -1.8379274606704712, + "logps/chosen": -50.251895904541016, + "logps/rejected": -162.62149047851562, + "loss": 0.2995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16085854172706604, + "rewards/margins": 2.944995641708374, + "rewards/rejected": -2.784137010574341, + "step": 15045 + }, + { + "epoch": 0.88, + "learning_rate": 4.005006813740436e-09, + "logits/chosen": -2.1169095039367676, + "logits/rejected": -2.12379789352417, + "logps/chosen": -12.919830322265625, + "logps/rejected": -200.2752227783203, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1439562886953354, + "rewards/margins": 2.8864290714263916, + "rewards/rejected": -2.7424728870391846, + "step": 15046 + }, + { + "epoch": 0.88, + "learning_rate": 4.001311968259097e-09, + "logits/chosen": -1.8306825160980225, + "logits/rejected": -1.8040292263031006, + "logps/chosen": -0.3930104076862335, + "logps/rejected": -256.9770202636719, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16276387870311737, + "rewards/margins": 6.116293430328369, + "rewards/rejected": -5.953529357910156, + "step": 15047 + }, + { + "epoch": 0.88, + "learning_rate": 3.997618756876164e-09, + "logits/chosen": -1.9983664751052856, + "logits/rejected": -1.9693866968154907, + "logps/chosen": -0.049720559269189835, + "logps/rejected": -440.4541931152344, + "loss": 0.3079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004548768978565931, + "rewards/margins": 11.049824714660645, + "rewards/rejected": -11.054373741149902, + "step": 15048 + }, + { + "epoch": 0.88, + "learning_rate": 3.993927179722817e-09, + "logits/chosen": -1.9787415266036987, + "logits/rejected": -1.980279564857483, + "logps/chosen": -155.98114013671875, + "logps/rejected": -395.7279052734375, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0438019037246704, + "rewards/margins": 3.963101387023926, + "rewards/rejected": -2.919299364089966, + "step": 15049 + }, + { + "epoch": 0.88, + "learning_rate": 3.990237236930227e-09, + "logits/chosen": -1.9969687461853027, + "logits/rejected": -1.993177890777588, + "logps/chosen": -0.0015874517848715186, + "logps/rejected": -78.82688903808594, + "loss": 0.6117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7111604140372947e-05, + "rewards/margins": 0.2899998128414154, + "rewards/rejected": -0.2899726927280426, + "step": 15050 + }, + { + "epoch": 0.88, + "learning_rate": 3.986548928629474e-09, + "logits/chosen": -2.0116119384765625, + "logits/rejected": -2.0041470527648926, + "logps/chosen": -0.00010144361294806004, + "logps/rejected": -84.1613540649414, + "loss": 0.6794, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.083122677111533e-06, + "rewards/margins": 0.002632831921800971, + "rewards/rejected": -0.0026237487327307463, + "step": 15051 + }, + { + "epoch": 0.88, + "learning_rate": 3.982862254951574e-09, + "logits/chosen": -2.0058116912841797, + "logits/rejected": -1.988097071647644, + "logps/chosen": -49.27051544189453, + "logps/rejected": -230.62857055664062, + "loss": 0.0342, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.149738311767578, + "rewards/margins": 5.021280765533447, + "rewards/rejected": -2.871542453765869, + "step": 15052 + }, + { + "epoch": 0.88, + "learning_rate": 3.979177216027502e-09, + "logits/chosen": -1.9270553588867188, + "logits/rejected": -1.9261586666107178, + "logps/chosen": -320.5960388183594, + "logps/rejected": -356.580322265625, + "loss": 0.4264, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.3075714111328125, + "rewards/margins": -0.16090703010559082, + "rewards/rejected": 1.4684784412384033, + "step": 15053 + }, + { + "epoch": 0.88, + "learning_rate": 3.975493811988162e-09, + "logits/chosen": -1.6558219194412231, + "logits/rejected": -1.6837189197540283, + "logps/chosen": -178.44552612304688, + "logps/rejected": -277.1689453125, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5011260509490967, + "rewards/margins": 2.53743577003479, + "rewards/rejected": -0.03630981594324112, + "step": 15054 + }, + { + "epoch": 0.88, + "learning_rate": 3.971812042964423e-09, + "logits/chosen": -1.9315531253814697, + "logits/rejected": -1.9043338298797607, + "logps/chosen": -192.57298278808594, + "logps/rejected": -309.30804443359375, + "loss": 0.1408, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8842666149139404, + "rewards/margins": 1.1623672246932983, + "rewards/rejected": 1.721899390220642, + "step": 15055 + }, + { + "epoch": 0.88, + "learning_rate": 3.968131909087063e-09, + "logits/chosen": -1.9077786207199097, + "logits/rejected": -1.9617136716842651, + "logps/chosen": -259.06170654296875, + "logps/rejected": -250.7838592529297, + "loss": 0.0836, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.787237524986267, + "rewards/margins": 1.8893004655838013, + "rewards/rejected": -0.10206299275159836, + "step": 15056 + }, + { + "epoch": 0.88, + "learning_rate": 3.9644534104868235e-09, + "logits/chosen": -2.013155221939087, + "logits/rejected": -1.9896039962768555, + "logps/chosen": -155.29501342773438, + "logps/rejected": -301.0897216796875, + "loss": 0.2238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9384247064590454, + "rewards/margins": 1.028466820716858, + "rewards/rejected": -0.0900421142578125, + "step": 15057 + }, + { + "epoch": 0.88, + "learning_rate": 3.960776547294381e-09, + "logits/chosen": -2.0056707859039307, + "logits/rejected": -2.0059876441955566, + "logps/chosen": -15.979165077209473, + "logps/rejected": -101.42509460449219, + "loss": 0.5034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32635411620140076, + "rewards/margins": 1.804648756980896, + "rewards/rejected": -2.131002902984619, + "step": 15058 + }, + { + "epoch": 0.88, + "learning_rate": 3.957101319640366e-09, + "logits/chosen": -1.7570286989212036, + "logits/rejected": -1.7532864809036255, + "logps/chosen": -4.071108341217041, + "logps/rejected": -163.17550659179688, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08863478153944016, + "rewards/margins": 2.4861323833465576, + "rewards/rejected": -2.3974976539611816, + "step": 15059 + }, + { + "epoch": 0.88, + "learning_rate": 3.953427727655323e-09, + "logits/chosen": -1.8664519786834717, + "logits/rejected": -1.8614588975906372, + "logps/chosen": -7.39001989364624, + "logps/rejected": -87.40876770019531, + "loss": 0.5923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07890095561742783, + "rewards/margins": 0.3546997606754303, + "rewards/rejected": -0.2757987976074219, + "step": 15060 + }, + { + "epoch": 0.88, + "learning_rate": 3.949755771469765e-09, + "logits/chosen": -1.7907674312591553, + "logits/rejected": -1.7634549140930176, + "logps/chosen": -239.9856414794922, + "logps/rejected": -341.11334228515625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.517890930175781, + "rewards/margins": 5.341804504394531, + "rewards/rejected": -0.82391357421875, + "step": 15061 + }, + { + "epoch": 0.88, + "learning_rate": 3.9460854512141375e-09, + "logits/chosen": -1.7778226137161255, + "logits/rejected": -1.7028917074203491, + "logps/chosen": -298.7014465332031, + "logps/rejected": -622.9830932617188, + "loss": 0.0423, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.981512427330017, + "rewards/margins": 3.899218797683716, + "rewards/rejected": -1.9177063703536987, + "step": 15062 + }, + { + "epoch": 0.88, + "learning_rate": 3.942416767018841e-09, + "logits/chosen": -1.8995428085327148, + "logits/rejected": -1.9018303155899048, + "logps/chosen": -119.26747131347656, + "logps/rejected": -172.7843475341797, + "loss": 0.1029, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.241223096847534, + "rewards/margins": 1.620835781097412, + "rewards/rejected": 1.620387315750122, + "step": 15063 + }, + { + "epoch": 0.88, + "learning_rate": 3.938749719014184e-09, + "logits/chosen": -1.601960301399231, + "logits/rejected": -1.6072430610656738, + "logps/chosen": -253.46841430664062, + "logps/rejected": -365.3304443359375, + "loss": 0.0561, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5244476795196533, + "rewards/margins": 2.8818817138671875, + "rewards/rejected": -1.3574341535568237, + "step": 15064 + }, + { + "epoch": 0.88, + "learning_rate": 3.935084307330444e-09, + "logits/chosen": -2.007483959197998, + "logits/rejected": -2.001901865005493, + "logps/chosen": -101.23299407958984, + "logps/rejected": -359.3097229003906, + "loss": 0.2133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2475532591342926, + "rewards/margins": 4.341886520385742, + "rewards/rejected": -4.094333171844482, + "step": 15065 + }, + { + "epoch": 0.88, + "learning_rate": 3.93142053209784e-09, + "logits/chosen": -1.6631642580032349, + "logits/rejected": -1.6692458391189575, + "logps/chosen": -8.416031050728634e-05, + "logps/rejected": -207.05377197265625, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7165417602882371e-06, + "rewards/margins": 3.803907871246338, + "rewards/rejected": -3.803906202316284, + "step": 15066 + }, + { + "epoch": 0.88, + "learning_rate": 3.927758393446534e-09, + "logits/chosen": -2.0885872840881348, + "logits/rejected": -2.0898141860961914, + "logps/chosen": -5.633758544921875, + "logps/rejected": -140.61331176757812, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2997874319553375, + "rewards/margins": 2.6578280925750732, + "rewards/rejected": -2.3580405712127686, + "step": 15067 + }, + { + "epoch": 0.88, + "learning_rate": 3.9240978915066e-09, + "logits/chosen": -1.8453377485275269, + "logits/rejected": -1.834193468093872, + "logps/chosen": -35.47765350341797, + "logps/rejected": -213.22052001953125, + "loss": 0.3636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19629745185375214, + "rewards/margins": 1.1528602838516235, + "rewards/rejected": -0.9565628170967102, + "step": 15068 + }, + { + "epoch": 0.88, + "learning_rate": 3.920439026408096e-09, + "logits/chosen": -1.8942421674728394, + "logits/rejected": -1.8935487270355225, + "logps/chosen": -1.8628262281417847, + "logps/rejected": -179.05474853515625, + "loss": 0.396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12071473896503448, + "rewards/margins": 4.73482608795166, + "rewards/rejected": -4.855540752410889, + "step": 15069 + }, + { + "epoch": 0.88, + "learning_rate": 3.916781798280993e-09, + "logits/chosen": -2.011063575744629, + "logits/rejected": -2.0051047801971436, + "logps/chosen": -0.0002889433817472309, + "logps/rejected": -235.1590576171875, + "loss": 0.3217, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.430214408377651e-05, + "rewards/margins": 5.645907402038574, + "rewards/rejected": -5.645883083343506, + "step": 15070 + }, + { + "epoch": 0.88, + "learning_rate": 3.913126207255218e-09, + "logits/chosen": -2.0961625576019287, + "logits/rejected": -2.1079983711242676, + "logps/chosen": -20.993614196777344, + "logps/rejected": -85.89154052734375, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6505958437919617, + "rewards/margins": 1.196356177330017, + "rewards/rejected": -0.5457603335380554, + "step": 15071 + }, + { + "epoch": 0.88, + "learning_rate": 3.909472253460639e-09, + "logits/chosen": -1.8252291679382324, + "logits/rejected": -1.832118272781372, + "logps/chosen": -22.07666015625, + "logps/rejected": -339.86083984375, + "loss": 0.2596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2680280804634094, + "rewards/margins": 5.625529766082764, + "rewards/rejected": -5.35750150680542, + "step": 15072 + }, + { + "epoch": 0.88, + "learning_rate": 3.905819937027055e-09, + "logits/chosen": -1.7163946628570557, + "logits/rejected": -1.690873146057129, + "logps/chosen": -209.23440551757812, + "logps/rejected": -268.2410888671875, + "loss": 0.3105, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0980255603790283, + "rewards/margins": 0.9358459711074829, + "rewards/rejected": 0.16217957437038422, + "step": 15073 + }, + { + "epoch": 0.88, + "learning_rate": 3.9021692580842315e-09, + "logits/chosen": -1.9582037925720215, + "logits/rejected": -1.9555156230926514, + "logps/chosen": -12.38602352142334, + "logps/rejected": -112.17957305908203, + "loss": 0.4265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14058561623096466, + "rewards/margins": 1.1407161951065063, + "rewards/rejected": -1.000130534172058, + "step": 15074 + }, + { + "epoch": 0.88, + "learning_rate": 3.89852021676183e-09, + "logits/chosen": -1.8929914236068726, + "logits/rejected": -1.888911247253418, + "logps/chosen": -31.760644912719727, + "logps/rejected": -331.802001953125, + "loss": 0.2807, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11536770313978195, + "rewards/margins": 6.9756951332092285, + "rewards/rejected": -6.860327243804932, + "step": 15075 + }, + { + "epoch": 0.88, + "learning_rate": 3.894872813189498e-09, + "logits/chosen": -1.952646255493164, + "logits/rejected": -1.9238696098327637, + "logps/chosen": -8.247220993041992, + "logps/rejected": -296.99798583984375, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09426593780517578, + "rewards/margins": 5.476260662078857, + "rewards/rejected": -5.381994724273682, + "step": 15076 + }, + { + "epoch": 0.88, + "learning_rate": 3.8912270474968145e-09, + "logits/chosen": -2.078195333480835, + "logits/rejected": -2.0735504627227783, + "logps/chosen": -12.429579734802246, + "logps/rejected": -74.31367492675781, + "loss": 0.4209, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7108386158943176, + "rewards/margins": 0.46677273511886597, + "rewards/rejected": 0.24406586587429047, + "step": 15077 + }, + { + "epoch": 0.88, + "learning_rate": 3.887582919813293e-09, + "logits/chosen": -1.9251699447631836, + "logits/rejected": -1.9466536045074463, + "logps/chosen": -173.16015625, + "logps/rejected": -379.3878173828125, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8822083473205566, + "rewards/margins": 3.1298952102661133, + "rewards/rejected": -0.24768677353858948, + "step": 15078 + }, + { + "epoch": 0.88, + "learning_rate": 3.883940430268373e-09, + "logits/chosen": -2.1596834659576416, + "logits/rejected": -2.1595520973205566, + "logps/chosen": -2.0468084812164307, + "logps/rejected": -149.05126953125, + "loss": 0.3699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08974087238311768, + "rewards/margins": 2.1388559341430664, + "rewards/rejected": -2.049114942550659, + "step": 15079 + }, + { + "epoch": 0.88, + "learning_rate": 3.880299578991475e-09, + "logits/chosen": -1.976057529449463, + "logits/rejected": -1.9867719411849976, + "logps/chosen": -127.8109359741211, + "logps/rejected": -522.4758911132812, + "loss": 0.3541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01698761060833931, + "rewards/margins": 7.027207851409912, + "rewards/rejected": -7.044195652008057, + "step": 15080 + }, + { + "epoch": 0.88, + "learning_rate": 3.876660366111928e-09, + "logits/chosen": -1.9005261659622192, + "logits/rejected": -1.9166572093963623, + "logps/chosen": -181.38604736328125, + "logps/rejected": -260.55389404296875, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.854878306388855, + "rewards/margins": 2.3675355911254883, + "rewards/rejected": -0.5126571655273438, + "step": 15081 + }, + { + "epoch": 0.88, + "learning_rate": 3.873022791759029e-09, + "logits/chosen": -1.7887598276138306, + "logits/rejected": -1.7880048751831055, + "logps/chosen": -3.182864384143613e-05, + "logps/rejected": -67.28777313232422, + "loss": 0.4202, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.079612262510636e-07, + "rewards/margins": 1.8119043111801147, + "rewards/rejected": -1.8119049072265625, + "step": 15082 + }, + { + "epoch": 0.88, + "learning_rate": 3.869386856061979e-09, + "logits/chosen": -1.7173362970352173, + "logits/rejected": -1.7255336046218872, + "logps/chosen": -12.85173511505127, + "logps/rejected": -143.16079711914062, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1677994728088379, + "rewards/margins": 3.4449708461761475, + "rewards/rejected": -3.2771713733673096, + "step": 15083 + }, + { + "epoch": 0.88, + "learning_rate": 3.8657525591499595e-09, + "logits/chosen": -1.9588909149169922, + "logits/rejected": -1.9587608575820923, + "logps/chosen": -19.78522300720215, + "logps/rejected": -67.71833038330078, + "loss": 0.5611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05518150329589844, + "rewards/margins": 0.5323967337608337, + "rewards/rejected": -0.5875782370567322, + "step": 15084 + }, + { + "epoch": 0.88, + "learning_rate": 3.8621199011520776e-09, + "logits/chosen": -1.9152864217758179, + "logits/rejected": -1.9200515747070312, + "logps/chosen": -37.31499099731445, + "logps/rejected": -189.51907348632812, + "loss": 0.1768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.506857693195343, + "rewards/margins": 3.5427799224853516, + "rewards/rejected": -3.0359222888946533, + "step": 15085 + }, + { + "epoch": 0.88, + "learning_rate": 3.858488882197386e-09, + "logits/chosen": -2.0000460147857666, + "logits/rejected": -2.0008487701416016, + "logps/chosen": -9.269423484802246, + "logps/rejected": -173.17124938964844, + "loss": 0.3947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05856657028198242, + "rewards/margins": 1.3445910215377808, + "rewards/rejected": -1.2860244512557983, + "step": 15086 + }, + { + "epoch": 0.88, + "learning_rate": 3.85485950241487e-09, + "logits/chosen": -1.9190616607666016, + "logits/rejected": -1.932243824005127, + "logps/chosen": -8.075189590454102, + "logps/rejected": -228.96282958984375, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2502727508544922, + "rewards/margins": 3.6417462825775146, + "rewards/rejected": -3.3914735317230225, + "step": 15087 + }, + { + "epoch": 0.88, + "learning_rate": 3.85123176193346e-09, + "logits/chosen": -1.6931233406066895, + "logits/rejected": -1.7058955430984497, + "logps/chosen": -53.79951477050781, + "logps/rejected": -361.85357666015625, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10696525871753693, + "rewards/margins": 4.438750267028809, + "rewards/rejected": -4.54571533203125, + "step": 15088 + }, + { + "epoch": 0.88, + "learning_rate": 3.8476056608820366e-09, + "logits/chosen": -2.139437198638916, + "logits/rejected": -2.1336045265197754, + "logps/chosen": -0.012544644996523857, + "logps/rejected": -110.27116394042969, + "loss": 0.5129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011708360398188233, + "rewards/margins": 0.8385783433914185, + "rewards/rejected": -0.8397491574287415, + "step": 15089 + }, + { + "epoch": 0.88, + "learning_rate": 3.8439811993894234e-09, + "logits/chosen": -1.9007325172424316, + "logits/rejected": -1.9074788093566895, + "logps/chosen": -170.56295776367188, + "logps/rejected": -324.9856872558594, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.776902914047241, + "rewards/margins": 2.935020685195923, + "rewards/rejected": -0.15811768174171448, + "step": 15090 + }, + { + "epoch": 0.88, + "learning_rate": 3.8403583775843576e-09, + "logits/chosen": -1.6373436450958252, + "logits/rejected": -1.635122537612915, + "logps/chosen": -13.123137474060059, + "logps/rejected": -226.22653198242188, + "loss": 0.2964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3104891777038574, + "rewards/margins": 3.926454544067383, + "rewards/rejected": -3.6159653663635254, + "step": 15091 + }, + { + "epoch": 0.88, + "learning_rate": 3.836737195595557e-09, + "logits/chosen": -1.8109112977981567, + "logits/rejected": -1.8240700960159302, + "logps/chosen": -239.40338134765625, + "logps/rejected": -437.39251708984375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.436724901199341, + "rewards/margins": 5.783621311187744, + "rewards/rejected": -2.3468964099884033, + "step": 15092 + }, + { + "epoch": 0.88, + "learning_rate": 3.833117653551676e-09, + "logits/chosen": -1.8512248992919922, + "logits/rejected": -1.8514668941497803, + "logps/chosen": -164.8286895751953, + "logps/rejected": -186.74966430664062, + "loss": 0.3577, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.775593638420105, + "rewards/margins": 0.07630622386932373, + "rewards/rejected": 1.6992874145507812, + "step": 15093 + }, + { + "epoch": 0.88, + "learning_rate": 3.829499751581267e-09, + "logits/chosen": -1.9166134595870972, + "logits/rejected": -1.9572882652282715, + "logps/chosen": -175.99867248535156, + "logps/rejected": -313.83245849609375, + "loss": 0.1167, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2839813232421875, + "rewards/margins": 1.789025902748108, + "rewards/rejected": -0.5050445795059204, + "step": 15094 + }, + { + "epoch": 0.88, + "learning_rate": 3.825883489812875e-09, + "logits/chosen": -1.7298626899719238, + "logits/rejected": -1.7567157745361328, + "logps/chosen": -203.35618591308594, + "logps/rejected": -420.0518798828125, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7424286603927612, + "rewards/margins": 2.852801561355591, + "rewards/rejected": -1.1103729009628296, + "step": 15095 + }, + { + "epoch": 0.88, + "learning_rate": 3.822268868374962e-09, + "logits/chosen": -1.9008166790008545, + "logits/rejected": -1.8914307355880737, + "logps/chosen": -62.553672790527344, + "logps/rejected": -294.6153564453125, + "loss": 0.0683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5866165161132812, + "rewards/margins": 4.557887077331543, + "rewards/rejected": -2.971270799636841, + "step": 15096 + }, + { + "epoch": 0.88, + "learning_rate": 3.818655887395949e-09, + "logits/chosen": -1.9651986360549927, + "logits/rejected": -1.9564276933670044, + "logps/chosen": -60.215091705322266, + "logps/rejected": -258.30096435546875, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0383403301239014, + "rewards/margins": 5.586262226104736, + "rewards/rejected": -3.547921895980835, + "step": 15097 + }, + { + "epoch": 0.88, + "learning_rate": 3.815044547004165e-09, + "logits/chosen": -1.832931399345398, + "logits/rejected": -1.8410696983337402, + "logps/chosen": -141.98448181152344, + "logps/rejected": -245.14764404296875, + "loss": 0.2622, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.31059730052948, + "rewards/margins": 0.8189316391944885, + "rewards/rejected": 0.49166566133499146, + "step": 15098 + }, + { + "epoch": 0.88, + "learning_rate": 3.811434847327915e-09, + "logits/chosen": -2.0019893646240234, + "logits/rejected": -1.9142961502075195, + "logps/chosen": -128.47019958496094, + "logps/rejected": -573.83349609375, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8809998035430908, + "rewards/margins": 5.953210830688477, + "rewards/rejected": -4.072210788726807, + "step": 15099 + }, + { + "epoch": 0.88, + "learning_rate": 3.8078267884954325e-09, + "logits/chosen": -1.7993279695510864, + "logits/rejected": -1.803033471107483, + "logps/chosen": -171.73658752441406, + "logps/rejected": -297.31549072265625, + "loss": 0.073, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8501038551330566, + "rewards/margins": 2.037747383117676, + "rewards/rejected": 0.8123565912246704, + "step": 15100 + }, + { + "epoch": 0.88, + "learning_rate": 3.804220370634898e-09, + "logits/chosen": -1.7057762145996094, + "logits/rejected": -1.7066712379455566, + "logps/chosen": -9.25278091430664, + "logps/rejected": -243.37904357910156, + "loss": 0.3754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13741183280944824, + "rewards/margins": 5.185288429260254, + "rewards/rejected": -5.322700500488281, + "step": 15101 + }, + { + "epoch": 0.88, + "learning_rate": 3.800615593874418e-09, + "logits/chosen": -1.711178183555603, + "logits/rejected": -1.7098149061203003, + "logps/chosen": -0.37709489464759827, + "logps/rejected": -45.59666442871094, + "loss": 0.4122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010851663537323475, + "rewards/margins": 1.3916559219360352, + "rewards/rejected": -1.3808043003082275, + "step": 15102 + }, + { + "epoch": 0.88, + "learning_rate": 3.7970124583420584e-09, + "logits/chosen": -1.9314988851547241, + "logits/rejected": -1.8682442903518677, + "logps/chosen": -205.10838317871094, + "logps/rejected": -548.072509765625, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.428720235824585, + "rewards/margins": 4.772745132446289, + "rewards/rejected": -2.344024658203125, + "step": 15103 + }, + { + "epoch": 0.88, + "learning_rate": 3.793410964165822e-09, + "logits/chosen": -1.924863576889038, + "logits/rejected": -1.931501030921936, + "logps/chosen": -156.47177124023438, + "logps/rejected": -393.2652893066406, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.556158423423767, + "rewards/margins": 4.843869209289551, + "rewards/rejected": -3.287710666656494, + "step": 15104 + }, + { + "epoch": 0.88, + "learning_rate": 3.789811111473651e-09, + "logits/chosen": -1.795226812362671, + "logits/rejected": -1.7845511436462402, + "logps/chosen": -2.458116292953491, + "logps/rejected": -93.80725860595703, + "loss": 0.4268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12315862625837326, + "rewards/margins": 1.3558833599090576, + "rewards/rejected": -1.2327247858047485, + "step": 15105 + }, + { + "epoch": 0.88, + "learning_rate": 3.786212900393426e-09, + "logits/chosen": -1.9965757131576538, + "logits/rejected": -1.9316226243972778, + "logps/chosen": -191.24082946777344, + "logps/rejected": -269.9475402832031, + "loss": 0.127, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4945390224456787, + "rewards/margins": 1.585270881652832, + "rewards/rejected": 0.9092682003974915, + "step": 15106 + }, + { + "epoch": 0.88, + "learning_rate": 3.782616331052968e-09, + "logits/chosen": -1.8579521179199219, + "logits/rejected": -1.849044680595398, + "logps/chosen": -39.55506134033203, + "logps/rejected": -224.6575927734375, + "loss": 0.1162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.625056505203247, + "rewards/margins": 2.4676315784454346, + "rewards/rejected": -0.8425750732421875, + "step": 15107 + }, + { + "epoch": 0.88, + "learning_rate": 3.779021403580057e-09, + "logits/chosen": -1.696936845779419, + "logits/rejected": -1.687307357788086, + "logps/chosen": -22.580841064453125, + "logps/rejected": -110.4947509765625, + "loss": 0.6819, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.041608430445194244, + "rewards/margins": -0.03553009033203125, + "rewards/rejected": 0.0771385207772255, + "step": 15108 + }, + { + "epoch": 0.88, + "learning_rate": 3.7754281181024026e-09, + "logits/chosen": -1.8429367542266846, + "logits/rejected": -1.8385804891586304, + "logps/chosen": -0.0003124056966044009, + "logps/rejected": -222.94273376464844, + "loss": 0.3253, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7278624682148802e-06, + "rewards/margins": 4.8251776695251465, + "rewards/rejected": -4.825175762176514, + "step": 15109 + }, + { + "epoch": 0.88, + "learning_rate": 3.7718364747476415e-09, + "logits/chosen": -2.066689968109131, + "logits/rejected": -2.0749282836914062, + "logps/chosen": -26.455615997314453, + "logps/rejected": -216.35028076171875, + "loss": 0.2944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49145910143852234, + "rewards/margins": 1.782244324684143, + "rewards/rejected": -1.2907851934432983, + "step": 15110 + }, + { + "epoch": 0.88, + "learning_rate": 3.7682464736433715e-09, + "logits/chosen": -1.8568792343139648, + "logits/rejected": -1.8562829494476318, + "logps/chosen": -186.91278076171875, + "logps/rejected": -463.0219421386719, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2482025623321533, + "rewards/margins": 6.324847221374512, + "rewards/rejected": -4.0766448974609375, + "step": 15111 + }, + { + "epoch": 0.88, + "learning_rate": 3.7646581149171406e-09, + "logits/chosen": -2.051175355911255, + "logits/rejected": -2.0425872802734375, + "logps/chosen": -0.0010412659030407667, + "logps/rejected": -146.38323974609375, + "loss": 0.3932, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.799836359685287e-05, + "rewards/margins": 2.263969898223877, + "rewards/rejected": -2.2640578746795654, + "step": 15112 + }, + { + "epoch": 0.88, + "learning_rate": 3.761071398696414e-09, + "logits/chosen": -1.9384506940841675, + "logits/rejected": -1.9344178438186646, + "logps/chosen": -254.3355712890625, + "logps/rejected": -644.5859375, + "loss": 0.1865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1392669677734375, + "rewards/margins": 11.803214073181152, + "rewards/rejected": -11.94248104095459, + "step": 15113 + }, + { + "epoch": 0.88, + "learning_rate": 3.757486325108606e-09, + "logits/chosen": -1.893249273300171, + "logits/rejected": -1.8814374208450317, + "logps/chosen": -249.075439453125, + "logps/rejected": -376.3819885253906, + "loss": 0.1437, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.214498996734619, + "rewards/margins": 1.4978089332580566, + "rewards/rejected": 0.7166900634765625, + "step": 15114 + }, + { + "epoch": 0.88, + "learning_rate": 3.7539028942810806e-09, + "logits/chosen": -2.049828290939331, + "logits/rejected": -2.0462989807128906, + "logps/chosen": -1.768577218055725, + "logps/rejected": -168.20335388183594, + "loss": 0.4175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1267804205417633, + "rewards/margins": 2.69223952293396, + "rewards/rejected": -2.8190200328826904, + "step": 15115 + }, + { + "epoch": 0.88, + "learning_rate": 3.750321106341148e-09, + "logits/chosen": -1.860419511795044, + "logits/rejected": -1.8517999649047852, + "logps/chosen": -23.008686065673828, + "logps/rejected": -128.55120849609375, + "loss": 0.2761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8302740454673767, + "rewards/margins": 1.2277617454528809, + "rewards/rejected": -0.3974876403808594, + "step": 15116 + }, + { + "epoch": 0.88, + "learning_rate": 3.7467409614160285e-09, + "logits/chosen": -1.9802303314208984, + "logits/rejected": -1.977725625038147, + "logps/chosen": -50.519901275634766, + "logps/rejected": -205.76272583007812, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40683022141456604, + "rewards/margins": 2.0494847297668457, + "rewards/rejected": -1.6426544189453125, + "step": 15117 + }, + { + "epoch": 0.88, + "learning_rate": 3.743162459632926e-09, + "logits/chosen": -2.024871826171875, + "logits/rejected": -2.040494680404663, + "logps/chosen": -287.7779541015625, + "logps/rejected": -482.50225830078125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.953875780105591, + "rewards/margins": 5.0196533203125, + "rewards/rejected": -1.0657776594161987, + "step": 15118 + }, + { + "epoch": 0.88, + "learning_rate": 3.7395856011189555e-09, + "logits/chosen": -1.9768670797348022, + "logits/rejected": -1.9817187786102295, + "logps/chosen": -1.8662879467010498, + "logps/rejected": -86.39793395996094, + "loss": 0.4105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029986048117280006, + "rewards/margins": 1.5497698783874512, + "rewards/rejected": -1.519783854484558, + "step": 15119 + }, + { + "epoch": 0.88, + "learning_rate": 3.7360103860011985e-09, + "logits/chosen": -2.0450143814086914, + "logits/rejected": -1.9941720962524414, + "logps/chosen": -7.727271556854248, + "logps/rejected": -379.8082580566406, + "loss": 0.271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3284558951854706, + "rewards/margins": 5.181144714355469, + "rewards/rejected": -4.852688789367676, + "step": 15120 + }, + { + "epoch": 0.88, + "learning_rate": 3.732436814406642e-09, + "logits/chosen": -1.7483044862747192, + "logits/rejected": -1.7395367622375488, + "logps/chosen": -275.24212646484375, + "logps/rejected": -399.1405944824219, + "loss": 0.1536, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6994354724884033, + "rewards/margins": 1.4508819580078125, + "rewards/rejected": 0.24855346977710724, + "step": 15121 + }, + { + "epoch": 0.88, + "learning_rate": 3.728864886462252e-09, + "logits/chosen": -1.687450885772705, + "logits/rejected": -1.6735683679580688, + "logps/chosen": -200.79818725585938, + "logps/rejected": -450.9134521484375, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1175445318222046, + "rewards/margins": 7.7117462158203125, + "rewards/rejected": -6.594201564788818, + "step": 15122 + }, + { + "epoch": 0.88, + "learning_rate": 3.725294602294921e-09, + "logits/chosen": -1.857527256011963, + "logits/rejected": -1.8537354469299316, + "logps/chosen": -18.788850784301758, + "logps/rejected": -195.2158203125, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01709442213177681, + "rewards/margins": 3.3465638160705566, + "rewards/rejected": -3.3636581897735596, + "step": 15123 + }, + { + "epoch": 0.88, + "learning_rate": 3.7217259620314802e-09, + "logits/chosen": -2.133446455001831, + "logits/rejected": -2.1404330730438232, + "logps/chosen": -0.8378294706344604, + "logps/rejected": -189.884033203125, + "loss": 0.3324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05214538797736168, + "rewards/margins": 3.5174896717071533, + "rewards/rejected": -3.5696351528167725, + "step": 15124 + }, + { + "epoch": 0.88, + "learning_rate": 3.718158965798701e-09, + "logits/chosen": -1.7826776504516602, + "logits/rejected": -1.776749610900879, + "logps/chosen": -195.7822265625, + "logps/rejected": -399.741943359375, + "loss": 0.1666, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.828851342201233, + "rewards/margins": 1.5022796392440796, + "rewards/rejected": 0.32657167315483093, + "step": 15125 + }, + { + "epoch": 0.88, + "learning_rate": 3.714593613723305e-09, + "logits/chosen": -1.922507882118225, + "logits/rejected": -1.9158039093017578, + "logps/chosen": -51.408958435058594, + "logps/rejected": -290.66986083984375, + "loss": 0.1318, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0098145008087158, + "rewards/margins": 6.213754653930664, + "rewards/rejected": -5.203939914703369, + "step": 15126 + }, + { + "epoch": 0.88, + "learning_rate": 3.7110299059319505e-09, + "logits/chosen": -1.8798065185546875, + "logits/rejected": -1.8688160181045532, + "logps/chosen": -19.679283142089844, + "logps/rejected": -245.62844848632812, + "loss": 0.4654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1594318449497223, + "rewards/margins": 1.832133173942566, + "rewards/rejected": -1.9915649890899658, + "step": 15127 + }, + { + "epoch": 0.88, + "learning_rate": 3.7074678425512485e-09, + "logits/chosen": -1.8897411823272705, + "logits/rejected": -1.902809739112854, + "logps/chosen": -248.16000366210938, + "logps/rejected": -271.06475830078125, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0249297618865967, + "rewards/margins": 4.401211738586426, + "rewards/rejected": -2.37628173828125, + "step": 15128 + }, + { + "epoch": 0.88, + "learning_rate": 3.703907423707714e-09, + "logits/chosen": -1.9024235010147095, + "logits/rejected": -1.8970118761062622, + "logps/chosen": -9.021611213684082, + "logps/rejected": -166.26483154296875, + "loss": 0.366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04493207857012749, + "rewards/margins": 2.9832966327667236, + "rewards/rejected": -3.028228759765625, + "step": 15129 + }, + { + "epoch": 0.88, + "learning_rate": 3.7003486495278514e-09, + "logits/chosen": -1.963564395904541, + "logits/rejected": -1.962312936782837, + "logps/chosen": -120.86734771728516, + "logps/rejected": -349.6263427734375, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1760551929473877, + "rewards/margins": 4.323711395263672, + "rewards/rejected": -2.147656202316284, + "step": 15130 + }, + { + "epoch": 0.88, + "learning_rate": 3.696791520138082e-09, + "logits/chosen": -1.8911192417144775, + "logits/rejected": -1.8910068273544312, + "logps/chosen": -16.564516067504883, + "logps/rejected": -241.50375366210938, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25951918959617615, + "rewards/margins": 1.6116600036621094, + "rewards/rejected": -1.3521407842636108, + "step": 15131 + }, + { + "epoch": 0.88, + "learning_rate": 3.693236035664771e-09, + "logits/chosen": -1.9377230405807495, + "logits/rejected": -1.9340721368789673, + "logps/chosen": -0.9837338328361511, + "logps/rejected": -150.07379150390625, + "loss": 0.3307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06825746595859528, + "rewards/margins": 3.171630382537842, + "rewards/rejected": -3.2398879528045654, + "step": 15132 + }, + { + "epoch": 0.88, + "learning_rate": 3.689682196234223e-09, + "logits/chosen": -1.929729700088501, + "logits/rejected": -1.9219169616699219, + "logps/chosen": -11.297423362731934, + "logps/rejected": -63.044395446777344, + "loss": 0.5656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.304358571767807, + "rewards/margins": 0.18759983777999878, + "rewards/rejected": 0.11675872653722763, + "step": 15133 + }, + { + "epoch": 0.88, + "learning_rate": 3.6861300019726926e-09, + "logits/chosen": -1.9484119415283203, + "logits/rejected": -1.9711947441101074, + "logps/chosen": -250.67721557617188, + "logps/rejected": -389.8774719238281, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0226471424102783, + "rewards/margins": 4.768310546875, + "rewards/rejected": -2.7456634044647217, + "step": 15134 + }, + { + "epoch": 0.88, + "learning_rate": 3.682579453006379e-09, + "logits/chosen": -1.7957440614700317, + "logits/rejected": -1.7932361364364624, + "logps/chosen": -47.713050842285156, + "logps/rejected": -163.84364318847656, + "loss": 0.571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16603699326515198, + "rewards/margins": 0.4342330992221832, + "rewards/rejected": -0.6002700924873352, + "step": 15135 + }, + { + "epoch": 0.88, + "learning_rate": 3.6790305494614037e-09, + "logits/chosen": -1.8619076013565063, + "logits/rejected": -1.856489896774292, + "logps/chosen": -6.962599277496338, + "logps/rejected": -130.74398803710938, + "loss": 0.3087, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4887731075286865, + "rewards/margins": 1.623059630393982, + "rewards/rejected": -1.1342865228652954, + "step": 15136 + }, + { + "epoch": 0.88, + "learning_rate": 3.6754832914638377e-09, + "logits/chosen": -1.62090265750885, + "logits/rejected": -1.6108213663101196, + "logps/chosen": -194.00311279296875, + "logps/rejected": -365.32659912109375, + "loss": 0.0644, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2045685052871704, + "rewards/margins": 3.544095039367676, + "rewards/rejected": -2.339526414871216, + "step": 15137 + }, + { + "epoch": 0.88, + "learning_rate": 3.671937679139703e-09, + "logits/chosen": -1.818828821182251, + "logits/rejected": -1.8233838081359863, + "logps/chosen": -32.1338996887207, + "logps/rejected": -105.12986755371094, + "loss": 0.5345, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39664536714553833, + "rewards/margins": 0.15456314384937286, + "rewards/rejected": 0.24208222329616547, + "step": 15138 + }, + { + "epoch": 0.88, + "learning_rate": 3.6683937126149646e-09, + "logits/chosen": -1.9634960889816284, + "logits/rejected": -1.9692449569702148, + "logps/chosen": -14.183900833129883, + "logps/rejected": -159.2382354736328, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08916149288415909, + "rewards/margins": 2.0677030086517334, + "rewards/rejected": -1.978541612625122, + "step": 15139 + }, + { + "epoch": 0.88, + "learning_rate": 3.664851392015511e-09, + "logits/chosen": -1.5660263299942017, + "logits/rejected": -1.601309895515442, + "logps/chosen": -223.94390869140625, + "logps/rejected": -408.4652099609375, + "loss": 0.0893, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8681609630584717, + "rewards/margins": 1.6747527122497559, + "rewards/rejected": 2.193408250808716, + "step": 15140 + }, + { + "epoch": 0.88, + "learning_rate": 3.6613107174671808e-09, + "logits/chosen": -1.8136897087097168, + "logits/rejected": -1.8082869052886963, + "logps/chosen": -32.8107795715332, + "logps/rejected": -167.8351287841797, + "loss": 0.4861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5076671838760376, + "rewards/margins": 0.2782772183418274, + "rewards/rejected": 0.229389950633049, + "step": 15141 + }, + { + "epoch": 0.88, + "learning_rate": 3.6577716890957623e-09, + "logits/chosen": -1.9295040369033813, + "logits/rejected": -1.8888624906539917, + "logps/chosen": -153.50418090820312, + "logps/rejected": -284.85595703125, + "loss": 0.0672, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3066086769104004, + "rewards/margins": 2.1268019676208496, + "rewards/rejected": 1.1798065900802612, + "step": 15142 + }, + { + "epoch": 0.88, + "learning_rate": 3.6542343070269823e-09, + "logits/chosen": -1.7038277387619019, + "logits/rejected": -1.7126926183700562, + "logps/chosen": -263.3914794921875, + "logps/rejected": -334.52313232421875, + "loss": 0.133, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.536578416824341, + "rewards/margins": 1.478430151939392, + "rewards/rejected": 1.0581482648849487, + "step": 15143 + }, + { + "epoch": 0.88, + "learning_rate": 3.6506985713864956e-09, + "logits/chosen": -1.9034799337387085, + "logits/rejected": -1.873836636543274, + "logps/chosen": -190.20697021484375, + "logps/rejected": -355.16815185546875, + "loss": 0.061, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.552288770675659, + "rewards/margins": 2.608184814453125, + "rewards/rejected": -0.05589599534869194, + "step": 15144 + }, + { + "epoch": 0.88, + "learning_rate": 3.647164482299919e-09, + "logits/chosen": -1.9090583324432373, + "logits/rejected": -1.9093142747879028, + "logps/chosen": -14.067282676696777, + "logps/rejected": -196.7561798095703, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08880167454481125, + "rewards/margins": 2.391775608062744, + "rewards/rejected": -2.302973985671997, + "step": 15145 + }, + { + "epoch": 0.88, + "learning_rate": 3.643632039892791e-09, + "logits/chosen": -1.7104430198669434, + "logits/rejected": -1.7164123058319092, + "logps/chosen": -207.41453552246094, + "logps/rejected": -353.7120361328125, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.833988904953003, + "rewards/margins": 3.9028549194335938, + "rewards/rejected": -1.0688660144805908, + "step": 15146 + }, + { + "epoch": 0.88, + "learning_rate": 3.6401012442906165e-09, + "logits/chosen": -2.0176522731781006, + "logits/rejected": -2.0497937202453613, + "logps/chosen": -157.31292724609375, + "logps/rejected": -495.0757141113281, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1858887672424316, + "rewards/margins": 7.687429904937744, + "rewards/rejected": -5.5015411376953125, + "step": 15147 + }, + { + "epoch": 0.88, + "learning_rate": 3.6365720956188117e-09, + "logits/chosen": -2.041064739227295, + "logits/rejected": -2.0472893714904785, + "logps/chosen": -18.110458374023438, + "logps/rejected": -138.5041046142578, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22212791442871094, + "rewards/margins": 2.098167896270752, + "rewards/rejected": -1.8760398626327515, + "step": 15148 + }, + { + "epoch": 0.88, + "learning_rate": 3.6330445940027486e-09, + "logits/chosen": -1.8524357080459595, + "logits/rejected": -1.8437223434448242, + "logps/chosen": -74.72647857666016, + "logps/rejected": -391.1863708496094, + "loss": 0.075, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7657479047775269, + "rewards/margins": 6.8651041984558105, + "rewards/rejected": -5.099356174468994, + "step": 15149 + }, + { + "epoch": 0.88, + "learning_rate": 3.6295187395677496e-09, + "logits/chosen": -2.0057573318481445, + "logits/rejected": -2.0117719173431396, + "logps/chosen": -236.52505493164062, + "logps/rejected": -404.5108642578125, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5746643543243408, + "rewards/margins": 4.893420219421387, + "rewards/rejected": -3.318756103515625, + "step": 15150 + }, + { + "epoch": 0.88, + "learning_rate": 3.625994532439075e-09, + "logits/chosen": -1.9320756196975708, + "logits/rejected": -1.9264311790466309, + "logps/chosen": -212.54344177246094, + "logps/rejected": -242.840087890625, + "loss": 0.2423, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.939723253250122, + "rewards/margins": 0.5869400501251221, + "rewards/rejected": 1.352783203125, + "step": 15151 + }, + { + "epoch": 0.88, + "learning_rate": 3.622471972741903e-09, + "logits/chosen": -1.7674758434295654, + "logits/rejected": -1.8758606910705566, + "logps/chosen": -359.637451171875, + "logps/rejected": -240.4318084716797, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3023712635040283, + "rewards/margins": 4.2299485206604, + "rewards/rejected": -0.9275771975517273, + "step": 15152 + }, + { + "epoch": 0.88, + "learning_rate": 3.618951060601383e-09, + "logits/chosen": -1.8792256116867065, + "logits/rejected": -1.8808391094207764, + "logps/chosen": -0.5611112117767334, + "logps/rejected": -195.25619506835938, + "loss": 0.4634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04604073986411095, + "rewards/margins": 1.4595897197723389, + "rewards/rejected": -1.5056304931640625, + "step": 15153 + }, + { + "epoch": 0.88, + "learning_rate": 3.615431796142604e-09, + "logits/chosen": -1.8385957479476929, + "logits/rejected": -1.8747390508651733, + "logps/chosen": -196.57540893554688, + "logps/rejected": -413.95965576171875, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.532757520675659, + "rewards/margins": 5.2381591796875, + "rewards/rejected": -2.705401659011841, + "step": 15154 + }, + { + "epoch": 0.88, + "learning_rate": 3.6119141794905773e-09, + "logits/chosen": -2.095435380935669, + "logits/rejected": -2.0755109786987305, + "logps/chosen": -46.001502990722656, + "logps/rejected": -344.1292724609375, + "loss": 0.1591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9074516296386719, + "rewards/margins": 5.313964366912842, + "rewards/rejected": -4.40651273727417, + "step": 15155 + }, + { + "epoch": 0.88, + "learning_rate": 3.6083982107702637e-09, + "logits/chosen": -1.7848244905471802, + "logits/rejected": -1.8013094663619995, + "logps/chosen": -238.68548583984375, + "logps/rejected": -397.0646667480469, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4713196754455566, + "rewards/margins": 3.918173313140869, + "rewards/rejected": -1.4468536376953125, + "step": 15156 + }, + { + "epoch": 0.88, + "learning_rate": 3.6048838901065692e-09, + "logits/chosen": -1.9263616800308228, + "logits/rejected": -1.9210922718048096, + "logps/chosen": -58.485870361328125, + "logps/rejected": -182.07479858398438, + "loss": 0.5862, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7170242667198181, + "rewards/margins": 1.2271971702575684, + "rewards/rejected": -1.9442214965820312, + "step": 15157 + }, + { + "epoch": 0.88, + "learning_rate": 3.601371217624355e-09, + "logits/chosen": -1.9419121742248535, + "logits/rejected": -1.933776617050171, + "logps/chosen": -17.992305755615234, + "logps/rejected": -38.9654655456543, + "loss": 0.5515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3617822825908661, + "rewards/margins": 0.21251584589481354, + "rewards/rejected": 0.14926643669605255, + "step": 15158 + }, + { + "epoch": 0.88, + "learning_rate": 3.597860193448388e-09, + "logits/chosen": -1.9715832471847534, + "logits/rejected": -1.9426567554473877, + "logps/chosen": -246.708984375, + "logps/rejected": -445.6372985839844, + "loss": 0.1588, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.952862501144409, + "rewards/margins": 1.036849856376648, + "rewards/rejected": 1.9160126447677612, + "step": 15159 + }, + { + "epoch": 0.88, + "learning_rate": 3.594350817703401e-09, + "logits/chosen": -1.8218505382537842, + "logits/rejected": -1.7334349155426025, + "logps/chosen": -220.9293975830078, + "logps/rejected": -429.3471374511719, + "loss": 0.1834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3706772327423096, + "rewards/margins": 1.6781997680664062, + "rewards/rejected": -0.30752259492874146, + "step": 15160 + }, + { + "epoch": 0.88, + "learning_rate": 3.5908430905140674e-09, + "logits/chosen": -1.9465757608413696, + "logits/rejected": -1.9498038291931152, + "logps/chosen": -64.06056213378906, + "logps/rejected": -164.53038024902344, + "loss": 0.1907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3308686017990112, + "rewards/margins": 1.2700989246368408, + "rewards/rejected": 0.06076965481042862, + "step": 15161 + }, + { + "epoch": 0.88, + "learning_rate": 3.5873370120050096e-09, + "logits/chosen": -1.8766582012176514, + "logits/rejected": -1.918328881263733, + "logps/chosen": -168.31922912597656, + "logps/rejected": -373.69189453125, + "loss": 0.0408, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6455917358398438, + "rewards/margins": 3.7421250343322754, + "rewards/rejected": -2.0965332984924316, + "step": 15162 + }, + { + "epoch": 0.88, + "learning_rate": 3.583832582300761e-09, + "logits/chosen": -1.6843268871307373, + "logits/rejected": -1.664724588394165, + "logps/chosen": -195.2593994140625, + "logps/rejected": -384.9437561035156, + "loss": 0.1356, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.80657958984375, + "rewards/margins": 1.4380218982696533, + "rewards/rejected": 0.36855775117874146, + "step": 15163 + }, + { + "epoch": 0.88, + "learning_rate": 3.580329801525822e-09, + "logits/chosen": -1.979709267616272, + "logits/rejected": -1.956789255142212, + "logps/chosen": -197.65509033203125, + "logps/rejected": -627.4266357421875, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3604187965393066, + "rewards/margins": 6.544659614562988, + "rewards/rejected": -4.184240818023682, + "step": 15164 + }, + { + "epoch": 0.88, + "learning_rate": 3.5768286698046324e-09, + "logits/chosen": -2.003251552581787, + "logits/rejected": -1.9933485984802246, + "logps/chosen": -47.40319061279297, + "logps/rejected": -164.21783447265625, + "loss": 0.1924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6358131766319275, + "rewards/margins": 2.7825891971588135, + "rewards/rejected": -2.146775960922241, + "step": 15165 + }, + { + "epoch": 0.88, + "learning_rate": 3.5733291872615814e-09, + "logits/chosen": -2.002713203430176, + "logits/rejected": -2.002570867538452, + "logps/chosen": -16.272628784179688, + "logps/rejected": -245.7457275390625, + "loss": 0.1994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7371851205825806, + "rewards/margins": 5.210525035858154, + "rewards/rejected": -4.473340034484863, + "step": 15166 + }, + { + "epoch": 0.88, + "learning_rate": 3.5698313540209633e-09, + "logits/chosen": -1.9110561609268188, + "logits/rejected": -1.9060451984405518, + "logps/chosen": -46.70656967163086, + "logps/rejected": -142.2914276123047, + "loss": 0.1847, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7787281274795532, + "rewards/margins": 1.247429370880127, + "rewards/rejected": 0.531298816204071, + "step": 15167 + }, + { + "epoch": 0.88, + "learning_rate": 3.566335170207052e-09, + "logits/chosen": -1.7190862894058228, + "logits/rejected": -1.7216192483901978, + "logps/chosen": -1.5057052373886108, + "logps/rejected": -79.2428970336914, + "loss": 0.5736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10767673701047897, + "rewards/margins": 0.41204991936683655, + "rewards/rejected": -0.304373174905777, + "step": 15168 + }, + { + "epoch": 0.88, + "learning_rate": 3.5628406359440423e-09, + "logits/chosen": -1.8949503898620605, + "logits/rejected": -1.8506959676742554, + "logps/chosen": -246.7156219482422, + "logps/rejected": -481.38922119140625, + "loss": 0.0972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6206222772598267, + "rewards/margins": 2.680180549621582, + "rewards/rejected": -1.0595581531524658, + "step": 15169 + }, + { + "epoch": 0.88, + "learning_rate": 3.5593477513560954e-09, + "logits/chosen": -2.0720105171203613, + "logits/rejected": -2.071974277496338, + "logps/chosen": -53.50804901123047, + "logps/rejected": -140.17977905273438, + "loss": 0.3779, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6726928949356079, + "rewards/margins": 0.6319763660430908, + "rewards/rejected": 0.04071655496954918, + "step": 15170 + }, + { + "epoch": 0.88, + "learning_rate": 3.5558565165672736e-09, + "logits/chosen": -2.0305166244506836, + "logits/rejected": -2.045271873474121, + "logps/chosen": -106.99413299560547, + "logps/rejected": -304.8193664550781, + "loss": 0.079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5043175220489502, + "rewards/margins": 5.778277397155762, + "rewards/rejected": -4.273959636688232, + "step": 15171 + }, + { + "epoch": 0.88, + "learning_rate": 3.5523669317016002e-09, + "logits/chosen": -1.7722373008728027, + "logits/rejected": -1.768904685974121, + "logps/chosen": -73.12071228027344, + "logps/rejected": -144.14903259277344, + "loss": 0.4065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25746461749076843, + "rewards/margins": 1.2414764165878296, + "rewards/rejected": -0.9840118288993835, + "step": 15172 + }, + { + "epoch": 0.88, + "learning_rate": 3.54887899688307e-09, + "logits/chosen": -1.9075355529785156, + "logits/rejected": -1.9061709642410278, + "logps/chosen": -0.3995276689529419, + "logps/rejected": -48.85010528564453, + "loss": 0.6243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013573474250733852, + "rewards/margins": 0.19129373133182526, + "rewards/rejected": -0.17772026360034943, + "step": 15173 + }, + { + "epoch": 0.88, + "learning_rate": 3.5453927122355675e-09, + "logits/chosen": -1.887069821357727, + "logits/rejected": -1.8826864957809448, + "logps/chosen": -1.059782862663269, + "logps/rejected": -125.91914367675781, + "loss": 0.4074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03219380974769592, + "rewards/margins": 1.0701408386230469, + "rewards/rejected": -1.1023346185684204, + "step": 15174 + }, + { + "epoch": 0.88, + "learning_rate": 3.541908077882955e-09, + "logits/chosen": -1.8353701829910278, + "logits/rejected": -1.8346699476242065, + "logps/chosen": -25.354759216308594, + "logps/rejected": -276.6514587402344, + "loss": 0.1986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7814430594444275, + "rewards/margins": 4.954535961151123, + "rewards/rejected": -4.173092842102051, + "step": 15175 + }, + { + "epoch": 0.88, + "learning_rate": 3.5384250939490103e-09, + "logits/chosen": -1.6085400581359863, + "logits/rejected": -1.5988575220108032, + "logps/chosen": -44.4185791015625, + "logps/rejected": -324.9091491699219, + "loss": 0.0893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4143184423446655, + "rewards/margins": 3.45211124420166, + "rewards/rejected": -2.037792921066284, + "step": 15176 + }, + { + "epoch": 0.88, + "learning_rate": 3.534943760557485e-09, + "logits/chosen": -1.8460925817489624, + "logits/rejected": -1.8110308647155762, + "logps/chosen": -221.1597900390625, + "logps/rejected": -405.7852478027344, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2055206298828125, + "rewards/margins": 0.8288421630859375, + "rewards/rejected": 0.376678466796875, + "step": 15177 + }, + { + "epoch": 0.88, + "learning_rate": 3.531464077832036e-09, + "logits/chosen": -1.9699492454528809, + "logits/rejected": -1.9690483808517456, + "logps/chosen": -31.56451988220215, + "logps/rejected": -212.80325317382812, + "loss": 0.3535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21328869462013245, + "rewards/margins": 1.5814924240112305, + "rewards/rejected": -1.3682037591934204, + "step": 15178 + }, + { + "epoch": 0.88, + "learning_rate": 3.527986045896286e-09, + "logits/chosen": -2.004182815551758, + "logits/rejected": -2.004802942276001, + "logps/chosen": -5.015204906463623, + "logps/rejected": -171.90420532226562, + "loss": 0.2889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18715643882751465, + "rewards/margins": 4.2744951248168945, + "rewards/rejected": -4.087338447570801, + "step": 15179 + }, + { + "epoch": 0.88, + "learning_rate": 3.524509664873787e-09, + "logits/chosen": -1.7350205183029175, + "logits/rejected": -1.7185786962509155, + "logps/chosen": -211.0177459716797, + "logps/rejected": -428.37548828125, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1781280040740967, + "rewards/margins": 4.7916717529296875, + "rewards/rejected": -2.613543748855591, + "step": 15180 + }, + { + "epoch": 0.88, + "learning_rate": 3.521034934888051e-09, + "logits/chosen": -1.920600414276123, + "logits/rejected": -1.9153826236724854, + "logps/chosen": -0.00024304771795868874, + "logps/rejected": -227.28775024414062, + "loss": 0.3263, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.819015081622638e-06, + "rewards/margins": 4.310743808746338, + "rewards/rejected": -4.310751438140869, + "step": 15181 + }, + { + "epoch": 0.88, + "learning_rate": 3.5175618560625008e-09, + "logits/chosen": -1.7625173330307007, + "logits/rejected": -1.7599924802780151, + "logps/chosen": -188.99412536621094, + "logps/rejected": -286.23162841796875, + "loss": 0.0503, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.614671468734741, + "rewards/margins": 2.520653009414673, + "rewards/rejected": 0.09401855617761612, + "step": 15182 + }, + { + "epoch": 0.88, + "learning_rate": 3.514090428520522e-09, + "logits/chosen": -2.1267809867858887, + "logits/rejected": -2.1277103424072266, + "logps/chosen": -61.53706359863281, + "logps/rejected": -197.6826934814453, + "loss": 0.215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5461570620536804, + "rewards/margins": 2.2335410118103027, + "rewards/rejected": -1.687384009361267, + "step": 15183 + }, + { + "epoch": 0.88, + "learning_rate": 3.510620652385443e-09, + "logits/chosen": -2.019622802734375, + "logits/rejected": -2.0191919803619385, + "logps/chosen": -91.99276733398438, + "logps/rejected": -343.75341796875, + "loss": 0.2333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3850906491279602, + "rewards/margins": 6.763197422027588, + "rewards/rejected": -6.378106594085693, + "step": 15184 + }, + { + "epoch": 0.88, + "learning_rate": 3.5071525277805215e-09, + "logits/chosen": -1.8138436079025269, + "logits/rejected": -1.8118091821670532, + "logps/chosen": -101.80168151855469, + "logps/rejected": -217.20025634765625, + "loss": 0.3479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08894119411706924, + "rewards/margins": 3.017737627029419, + "rewards/rejected": -2.9287965297698975, + "step": 15185 + }, + { + "epoch": 0.88, + "learning_rate": 3.5036860548289637e-09, + "logits/chosen": -1.755133032798767, + "logits/rejected": -1.7419884204864502, + "logps/chosen": -323.1420593261719, + "logps/rejected": -501.353271484375, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.595388889312744, + "rewards/margins": 3.14935302734375, + "rewards/rejected": -0.5539642572402954, + "step": 15186 + }, + { + "epoch": 0.88, + "learning_rate": 3.5002212336539103e-09, + "logits/chosen": -1.7822797298431396, + "logits/rejected": -1.7760103940963745, + "logps/chosen": -2.564988613128662, + "logps/rejected": -71.590576171875, + "loss": 0.5803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1747160404920578, + "rewards/margins": 0.30011123418807983, + "rewards/rejected": -0.12539520859718323, + "step": 15187 + }, + { + "epoch": 0.88, + "learning_rate": 3.496758064378458e-09, + "logits/chosen": -1.9950215816497803, + "logits/rejected": -2.06925106048584, + "logps/chosen": -200.78021240234375, + "logps/rejected": -503.95159912109375, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8634384870529175, + "rewards/margins": 6.565486431121826, + "rewards/rejected": -4.702047824859619, + "step": 15188 + }, + { + "epoch": 0.88, + "learning_rate": 3.4932965471256347e-09, + "logits/chosen": -1.7797974348068237, + "logits/rejected": -1.812975525856018, + "logps/chosen": -216.25180053710938, + "logps/rejected": -376.38116455078125, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.53916335105896, + "rewards/margins": 2.6747376918792725, + "rewards/rejected": -0.1355743408203125, + "step": 15189 + }, + { + "epoch": 0.88, + "learning_rate": 3.489836682018399e-09, + "logits/chosen": -1.8954211473464966, + "logits/rejected": -1.8884464502334595, + "logps/chosen": -196.15687561035156, + "logps/rejected": -272.6015930175781, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.573970079421997, + "rewards/margins": 0.9758316278457642, + "rewards/rejected": 0.5981384515762329, + "step": 15190 + }, + { + "epoch": 0.88, + "learning_rate": 3.486378469179674e-09, + "logits/chosen": -2.0856781005859375, + "logits/rejected": -2.0789897441864014, + "logps/chosen": -64.31583404541016, + "logps/rejected": -241.1292724609375, + "loss": 0.1307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8424354791641235, + "rewards/margins": 2.4740915298461914, + "rewards/rejected": -1.6316559314727783, + "step": 15191 + }, + { + "epoch": 0.88, + "learning_rate": 3.482921908732306e-09, + "logits/chosen": -1.74177086353302, + "logits/rejected": -1.7598876953125, + "logps/chosen": -146.53607177734375, + "logps/rejected": -266.7241516113281, + "loss": 0.2224, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6302505731582642, + "rewards/margins": 0.7955490350723267, + "rewards/rejected": 0.8347015380859375, + "step": 15192 + }, + { + "epoch": 0.88, + "learning_rate": 3.4794670007990913e-09, + "logits/chosen": -1.964918851852417, + "logits/rejected": -1.9461555480957031, + "logps/chosen": -220.31500244140625, + "logps/rejected": -396.13665771484375, + "loss": 0.1367, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3936554193496704, + "rewards/margins": 1.6962463855743408, + "rewards/rejected": -0.302590936422348, + "step": 15193 + }, + { + "epoch": 0.88, + "learning_rate": 3.476013745502765e-09, + "logits/chosen": -1.9470865726470947, + "logits/rejected": -1.9558924436569214, + "logps/chosen": -113.97151184082031, + "logps/rejected": -370.47503662109375, + "loss": 0.0709, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5133103132247925, + "rewards/margins": 4.9832444190979, + "rewards/rejected": -3.4699342250823975, + "step": 15194 + }, + { + "epoch": 0.88, + "learning_rate": 3.472562142966007e-09, + "logits/chosen": -1.8467117547988892, + "logits/rejected": -1.853053092956543, + "logps/chosen": -277.4892272949219, + "logps/rejected": -500.27685546875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.735482931137085, + "rewards/margins": 7.26202392578125, + "rewards/rejected": -4.526541233062744, + "step": 15195 + }, + { + "epoch": 0.88, + "learning_rate": 3.4691121933114355e-09, + "logits/chosen": -1.978173017501831, + "logits/rejected": -1.9640257358551025, + "logps/chosen": -207.25177001953125, + "logps/rejected": -308.95294189453125, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8546676635742188, + "rewards/margins": 4.528450012207031, + "rewards/rejected": -1.6737823486328125, + "step": 15196 + }, + { + "epoch": 0.88, + "learning_rate": 3.465663896661597e-09, + "logits/chosen": -1.9276739358901978, + "logits/rejected": -1.92106032371521, + "logps/chosen": -0.03930652141571045, + "logps/rejected": -86.20375061035156, + "loss": 0.3987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010751599911600351, + "rewards/margins": 1.9168829917907715, + "rewards/rejected": -1.91795814037323, + "step": 15197 + }, + { + "epoch": 0.88, + "learning_rate": 3.462217253139005e-09, + "logits/chosen": -1.9041218757629395, + "logits/rejected": -1.9167238473892212, + "logps/chosen": -199.1295166015625, + "logps/rejected": -310.45697021484375, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8743133544921875, + "rewards/margins": 2.347088575363159, + "rewards/rejected": -0.47277528047561646, + "step": 15198 + }, + { + "epoch": 0.88, + "learning_rate": 3.4587722628661e-09, + "logits/chosen": -1.7494782209396362, + "logits/rejected": -1.7329981327056885, + "logps/chosen": -221.76821899414062, + "logps/rejected": -345.03460693359375, + "loss": 0.2483, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7597978115081787, + "rewards/margins": 0.47842884063720703, + "rewards/rejected": 2.2813689708709717, + "step": 15199 + }, + { + "epoch": 0.88, + "learning_rate": 3.455328925965262e-09, + "logits/chosen": -1.8311347961425781, + "logits/rejected": -1.8199117183685303, + "logps/chosen": -29.25128936767578, + "logps/rejected": -190.86651611328125, + "loss": 0.3924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11133499443531036, + "rewards/margins": 2.5647635459899902, + "rewards/rejected": -2.676098585128784, + "step": 15200 + }, + { + "epoch": 0.88, + "learning_rate": 3.45188724255881e-09, + "logits/chosen": -1.9664137363433838, + "logits/rejected": -1.9441871643066406, + "logps/chosen": -99.01637268066406, + "logps/rejected": -236.86380004882812, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34483107924461365, + "rewards/margins": 1.6336007118225098, + "rewards/rejected": -1.2887696027755737, + "step": 15201 + }, + { + "epoch": 0.88, + "learning_rate": 3.448447212769018e-09, + "logits/chosen": -1.964916467666626, + "logits/rejected": -1.9700392484664917, + "logps/chosen": -143.45458984375, + "logps/rejected": -265.3054504394531, + "loss": 0.2198, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7527602910995483, + "rewards/margins": 0.8047225475311279, + "rewards/rejected": 0.9480377435684204, + "step": 15202 + }, + { + "epoch": 0.88, + "learning_rate": 3.4450088367180886e-09, + "logits/chosen": -1.5154682397842407, + "logits/rejected": -1.5166436433792114, + "logps/chosen": -105.65470123291016, + "logps/rejected": -119.21723175048828, + "loss": 0.2248, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.165088653564453, + "rewards/margins": 0.7706954479217529, + "rewards/rejected": 1.3943932056427002, + "step": 15203 + }, + { + "epoch": 0.88, + "learning_rate": 3.441572114528174e-09, + "logits/chosen": -2.1030445098876953, + "logits/rejected": -2.0987162590026855, + "logps/chosen": -0.00011944420111831278, + "logps/rejected": -223.32330322265625, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.4617987441597506e-05, + "rewards/margins": 3.8141651153564453, + "rewards/rejected": -3.8141205310821533, + "step": 15204 + }, + { + "epoch": 0.88, + "learning_rate": 3.4381370463213543e-09, + "logits/chosen": -1.9633476734161377, + "logits/rejected": -1.862560510635376, + "logps/chosen": -168.2749481201172, + "logps/rejected": -393.21160888671875, + "loss": 0.2271, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6936920881271362, + "rewards/margins": 0.9679962992668152, + "rewards/rejected": 0.725695788860321, + "step": 15205 + }, + { + "epoch": 0.88, + "learning_rate": 3.4347036322196655e-09, + "logits/chosen": -1.8124804496765137, + "logits/rejected": -1.8134260177612305, + "logps/chosen": -18.673364639282227, + "logps/rejected": -187.0869140625, + "loss": 0.3592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2251829206943512, + "rewards/margins": 2.3237762451171875, + "rewards/rejected": -2.098593235015869, + "step": 15206 + }, + { + "epoch": 0.88, + "learning_rate": 3.4312718723450816e-09, + "logits/chosen": -1.9172428846359253, + "logits/rejected": -1.9133695363998413, + "logps/chosen": -51.29416275024414, + "logps/rejected": -124.32676696777344, + "loss": 0.3724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.691253662109375, + "rewards/margins": 0.544506847858429, + "rewards/rejected": 0.14674682915210724, + "step": 15207 + }, + { + "epoch": 0.89, + "learning_rate": 3.427841766819517e-09, + "logits/chosen": -1.8681085109710693, + "logits/rejected": -1.8705716133117676, + "logps/chosen": -225.76734924316406, + "logps/rejected": -408.4085693359375, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6441116333007812, + "rewards/margins": 7.843391418457031, + "rewards/rejected": -4.19927978515625, + "step": 15208 + }, + { + "epoch": 0.89, + "learning_rate": 3.424413315764818e-09, + "logits/chosen": -2.059826135635376, + "logits/rejected": -2.0597193241119385, + "logps/chosen": -0.12809889018535614, + "logps/rejected": -160.76144409179688, + "loss": 0.4224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008033937774598598, + "rewards/margins": 1.4511502981185913, + "rewards/rejected": -1.4591842889785767, + "step": 15209 + }, + { + "epoch": 0.89, + "learning_rate": 3.420986519302782e-09, + "logits/chosen": -2.043727159500122, + "logits/rejected": -2.0580246448516846, + "logps/chosen": -177.98098754882812, + "logps/rejected": -436.2698669433594, + "loss": 0.1322, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.222494602203369, + "rewards/margins": 1.3978760242462158, + "rewards/rejected": 0.8246185183525085, + "step": 15210 + }, + { + "epoch": 0.89, + "learning_rate": 3.4175613775551447e-09, + "logits/chosen": -1.8870376348495483, + "logits/rejected": -1.88874351978302, + "logps/chosen": -84.87226867675781, + "logps/rejected": -154.4169921875, + "loss": 0.3687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7099044919013977, + "rewards/margins": 0.8905365467071533, + "rewards/rejected": -0.18063202500343323, + "step": 15211 + }, + { + "epoch": 0.89, + "learning_rate": 3.414137890643598e-09, + "logits/chosen": -1.8922702074050903, + "logits/rejected": -1.8677866458892822, + "logps/chosen": -233.39219665527344, + "logps/rejected": -522.2156372070312, + "loss": 0.0627, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2164154052734375, + "rewards/margins": 2.2158477306365967, + "rewards/rejected": 0.0005676269647665322, + "step": 15212 + }, + { + "epoch": 0.89, + "learning_rate": 3.4107160586897387e-09, + "logits/chosen": -1.90133798122406, + "logits/rejected": -1.8930896520614624, + "logps/chosen": -145.39198303222656, + "logps/rejected": -289.52496337890625, + "loss": 0.2238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9910568594932556, + "rewards/margins": 1.5134323835372925, + "rewards/rejected": -0.5223755240440369, + "step": 15213 + }, + { + "epoch": 0.89, + "learning_rate": 3.407295881815131e-09, + "logits/chosen": -1.9408373832702637, + "logits/rejected": -1.9982916116714478, + "logps/chosen": -277.4919738769531, + "logps/rejected": -459.4031066894531, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.652304172515869, + "rewards/margins": 4.205957412719727, + "rewards/rejected": -1.5536530017852783, + "step": 15214 + }, + { + "epoch": 0.89, + "learning_rate": 3.4038773601412997e-09, + "logits/chosen": -1.9283937215805054, + "logits/rejected": -1.9146654605865479, + "logps/chosen": -183.44544982910156, + "logps/rejected": -389.9502868652344, + "loss": 0.0548, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6068588495254517, + "rewards/margins": 4.179060459136963, + "rewards/rejected": -2.5722014904022217, + "step": 15215 + }, + { + "epoch": 0.89, + "learning_rate": 3.4004604937896643e-09, + "logits/chosen": -2.0382449626922607, + "logits/rejected": -2.0355777740478516, + "logps/chosen": -4.646847248077393, + "logps/rejected": -88.14985656738281, + "loss": 0.6077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035340309143066406, + "rewards/margins": 0.2993345260620117, + "rewards/rejected": -0.3346748352050781, + "step": 15216 + }, + { + "epoch": 0.89, + "learning_rate": 3.397045282881611e-09, + "logits/chosen": -1.8237217664718628, + "logits/rejected": -1.7957532405853271, + "logps/chosen": -157.62376403808594, + "logps/rejected": -319.44439697265625, + "loss": 0.2393, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1735321283340454, + "rewards/margins": 1.291900634765625, + "rewards/rejected": -0.11836852878332138, + "step": 15217 + }, + { + "epoch": 0.89, + "learning_rate": 3.3936317275384764e-09, + "logits/chosen": -1.8426553010940552, + "logits/rejected": -1.843052625656128, + "logps/chosen": -111.5357666015625, + "logps/rejected": -483.2860412597656, + "loss": 0.2242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19351501762866974, + "rewards/margins": 2.8403139114379883, + "rewards/rejected": -2.646798849105835, + "step": 15218 + }, + { + "epoch": 0.89, + "learning_rate": 3.3902198278815187e-09, + "logits/chosen": -1.8210493326187134, + "logits/rejected": -1.8458930253982544, + "logps/chosen": -146.30203247070312, + "logps/rejected": -354.060546875, + "loss": 0.0493, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9787734746932983, + "rewards/margins": 2.706355333328247, + "rewards/rejected": -0.727581799030304, + "step": 15219 + }, + { + "epoch": 0.89, + "learning_rate": 3.386809584031941e-09, + "logits/chosen": -1.947774887084961, + "logits/rejected": -1.9448697566986084, + "logps/chosen": -1.074514389038086, + "logps/rejected": -112.1827163696289, + "loss": 0.4311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09273896366357803, + "rewards/margins": 1.043656826019287, + "rewards/rejected": -0.9509178400039673, + "step": 15220 + }, + { + "epoch": 0.89, + "learning_rate": 3.3834009961108966e-09, + "logits/chosen": -1.7696810960769653, + "logits/rejected": -1.7591195106506348, + "logps/chosen": -91.2970962524414, + "logps/rejected": -223.99749755859375, + "loss": 0.109, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5395179986953735, + "rewards/margins": 3.556708812713623, + "rewards/rejected": -2.01719069480896, + "step": 15221 + }, + { + "epoch": 0.89, + "learning_rate": 3.3799940642394776e-09, + "logits/chosen": -1.8824362754821777, + "logits/rejected": -1.887221097946167, + "logps/chosen": -31.426727294921875, + "logps/rejected": -254.5443572998047, + "loss": 0.3963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18070068955421448, + "rewards/margins": 4.2141008377075195, + "rewards/rejected": -4.394801616668701, + "step": 15222 + }, + { + "epoch": 0.89, + "learning_rate": 3.37658878853872e-09, + "logits/chosen": -1.7702189683914185, + "logits/rejected": -1.7789597511291504, + "logps/chosen": -61.51902770996094, + "logps/rejected": -194.89077758789062, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9419647455215454, + "rewards/margins": 0.9850037097930908, + "rewards/rejected": -0.04303894191980362, + "step": 15223 + }, + { + "epoch": 0.89, + "learning_rate": 3.3731851691295775e-09, + "logits/chosen": -1.7612855434417725, + "logits/rejected": -1.759108304977417, + "logps/chosen": -337.4722900390625, + "logps/rejected": -507.657958984375, + "loss": 0.0783, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5069000720977783, + "rewards/margins": 2.2540862560272217, + "rewards/rejected": 0.2528137266635895, + "step": 15224 + }, + { + "epoch": 0.89, + "learning_rate": 3.3697832061329757e-09, + "logits/chosen": -2.0399773120880127, + "logits/rejected": -2.0443484783172607, + "logps/chosen": -4.5680952072143555, + "logps/rejected": -56.43724060058594, + "loss": 0.5516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09787359088659286, + "rewards/margins": 0.6033732295036316, + "rewards/rejected": -0.5054996609687805, + "step": 15225 + }, + { + "epoch": 0.89, + "learning_rate": 3.3663828996697674e-09, + "logits/chosen": -2.1136035919189453, + "logits/rejected": -2.1176774501800537, + "logps/chosen": -38.030120849609375, + "logps/rejected": -300.1686096191406, + "loss": 0.215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5645279288291931, + "rewards/margins": 4.153355598449707, + "rewards/rejected": -3.588827610015869, + "step": 15226 + }, + { + "epoch": 0.89, + "learning_rate": 3.362984249860756e-09, + "logits/chosen": -1.7977147102355957, + "logits/rejected": -1.7940953969955444, + "logps/chosen": -44.640228271484375, + "logps/rejected": -190.76368713378906, + "loss": 0.2047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8249664306640625, + "rewards/margins": 2.1891374588012695, + "rewards/rejected": -1.3641709089279175, + "step": 15227 + }, + { + "epoch": 0.89, + "learning_rate": 3.3595872568266624e-09, + "logits/chosen": -1.9573922157287598, + "logits/rejected": -1.956482172012329, + "logps/chosen": -4.758676528930664, + "logps/rejected": -136.44561767578125, + "loss": 0.3113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0689944252371788, + "rewards/margins": 1.9839648008346558, + "rewards/rejected": -1.9149703979492188, + "step": 15228 + }, + { + "epoch": 0.89, + "learning_rate": 3.356191920688173e-09, + "logits/chosen": -1.7534232139587402, + "logits/rejected": -1.6700201034545898, + "logps/chosen": -264.1931457519531, + "logps/rejected": -477.2417907714844, + "loss": 0.1139, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5891205072402954, + "rewards/margins": 2.092974901199341, + "rewards/rejected": -0.5038543939590454, + "step": 15229 + }, + { + "epoch": 0.89, + "learning_rate": 3.3527982415659073e-09, + "logits/chosen": -2.0636954307556152, + "logits/rejected": -2.053570032119751, + "logps/chosen": -71.66342163085938, + "logps/rejected": -266.21533203125, + "loss": 0.1068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9754379391670227, + "rewards/margins": 7.035294055938721, + "rewards/rejected": -6.059855937957764, + "step": 15230 + }, + { + "epoch": 0.89, + "learning_rate": 3.349406219580431e-09, + "logits/chosen": -1.875159740447998, + "logits/rejected": -1.928343653678894, + "logps/chosen": -246.33714294433594, + "logps/rejected": -395.2889404296875, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9649826288223267, + "rewards/margins": 4.744361877441406, + "rewards/rejected": -2.779379367828369, + "step": 15231 + }, + { + "epoch": 0.89, + "learning_rate": 3.34601585485223e-09, + "logits/chosen": -1.847766637802124, + "logits/rejected": -1.832937240600586, + "logps/chosen": -36.41918182373047, + "logps/rejected": -196.91526794433594, + "loss": 0.2445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43023911118507385, + "rewards/margins": 4.303536415100098, + "rewards/rejected": -3.8732972145080566, + "step": 15232 + }, + { + "epoch": 0.89, + "learning_rate": 3.3426271475017474e-09, + "logits/chosen": -1.8884758949279785, + "logits/rejected": -1.8938887119293213, + "logps/chosen": -9.560195922851562, + "logps/rejected": -130.42572021484375, + "loss": 0.3948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07404174655675888, + "rewards/margins": 1.6824569702148438, + "rewards/rejected": -1.6084152460098267, + "step": 15233 + }, + { + "epoch": 0.89, + "learning_rate": 3.3392400976493874e-09, + "logits/chosen": -1.8574331998825073, + "logits/rejected": -1.864399790763855, + "logps/chosen": -212.61984252929688, + "logps/rejected": -395.8046875, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3885163068771362, + "rewards/margins": 2.7612245082855225, + "rewards/rejected": -1.3727082014083862, + "step": 15234 + }, + { + "epoch": 0.89, + "learning_rate": 3.3358547054154585e-09, + "logits/chosen": -2.018996238708496, + "logits/rejected": -2.000784158706665, + "logps/chosen": -17.37378692626953, + "logps/rejected": -338.750244140625, + "loss": 0.2547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3884601593017578, + "rewards/margins": 4.702836990356445, + "rewards/rejected": -4.3143768310546875, + "step": 15235 + }, + { + "epoch": 0.89, + "learning_rate": 3.3324709709202258e-09, + "logits/chosen": -1.9051471948623657, + "logits/rejected": -1.9038300514221191, + "logps/chosen": -60.38526916503906, + "logps/rejected": -206.2570037841797, + "loss": 0.2528, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2123756408691406, + "rewards/margins": 0.9385032653808594, + "rewards/rejected": 0.27387237548828125, + "step": 15236 + }, + { + "epoch": 0.89, + "learning_rate": 3.3290888942839046e-09, + "logits/chosen": -1.6664652824401855, + "logits/rejected": -1.6629060506820679, + "logps/chosen": -6.977241516113281, + "logps/rejected": -108.69215393066406, + "loss": 0.559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3103334605693817, + "rewards/margins": 1.0028318166732788, + "rewards/rejected": -1.313165307044983, + "step": 15237 + }, + { + "epoch": 0.89, + "learning_rate": 3.325708475626643e-09, + "logits/chosen": -1.9679499864578247, + "logits/rejected": -1.9631805419921875, + "logps/chosen": -6.790112495422363, + "logps/rejected": -351.61328125, + "loss": 0.2814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1355910301208496, + "rewards/margins": 6.079084873199463, + "rewards/rejected": -5.943493843078613, + "step": 15238 + }, + { + "epoch": 0.89, + "learning_rate": 3.322329715068517e-09, + "logits/chosen": -1.893587350845337, + "logits/rejected": -1.9056733846664429, + "logps/chosen": -177.4525146484375, + "logps/rejected": -447.1116943359375, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1266496181488037, + "rewards/margins": 5.024858474731445, + "rewards/rejected": -2.8982086181640625, + "step": 15239 + }, + { + "epoch": 0.89, + "learning_rate": 3.3189526127295643e-09, + "logits/chosen": -1.999915599822998, + "logits/rejected": -1.9683541059494019, + "logps/chosen": -93.59574890136719, + "logps/rejected": -298.16864013671875, + "loss": 0.0865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.510511040687561, + "rewards/margins": 5.371073246002197, + "rewards/rejected": -3.8605620861053467, + "step": 15240 + }, + { + "epoch": 0.89, + "learning_rate": 3.3155771687297552e-09, + "logits/chosen": -1.8361353874206543, + "logits/rejected": -1.8446290493011475, + "logps/chosen": -246.4743194580078, + "logps/rejected": -375.12646484375, + "loss": 0.1254, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0973007678985596, + "rewards/margins": 1.5943466424942017, + "rewards/rejected": 0.5029541254043579, + "step": 15241 + }, + { + "epoch": 0.89, + "learning_rate": 3.3122033831890108e-09, + "logits/chosen": -1.957244873046875, + "logits/rejected": -1.961513638496399, + "logps/chosen": -0.18547342717647552, + "logps/rejected": -216.8309326171875, + "loss": 0.3651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0019834444392472506, + "rewards/margins": 4.780670166015625, + "rewards/rejected": -4.7786865234375, + "step": 15242 + }, + { + "epoch": 0.89, + "learning_rate": 3.3088312562271737e-09, + "logits/chosen": -1.9024230241775513, + "logits/rejected": -1.8975179195404053, + "logps/chosen": -4.164454460144043, + "logps/rejected": -187.382080078125, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3148467242717743, + "rewards/margins": 3.140577793121338, + "rewards/rejected": -2.825731039047241, + "step": 15243 + }, + { + "epoch": 0.89, + "learning_rate": 3.305460787964037e-09, + "logits/chosen": -1.8073712587356567, + "logits/rejected": -1.7684417963027954, + "logps/chosen": -363.7239074707031, + "logps/rejected": -631.076416015625, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.778784155845642, + "rewards/margins": 9.128857612609863, + "rewards/rejected": -7.350073337554932, + "step": 15244 + }, + { + "epoch": 0.89, + "learning_rate": 3.3020919785193445e-09, + "logits/chosen": -1.8998384475708008, + "logits/rejected": -1.8913843631744385, + "logps/chosen": -74.07048797607422, + "logps/rejected": -424.7484130859375, + "loss": 0.2699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01667327992618084, + "rewards/margins": 10.344602584838867, + "rewards/rejected": -10.327929496765137, + "step": 15245 + }, + { + "epoch": 0.89, + "learning_rate": 3.2987248280127724e-09, + "logits/chosen": -2.1591765880584717, + "logits/rejected": -2.1569676399230957, + "logps/chosen": -4.828232765197754, + "logps/rejected": -143.7918701171875, + "loss": 0.5725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031523656100034714, + "rewards/margins": 0.5195546746253967, + "rewards/rejected": -0.4880309998989105, + "step": 15246 + }, + { + "epoch": 0.89, + "learning_rate": 3.29535933656393e-09, + "logits/chosen": -1.7704921960830688, + "logits/rejected": -1.7726194858551025, + "logps/chosen": -45.768775939941406, + "logps/rejected": -109.59504699707031, + "loss": 0.3432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8362289667129517, + "rewards/margins": 0.8100006580352783, + "rewards/rejected": 0.02622833289206028, + "step": 15247 + }, + { + "epoch": 0.89, + "learning_rate": 3.291995504292383e-09, + "logits/chosen": -1.946941614151001, + "logits/rejected": -2.020116090774536, + "logps/chosen": -258.7557067871094, + "logps/rejected": -470.94525146484375, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0607025623321533, + "rewards/margins": 4.950930595397949, + "rewards/rejected": -2.890228271484375, + "step": 15248 + }, + { + "epoch": 0.89, + "learning_rate": 3.2886333313176253e-09, + "logits/chosen": -1.8385703563690186, + "logits/rejected": -1.8515886068344116, + "logps/chosen": -187.86471557617188, + "logps/rejected": -332.95697021484375, + "loss": 0.0593, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.032785177230835, + "rewards/margins": 2.4598724842071533, + "rewards/rejected": 0.5729126334190369, + "step": 15249 + }, + { + "epoch": 0.89, + "learning_rate": 3.2852728177591106e-09, + "logits/chosen": -1.7758795022964478, + "logits/rejected": -1.7650364637374878, + "logps/chosen": -49.11225891113281, + "logps/rejected": -122.85565185546875, + "loss": 0.1865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.03758704662323, + "rewards/margins": 1.74078369140625, + "rewards/rejected": -0.7031967043876648, + "step": 15250 + }, + { + "epoch": 0.89, + "learning_rate": 3.2819139637362046e-09, + "logits/chosen": -2.009948968887329, + "logits/rejected": -2.013829469680786, + "logps/chosen": -15.19312858581543, + "logps/rejected": -298.3663635253906, + "loss": 0.2102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5808576941490173, + "rewards/margins": 3.4869062900543213, + "rewards/rejected": -2.906048536300659, + "step": 15251 + }, + { + "epoch": 0.89, + "learning_rate": 3.27855676936824e-09, + "logits/chosen": -1.9875251054763794, + "logits/rejected": -1.9906628131866455, + "logps/chosen": -61.07563400268555, + "logps/rejected": -115.82548522949219, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39196282625198364, + "rewards/margins": 1.3640964031219482, + "rewards/rejected": -0.9721336364746094, + "step": 15252 + }, + { + "epoch": 0.89, + "learning_rate": 3.275201234774472e-09, + "logits/chosen": -1.9580774307250977, + "logits/rejected": -1.94895339012146, + "logps/chosen": -76.67958068847656, + "logps/rejected": -388.77703857421875, + "loss": 0.0529, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8917893171310425, + "rewards/margins": 8.554466247558594, + "rewards/rejected": -6.66267728805542, + "step": 15253 + }, + { + "epoch": 0.89, + "learning_rate": 3.271847360074115e-09, + "logits/chosen": -1.8342323303222656, + "logits/rejected": -1.831106424331665, + "logps/chosen": -42.90264892578125, + "logps/rejected": -256.9154052734375, + "loss": 0.116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2140743732452393, + "rewards/margins": 5.4730072021484375, + "rewards/rejected": -4.258932590484619, + "step": 15254 + }, + { + "epoch": 0.89, + "learning_rate": 3.268495145386324e-09, + "logits/chosen": -1.706599473953247, + "logits/rejected": -1.7043830156326294, + "logps/chosen": -262.2312316894531, + "logps/rejected": -531.4290771484375, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.197985887527466, + "rewards/margins": 9.953140258789062, + "rewards/rejected": -7.755154609680176, + "step": 15255 + }, + { + "epoch": 0.89, + "learning_rate": 3.2651445908301545e-09, + "logits/chosen": -1.8948887586593628, + "logits/rejected": -1.8949337005615234, + "logps/chosen": -191.25509643554688, + "logps/rejected": -385.039794921875, + "loss": 0.0441, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3419723510742188, + "rewards/margins": 3.5670762062072754, + "rewards/rejected": -2.2251038551330566, + "step": 15256 + }, + { + "epoch": 0.89, + "learning_rate": 3.2617956965246663e-09, + "logits/chosen": -1.8016846179962158, + "logits/rejected": -1.8034003973007202, + "logps/chosen": -10.17772388458252, + "logps/rejected": -275.5449523925781, + "loss": 0.3739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2307451218366623, + "rewards/margins": 2.609785318374634, + "rewards/rejected": -2.8405303955078125, + "step": 15257 + }, + { + "epoch": 0.89, + "learning_rate": 3.258448462588814e-09, + "logits/chosen": -1.8880561590194702, + "logits/rejected": -1.8851498365402222, + "logps/chosen": -9.269486427307129, + "logps/rejected": -69.65454864501953, + "loss": 0.2919, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3470132052898407, + "rewards/margins": 1.787054181098938, + "rewards/rejected": -1.440040946006775, + "step": 15258 + }, + { + "epoch": 0.89, + "learning_rate": 3.255102889141509e-09, + "logits/chosen": -1.7537552118301392, + "logits/rejected": -1.7500922679901123, + "logps/chosen": -39.94718551635742, + "logps/rejected": -139.54147338867188, + "loss": 0.5553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4277057647705078, + "rewards/margins": 0.17777900397777557, + "rewards/rejected": 0.24992676079273224, + "step": 15259 + }, + { + "epoch": 0.89, + "learning_rate": 3.2517589763015997e-09, + "logits/chosen": -1.8952511548995972, + "logits/rejected": -1.8975658416748047, + "logps/chosen": -26.898996353149414, + "logps/rejected": -234.86935424804688, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32970523834228516, + "rewards/margins": 3.7548930644989014, + "rewards/rejected": -3.425187826156616, + "step": 15260 + }, + { + "epoch": 0.89, + "learning_rate": 3.248416724187891e-09, + "logits/chosen": -1.9576150178909302, + "logits/rejected": -1.9579553604125977, + "logps/chosen": -49.89474105834961, + "logps/rejected": -185.34170532226562, + "loss": 0.719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6880707144737244, + "rewards/margins": 0.32450252771377563, + "rewards/rejected": -1.0125732421875, + "step": 15261 + }, + { + "epoch": 0.89, + "learning_rate": 3.2450761329191e-09, + "logits/chosen": -2.0545907020568848, + "logits/rejected": -2.020219087600708, + "logps/chosen": -34.484439849853516, + "logps/rejected": -307.2410583496094, + "loss": 0.2916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2393798828125, + "rewards/margins": 3.7933197021484375, + "rewards/rejected": -3.5539398193359375, + "step": 15262 + }, + { + "epoch": 0.89, + "learning_rate": 3.241737202613909e-09, + "logits/chosen": -2.086193561553955, + "logits/rejected": -2.117412805557251, + "logps/chosen": -196.21588134765625, + "logps/rejected": -312.32550048828125, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.277355909347534, + "rewards/margins": 2.3684051036834717, + "rewards/rejected": 0.9089508056640625, + "step": 15263 + }, + { + "epoch": 0.89, + "learning_rate": 3.238399933390934e-09, + "logits/chosen": -1.8770110607147217, + "logits/rejected": -1.8706824779510498, + "logps/chosen": -24.736623764038086, + "logps/rejected": -277.3221740722656, + "loss": 0.2465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3764503598213196, + "rewards/margins": 5.899265766143799, + "rewards/rejected": -5.522815227508545, + "step": 15264 + }, + { + "epoch": 0.89, + "learning_rate": 3.2350643253687306e-09, + "logits/chosen": -1.8100281953811646, + "logits/rejected": -1.8020004034042358, + "logps/chosen": -134.8090362548828, + "logps/rejected": -225.57305908203125, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.820123314857483, + "rewards/margins": 0.6045287847518921, + "rewards/rejected": 1.2155945301055908, + "step": 15265 + }, + { + "epoch": 0.89, + "learning_rate": 3.2317303786657924e-09, + "logits/chosen": -2.0487747192382812, + "logits/rejected": -2.033109188079834, + "logps/chosen": -8.750219345092773, + "logps/rejected": -212.19720458984375, + "loss": 0.3148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16249684989452362, + "rewards/margins": 3.6405317783355713, + "rewards/rejected": -3.4780349731445312, + "step": 15266 + }, + { + "epoch": 0.89, + "learning_rate": 3.2283980934005582e-09, + "logits/chosen": -1.9499748945236206, + "logits/rejected": -1.9271708726882935, + "logps/chosen": -255.7056884765625, + "logps/rejected": -304.0406188964844, + "loss": 0.3116, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.573385715484619, + "rewards/margins": 0.165130615234375, + "rewards/rejected": 3.408255100250244, + "step": 15267 + }, + { + "epoch": 0.89, + "learning_rate": 3.225067469691406e-09, + "logits/chosen": -1.860748291015625, + "logits/rejected": -1.856082558631897, + "logps/chosen": -10.281502723693848, + "logps/rejected": -227.31704711914062, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05238685756921768, + "rewards/margins": 2.9253599643707275, + "rewards/rejected": -2.9777467250823975, + "step": 15268 + }, + { + "epoch": 0.89, + "learning_rate": 3.2217385076566683e-09, + "logits/chosen": -1.8996127843856812, + "logits/rejected": -1.8987051248550415, + "logps/chosen": -1.901039958000183, + "logps/rejected": -211.981689453125, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022245585918426514, + "rewards/margins": 4.053709030151367, + "rewards/rejected": -4.031463623046875, + "step": 15269 + }, + { + "epoch": 0.89, + "learning_rate": 3.218411207414584e-09, + "logits/chosen": -1.752485990524292, + "logits/rejected": -1.7509270906448364, + "logps/chosen": -214.50753784179688, + "logps/rejected": -348.1718444824219, + "loss": 0.045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6805572509765625, + "rewards/margins": 2.661639451980591, + "rewards/rejected": 0.01891784742474556, + "step": 15270 + }, + { + "epoch": 0.89, + "learning_rate": 3.21508556908337e-09, + "logits/chosen": -1.792864441871643, + "logits/rejected": -1.774687647819519, + "logps/chosen": -37.51693344116211, + "logps/rejected": -389.6739501953125, + "loss": 0.091, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2613261938095093, + "rewards/margins": 4.341700553894043, + "rewards/rejected": -3.080374240875244, + "step": 15271 + }, + { + "epoch": 0.89, + "learning_rate": 3.21176159278117e-09, + "logits/chosen": -1.666275143623352, + "logits/rejected": -1.7170169353485107, + "logps/chosen": -306.37481689453125, + "logps/rejected": -482.381103515625, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.08188796043396, + "rewards/margins": 6.688974380493164, + "rewards/rejected": -4.607086181640625, + "step": 15272 + }, + { + "epoch": 0.89, + "learning_rate": 3.208439278626068e-09, + "logits/chosen": -1.914867877960205, + "logits/rejected": -1.9077162742614746, + "logps/chosen": -89.5355453491211, + "logps/rejected": -161.86923217773438, + "loss": 0.2827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035474397242069244, + "rewards/margins": 2.732381582260132, + "rewards/rejected": -2.7678558826446533, + "step": 15273 + }, + { + "epoch": 0.89, + "learning_rate": 3.2051186267360753e-09, + "logits/chosen": -2.004193067550659, + "logits/rejected": -1.9957406520843506, + "logps/chosen": -14.880950927734375, + "logps/rejected": -101.89506530761719, + "loss": 0.3657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07465248554944992, + "rewards/margins": 3.0670008659362793, + "rewards/rejected": -2.9923484325408936, + "step": 15274 + }, + { + "epoch": 0.89, + "learning_rate": 3.2017996372291632e-09, + "logits/chosen": -1.7579022645950317, + "logits/rejected": -1.8241270780563354, + "logps/chosen": -271.1265869140625, + "logps/rejected": -358.110595703125, + "loss": 0.0262, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.781176805496216, + "rewards/margins": 2.994253635406494, + "rewards/rejected": 0.7869232296943665, + "step": 15275 + }, + { + "epoch": 0.89, + "learning_rate": 3.1984823102232607e-09, + "logits/chosen": -1.842552900314331, + "logits/rejected": -1.8495593070983887, + "logps/chosen": -146.70098876953125, + "logps/rejected": -351.1364440917969, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4823060035705566, + "rewards/margins": 2.752041816711426, + "rewards/rejected": -0.269735723733902, + "step": 15276 + }, + { + "epoch": 0.89, + "learning_rate": 3.1951666458361837e-09, + "logits/chosen": -1.7700145244598389, + "logits/rejected": -1.772383689880371, + "logps/chosen": -11.55238151550293, + "logps/rejected": -203.99993896484375, + "loss": 0.2849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1700243055820465, + "rewards/margins": 3.293060541152954, + "rewards/rejected": -3.1230361461639404, + "step": 15277 + }, + { + "epoch": 0.89, + "learning_rate": 3.1918526441857387e-09, + "logits/chosen": -1.8061686754226685, + "logits/rejected": -1.8101959228515625, + "logps/chosen": -8.983072280883789, + "logps/rejected": -75.76032257080078, + "loss": 0.5026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06760215759277344, + "rewards/margins": 0.9922458529472351, + "rewards/rejected": -0.9246436953544617, + "step": 15278 + }, + { + "epoch": 0.89, + "learning_rate": 3.1885403053896474e-09, + "logits/chosen": -1.8912811279296875, + "logits/rejected": -1.881129264831543, + "logps/chosen": -0.02114984393119812, + "logps/rejected": -186.16912841796875, + "loss": 0.3723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03307900205254555, + "rewards/margins": 2.6461968421936035, + "rewards/rejected": -2.6131179332733154, + "step": 15279 + }, + { + "epoch": 0.89, + "learning_rate": 3.1852296295655945e-09, + "logits/chosen": -1.7618619203567505, + "logits/rejected": -1.6522676944732666, + "logps/chosen": -259.4054260253906, + "logps/rejected": -589.8533935546875, + "loss": 0.0953, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7391875982284546, + "rewards/margins": 2.928579807281494, + "rewards/rejected": -1.18939208984375, + "step": 15280 + }, + { + "epoch": 0.89, + "learning_rate": 3.181920616831174e-09, + "logits/chosen": -1.999903917312622, + "logits/rejected": -1.9931038618087769, + "logps/chosen": -118.40680694580078, + "logps/rejected": -237.46133422851562, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6985198855400085, + "rewards/margins": 0.5316451787948608, + "rewards/rejected": 0.1668746918439865, + "step": 15281 + }, + { + "epoch": 0.89, + "learning_rate": 3.1786132673039425e-09, + "logits/chosen": -1.894432783126831, + "logits/rejected": -1.899657130241394, + "logps/chosen": -0.6473633050918579, + "logps/rejected": -135.45457458496094, + "loss": 0.4873, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03731600567698479, + "rewards/margins": 1.074938416481018, + "rewards/rejected": -1.1122543811798096, + "step": 15282 + }, + { + "epoch": 0.89, + "learning_rate": 3.1753075811013995e-09, + "logits/chosen": -1.7726012468338013, + "logits/rejected": -1.766568899154663, + "logps/chosen": -0.030441999435424805, + "logps/rejected": -59.81250762939453, + "loss": 0.6395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002268827287480235, + "rewards/margins": 0.22555863857269287, + "rewards/rejected": -0.22782745957374573, + "step": 15283 + }, + { + "epoch": 0.89, + "learning_rate": 3.17200355834098e-09, + "logits/chosen": -1.9872782230377197, + "logits/rejected": -1.9822274446487427, + "logps/chosen": -0.0008208329090848565, + "logps/rejected": -180.50222778320312, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.920910527696833e-05, + "rewards/margins": 2.7258541584014893, + "rewards/rejected": -2.7259232997894287, + "step": 15284 + }, + { + "epoch": 0.89, + "learning_rate": 3.1687011991400502e-09, + "logits/chosen": -1.809706449508667, + "logits/rejected": -1.8033429384231567, + "logps/chosen": -94.99150085449219, + "logps/rejected": -332.14190673828125, + "loss": 0.0866, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3346222639083862, + "rewards/margins": 3.426877021789551, + "rewards/rejected": -2.092254638671875, + "step": 15285 + }, + { + "epoch": 0.89, + "learning_rate": 3.1654005036159282e-09, + "logits/chosen": -2.030154228210449, + "logits/rejected": -2.018589496612549, + "logps/chosen": -37.26111602783203, + "logps/rejected": -155.145751953125, + "loss": 0.4757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.062084197998046875, + "rewards/margins": 0.7918205261230469, + "rewards/rejected": -0.729736328125, + "step": 15286 + }, + { + "epoch": 0.89, + "learning_rate": 3.1621014718858752e-09, + "logits/chosen": -2.0275771617889404, + "logits/rejected": -2.030853271484375, + "logps/chosen": -2.166261911392212, + "logps/rejected": -59.32110595703125, + "loss": 0.458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2839288115501404, + "rewards/margins": 0.6362824440002441, + "rewards/rejected": -0.35235366225242615, + "step": 15287 + }, + { + "epoch": 0.89, + "learning_rate": 3.158804104067092e-09, + "logits/chosen": -2.026538133621216, + "logits/rejected": -2.016512393951416, + "logps/chosen": -68.86371612548828, + "logps/rejected": -223.32305908203125, + "loss": 0.1132, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4012947082519531, + "rewards/margins": 3.048023223876953, + "rewards/rejected": -1.646728515625, + "step": 15288 + }, + { + "epoch": 0.89, + "learning_rate": 3.1555084002767017e-09, + "logits/chosen": -1.8354930877685547, + "logits/rejected": -1.834599256515503, + "logps/chosen": -70.7585678100586, + "logps/rejected": -245.0529022216797, + "loss": 0.2662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2960670590400696, + "rewards/margins": 2.538881778717041, + "rewards/rejected": -2.242814779281616, + "step": 15289 + }, + { + "epoch": 0.89, + "learning_rate": 3.1522143606317995e-09, + "logits/chosen": -1.8910843133926392, + "logits/rejected": -1.8697534799575806, + "logps/chosen": -239.69227600097656, + "logps/rejected": -309.53973388671875, + "loss": 0.1894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6763687133789062, + "rewards/margins": 1.3874709606170654, + "rewards/rejected": 0.28889772295951843, + "step": 15290 + }, + { + "epoch": 0.89, + "learning_rate": 3.1489219852493974e-09, + "logits/chosen": -2.0108633041381836, + "logits/rejected": -2.0045642852783203, + "logps/chosen": -48.859169006347656, + "logps/rejected": -175.21270751953125, + "loss": 0.4154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42633286118507385, + "rewards/margins": 3.075244903564453, + "rewards/rejected": -3.501577854156494, + "step": 15291 + }, + { + "epoch": 0.89, + "learning_rate": 3.1456312742464687e-09, + "logits/chosen": -1.8659038543701172, + "logits/rejected": -1.8527988195419312, + "logps/chosen": -27.159461975097656, + "logps/rejected": -371.1537170410156, + "loss": 0.121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9159149527549744, + "rewards/margins": 8.300561904907227, + "rewards/rejected": -7.384646892547607, + "step": 15292 + }, + { + "epoch": 0.89, + "learning_rate": 3.142342227739897e-09, + "logits/chosen": -1.82984459400177, + "logits/rejected": -1.8317803144454956, + "logps/chosen": -2.257321357727051, + "logps/rejected": -80.44036865234375, + "loss": 0.5434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008280945010483265, + "rewards/margins": 1.0036262273788452, + "rewards/rejected": -1.0119072198867798, + "step": 15293 + }, + { + "epoch": 0.89, + "learning_rate": 3.139054845846534e-09, + "logits/chosen": -2.0581507682800293, + "logits/rejected": -2.058669090270996, + "logps/chosen": -39.82371139526367, + "logps/rejected": -159.41079711914062, + "loss": 0.1962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9147807955741882, + "rewards/margins": 2.081174850463867, + "rewards/rejected": -1.1663941144943237, + "step": 15294 + }, + { + "epoch": 0.89, + "learning_rate": 3.1357691286831747e-09, + "logits/chosen": -1.6430749893188477, + "logits/rejected": -1.6013586521148682, + "logps/chosen": -129.8645477294922, + "logps/rejected": -309.7328186035156, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8133514523506165, + "rewards/margins": 1.5658355951309204, + "rewards/rejected": -0.752484142780304, + "step": 15295 + }, + { + "epoch": 0.89, + "learning_rate": 3.1324850763665254e-09, + "logits/chosen": -1.8160017728805542, + "logits/rejected": -1.8209407329559326, + "logps/chosen": -189.24769592285156, + "logps/rejected": -226.55068969726562, + "loss": 0.6024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12706910073757172, + "rewards/margins": 0.07888182997703552, + "rewards/rejected": -0.20595093071460724, + "step": 15296 + }, + { + "epoch": 0.89, + "learning_rate": 3.1292026890132605e-09, + "logits/chosen": -1.8434851169586182, + "logits/rejected": -1.8466311693191528, + "logps/chosen": -248.2306671142578, + "logps/rejected": -424.4449462890625, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9529770612716675, + "rewards/margins": 4.709608554840088, + "rewards/rejected": -2.75663161277771, + "step": 15297 + }, + { + "epoch": 0.89, + "learning_rate": 3.1259219667399863e-09, + "logits/chosen": -2.1141724586486816, + "logits/rejected": -2.116534471511841, + "logps/chosen": -7.158558368682861, + "logps/rejected": -50.130435943603516, + "loss": 0.6389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12960000336170197, + "rewards/margins": 0.07912049442529678, + "rewards/rejected": 0.05047950893640518, + "step": 15298 + }, + { + "epoch": 0.89, + "learning_rate": 3.122642909663259e-09, + "logits/chosen": -1.8273770809173584, + "logits/rejected": -1.8430861234664917, + "logps/chosen": -201.9168701171875, + "logps/rejected": -323.2381286621094, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4590394496917725, + "rewards/margins": 3.3940460681915283, + "rewards/rejected": 0.06499328464269638, + "step": 15299 + }, + { + "epoch": 0.89, + "learning_rate": 3.119365517899547e-09, + "logits/chosen": -1.9532264471054077, + "logits/rejected": -1.940030574798584, + "logps/chosen": -0.13365131616592407, + "logps/rejected": -202.43771362304688, + "loss": 0.3625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011930723674595356, + "rewards/margins": 3.5376787185668945, + "rewards/rejected": -3.549609422683716, + "step": 15300 + }, + { + "epoch": 0.89, + "learning_rate": 3.1160897915652907e-09, + "logits/chosen": -1.8028912544250488, + "logits/rejected": -1.8585222959518433, + "logps/chosen": -137.69058227539062, + "logps/rejected": -488.89410400390625, + "loss": 0.0468, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3507431745529175, + "rewards/margins": 6.2864670753479, + "rewards/rejected": -4.935723781585693, + "step": 15301 + }, + { + "epoch": 0.89, + "learning_rate": 3.1128157307768576e-09, + "logits/chosen": -1.989005446434021, + "logits/rejected": -1.9820168018341064, + "logps/chosen": -9.009954452514648, + "logps/rejected": -146.9024658203125, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16023731231689453, + "rewards/margins": 1.913914680480957, + "rewards/rejected": -1.7536773681640625, + "step": 15302 + }, + { + "epoch": 0.89, + "learning_rate": 3.109543335650572e-09, + "logits/chosen": -1.940016746520996, + "logits/rejected": -1.9408226013183594, + "logps/chosen": -27.18490219116211, + "logps/rejected": -153.3540496826172, + "loss": 0.4001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12684135138988495, + "rewards/margins": 1.7927616834640503, + "rewards/rejected": -1.9196029901504517, + "step": 15303 + }, + { + "epoch": 0.89, + "learning_rate": 3.1062726063026624e-09, + "logits/chosen": -1.984169840812683, + "logits/rejected": -2.042717218399048, + "logps/chosen": -245.68727111816406, + "logps/rejected": -343.3643798828125, + "loss": 0.0827, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2014999389648438, + "rewards/margins": 2.0038251876831055, + "rewards/rejected": 1.1976746320724487, + "step": 15304 + }, + { + "epoch": 0.89, + "learning_rate": 3.1030035428493305e-09, + "logits/chosen": -1.9821925163269043, + "logits/rejected": -1.9615411758422852, + "logps/chosen": -201.4648895263672, + "logps/rejected": -313.9742736816406, + "loss": 0.1174, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.356776475906372, + "rewards/margins": 1.7453904151916504, + "rewards/rejected": 0.6113861203193665, + "step": 15305 + }, + { + "epoch": 0.89, + "learning_rate": 3.099736145406712e-09, + "logits/chosen": -1.815555214881897, + "logits/rejected": -1.7940796613693237, + "logps/chosen": -148.42465209960938, + "logps/rejected": -436.129638671875, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0370757579803467, + "rewards/margins": 5.174050807952881, + "rewards/rejected": -3.136975049972534, + "step": 15306 + }, + { + "epoch": 0.89, + "learning_rate": 3.0964704140908905e-09, + "logits/chosen": -1.838114857673645, + "logits/rejected": -1.8334702253341675, + "logps/chosen": -35.009437561035156, + "logps/rejected": -159.14683532714844, + "loss": 0.1301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3639366626739502, + "rewards/margins": 2.2819528579711914, + "rewards/rejected": -0.9180160760879517, + "step": 15307 + }, + { + "epoch": 0.89, + "learning_rate": 3.0932063490178574e-09, + "logits/chosen": -1.6313356161117554, + "logits/rejected": -1.637472152709961, + "logps/chosen": -5.395169258117676, + "logps/rejected": -77.03622436523438, + "loss": 0.6922, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2554275691509247, + "rewards/margins": -0.25293728709220886, + "rewards/rejected": 0.5083648562431335, + "step": 15308 + }, + { + "epoch": 0.89, + "learning_rate": 3.089943950303581e-09, + "logits/chosen": -1.9292570352554321, + "logits/rejected": -1.9592853784561157, + "logps/chosen": -231.337158203125, + "logps/rejected": -264.59979248046875, + "loss": 0.1339, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9631898403167725, + "rewards/margins": 1.3240723609924316, + "rewards/rejected": 1.6391174793243408, + "step": 15309 + }, + { + "epoch": 0.89, + "learning_rate": 3.0866832180639625e-09, + "logits/chosen": -1.9703760147094727, + "logits/rejected": -2.0340094566345215, + "logps/chosen": -217.21490478515625, + "logps/rejected": -458.5771789550781, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.044122338294983, + "rewards/margins": 4.636670112609863, + "rewards/rejected": -3.592547655105591, + "step": 15310 + }, + { + "epoch": 0.89, + "learning_rate": 3.0834241524148373e-09, + "logits/chosen": -1.7046443223953247, + "logits/rejected": -1.676413655281067, + "logps/chosen": -223.57534790039062, + "logps/rejected": -388.12176513671875, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.66571044921875, + "rewards/margins": 3.97161865234375, + "rewards/rejected": -1.305908203125, + "step": 15311 + }, + { + "epoch": 0.89, + "learning_rate": 3.080166753471969e-09, + "logits/chosen": -2.055392265319824, + "logits/rejected": -2.05010986328125, + "logps/chosen": -28.29859161376953, + "logps/rejected": -252.30377197265625, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08356552571058273, + "rewards/margins": 3.8909385204315186, + "rewards/rejected": -3.807373046875, + "step": 15312 + }, + { + "epoch": 0.89, + "learning_rate": 3.076911021351092e-09, + "logits/chosen": -1.9416050910949707, + "logits/rejected": -1.9343558549880981, + "logps/chosen": -275.86181640625, + "logps/rejected": -442.123046875, + "loss": 0.1656, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.786029100418091, + "rewards/margins": 1.1027618646621704, + "rewards/rejected": 1.6832672357559204, + "step": 15313 + }, + { + "epoch": 0.89, + "learning_rate": 3.0736569561678637e-09, + "logits/chosen": -1.91171395778656, + "logits/rejected": -1.9219657182693481, + "logps/chosen": -227.6536407470703, + "logps/rejected": -333.7213439941406, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6131194829940796, + "rewards/margins": 2.883431911468506, + "rewards/rejected": -1.2703125476837158, + "step": 15314 + }, + { + "epoch": 0.89, + "learning_rate": 3.0704045580378756e-09, + "logits/chosen": -1.834237813949585, + "logits/rejected": -1.8330166339874268, + "logps/chosen": -44.732357025146484, + "logps/rejected": -164.54249572753906, + "loss": 0.5754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04911689832806587, + "rewards/margins": 0.386614590883255, + "rewards/rejected": -0.4357315003871918, + "step": 15315 + }, + { + "epoch": 0.89, + "learning_rate": 3.067153827076685e-09, + "logits/chosen": -1.8981510400772095, + "logits/rejected": -1.8891197443008423, + "logps/chosen": -20.707332611083984, + "logps/rejected": -219.65658569335938, + "loss": 0.2085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3808044493198395, + "rewards/margins": 4.164837837219238, + "rewards/rejected": -3.7840332984924316, + "step": 15316 + }, + { + "epoch": 0.89, + "learning_rate": 3.0639047633997494e-09, + "logits/chosen": -1.817822813987732, + "logits/rejected": -1.820072054862976, + "logps/chosen": -66.48768615722656, + "logps/rejected": -263.60552978515625, + "loss": 0.1058, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.473772406578064, + "rewards/margins": 2.6571877002716064, + "rewards/rejected": -1.1834152936935425, + "step": 15317 + }, + { + "epoch": 0.89, + "learning_rate": 3.0606573671225213e-09, + "logits/chosen": -1.917392373085022, + "logits/rejected": -1.944395661354065, + "logps/chosen": -191.94589233398438, + "logps/rejected": -306.03839111328125, + "loss": 0.0854, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.463507056236267, + "rewards/margins": 2.177874803543091, + "rewards/rejected": -0.714367687702179, + "step": 15318 + }, + { + "epoch": 0.89, + "learning_rate": 3.057411638360341e-09, + "logits/chosen": -1.9996498823165894, + "logits/rejected": -1.993757963180542, + "logps/chosen": -90.82547760009766, + "logps/rejected": -270.0693359375, + "loss": 0.2729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1129051223397255, + "rewards/margins": 3.3293662071228027, + "rewards/rejected": -3.216461181640625, + "step": 15319 + }, + { + "epoch": 0.89, + "learning_rate": 3.0541675772285227e-09, + "logits/chosen": -1.923310399055481, + "logits/rejected": -1.9093213081359863, + "logps/chosen": -5.698103268514387e-05, + "logps/rejected": -208.68136596679688, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4543551287715673e-06, + "rewards/margins": 3.588536024093628, + "rewards/rejected": -3.5885345935821533, + "step": 15320 + }, + { + "epoch": 0.89, + "learning_rate": 3.050925183842307e-09, + "logits/chosen": -1.8897954225540161, + "logits/rejected": -1.8776345252990723, + "logps/chosen": -69.67494201660156, + "logps/rejected": -363.3465881347656, + "loss": 0.1325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9046974182128906, + "rewards/margins": 8.221977233886719, + "rewards/rejected": -7.31727933883667, + "step": 15321 + }, + { + "epoch": 0.89, + "learning_rate": 3.0476844583168905e-09, + "logits/chosen": -1.8552504777908325, + "logits/rejected": -1.866653323173523, + "logps/chosen": -202.42247009277344, + "logps/rejected": -417.0335388183594, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2966384887695312, + "rewards/margins": 4.033546447753906, + "rewards/rejected": -2.736907958984375, + "step": 15322 + }, + { + "epoch": 0.89, + "learning_rate": 3.044445400767376e-09, + "logits/chosen": -1.9472838640213013, + "logits/rejected": -1.936410903930664, + "logps/chosen": -20.834514617919922, + "logps/rejected": -218.17196655273438, + "loss": 0.1978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6401840448379517, + "rewards/margins": 4.669233798980713, + "rewards/rejected": -4.029049873352051, + "step": 15323 + }, + { + "epoch": 0.89, + "learning_rate": 3.0412080113088546e-09, + "logits/chosen": -1.8915200233459473, + "logits/rejected": -1.9102253913879395, + "logps/chosen": -194.57925415039062, + "logps/rejected": -343.374267578125, + "loss": 0.0495, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.239605665206909, + "rewards/margins": 2.575360059738159, + "rewards/rejected": -0.33575439453125, + "step": 15324 + }, + { + "epoch": 0.89, + "learning_rate": 3.0379722900563174e-09, + "logits/chosen": -1.8913050889968872, + "logits/rejected": -1.885219931602478, + "logps/chosen": -23.872154235839844, + "logps/rejected": -240.42015075683594, + "loss": 0.322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19810600578784943, + "rewards/margins": 2.406031608581543, + "rewards/rejected": -2.20792555809021, + "step": 15325 + }, + { + "epoch": 0.89, + "learning_rate": 3.0347382371247345e-09, + "logits/chosen": -1.9548662900924683, + "logits/rejected": -1.965384840965271, + "logps/chosen": -157.95114135742188, + "logps/rejected": -475.52813720703125, + "loss": 0.0435, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3134247064590454, + "rewards/margins": 6.493252754211426, + "rewards/rejected": -5.17982816696167, + "step": 15326 + }, + { + "epoch": 0.89, + "learning_rate": 3.031505852628968e-09, + "logits/chosen": -1.9967200756072998, + "logits/rejected": -1.9965853691101074, + "logps/chosen": -0.6570326685905457, + "logps/rejected": -184.63833618164062, + "loss": 0.3562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011328160762786865, + "rewards/margins": 3.0136170387268066, + "rewards/rejected": -3.002288818359375, + "step": 15327 + }, + { + "epoch": 0.89, + "learning_rate": 3.0282751366838665e-09, + "logits/chosen": -1.8750262260437012, + "logits/rejected": -1.8346725702285767, + "logps/chosen": -250.7657470703125, + "logps/rejected": -394.1635437011719, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.860858201980591, + "rewards/margins": 4.2143402099609375, + "rewards/rejected": -1.3534821271896362, + "step": 15328 + }, + { + "epoch": 0.89, + "learning_rate": 3.0250460894041927e-09, + "logits/chosen": -1.8499835729599, + "logits/rejected": -1.8550113439559937, + "logps/chosen": -257.4242858886719, + "logps/rejected": -610.238525390625, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3074158430099487, + "rewards/margins": 7.623712539672852, + "rewards/rejected": -6.316296577453613, + "step": 15329 + }, + { + "epoch": 0.89, + "learning_rate": 3.021818710904667e-09, + "logits/chosen": -1.8533711433410645, + "logits/rejected": -1.852108359336853, + "logps/chosen": -162.21810913085938, + "logps/rejected": -264.088623046875, + "loss": 0.0878, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0604889392852783, + "rewards/margins": 2.613888740539551, + "rewards/rejected": -0.5533996820449829, + "step": 15330 + }, + { + "epoch": 0.89, + "learning_rate": 3.018593001299935e-09, + "logits/chosen": -1.838716745376587, + "logits/rejected": -1.8347094058990479, + "logps/chosen": -0.8037573099136353, + "logps/rejected": -28.856237411499023, + "loss": 0.6596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.066852867603302, + "rewards/margins": 0.10393990576267242, + "rewards/rejected": -0.17079277336597443, + "step": 15331 + }, + { + "epoch": 0.89, + "learning_rate": 3.015368960704584e-09, + "logits/chosen": -1.9510879516601562, + "logits/rejected": -1.9562567472457886, + "logps/chosen": -0.00022529606940224767, + "logps/rejected": -200.51910400390625, + "loss": 0.4083, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.784051376802381e-06, + "rewards/margins": 1.550981044769287, + "rewards/rejected": -1.550988793373108, + "step": 15332 + }, + { + "epoch": 0.89, + "learning_rate": 3.0121465892331553e-09, + "logits/chosen": -1.8737064599990845, + "logits/rejected": -1.8702939748764038, + "logps/chosen": -5.312723636627197, + "logps/rejected": -47.739715576171875, + "loss": 0.7346, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0647641196846962, + "rewards/margins": -0.20440760254859924, + "rewards/rejected": 0.13964347541332245, + "step": 15333 + }, + { + "epoch": 0.89, + "learning_rate": 3.00892588700013e-09, + "logits/chosen": -1.6771910190582275, + "logits/rejected": -1.7334685325622559, + "logps/chosen": -278.1356201171875, + "logps/rejected": -337.6211853027344, + "loss": 0.0584, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7777557373046875, + "rewards/margins": 2.810849189758301, + "rewards/rejected": -1.0330933332443237, + "step": 15334 + }, + { + "epoch": 0.89, + "learning_rate": 3.0057068541199105e-09, + "logits/chosen": -1.9749119281768799, + "logits/rejected": -1.9818294048309326, + "logps/chosen": -38.74104690551758, + "logps/rejected": -142.50839233398438, + "loss": 0.4424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4191654324531555, + "rewards/margins": 0.671817421913147, + "rewards/rejected": -0.25265198945999146, + "step": 15335 + }, + { + "epoch": 0.89, + "learning_rate": 3.0024894907068445e-09, + "logits/chosen": -1.867644190788269, + "logits/rejected": -1.8670965433120728, + "logps/chosen": -94.96757507324219, + "logps/rejected": -353.14691162109375, + "loss": 0.0478, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.19059157371521, + "rewards/margins": 4.118411540985107, + "rewards/rejected": -1.927819848060608, + "step": 15336 + }, + { + "epoch": 0.89, + "learning_rate": 2.9992737968752625e-09, + "logits/chosen": -1.7562824487686157, + "logits/rejected": -1.7343379259109497, + "logps/chosen": -225.60345458984375, + "logps/rejected": -385.0676574707031, + "loss": 0.2761, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1106277704238892, + "rewards/margins": 0.5171188712120056, + "rewards/rejected": 0.5935088992118835, + "step": 15337 + }, + { + "epoch": 0.89, + "learning_rate": 2.9960597727393733e-09, + "logits/chosen": -1.885379672050476, + "logits/rejected": -1.8712306022644043, + "logps/chosen": -0.009111394174396992, + "logps/rejected": -200.9404754638672, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005152401863597333, + "rewards/margins": 6.854257583618164, + "rewards/rejected": -6.854773044586182, + "step": 15338 + }, + { + "epoch": 0.89, + "learning_rate": 2.9928474184133577e-09, + "logits/chosen": -2.0126380920410156, + "logits/rejected": -2.004815101623535, + "logps/chosen": -1.9707242250442505, + "logps/rejected": -140.54881286621094, + "loss": 0.5484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006551027181558311, + "rewards/margins": 0.7464274168014526, + "rewards/rejected": -0.747082531452179, + "step": 15339 + }, + { + "epoch": 0.89, + "learning_rate": 2.9896367340113414e-09, + "logits/chosen": -1.946504831314087, + "logits/rejected": -1.939399242401123, + "logps/chosen": -127.1833724975586, + "logps/rejected": -320.91693115234375, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.487485647201538, + "rewards/margins": 6.026172637939453, + "rewards/rejected": -3.538687229156494, + "step": 15340 + }, + { + "epoch": 0.89, + "learning_rate": 2.986427719647383e-09, + "logits/chosen": -1.8291937112808228, + "logits/rejected": -1.8148705959320068, + "logps/chosen": -155.55526733398438, + "logps/rejected": -221.98751831054688, + "loss": 0.3529, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5522797107696533, + "rewards/margins": 0.11296236515045166, + "rewards/rejected": 1.4393173456192017, + "step": 15341 + }, + { + "epoch": 0.89, + "learning_rate": 2.98322037543548e-09, + "logits/chosen": -1.9570984840393066, + "logits/rejected": -1.9673703908920288, + "logps/chosen": -12.322710037231445, + "logps/rejected": -231.01492309570312, + "loss": 0.2575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3456842601299286, + "rewards/margins": 4.9852423667907715, + "rewards/rejected": -4.6395583152771, + "step": 15342 + }, + { + "epoch": 0.89, + "learning_rate": 2.9800147014895692e-09, + "logits/chosen": -1.8689537048339844, + "logits/rejected": -1.8065253496170044, + "logps/chosen": -317.29766845703125, + "logps/rejected": -523.5562744140625, + "loss": 0.1153, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.399981737136841, + "rewards/margins": 1.473541259765625, + "rewards/rejected": 0.926440417766571, + "step": 15343 + }, + { + "epoch": 0.89, + "learning_rate": 2.9768106979235376e-09, + "logits/chosen": -2.0836403369903564, + "logits/rejected": -2.0718114376068115, + "logps/chosen": -45.547542572021484, + "logps/rejected": -188.87303161621094, + "loss": 0.1535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7618610262870789, + "rewards/margins": 3.0052692890167236, + "rewards/rejected": -2.243408203125, + "step": 15344 + }, + { + "epoch": 0.89, + "learning_rate": 2.97360836485121e-09, + "logits/chosen": -1.8041143417358398, + "logits/rejected": -1.7926793098449707, + "logps/chosen": -167.58990478515625, + "logps/rejected": -315.1891784667969, + "loss": 0.074, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4735658168792725, + "rewards/margins": 2.1449739933013916, + "rewards/rejected": 0.328591912984848, + "step": 15345 + }, + { + "epoch": 0.89, + "learning_rate": 2.9704077023863404e-09, + "logits/chosen": -1.8787697553634644, + "logits/rejected": -1.8383865356445312, + "logps/chosen": -197.8448486328125, + "logps/rejected": -351.49725341796875, + "loss": 0.0537, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9436005353927612, + "rewards/margins": 4.018972873687744, + "rewards/rejected": -2.0753724575042725, + "step": 15346 + }, + { + "epoch": 0.89, + "learning_rate": 2.9672087106426325e-09, + "logits/chosen": -1.6648069620132446, + "logits/rejected": -1.668932557106018, + "logps/chosen": -0.00014221369929146022, + "logps/rejected": -242.41995239257812, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.517854449659353e-06, + "rewards/margins": 3.752851963043213, + "rewards/rejected": -3.752856492996216, + "step": 15347 + }, + { + "epoch": 0.89, + "learning_rate": 2.9640113897337336e-09, + "logits/chosen": -1.7960888147354126, + "logits/rejected": -1.7926028966903687, + "logps/chosen": -7.629284858703613, + "logps/rejected": -110.1238784790039, + "loss": 0.6577, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.17469044029712677, + "rewards/margins": -0.02863597869873047, + "rewards/rejected": 0.20332641899585724, + "step": 15348 + }, + { + "epoch": 0.89, + "learning_rate": 2.9608157397732427e-09, + "logits/chosen": -1.8524584770202637, + "logits/rejected": -1.8607726097106934, + "logps/chosen": -0.04947249963879585, + "logps/rejected": -218.15013122558594, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003021941753104329, + "rewards/margins": 2.9791159629821777, + "rewards/rejected": -2.9760940074920654, + "step": 15349 + }, + { + "epoch": 0.89, + "learning_rate": 2.9576217608746566e-09, + "logits/chosen": -1.878122091293335, + "logits/rejected": -1.8730320930480957, + "logps/chosen": -20.13604736328125, + "logps/rejected": -123.48797607421875, + "loss": 0.3307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5319570899009705, + "rewards/margins": 1.1965346336364746, + "rewards/rejected": -0.6645774841308594, + "step": 15350 + }, + { + "epoch": 0.89, + "learning_rate": 2.9544294531514526e-09, + "logits/chosen": -1.8556870222091675, + "logits/rejected": -1.8326776027679443, + "logps/chosen": -241.84671020507812, + "logps/rejected": -608.8585815429688, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8863617181777954, + "rewards/margins": 9.907217979431152, + "rewards/rejected": -8.020855903625488, + "step": 15351 + }, + { + "epoch": 0.89, + "learning_rate": 2.951238816717044e-09, + "logits/chosen": -2.0191903114318848, + "logits/rejected": -2.021265983581543, + "logps/chosen": -174.3578643798828, + "logps/rejected": -259.2173156738281, + "loss": 0.1184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5763870477676392, + "rewards/margins": 2.3163726329803467, + "rewards/rejected": -0.7399856448173523, + "step": 15352 + }, + { + "epoch": 0.89, + "learning_rate": 2.9480498516847753e-09, + "logits/chosen": -1.8998171091079712, + "logits/rejected": -1.9045157432556152, + "logps/chosen": -0.010430986061692238, + "logps/rejected": -158.63333129882812, + "loss": 0.371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000767712015658617, + "rewards/margins": 2.4775404930114746, + "rewards/rejected": -2.4783082008361816, + "step": 15353 + }, + { + "epoch": 0.89, + "learning_rate": 2.9448625581679264e-09, + "logits/chosen": -1.8317970037460327, + "logits/rejected": -1.8421101570129395, + "logps/chosen": -220.01063537597656, + "logps/rejected": -447.5725402832031, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3723220825195312, + "rewards/margins": 5.1110944747924805, + "rewards/rejected": -2.7387726306915283, + "step": 15354 + }, + { + "epoch": 0.89, + "learning_rate": 2.9416769362797243e-09, + "logits/chosen": -1.8359078168869019, + "logits/rejected": -1.8477857112884521, + "logps/chosen": -143.45884704589844, + "logps/rejected": -366.2882995605469, + "loss": 0.0957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.634181261062622, + "rewards/margins": 2.585414171218872, + "rewards/rejected": -0.95123291015625, + "step": 15355 + }, + { + "epoch": 0.89, + "learning_rate": 2.9384929861333617e-09, + "logits/chosen": -1.8129115104675293, + "logits/rejected": -1.772982120513916, + "logps/chosen": -145.65728759765625, + "logps/rejected": -264.57415771484375, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5383682250976562, + "rewards/margins": 0.6273147463798523, + "rewards/rejected": 0.911053478717804, + "step": 15356 + }, + { + "epoch": 0.89, + "learning_rate": 2.9353107078419204e-09, + "logits/chosen": -1.7415531873703003, + "logits/rejected": -1.7244693040847778, + "logps/chosen": -231.53167724609375, + "logps/rejected": -354.6116027832031, + "loss": 0.1074, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3543763160705566, + "rewards/margins": 1.8508210182189941, + "rewards/rejected": 0.5035552978515625, + "step": 15357 + }, + { + "epoch": 0.89, + "learning_rate": 2.9321301015184707e-09, + "logits/chosen": -1.7887351512908936, + "logits/rejected": -1.7939924001693726, + "logps/chosen": -0.033129218965768814, + "logps/rejected": -116.79605102539062, + "loss": 0.5201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004809886682778597, + "rewards/margins": 0.8624240756034851, + "rewards/rejected": -0.8576141595840454, + "step": 15358 + }, + { + "epoch": 0.89, + "learning_rate": 2.928951167275978e-09, + "logits/chosen": -2.0113039016723633, + "logits/rejected": -1.992266297340393, + "logps/chosen": -4.923265441902913e-05, + "logps/rejected": -223.74307250976562, + "loss": 0.3488, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.668785625952296e-05, + "rewards/margins": 5.3082275390625, + "rewards/rejected": -5.308210849761963, + "step": 15359 + }, + { + "epoch": 0.89, + "learning_rate": 2.925773905227402e-09, + "logits/chosen": -1.982173204421997, + "logits/rejected": -1.9859261512756348, + "logps/chosen": -165.5687255859375, + "logps/rejected": -243.81068420410156, + "loss": 0.2736, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.41058349609375, + "rewards/margins": 0.5577071905136108, + "rewards/rejected": 0.8528763055801392, + "step": 15360 + }, + { + "epoch": 0.89, + "learning_rate": 2.922598315485597e-09, + "logits/chosen": -1.9487110376358032, + "logits/rejected": -1.9430756568908691, + "logps/chosen": -0.005764520727097988, + "logps/rejected": -97.21293640136719, + "loss": 0.7357, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00048022420378401875, + "rewards/margins": -0.2155781388282776, + "rewards/rejected": 0.21605835855007172, + "step": 15361 + }, + { + "epoch": 0.89, + "learning_rate": 2.919424398163378e-09, + "logits/chosen": -1.8219910860061646, + "logits/rejected": -1.8939716815948486, + "logps/chosen": -231.80177307128906, + "logps/rejected": -232.31655883789062, + "loss": 0.077, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.237379550933838, + "rewards/margins": 2.005906820297241, + "rewards/rejected": 0.23147277534008026, + "step": 15362 + }, + { + "epoch": 0.89, + "learning_rate": 2.916252153373505e-09, + "logits/chosen": -1.980608582496643, + "logits/rejected": -1.9779784679412842, + "logps/chosen": -55.382080078125, + "logps/rejected": -183.92164611816406, + "loss": 0.3785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10664939880371094, + "rewards/margins": 2.7207210063934326, + "rewards/rejected": -2.6140716075897217, + "step": 15363 + }, + { + "epoch": 0.89, + "learning_rate": 2.9130815812286714e-09, + "logits/chosen": -1.8072526454925537, + "logits/rejected": -1.677856206893921, + "logps/chosen": -307.5601806640625, + "logps/rejected": -918.8909912109375, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.850311279296875, + "rewards/margins": 9.714252471923828, + "rewards/rejected": -7.863940715789795, + "step": 15364 + }, + { + "epoch": 0.89, + "learning_rate": 2.9099126818414975e-09, + "logits/chosen": -1.6559786796569824, + "logits/rejected": -1.6517865657806396, + "logps/chosen": -57.916648864746094, + "logps/rejected": -223.54994201660156, + "loss": 0.2456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8446937799453735, + "rewards/margins": 1.8421287536621094, + "rewards/rejected": -0.9974350333213806, + "step": 15365 + }, + { + "epoch": 0.89, + "learning_rate": 2.9067454553245665e-09, + "logits/chosen": -1.8198271989822388, + "logits/rejected": -1.8173587322235107, + "logps/chosen": -0.39047771692276, + "logps/rejected": -103.25773620605469, + "loss": 0.6696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020816095173358917, + "rewards/margins": 0.1125091090798378, + "rewards/rejected": -0.13332520425319672, + "step": 15366 + }, + { + "epoch": 0.89, + "learning_rate": 2.9035799017903985e-09, + "logits/chosen": -1.7913086414337158, + "logits/rejected": -1.7934575080871582, + "logps/chosen": -0.015008874237537384, + "logps/rejected": -330.0733642578125, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00131778116337955, + "rewards/margins": 7.1335272789001465, + "rewards/rejected": -7.13484525680542, + "step": 15367 + }, + { + "epoch": 0.89, + "learning_rate": 2.9004160213514537e-09, + "logits/chosen": -1.6469358205795288, + "logits/rejected": -1.639182209968567, + "logps/chosen": -90.91058349609375, + "logps/rejected": -196.61981201171875, + "loss": 0.6017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5425201654434204, + "rewards/margins": 0.9628493785858154, + "rewards/rejected": -1.5053695440292358, + "step": 15368 + }, + { + "epoch": 0.89, + "learning_rate": 2.8972538141201154e-09, + "logits/chosen": -1.9911595582962036, + "logits/rejected": -1.989668607711792, + "logps/chosen": -194.53237915039062, + "logps/rejected": -312.24395751953125, + "loss": 0.1253, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.51080322265625, + "rewards/margins": 1.933355689048767, + "rewards/rejected": -0.4225524961948395, + "step": 15369 + }, + { + "epoch": 0.89, + "learning_rate": 2.8940932802087204e-09, + "logits/chosen": -1.8673335313796997, + "logits/rejected": -1.8640491962432861, + "logps/chosen": -236.27496337890625, + "logps/rejected": -348.2533874511719, + "loss": 0.1524, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.547750949859619, + "rewards/margins": 1.0927735567092896, + "rewards/rejected": 1.4549773931503296, + "step": 15370 + }, + { + "epoch": 0.89, + "learning_rate": 2.890934419729557e-09, + "logits/chosen": -1.960349440574646, + "logits/rejected": -1.9551717042922974, + "logps/chosen": -23.727094650268555, + "logps/rejected": -151.21583557128906, + "loss": 0.3528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048249244689941406, + "rewards/margins": 2.795558452606201, + "rewards/rejected": -2.7473092079162598, + "step": 15371 + }, + { + "epoch": 0.89, + "learning_rate": 2.8877772327948415e-09, + "logits/chosen": -1.8885056972503662, + "logits/rejected": -1.898302674293518, + "logps/chosen": -169.32447814941406, + "logps/rejected": -363.5057373046875, + "loss": 0.0723, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.802577257156372, + "rewards/margins": 2.283076524734497, + "rewards/rejected": -0.480499267578125, + "step": 15372 + }, + { + "epoch": 0.89, + "learning_rate": 2.8846217195167223e-09, + "logits/chosen": -2.0697975158691406, + "logits/rejected": -2.058671474456787, + "logps/chosen": -45.16843032836914, + "logps/rejected": -205.24148559570312, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6075908541679382, + "rewards/margins": 1.5359783172607422, + "rewards/rejected": -0.928387463092804, + "step": 15373 + }, + { + "epoch": 0.89, + "learning_rate": 2.88146788000731e-09, + "logits/chosen": -1.8843992948532104, + "logits/rejected": -1.8711140155792236, + "logps/chosen": -272.8515625, + "logps/rejected": -364.5956115722656, + "loss": 0.208, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3922821283340454, + "rewards/margins": 0.8620239496231079, + "rewards/rejected": 0.5302581787109375, + "step": 15374 + }, + { + "epoch": 0.89, + "learning_rate": 2.8783157143786373e-09, + "logits/chosen": -2.018409490585327, + "logits/rejected": -2.022939443588257, + "logps/chosen": -116.56403350830078, + "logps/rejected": -300.9133605957031, + "loss": 0.1603, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0119980573654175, + "rewards/margins": 2.830915927886963, + "rewards/rejected": -1.8189178705215454, + "step": 15375 + }, + { + "epoch": 0.89, + "learning_rate": 2.8751652227426872e-09, + "logits/chosen": -1.758802890777588, + "logits/rejected": -1.7365479469299316, + "logps/chosen": -271.5537109375, + "logps/rejected": -494.4789123535156, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.125720262527466, + "rewards/margins": 7.791155815124512, + "rewards/rejected": -4.665435791015625, + "step": 15376 + }, + { + "epoch": 0.89, + "learning_rate": 2.8720164052113916e-09, + "logits/chosen": -1.9342091083526611, + "logits/rejected": -1.9261348247528076, + "logps/chosen": -0.016599338501691818, + "logps/rejected": -128.18800354003906, + "loss": 0.4689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02695302478969097, + "rewards/margins": 1.2121962308883667, + "rewards/rejected": -1.1852432489395142, + "step": 15377 + }, + { + "epoch": 0.89, + "learning_rate": 2.8688692618965835e-09, + "logits/chosen": -2.06257700920105, + "logits/rejected": -2.0675294399261475, + "logps/chosen": -108.32898712158203, + "logps/rejected": -192.45277404785156, + "loss": 0.4044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08596573024988174, + "rewards/margins": 1.5731178522109985, + "rewards/rejected": -1.487152099609375, + "step": 15378 + }, + { + "epoch": 0.89, + "learning_rate": 2.8657237929100963e-09, + "logits/chosen": -1.898506760597229, + "logits/rejected": -1.8962488174438477, + "logps/chosen": -88.16858673095703, + "logps/rejected": -188.2232208251953, + "loss": 0.1742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7655044794082642, + "rewards/margins": 1.1857696771621704, + "rewards/rejected": 0.5797348022460938, + "step": 15379 + }, + { + "epoch": 0.9, + "learning_rate": 2.8625799983636513e-09, + "logits/chosen": -1.8069323301315308, + "logits/rejected": -1.8164842128753662, + "logps/chosen": -79.93047332763672, + "logps/rejected": -202.6656951904297, + "loss": 0.6004, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6801078915596008, + "rewards/margins": -0.3532112240791321, + "rewards/rejected": 1.033319115638733, + "step": 15380 + }, + { + "epoch": 0.9, + "learning_rate": 2.859437878368942e-09, + "logits/chosen": -1.6780450344085693, + "logits/rejected": -1.6200730800628662, + "logps/chosen": -295.7192687988281, + "logps/rejected": -785.6417236328125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.494894504547119, + "rewards/margins": 6.043173313140869, + "rewards/rejected": -2.54827880859375, + "step": 15381 + }, + { + "epoch": 0.9, + "learning_rate": 2.8562974330375855e-09, + "logits/chosen": -1.8783010244369507, + "logits/rejected": -1.8493748903274536, + "logps/chosen": -286.9002380371094, + "logps/rejected": -348.3629150390625, + "loss": 0.2127, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.158599853515625, + "rewards/margins": 1.2296569347381592, + "rewards/rejected": -0.07105713337659836, + "step": 15382 + }, + { + "epoch": 0.9, + "learning_rate": 2.8531586624811588e-09, + "logits/chosen": -2.0005767345428467, + "logits/rejected": -1.9944695234298706, + "logps/chosen": -19.68767547607422, + "logps/rejected": -175.89743041992188, + "loss": 0.4551, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41089364886283875, + "rewards/margins": 2.3240153789520264, + "rewards/rejected": -2.7349090576171875, + "step": 15383 + }, + { + "epoch": 0.9, + "learning_rate": 2.850021566811145e-09, + "logits/chosen": -1.864166259765625, + "logits/rejected": -1.8611491918563843, + "logps/chosen": -16.848236083984375, + "logps/rejected": -108.78141021728516, + "loss": 0.2496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8259435892105103, + "rewards/margins": 1.3512532711029053, + "rewards/rejected": -0.5253097414970398, + "step": 15384 + }, + { + "epoch": 0.9, + "learning_rate": 2.846886146139005e-09, + "logits/chosen": -2.019170045852661, + "logits/rejected": -2.0201330184936523, + "logps/chosen": -7.899784564971924, + "logps/rejected": -46.155731201171875, + "loss": 0.5036, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12451662868261337, + "rewards/margins": 0.7296454906463623, + "rewards/rejected": -0.6051288843154907, + "step": 15385 + }, + { + "epoch": 0.9, + "learning_rate": 2.8437524005761215e-09, + "logits/chosen": -1.9980239868164062, + "logits/rejected": -2.004868507385254, + "logps/chosen": -131.27059936523438, + "logps/rejected": -211.0124053955078, + "loss": 0.1406, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.23471999168396, + "rewards/margins": 1.6595689058303833, + "rewards/rejected": 0.5751510858535767, + "step": 15386 + }, + { + "epoch": 0.9, + "learning_rate": 2.840620330233823e-09, + "logits/chosen": -1.9121589660644531, + "logits/rejected": -1.757785439491272, + "logps/chosen": -194.46621704101562, + "logps/rejected": -768.8375244140625, + "loss": 0.0761, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.264227271080017, + "rewards/margins": 6.483136177062988, + "rewards/rejected": -5.218908786773682, + "step": 15387 + }, + { + "epoch": 0.9, + "learning_rate": 2.8374899352233637e-09, + "logits/chosen": -1.9035298824310303, + "logits/rejected": -1.9024444818496704, + "logps/chosen": -103.72146606445312, + "logps/rejected": -161.97052001953125, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1662704944610596, + "rewards/margins": 0.32721561193466187, + "rewards/rejected": 0.8390548825263977, + "step": 15388 + }, + { + "epoch": 0.9, + "learning_rate": 2.834361215655956e-09, + "logits/chosen": -1.857527494430542, + "logits/rejected": -1.8514338731765747, + "logps/chosen": -35.2365837097168, + "logps/rejected": -171.68516540527344, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41783830523490906, + "rewards/margins": 0.9546898603439331, + "rewards/rejected": -0.5368515253067017, + "step": 15389 + }, + { + "epoch": 0.9, + "learning_rate": 2.8312341716427548e-09, + "logits/chosen": -1.8121026754379272, + "logits/rejected": -1.807917594909668, + "logps/chosen": -6.628960609436035, + "logps/rejected": -251.71774291992188, + "loss": 0.3065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13763312995433807, + "rewards/margins": 4.8528923988342285, + "rewards/rejected": -4.715259075164795, + "step": 15390 + }, + { + "epoch": 0.9, + "learning_rate": 2.8281088032948487e-09, + "logits/chosen": -2.095303773880005, + "logits/rejected": -2.089510202407837, + "logps/chosen": -0.0006454128306359053, + "logps/rejected": -166.81002807617188, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00011118005932075903, + "rewards/margins": 5.119167327880859, + "rewards/rejected": -5.119056224822998, + "step": 15391 + }, + { + "epoch": 0.9, + "learning_rate": 2.8249851107232546e-09, + "logits/chosen": -1.6728708744049072, + "logits/rejected": -1.6634397506713867, + "logps/chosen": -15.407759666442871, + "logps/rejected": -88.40991973876953, + "loss": 0.4821, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23006029427051544, + "rewards/margins": 0.7714802622795105, + "rewards/rejected": -0.5414199829101562, + "step": 15392 + }, + { + "epoch": 0.9, + "learning_rate": 2.8218630940389454e-09, + "logits/chosen": -1.9398947954177856, + "logits/rejected": -1.933500051498413, + "logps/chosen": -41.073036193847656, + "logps/rejected": -315.5652770996094, + "loss": 0.1881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5842140316963196, + "rewards/margins": 5.526185035705566, + "rewards/rejected": -4.9419708251953125, + "step": 15393 + }, + { + "epoch": 0.9, + "learning_rate": 2.8187427533528374e-09, + "logits/chosen": -1.820360541343689, + "logits/rejected": -1.791731357574463, + "logps/chosen": -164.29933166503906, + "logps/rejected": -259.6441650390625, + "loss": 0.2403, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3139100074768066, + "rewards/margins": 0.6175049543380737, + "rewards/rejected": 1.696405053138733, + "step": 15394 + }, + { + "epoch": 0.9, + "learning_rate": 2.8156240887757752e-09, + "logits/chosen": -1.986701488494873, + "logits/rejected": -1.9951163530349731, + "logps/chosen": -0.00019191470346413553, + "logps/rejected": -223.9441680908203, + "loss": 0.3399, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.77391096057545e-07, + "rewards/margins": 4.663910865783691, + "rewards/rejected": -4.663909912109375, + "step": 15395 + }, + { + "epoch": 0.9, + "learning_rate": 2.8125071004185486e-09, + "logits/chosen": -1.8707969188690186, + "logits/rejected": -1.8721555471420288, + "logps/chosen": -0.06469625979661942, + "logps/rejected": -106.18617248535156, + "loss": 0.436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00505216047167778, + "rewards/margins": 1.6420586109161377, + "rewards/rejected": -1.6471108198165894, + "step": 15396 + }, + { + "epoch": 0.9, + "learning_rate": 2.8093917883918793e-09, + "logits/chosen": -1.879510760307312, + "logits/rejected": -1.8832554817199707, + "logps/chosen": -99.3233642578125, + "logps/rejected": -136.38238525390625, + "loss": 0.2003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7003250122070312, + "rewards/margins": 1.629042148590088, + "rewards/rejected": -0.9287170767784119, + "step": 15397 + }, + { + "epoch": 0.9, + "learning_rate": 2.8062781528064573e-09, + "logits/chosen": -2.005948305130005, + "logits/rejected": -2.009514808654785, + "logps/chosen": -9.715343185234815e-05, + "logps/rejected": -167.7718505859375, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.1838734432531055e-06, + "rewards/margins": 3.6008894443511963, + "rewards/rejected": -3.6008851528167725, + "step": 15398 + }, + { + "epoch": 0.9, + "learning_rate": 2.803166193772882e-09, + "logits/chosen": -1.8188354969024658, + "logits/rejected": -1.811940312385559, + "logps/chosen": -220.6103057861328, + "logps/rejected": -364.6116638183594, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.712878465652466, + "rewards/margins": 1.0210754871368408, + "rewards/rejected": 1.691802978515625, + "step": 15399 + }, + { + "epoch": 0.9, + "learning_rate": 2.8000559114017106e-09, + "logits/chosen": -1.9732297658920288, + "logits/rejected": -1.9689501523971558, + "logps/chosen": -12.53176498413086, + "logps/rejected": -178.847900390625, + "loss": 0.2239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6943647265434265, + "rewards/margins": 2.180544853210449, + "rewards/rejected": -1.4861801862716675, + "step": 15400 + }, + { + "epoch": 0.9, + "learning_rate": 2.7969473058034267e-09, + "logits/chosen": -1.7544410228729248, + "logits/rejected": -1.7523587942123413, + "logps/chosen": -4.141100883483887, + "logps/rejected": -70.02240753173828, + "loss": 0.374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23604507744312286, + "rewards/margins": 1.6681348085403442, + "rewards/rejected": -1.4320896863937378, + "step": 15401 + }, + { + "epoch": 0.9, + "learning_rate": 2.79384037708848e-09, + "logits/chosen": -1.7687476873397827, + "logits/rejected": -1.758886456489563, + "logps/chosen": -19.39994239807129, + "logps/rejected": -184.13204956054688, + "loss": 0.5005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35777854919433594, + "rewards/margins": 0.30534934997558594, + "rewards/rejected": 0.05242919921875, + "step": 15402 + }, + { + "epoch": 0.9, + "learning_rate": 2.790735125367227e-09, + "logits/chosen": -2.0148446559906006, + "logits/rejected": -2.0169174671173096, + "logps/chosen": -0.1693606674671173, + "logps/rejected": -202.91470336914062, + "loss": 0.4093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033494026865810156, + "rewards/margins": 2.138026475906372, + "rewards/rejected": -2.141375780105591, + "step": 15403 + }, + { + "epoch": 0.9, + "learning_rate": 2.7876315507499913e-09, + "logits/chosen": -1.8589072227478027, + "logits/rejected": -1.854801893234253, + "logps/chosen": -158.20584106445312, + "logps/rejected": -325.99407958984375, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.642364501953125, + "rewards/margins": 2.496804714202881, + "rewards/rejected": -0.8544403314590454, + "step": 15404 + }, + { + "epoch": 0.9, + "learning_rate": 2.7845296533470176e-09, + "logits/chosen": -1.8354014158248901, + "logits/rejected": -1.8395966291427612, + "logps/chosen": -221.58157348632812, + "logps/rejected": -386.40728759765625, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0106446743011475, + "rewards/margins": 3.0646486282348633, + "rewards/rejected": -1.0540039539337158, + "step": 15405 + }, + { + "epoch": 0.9, + "learning_rate": 2.7814294332685175e-09, + "logits/chosen": -1.9216670989990234, + "logits/rejected": -1.9327573776245117, + "logps/chosen": -202.892578125, + "logps/rejected": -301.9329528808594, + "loss": 0.1685, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1348068714141846, + "rewards/margins": 1.0281509160995483, + "rewards/rejected": 1.1066559553146362, + "step": 15406 + }, + { + "epoch": 0.9, + "learning_rate": 2.7783308906246027e-09, + "logits/chosen": -1.8281407356262207, + "logits/rejected": -1.8263354301452637, + "logps/chosen": -58.6734619140625, + "logps/rejected": -394.1873779296875, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0576171875, + "rewards/margins": 8.202014923095703, + "rewards/rejected": -6.144397258758545, + "step": 15407 + }, + { + "epoch": 0.9, + "learning_rate": 2.7752340255253637e-09, + "logits/chosen": -2.0179264545440674, + "logits/rejected": -2.009721040725708, + "logps/chosen": -45.25431823730469, + "logps/rejected": -291.9830627441406, + "loss": 0.0969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.225429892539978, + "rewards/margins": 3.527273178100586, + "rewards/rejected": -2.3018434047698975, + "step": 15408 + }, + { + "epoch": 0.9, + "learning_rate": 2.7721388380808174e-09, + "logits/chosen": -1.9100205898284912, + "logits/rejected": -1.8861132860183716, + "logps/chosen": -152.6002655029297, + "logps/rejected": -519.50634765625, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3529891967773438, + "rewards/margins": 7.065892219543457, + "rewards/rejected": -4.712903022766113, + "step": 15409 + }, + { + "epoch": 0.9, + "learning_rate": 2.7690453284009153e-09, + "logits/chosen": -1.886231541633606, + "logits/rejected": -1.847892165184021, + "logps/chosen": -106.78478240966797, + "logps/rejected": -285.0437316894531, + "loss": 0.0527, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1710898876190186, + "rewards/margins": 2.5073904991149902, + "rewards/rejected": 0.6636993288993835, + "step": 15410 + }, + { + "epoch": 0.9, + "learning_rate": 2.7659534965955523e-09, + "logits/chosen": -1.802207589149475, + "logits/rejected": -1.7936954498291016, + "logps/chosen": -142.7789764404297, + "logps/rejected": -211.4252471923828, + "loss": 0.1534, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6383484601974487, + "rewards/margins": 1.9986679553985596, + "rewards/rejected": -0.3603195250034332, + "step": 15411 + }, + { + "epoch": 0.9, + "learning_rate": 2.7628633427745686e-09, + "logits/chosen": -1.8713679313659668, + "logits/rejected": -1.858678936958313, + "logps/chosen": -103.03076171875, + "logps/rejected": -301.6856994628906, + "loss": 0.1194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8644149899482727, + "rewards/margins": 3.962357997894287, + "rewards/rejected": -3.097943067550659, + "step": 15412 + }, + { + "epoch": 0.9, + "learning_rate": 2.7597748670477372e-09, + "logits/chosen": -2.038846254348755, + "logits/rejected": -2.0279338359832764, + "logps/chosen": -25.262821197509766, + "logps/rejected": -185.45172119140625, + "loss": 0.1071, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2981773614883423, + "rewards/margins": 4.038510799407959, + "rewards/rejected": -2.7403335571289062, + "step": 15413 + }, + { + "epoch": 0.9, + "learning_rate": 2.756688069524793e-09, + "logits/chosen": -1.9651463031768799, + "logits/rejected": -1.9672898054122925, + "logps/chosen": -0.003521616104990244, + "logps/rejected": -81.91390991210938, + "loss": 0.691, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00028930717962794006, + "rewards/margins": -0.05147949233651161, + "rewards/rejected": 0.05119018629193306, + "step": 15414 + }, + { + "epoch": 0.9, + "learning_rate": 2.7536029503153703e-09, + "logits/chosen": -1.9988420009613037, + "logits/rejected": -2.007626533508301, + "logps/chosen": -20.282007217407227, + "logps/rejected": -185.81796264648438, + "loss": 0.4675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06716670840978622, + "rewards/margins": 1.4190317392349243, + "rewards/rejected": -1.4861984252929688, + "step": 15415 + }, + { + "epoch": 0.9, + "learning_rate": 2.7505195095290644e-09, + "logits/chosen": -1.9414160251617432, + "logits/rejected": -1.9469027519226074, + "logps/chosen": -109.1090087890625, + "logps/rejected": -338.8928527832031, + "loss": 0.1611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8049697875976562, + "rewards/margins": 3.514714002609253, + "rewards/rejected": -2.7097442150115967, + "step": 15416 + }, + { + "epoch": 0.9, + "learning_rate": 2.7474377472754493e-09, + "logits/chosen": -1.5956429243087769, + "logits/rejected": -1.6049951314926147, + "logps/chosen": -26.875587463378906, + "logps/rejected": -119.6352310180664, + "loss": 0.3778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4629676938056946, + "rewards/margins": 1.131178379058838, + "rewards/rejected": -0.6682106256484985, + "step": 15417 + }, + { + "epoch": 0.9, + "learning_rate": 2.7443576636639764e-09, + "logits/chosen": -1.903516411781311, + "logits/rejected": -1.907938003540039, + "logps/chosen": -22.877920150756836, + "logps/rejected": -288.52655029296875, + "loss": 0.1867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.712070107460022, + "rewards/margins": 4.527231216430664, + "rewards/rejected": -3.8151612281799316, + "step": 15418 + }, + { + "epoch": 0.9, + "learning_rate": 2.7412792588040746e-09, + "logits/chosen": -2.0590145587921143, + "logits/rejected": -2.0596182346343994, + "logps/chosen": -3.766976442420855e-05, + "logps/rejected": -186.36471557617188, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.46624768149195e-06, + "rewards/margins": 5.314474582672119, + "rewards/rejected": -5.3144731521606445, + "step": 15419 + }, + { + "epoch": 0.9, + "learning_rate": 2.7382025328050896e-09, + "logits/chosen": -1.8822946548461914, + "logits/rejected": -1.8872926235198975, + "logps/chosen": -102.79365539550781, + "logps/rejected": -194.154296875, + "loss": 0.1911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9990036487579346, + "rewards/margins": 1.0756332874298096, + "rewards/rejected": 0.923370361328125, + "step": 15420 + }, + { + "epoch": 0.9, + "learning_rate": 2.735127485776345e-09, + "logits/chosen": -1.9098328351974487, + "logits/rejected": -1.9103095531463623, + "logps/chosen": -16.1946964263916, + "logps/rejected": -23.089935302734375, + "loss": 0.7069, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3278988003730774, + "rewards/margins": -0.03897610306739807, + "rewards/rejected": -0.2889226973056793, + "step": 15421 + }, + { + "epoch": 0.9, + "learning_rate": 2.7320541178270594e-09, + "logits/chosen": -1.998320460319519, + "logits/rejected": -2.058776617050171, + "logps/chosen": -139.72103881835938, + "logps/rejected": -447.7130126953125, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4729034900665283, + "rewards/margins": 6.880972862243652, + "rewards/rejected": -5.408069133758545, + "step": 15422 + }, + { + "epoch": 0.9, + "learning_rate": 2.7289824290664287e-09, + "logits/chosen": -1.8238520622253418, + "logits/rejected": -1.8065627813339233, + "logps/chosen": -252.73928833007812, + "logps/rejected": -537.8995361328125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7972137928009033, + "rewards/margins": 7.565625190734863, + "rewards/rejected": -4.768411159515381, + "step": 15423 + }, + { + "epoch": 0.9, + "learning_rate": 2.725912419603571e-09, + "logits/chosen": -1.9463821649551392, + "logits/rejected": -1.9488314390182495, + "logps/chosen": -7.32159948348999, + "logps/rejected": -89.13746643066406, + "loss": 0.2248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8853700757026672, + "rewards/margins": 1.680001974105835, + "rewards/rejected": -0.7946319580078125, + "step": 15424 + }, + { + "epoch": 0.9, + "learning_rate": 2.722844089547549e-09, + "logits/chosen": -2.018575668334961, + "logits/rejected": -2.0177383422851562, + "logps/chosen": -5.995594024658203, + "logps/rejected": -167.19798278808594, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1862306147813797, + "rewards/margins": 2.526839017868042, + "rewards/rejected": -2.3406083583831787, + "step": 15425 + }, + { + "epoch": 0.9, + "learning_rate": 2.719777439007359e-09, + "logits/chosen": -1.9323018789291382, + "logits/rejected": -1.9485976696014404, + "logps/chosen": -163.42242431640625, + "logps/rejected": -299.0721130371094, + "loss": 0.0862, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.541632056236267, + "rewards/margins": 2.1993651390075684, + "rewards/rejected": -0.657733142375946, + "step": 15426 + }, + { + "epoch": 0.9, + "learning_rate": 2.7167124680919473e-09, + "logits/chosen": -1.7292853593826294, + "logits/rejected": -1.7253999710083008, + "logps/chosen": -0.576902449131012, + "logps/rejected": -85.9108657836914, + "loss": 0.4959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0448414646089077, + "rewards/margins": 1.1108043193817139, + "rewards/rejected": -1.155645728111267, + "step": 15427 + }, + { + "epoch": 0.9, + "learning_rate": 2.713649176910193e-09, + "logits/chosen": -1.9530620574951172, + "logits/rejected": -1.9415862560272217, + "logps/chosen": -0.0002281577471876517, + "logps/rejected": -287.80206298828125, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5337056841817684e-06, + "rewards/margins": 6.528326511383057, + "rewards/rejected": -6.528334140777588, + "step": 15428 + }, + { + "epoch": 0.9, + "learning_rate": 2.710587565570932e-09, + "logits/chosen": -1.6850192546844482, + "logits/rejected": -1.6508300304412842, + "logps/chosen": -173.94903564453125, + "logps/rejected": -329.27667236328125, + "loss": 0.1098, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3476288318634033, + "rewards/margins": 1.8906586170196533, + "rewards/rejected": 0.45697021484375, + "step": 15429 + }, + { + "epoch": 0.9, + "learning_rate": 2.70752763418291e-09, + "logits/chosen": -1.9758398532867432, + "logits/rejected": -1.976273536682129, + "logps/chosen": -12.085010528564453, + "logps/rejected": -281.809814453125, + "loss": 0.1429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8795345425605774, + "rewards/margins": 5.996890068054199, + "rewards/rejected": -5.1173553466796875, + "step": 15430 + }, + { + "epoch": 0.9, + "learning_rate": 2.7044693828548406e-09, + "logits/chosen": -1.8953287601470947, + "logits/rejected": -1.9207725524902344, + "logps/chosen": -229.37217712402344, + "logps/rejected": -307.79736328125, + "loss": 0.0795, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9804840087890625, + "rewards/margins": 2.059957981109619, + "rewards/rejected": -0.07947387546300888, + "step": 15431 + }, + { + "epoch": 0.9, + "learning_rate": 2.7014128116953637e-09, + "logits/chosen": -1.8798809051513672, + "logits/rejected": -1.846016526222229, + "logps/chosen": -261.75262451171875, + "logps/rejected": -422.6187744140625, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.592681884765625, + "rewards/margins": 3.3593413829803467, + "rewards/rejected": -0.7666595578193665, + "step": 15432 + }, + { + "epoch": 0.9, + "learning_rate": 2.6983579208130768e-09, + "logits/chosen": -1.823358178138733, + "logits/rejected": -1.8263025283813477, + "logps/chosen": -22.35729217529297, + "logps/rejected": -99.235595703125, + "loss": 0.378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30607834458351135, + "rewards/margins": 1.3013992309570312, + "rewards/rejected": -0.9953209161758423, + "step": 15433 + }, + { + "epoch": 0.9, + "learning_rate": 2.6953047103164815e-09, + "logits/chosen": -2.016732931137085, + "logits/rejected": -2.005786418914795, + "logps/chosen": -191.64096069335938, + "logps/rejected": -353.11798095703125, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7567825317382812, + "rewards/margins": 3.604661464691162, + "rewards/rejected": 0.15212097764015198, + "step": 15434 + }, + { + "epoch": 0.9, + "learning_rate": 2.692253180314058e-09, + "logits/chosen": -1.8310530185699463, + "logits/rejected": -1.8299119472503662, + "logps/chosen": -5.391932964324951, + "logps/rejected": -407.6517639160156, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19938169419765472, + "rewards/margins": 8.99738883972168, + "rewards/rejected": -8.798007011413574, + "step": 15435 + }, + { + "epoch": 0.9, + "learning_rate": 2.6892033309142136e-09, + "logits/chosen": -1.9125789403915405, + "logits/rejected": -1.955682635307312, + "logps/chosen": -250.65383911132812, + "logps/rejected": -460.36444091796875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4876556396484375, + "rewards/margins": 8.396987915039062, + "rewards/rejected": -4.909332275390625, + "step": 15436 + }, + { + "epoch": 0.9, + "learning_rate": 2.6861551622252843e-09, + "logits/chosen": -1.970586895942688, + "logits/rejected": -1.9787821769714355, + "logps/chosen": -8.939187049865723, + "logps/rejected": -188.74490356445312, + "loss": 0.2179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7164139747619629, + "rewards/margins": 1.6391221284866333, + "rewards/rejected": -0.9227081537246704, + "step": 15437 + }, + { + "epoch": 0.9, + "learning_rate": 2.6831086743555666e-09, + "logits/chosen": -1.996027946472168, + "logits/rejected": -1.984803557395935, + "logps/chosen": -1.8763253688812256, + "logps/rejected": -149.74862670898438, + "loss": 0.4982, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1443532407283783, + "rewards/margins": 1.241654396057129, + "rewards/rejected": -1.3860076665878296, + "step": 15438 + }, + { + "epoch": 0.9, + "learning_rate": 2.680063867413268e-09, + "logits/chosen": -1.7176218032836914, + "logits/rejected": -1.7313792705535889, + "logps/chosen": -87.73826599121094, + "logps/rejected": -316.5429382324219, + "loss": 0.4052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2402191162109375, + "rewards/margins": 4.702517986297607, + "rewards/rejected": -4.942737102508545, + "step": 15439 + }, + { + "epoch": 0.9, + "learning_rate": 2.6770207415065804e-09, + "logits/chosen": -1.8481942415237427, + "logits/rejected": -1.847175121307373, + "logps/chosen": -102.61567687988281, + "logps/rejected": -112.06904602050781, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6063950061798096, + "rewards/margins": 0.2954139709472656, + "rewards/rejected": 1.310981035232544, + "step": 15440 + }, + { + "epoch": 0.9, + "learning_rate": 2.6739792967435947e-09, + "logits/chosen": -1.8461402654647827, + "logits/rejected": -1.8942866325378418, + "logps/chosen": -178.88665771484375, + "logps/rejected": -347.953125, + "loss": 0.0584, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4015213251113892, + "rewards/margins": 3.0839858055114746, + "rewards/rejected": -1.682464599609375, + "step": 15441 + }, + { + "epoch": 0.9, + "learning_rate": 2.670939533232358e-09, + "logits/chosen": -1.989786148071289, + "logits/rejected": -1.9721932411193848, + "logps/chosen": -2.6702608010964468e-05, + "logps/rejected": -150.83682250976562, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.96055542700924e-08, + "rewards/margins": 4.1564483642578125, + "rewards/rejected": -4.1564483642578125, + "step": 15442 + }, + { + "epoch": 0.9, + "learning_rate": 2.667901451080856e-09, + "logits/chosen": -1.922263503074646, + "logits/rejected": -1.9024336338043213, + "logps/chosen": -44.11420822143555, + "logps/rejected": -248.53741455078125, + "loss": 0.1641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6729980707168579, + "rewards/margins": 3.196951389312744, + "rewards/rejected": -2.5239531993865967, + "step": 15443 + }, + { + "epoch": 0.9, + "learning_rate": 2.664865050397036e-09, + "logits/chosen": -1.9325788021087646, + "logits/rejected": -1.9384350776672363, + "logps/chosen": -30.395536422729492, + "logps/rejected": -100.77552032470703, + "loss": 0.4171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17917309701442719, + "rewards/margins": 1.078066110610962, + "rewards/rejected": -0.8988929986953735, + "step": 15444 + }, + { + "epoch": 0.9, + "learning_rate": 2.6618303312887392e-09, + "logits/chosen": -1.82742178440094, + "logits/rejected": -1.8225444555282593, + "logps/chosen": -209.52854919433594, + "logps/rejected": -308.9091491699219, + "loss": 0.0926, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.025978088378906, + "rewards/margins": 1.6573379039764404, + "rewards/rejected": 2.368640184402466, + "step": 15445 + }, + { + "epoch": 0.9, + "learning_rate": 2.6587972938637847e-09, + "logits/chosen": -2.0410921573638916, + "logits/rejected": -2.029236316680908, + "logps/chosen": -53.8843879699707, + "logps/rejected": -223.31021118164062, + "loss": 0.3005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15197335183620453, + "rewards/margins": 3.136314630508423, + "rewards/rejected": -2.9843413829803467, + "step": 15446 + }, + { + "epoch": 0.9, + "learning_rate": 2.6557659382299256e-09, + "logits/chosen": -1.8818587064743042, + "logits/rejected": -1.8731075525283813, + "logps/chosen": -13.540472984313965, + "logps/rejected": -122.80879211425781, + "loss": 0.1849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47856664657592773, + "rewards/margins": 2.7472662925720215, + "rewards/rejected": -2.2686996459960938, + "step": 15447 + }, + { + "epoch": 0.9, + "learning_rate": 2.652736264494848e-09, + "logits/chosen": -1.7877426147460938, + "logits/rejected": -1.7880346775054932, + "logps/chosen": -31.49597930908203, + "logps/rejected": -134.82162475585938, + "loss": 0.7206, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0019674301147460938, + "rewards/margins": -0.26848354935646057, + "rewards/rejected": 0.2665161192417145, + "step": 15448 + }, + { + "epoch": 0.9, + "learning_rate": 2.6497082727661713e-09, + "logits/chosen": -1.9483625888824463, + "logits/rejected": -1.9621447324752808, + "logps/chosen": -174.42642211914062, + "logps/rejected": -544.7178955078125, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1719911098480225, + "rewards/margins": 4.882965087890625, + "rewards/rejected": -1.710974097251892, + "step": 15449 + }, + { + "epoch": 0.9, + "learning_rate": 2.6466819631514757e-09, + "logits/chosen": -1.9184322357177734, + "logits/rejected": -1.9078946113586426, + "logps/chosen": -69.8299331665039, + "logps/rejected": -290.89154052734375, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4105217456817627, + "rewards/margins": 6.791726112365723, + "rewards/rejected": -4.381204128265381, + "step": 15450 + }, + { + "epoch": 0.9, + "learning_rate": 2.6436573357582646e-09, + "logits/chosen": -1.966770887374878, + "logits/rejected": -1.9623808860778809, + "logps/chosen": -51.16699981689453, + "logps/rejected": -126.60945892333984, + "loss": 0.2298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3661689758300781, + "rewards/margins": 1.0204436779022217, + "rewards/rejected": 0.34572526812553406, + "step": 15451 + }, + { + "epoch": 0.9, + "learning_rate": 2.640634390694002e-09, + "logits/chosen": -1.7564090490341187, + "logits/rejected": -1.7618467807769775, + "logps/chosen": -185.88941955566406, + "logps/rejected": -243.2803955078125, + "loss": 0.1774, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0061936378479004, + "rewards/margins": 1.2400436401367188, + "rewards/rejected": 0.7661499381065369, + "step": 15452 + }, + { + "epoch": 0.9, + "learning_rate": 2.6376131280660518e-09, + "logits/chosen": -1.9891688823699951, + "logits/rejected": -2.0194432735443115, + "logps/chosen": -206.4745635986328, + "logps/rejected": -423.6649169921875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.234706163406372, + "rewards/margins": 9.909743309020996, + "rewards/rejected": -6.675036907196045, + "step": 15453 + }, + { + "epoch": 0.9, + "learning_rate": 2.6345935479817616e-09, + "logits/chosen": -1.480034589767456, + "logits/rejected": -1.4881893396377563, + "logps/chosen": -83.51834106445312, + "logps/rejected": -163.9596405029297, + "loss": 0.1459, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9324226379394531, + "rewards/margins": 1.5886894464492798, + "rewards/rejected": 0.3437332212924957, + "step": 15454 + }, + { + "epoch": 0.9, + "learning_rate": 2.6315756505483955e-09, + "logits/chosen": -1.9488123655319214, + "logits/rejected": -1.941979169845581, + "logps/chosen": -37.71243667602539, + "logps/rejected": -199.94061279296875, + "loss": 0.3957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1818462461233139, + "rewards/margins": 1.859531044960022, + "rewards/rejected": -2.041377305984497, + "step": 15455 + }, + { + "epoch": 0.9, + "learning_rate": 2.6285594358731734e-09, + "logits/chosen": -1.6481554508209229, + "logits/rejected": -1.6712895631790161, + "logps/chosen": -165.38699340820312, + "logps/rejected": -292.10357666015625, + "loss": 0.0861, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2101730108261108, + "rewards/margins": 2.6808700561523438, + "rewards/rejected": -1.470697045326233, + "step": 15456 + }, + { + "epoch": 0.9, + "learning_rate": 2.6255449040632372e-09, + "logits/chosen": -1.7923169136047363, + "logits/rejected": -1.8029624223709106, + "logps/chosen": -257.18743896484375, + "logps/rejected": -409.4029541015625, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3571135997772217, + "rewards/margins": 4.5110015869140625, + "rewards/rejected": -2.153887987136841, + "step": 15457 + }, + { + "epoch": 0.9, + "learning_rate": 2.622532055225668e-09, + "logits/chosen": -1.8481814861297607, + "logits/rejected": -1.8366234302520752, + "logps/chosen": -34.49125289916992, + "logps/rejected": -131.63259887695312, + "loss": 0.6515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6572563052177429, + "rewards/margins": 1.0430145263671875, + "rewards/rejected": -1.7002708911895752, + "step": 15458 + }, + { + "epoch": 0.9, + "learning_rate": 2.619520889467519e-09, + "logits/chosen": -1.9370290040969849, + "logits/rejected": -1.911619782447815, + "logps/chosen": -179.91993713378906, + "logps/rejected": -428.8397521972656, + "loss": 0.178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09412536770105362, + "rewards/margins": 6.126751899719238, + "rewards/rejected": -6.032626628875732, + "step": 15459 + }, + { + "epoch": 0.9, + "learning_rate": 2.6165114068957495e-09, + "logits/chosen": -1.9904917478561401, + "logits/rejected": -1.995762586593628, + "logps/chosen": -95.01056671142578, + "logps/rejected": -208.0640869140625, + "loss": 0.1346, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3426231145858765, + "rewards/margins": 1.9051673412322998, + "rewards/rejected": -0.5625442862510681, + "step": 15460 + }, + { + "epoch": 0.9, + "learning_rate": 2.613503607617279e-09, + "logits/chosen": -1.9630625247955322, + "logits/rejected": -1.9680827856063843, + "logps/chosen": -77.09515380859375, + "logps/rejected": -220.97134399414062, + "loss": 0.5787, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5130645632743835, + "rewards/margins": 1.0738098621368408, + "rewards/rejected": -1.5868743658065796, + "step": 15461 + }, + { + "epoch": 0.9, + "learning_rate": 2.6104974917389332e-09, + "logits/chosen": -1.9018827676773071, + "logits/rejected": -1.8732413053512573, + "logps/chosen": -153.5944061279297, + "logps/rejected": -285.49700927734375, + "loss": 0.1688, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7958725690841675, + "rewards/margins": 1.5026779174804688, + "rewards/rejected": 0.29319459199905396, + "step": 15462 + }, + { + "epoch": 0.9, + "learning_rate": 2.6074930593675435e-09, + "logits/chosen": -1.6919323205947876, + "logits/rejected": -1.682189702987671, + "logps/chosen": -150.05572509765625, + "logps/rejected": -554.3165283203125, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7411041259765625, + "rewards/margins": 12.414865493774414, + "rewards/rejected": -9.673761367797852, + "step": 15463 + }, + { + "epoch": 0.9, + "learning_rate": 2.6044903106098136e-09, + "logits/chosen": -1.9188395738601685, + "logits/rejected": -1.9426374435424805, + "logps/chosen": -142.3118133544922, + "logps/rejected": -602.89208984375, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8925964832305908, + "rewards/margins": 6.13031005859375, + "rewards/rejected": -4.237713813781738, + "step": 15464 + }, + { + "epoch": 0.9, + "learning_rate": 2.601489245572425e-09, + "logits/chosen": -1.8845152854919434, + "logits/rejected": -1.9301130771636963, + "logps/chosen": -268.34869384765625, + "logps/rejected": -532.3409423828125, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5381897687911987, + "rewards/margins": 8.721972465515137, + "rewards/rejected": -7.183783054351807, + "step": 15465 + }, + { + "epoch": 0.9, + "learning_rate": 2.5984898643619914e-09, + "logits/chosen": -1.9929051399230957, + "logits/rejected": -1.9897688627243042, + "logps/chosen": -14.022347450256348, + "logps/rejected": -187.94248962402344, + "loss": 0.2888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46951475739479065, + "rewards/margins": 1.9620730876922607, + "rewards/rejected": -1.4925583600997925, + "step": 15466 + }, + { + "epoch": 0.9, + "learning_rate": 2.5954921670850673e-09, + "logits/chosen": -2.0855400562286377, + "logits/rejected": -2.0929691791534424, + "logps/chosen": -48.882423400878906, + "logps/rejected": -237.35626220703125, + "loss": 0.2571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4345382750034332, + "rewards/margins": 2.1526551246643066, + "rewards/rejected": -1.7181167602539062, + "step": 15467 + }, + { + "epoch": 0.9, + "learning_rate": 2.592496153848134e-09, + "logits/chosen": -2.0389645099639893, + "logits/rejected": -2.0351223945617676, + "logps/chosen": -0.3189701437950134, + "logps/rejected": -92.56983184814453, + "loss": 0.387, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.051324523985385895, + "rewards/margins": 2.05696439743042, + "rewards/rejected": -2.0056397914886475, + "step": 15468 + }, + { + "epoch": 0.9, + "learning_rate": 2.589501824757634e-09, + "logits/chosen": -1.8976689577102661, + "logits/rejected": -1.895015001296997, + "logps/chosen": -22.89435386657715, + "logps/rejected": -126.44044494628906, + "loss": 0.4648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08451271057128906, + "rewards/margins": 1.0402637720108032, + "rewards/rejected": -0.9557510614395142, + "step": 15469 + }, + { + "epoch": 0.9, + "learning_rate": 2.586509179919938e-09, + "logits/chosen": -1.7191523313522339, + "logits/rejected": -1.7174142599105835, + "logps/chosen": -15.602376937866211, + "logps/rejected": -146.16494750976562, + "loss": 0.6255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10499649494886398, + "rewards/margins": 0.11644364148378372, + "rewards/rejected": -0.011447143740952015, + "step": 15470 + }, + { + "epoch": 0.9, + "learning_rate": 2.583518219441372e-09, + "logits/chosen": -1.6890594959259033, + "logits/rejected": -1.672626256942749, + "logps/chosen": -156.6283721923828, + "logps/rejected": -198.1661376953125, + "loss": 0.1567, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8423904180526733, + "rewards/margins": 1.7422943115234375, + "rewards/rejected": 0.10009612888097763, + "step": 15471 + }, + { + "epoch": 0.9, + "learning_rate": 2.5805289434281676e-09, + "logits/chosen": -1.9124088287353516, + "logits/rejected": -1.897047758102417, + "logps/chosen": -197.6033935546875, + "logps/rejected": -319.76336669921875, + "loss": 0.1204, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8967971801757812, + "rewards/margins": 1.5382248163223267, + "rewards/rejected": 1.3585723638534546, + "step": 15472 + }, + { + "epoch": 0.9, + "learning_rate": 2.5775413519865286e-09, + "logits/chosen": -2.039940595626831, + "logits/rejected": -2.0218026638031006, + "logps/chosen": -0.006269845180213451, + "logps/rejected": -177.48489379882812, + "loss": 0.3885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002275913575431332, + "rewards/margins": 2.4383556842803955, + "rewards/rejected": -2.4385833740234375, + "step": 15473 + }, + { + "epoch": 0.9, + "learning_rate": 2.574555445222587e-09, + "logits/chosen": -2.0082764625549316, + "logits/rejected": -1.9802662134170532, + "logps/chosen": -97.86524963378906, + "logps/rejected": -258.2022705078125, + "loss": 0.2238, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6942070722579956, + "rewards/margins": 1.0942314863204956, + "rewards/rejected": 0.5999755859375, + "step": 15474 + }, + { + "epoch": 0.9, + "learning_rate": 2.5715712232424302e-09, + "logits/chosen": -1.807539939880371, + "logits/rejected": -1.7980101108551025, + "logps/chosen": -206.0802001953125, + "logps/rejected": -224.04193115234375, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6566833853721619, + "rewards/margins": 0.6179855465888977, + "rewards/rejected": 0.03869781643152237, + "step": 15475 + }, + { + "epoch": 0.9, + "learning_rate": 2.568588686152051e-09, + "logits/chosen": -1.88704514503479, + "logits/rejected": -1.8727365732192993, + "logps/chosen": -54.08327102661133, + "logps/rejected": -223.60952758789062, + "loss": 0.1553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.821902871131897, + "rewards/margins": 3.05574369430542, + "rewards/rejected": -2.2338409423828125, + "step": 15476 + }, + { + "epoch": 0.9, + "learning_rate": 2.5656078340574093e-09, + "logits/chosen": -1.8329154253005981, + "logits/rejected": -1.8350547552108765, + "logps/chosen": -3.1921920776367188, + "logps/rejected": -15.817217826843262, + "loss": 0.6908, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03357810899615288, + "rewards/margins": -0.1114892065525055, + "rewards/rejected": 0.14506731927394867, + "step": 15477 + }, + { + "epoch": 0.9, + "learning_rate": 2.562628667064415e-09, + "logits/chosen": -1.8568438291549683, + "logits/rejected": -1.7924119234085083, + "logps/chosen": -171.76312255859375, + "logps/rejected": -379.54754638671875, + "loss": 0.1607, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4172638654708862, + "rewards/margins": 1.7535920143127441, + "rewards/rejected": -0.3363281190395355, + "step": 15478 + }, + { + "epoch": 0.9, + "learning_rate": 2.5596511852788882e-09, + "logits/chosen": -1.95267653465271, + "logits/rejected": -1.9638327360153198, + "logps/chosen": -204.0251007080078, + "logps/rejected": -228.2230224609375, + "loss": 0.1543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6202148795127869, + "rewards/margins": 3.302356004714966, + "rewards/rejected": -2.682141065597534, + "step": 15479 + }, + { + "epoch": 0.9, + "learning_rate": 2.556675388806612e-09, + "logits/chosen": -1.7534047365188599, + "logits/rejected": -1.74551260471344, + "logps/chosen": -294.459716796875, + "logps/rejected": -429.24603271484375, + "loss": 0.18, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3153748512268066, + "rewards/margins": 0.8998322486877441, + "rewards/rejected": 1.4155426025390625, + "step": 15480 + }, + { + "epoch": 0.9, + "learning_rate": 2.5537012777532785e-09, + "logits/chosen": -2.028782844543457, + "logits/rejected": -2.0293540954589844, + "logps/chosen": -6.18546724319458, + "logps/rejected": -192.6214599609375, + "loss": 0.4642, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037619736045598984, + "rewards/margins": 1.4488747119903564, + "rewards/rejected": -1.4864944219589233, + "step": 15481 + }, + { + "epoch": 0.9, + "learning_rate": 2.550728852224582e-09, + "logits/chosen": -1.9158799648284912, + "logits/rejected": -1.9438740015029907, + "logps/chosen": -192.73553466796875, + "logps/rejected": -408.9077453613281, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7850341796875, + "rewards/margins": 4.116406440734863, + "rewards/rejected": -1.3313721418380737, + "step": 15482 + }, + { + "epoch": 0.9, + "learning_rate": 2.5477581123260815e-09, + "logits/chosen": -1.9188311100006104, + "logits/rejected": -1.9199446439743042, + "logps/chosen": -30.569976806640625, + "logps/rejected": -224.1370849609375, + "loss": 0.1673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5897583365440369, + "rewards/margins": 4.926585674285889, + "rewards/rejected": -4.336827278137207, + "step": 15483 + }, + { + "epoch": 0.9, + "learning_rate": 2.5447890581633325e-09, + "logits/chosen": -1.7262524366378784, + "logits/rejected": -1.7226756811141968, + "logps/chosen": -252.5477752685547, + "logps/rejected": -315.34527587890625, + "loss": 0.1523, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.618367075920105, + "rewards/margins": 1.83885657787323, + "rewards/rejected": -0.220489501953125, + "step": 15484 + }, + { + "epoch": 0.9, + "learning_rate": 2.5418216898418055e-09, + "logits/chosen": -2.0100884437561035, + "logits/rejected": -2.0157289505004883, + "logps/chosen": -170.39505004882812, + "logps/rejected": -317.8702392578125, + "loss": 0.0832, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.176403760910034, + "rewards/margins": 1.9314086437225342, + "rewards/rejected": 0.2449951171875, + "step": 15485 + }, + { + "epoch": 0.9, + "learning_rate": 2.5388560074669164e-09, + "logits/chosen": -1.803634524345398, + "logits/rejected": -1.7924078702926636, + "logps/chosen": -29.45157241821289, + "logps/rejected": -163.24867248535156, + "loss": 0.1924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8085060119628906, + "rewards/margins": 2.9212806224823, + "rewards/rejected": -2.112774610519409, + "step": 15486 + }, + { + "epoch": 0.9, + "learning_rate": 2.53589201114402e-09, + "logits/chosen": -1.8756498098373413, + "logits/rejected": -1.8765006065368652, + "logps/chosen": -9.065462112426758, + "logps/rejected": -116.44711303710938, + "loss": 0.3368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1581886261701584, + "rewards/margins": 2.806459903717041, + "rewards/rejected": -2.648271322250366, + "step": 15487 + }, + { + "epoch": 0.9, + "learning_rate": 2.5329297009784035e-09, + "logits/chosen": -2.117304563522339, + "logits/rejected": -2.117619037628174, + "logps/chosen": -7.626657962799072, + "logps/rejected": -146.71249389648438, + "loss": 0.3957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1999155580997467, + "rewards/margins": 1.4850993156433105, + "rewards/rejected": -1.2851837873458862, + "step": 15488 + }, + { + "epoch": 0.9, + "learning_rate": 2.529969077075317e-09, + "logits/chosen": -1.7745475769042969, + "logits/rejected": -1.813136339187622, + "logps/chosen": -230.66390991210938, + "logps/rejected": -350.479248046875, + "loss": 0.0781, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1306625604629517, + "rewards/margins": 4.143693447113037, + "rewards/rejected": -3.013031005859375, + "step": 15489 + }, + { + "epoch": 0.9, + "learning_rate": 2.5270101395399368e-09, + "logits/chosen": -2.0014357566833496, + "logits/rejected": -1.9951616525650024, + "logps/chosen": -88.39195251464844, + "logps/rejected": -478.41986083984375, + "loss": 0.0628, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4691985845565796, + "rewards/margins": 5.334347724914551, + "rewards/rejected": -3.8651490211486816, + "step": 15490 + }, + { + "epoch": 0.9, + "learning_rate": 2.524052888477357e-09, + "logits/chosen": -1.9546014070510864, + "logits/rejected": -1.954807162284851, + "logps/chosen": -4.872561454772949, + "logps/rejected": -156.45494079589844, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23086166381835938, + "rewards/margins": 4.058480262756348, + "rewards/rejected": -3.827618360519409, + "step": 15491 + }, + { + "epoch": 0.9, + "learning_rate": 2.5210973239926548e-09, + "logits/chosen": -1.8088428974151611, + "logits/rejected": -1.8252902030944824, + "logps/chosen": -8.261028415290639e-05, + "logps/rejected": -171.656982421875, + "loss": 0.3524, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0979307464585872e-06, + "rewards/margins": 3.690624237060547, + "rewards/rejected": -3.690622091293335, + "step": 15492 + }, + { + "epoch": 0.9, + "learning_rate": 2.5181434461908234e-09, + "logits/chosen": -1.916872501373291, + "logits/rejected": -1.9194166660308838, + "logps/chosen": -21.879173278808594, + "logps/rejected": -219.60809326171875, + "loss": 0.3868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31636181473731995, + "rewards/margins": 3.722536087036133, + "rewards/rejected": -4.03889799118042, + "step": 15493 + }, + { + "epoch": 0.9, + "learning_rate": 2.5151912551767964e-09, + "logits/chosen": -1.7546477317810059, + "logits/rejected": -1.7344223260879517, + "logps/chosen": -145.4804229736328, + "logps/rejected": -211.09234619140625, + "loss": 0.3181, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7099746465682983, + "rewards/margins": 0.43258512020111084, + "rewards/rejected": 1.2773895263671875, + "step": 15494 + }, + { + "epoch": 0.9, + "learning_rate": 2.5122407510554442e-09, + "logits/chosen": -1.9890283346176147, + "logits/rejected": -1.9534279108047485, + "logps/chosen": -206.83721923828125, + "logps/rejected": -297.0081787109375, + "loss": 0.0774, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4465529918670654, + "rewards/margins": 2.410792589187622, + "rewards/rejected": 0.03576049953699112, + "step": 15495 + }, + { + "epoch": 0.9, + "learning_rate": 2.5092919339315897e-09, + "logits/chosen": -1.691845178604126, + "logits/rejected": -1.7024352550506592, + "logps/chosen": -224.14251708984375, + "logps/rejected": -480.1775817871094, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6979522705078125, + "rewards/margins": 5.8404083251953125, + "rewards/rejected": -4.1424560546875, + "step": 15496 + }, + { + "epoch": 0.9, + "learning_rate": 2.5063448039099878e-09, + "logits/chosen": -1.9650499820709229, + "logits/rejected": -1.9706189632415771, + "logps/chosen": -221.725830078125, + "logps/rejected": -551.3949584960938, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.350848436355591, + "rewards/margins": 6.730334281921387, + "rewards/rejected": -3.379486083984375, + "step": 15497 + }, + { + "epoch": 0.9, + "learning_rate": 2.5033993610953318e-09, + "logits/chosen": -1.7872769832611084, + "logits/rejected": -1.8010900020599365, + "logps/chosen": -25.359588623046875, + "logps/rejected": -102.06449890136719, + "loss": 0.3601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.474456787109375, + "rewards/margins": 1.1430160999298096, + "rewards/rejected": -0.6685592532157898, + "step": 15498 + }, + { + "epoch": 0.9, + "learning_rate": 2.500455605592272e-09, + "logits/chosen": -1.7943850755691528, + "logits/rejected": -1.7970149517059326, + "logps/chosen": -138.84425354003906, + "logps/rejected": -369.44158935546875, + "loss": 0.1167, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1831146478652954, + "rewards/margins": 2.317648410797119, + "rewards/rejected": -1.1345337629318237, + "step": 15499 + }, + { + "epoch": 0.9, + "learning_rate": 2.497513537505358e-09, + "logits/chosen": -1.7592278718948364, + "logits/rejected": -1.7457647323608398, + "logps/chosen": -51.80021667480469, + "logps/rejected": -322.73358154296875, + "loss": 0.29, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022388458251953125, + "rewards/margins": 3.577901601791382, + "rewards/rejected": -3.600290060043335, + "step": 15500 + }, + { + "epoch": 0.9, + "learning_rate": 2.4945731569391393e-09, + "logits/chosen": -1.9766600131988525, + "logits/rejected": -1.9760926961898804, + "logps/chosen": -0.32774436473846436, + "logps/rejected": -161.13153076171875, + "loss": 0.3645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018973013386130333, + "rewards/margins": 3.384734869003296, + "rewards/rejected": -3.403707981109619, + "step": 15501 + }, + { + "epoch": 0.9, + "learning_rate": 2.4916344639980493e-09, + "logits/chosen": -2.0007617473602295, + "logits/rejected": -1.980620265007019, + "logps/chosen": -62.71876525878906, + "logps/rejected": -222.80978393554688, + "loss": 0.1189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3960342407226562, + "rewards/margins": 6.188299655914307, + "rewards/rejected": -4.79226541519165, + "step": 15502 + }, + { + "epoch": 0.9, + "learning_rate": 2.4886974587864873e-09, + "logits/chosen": -2.0341317653656006, + "logits/rejected": -2.036459445953369, + "logps/chosen": -38.80186462402344, + "logps/rejected": -151.3329315185547, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2004062682390213, + "rewards/margins": 1.9363194704055786, + "rewards/rejected": -1.7359131574630737, + "step": 15503 + }, + { + "epoch": 0.9, + "learning_rate": 2.485762141408798e-09, + "logits/chosen": -2.05271053314209, + "logits/rejected": -2.0522053241729736, + "logps/chosen": -50.09546661376953, + "logps/rejected": -206.7001190185547, + "loss": 0.2974, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4404098987579346, + "rewards/margins": 0.4413238763809204, + "rewards/rejected": 0.9990860223770142, + "step": 15504 + }, + { + "epoch": 0.9, + "learning_rate": 2.4828285119692593e-09, + "logits/chosen": -1.9555420875549316, + "logits/rejected": -1.9516111612319946, + "logps/chosen": -17.599477767944336, + "logps/rejected": -110.303466796875, + "loss": 0.3602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17889519035816193, + "rewards/margins": 2.431056022644043, + "rewards/rejected": -2.2521607875823975, + "step": 15505 + }, + { + "epoch": 0.9, + "learning_rate": 2.479896570572082e-09, + "logits/chosen": -2.045982599258423, + "logits/rejected": -2.0404343605041504, + "logps/chosen": -6.895649433135986, + "logps/rejected": -109.0150146484375, + "loss": 0.5293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20018143951892853, + "rewards/margins": 0.3935942053794861, + "rewards/rejected": -0.19341278076171875, + "step": 15506 + }, + { + "epoch": 0.9, + "learning_rate": 2.476966317321422e-09, + "logits/chosen": -1.9287291765213013, + "logits/rejected": -1.9412105083465576, + "logps/chosen": -35.82785415649414, + "logps/rejected": -156.3206329345703, + "loss": 0.6611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22319793701171875, + "rewards/margins": 0.44368135929107666, + "rewards/rejected": -0.6668792963027954, + "step": 15507 + }, + { + "epoch": 0.9, + "learning_rate": 2.4740377523213728e-09, + "logits/chosen": -1.8027374744415283, + "logits/rejected": -1.7938179969787598, + "logps/chosen": -250.80694580078125, + "logps/rejected": -379.3802795410156, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3103578090667725, + "rewards/margins": 4.8347320556640625, + "rewards/rejected": -1.5243743658065796, + "step": 15508 + }, + { + "epoch": 0.9, + "learning_rate": 2.4711108756759915e-09, + "logits/chosen": -2.0293405055999756, + "logits/rejected": -2.029226303100586, + "logps/chosen": -20.544139862060547, + "logps/rejected": -156.97596740722656, + "loss": 0.2722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19054031372070312, + "rewards/margins": 4.792032718658447, + "rewards/rejected": -4.601492404937744, + "step": 15509 + }, + { + "epoch": 0.9, + "learning_rate": 2.4681856874892326e-09, + "logits/chosen": -1.7718006372451782, + "logits/rejected": -1.75501549243927, + "logps/chosen": -166.56417846679688, + "logps/rejected": -588.5364990234375, + "loss": 0.0371, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6241806745529175, + "rewards/margins": 7.102964878082275, + "rewards/rejected": -5.478784084320068, + "step": 15510 + }, + { + "epoch": 0.9, + "learning_rate": 2.4652621878650194e-09, + "logits/chosen": -1.8259599208831787, + "logits/rejected": -1.8095898628234863, + "logps/chosen": -228.26177978515625, + "logps/rejected": -394.6741943359375, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.091705322265625, + "rewards/margins": 7.473876953125, + "rewards/rejected": -4.382171630859375, + "step": 15511 + }, + { + "epoch": 0.9, + "learning_rate": 2.4623403769072127e-09, + "logits/chosen": -2.06247615814209, + "logits/rejected": -2.056717872619629, + "logps/chosen": -28.698945999145508, + "logps/rejected": -148.499755859375, + "loss": 0.2627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3386507034301758, + "rewards/margins": 3.0551669597625732, + "rewards/rejected": -2.7165162563323975, + "step": 15512 + }, + { + "epoch": 0.9, + "learning_rate": 2.459420254719613e-09, + "logits/chosen": -1.9242216348648071, + "logits/rejected": -1.9188014268875122, + "logps/chosen": -6.090218544006348, + "logps/rejected": -202.07009887695312, + "loss": 0.272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.230176642537117, + "rewards/margins": 4.786631107330322, + "rewards/rejected": -4.556454658508301, + "step": 15513 + }, + { + "epoch": 0.9, + "learning_rate": 2.456501821405943e-09, + "logits/chosen": -1.885262131690979, + "logits/rejected": -1.8702327013015747, + "logps/chosen": -204.86212158203125, + "logps/rejected": -294.369873046875, + "loss": 0.1229, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.412130832672119, + "rewards/margins": 1.4642333984375, + "rewards/rejected": 0.9478973746299744, + "step": 15514 + }, + { + "epoch": 0.9, + "learning_rate": 2.4535850770698918e-09, + "logits/chosen": -1.7601263523101807, + "logits/rejected": -1.7657661437988281, + "logps/chosen": -15.475781440734863, + "logps/rejected": -195.90484619140625, + "loss": 0.4067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0770290419459343, + "rewards/margins": 3.332162857055664, + "rewards/rejected": -3.409191846847534, + "step": 15515 + }, + { + "epoch": 0.9, + "learning_rate": 2.450670021815071e-09, + "logits/chosen": -1.888100266456604, + "logits/rejected": -1.8785558938980103, + "logps/chosen": -284.90277099609375, + "logps/rejected": -314.9549560546875, + "loss": 0.0457, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.860626220703125, + "rewards/margins": 2.798117160797119, + "rewards/rejected": -0.9374908804893494, + "step": 15516 + }, + { + "epoch": 0.9, + "learning_rate": 2.447756655745048e-09, + "logits/chosen": -1.7451963424682617, + "logits/rejected": -1.7213525772094727, + "logps/chosen": -105.63563537597656, + "logps/rejected": -620.109619140625, + "loss": 0.2435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11620712280273438, + "rewards/margins": 9.288100242614746, + "rewards/rejected": -9.171893119812012, + "step": 15517 + }, + { + "epoch": 0.9, + "learning_rate": 2.4448449789633064e-09, + "logits/chosen": -1.8023868799209595, + "logits/rejected": -1.7874935865402222, + "logps/chosen": -255.16876220703125, + "logps/rejected": -417.2518310546875, + "loss": 0.1268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6312927603721619, + "rewards/margins": 2.18784499168396, + "rewards/rejected": -1.5565521717071533, + "step": 15518 + }, + { + "epoch": 0.9, + "learning_rate": 2.44193499157328e-09, + "logits/chosen": -1.7676892280578613, + "logits/rejected": -1.7543742656707764, + "logps/chosen": -2.8811655044555664, + "logps/rejected": -232.91067504882812, + "loss": 0.3204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06985435634851456, + "rewards/margins": 3.3168604373931885, + "rewards/rejected": -3.2470061779022217, + "step": 15519 + }, + { + "epoch": 0.9, + "learning_rate": 2.43902669367837e-09, + "logits/chosen": -1.793431043624878, + "logits/rejected": -1.7663618326187134, + "logps/chosen": -256.5393371582031, + "logps/rejected": -438.11102294921875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.14676833152771, + "rewards/margins": 5.873971939086914, + "rewards/rejected": -2.727203369140625, + "step": 15520 + }, + { + "epoch": 0.9, + "learning_rate": 2.4361200853818707e-09, + "logits/chosen": -2.111582040786743, + "logits/rejected": -2.1027865409851074, + "logps/chosen": -49.7344856262207, + "logps/rejected": -228.58389282226562, + "loss": 0.3436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37877580523490906, + "rewards/margins": 1.2429420948028564, + "rewards/rejected": -0.864166259765625, + "step": 15521 + }, + { + "epoch": 0.9, + "learning_rate": 2.4332151667870504e-09, + "logits/chosen": -1.927628755569458, + "logits/rejected": -1.919784426689148, + "logps/chosen": -10.484986305236816, + "logps/rejected": -110.67557525634766, + "loss": 0.4537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18677006661891937, + "rewards/margins": 0.997667133808136, + "rewards/rejected": -0.8108970522880554, + "step": 15522 + }, + { + "epoch": 0.9, + "learning_rate": 2.4303119379970872e-09, + "logits/chosen": -2.0677027702331543, + "logits/rejected": -2.0560648441314697, + "logps/chosen": -13.85801887512207, + "logps/rejected": -308.7572937011719, + "loss": 0.2242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4264121949672699, + "rewards/margins": 6.206789493560791, + "rewards/rejected": -5.780377388000488, + "step": 15523 + }, + { + "epoch": 0.9, + "learning_rate": 2.4274103991151485e-09, + "logits/chosen": -2.089186668395996, + "logits/rejected": -2.081204891204834, + "logps/chosen": -41.60005187988281, + "logps/rejected": -254.82546997070312, + "loss": 0.4217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09824676811695099, + "rewards/margins": 4.733862400054932, + "rewards/rejected": -4.832108974456787, + "step": 15524 + }, + { + "epoch": 0.9, + "learning_rate": 2.424510550244291e-09, + "logits/chosen": -1.7535808086395264, + "logits/rejected": -1.7528672218322754, + "logps/chosen": -14.519481658935547, + "logps/rejected": -226.75173950195312, + "loss": 0.2998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20245037972927094, + "rewards/margins": 3.102633476257324, + "rewards/rejected": -2.9001832008361816, + "step": 15525 + }, + { + "epoch": 0.9, + "learning_rate": 2.421612391487532e-09, + "logits/chosen": -1.97568941116333, + "logits/rejected": -1.9796288013458252, + "logps/chosen": -60.962158203125, + "logps/rejected": -171.88040161132812, + "loss": 0.8308, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2419662475585938, + "rewards/margins": 0.6348892450332642, + "rewards/rejected": -1.876855492591858, + "step": 15526 + }, + { + "epoch": 0.9, + "learning_rate": 2.4187159229478336e-09, + "logits/chosen": -1.8932710886001587, + "logits/rejected": -1.9334965944290161, + "logps/chosen": -243.5849609375, + "logps/rejected": -496.50604248046875, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3230957984924316, + "rewards/margins": 7.865762710571289, + "rewards/rejected": -5.542666912078857, + "step": 15527 + }, + { + "epoch": 0.9, + "learning_rate": 2.4158211447280917e-09, + "logits/chosen": -1.8080462217330933, + "logits/rejected": -1.8084083795547485, + "logps/chosen": -328.8204345703125, + "logps/rejected": -406.079833984375, + "loss": 0.0833, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8825929164886475, + "rewards/margins": 1.9340240955352783, + "rewards/rejected": 0.9485687613487244, + "step": 15528 + }, + { + "epoch": 0.9, + "learning_rate": 2.4129280569311404e-09, + "logits/chosen": -1.8161460161209106, + "logits/rejected": -1.8232297897338867, + "logps/chosen": -8.569607734680176, + "logps/rejected": -179.00616455078125, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007935428991913795, + "rewards/margins": 2.0888941287994385, + "rewards/rejected": -2.080958604812622, + "step": 15529 + }, + { + "epoch": 0.9, + "learning_rate": 2.4100366596597588e-09, + "logits/chosen": -1.8418920040130615, + "logits/rejected": -1.8316370248794556, + "logps/chosen": -188.71066284179688, + "logps/rejected": -274.499755859375, + "loss": 0.2406, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0487793684005737, + "rewards/margins": 0.7374817132949829, + "rewards/rejected": 0.31129762530326843, + "step": 15530 + }, + { + "epoch": 0.9, + "learning_rate": 2.407146953016664e-09, + "logits/chosen": -2.1171493530273438, + "logits/rejected": -2.1347274780273438, + "logps/chosen": -266.37261962890625, + "logps/rejected": -627.0791015625, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4328370094299316, + "rewards/margins": 10.098180770874023, + "rewards/rejected": -7.66534423828125, + "step": 15531 + }, + { + "epoch": 0.9, + "learning_rate": 2.404258937104514e-09, + "logits/chosen": -1.952091932296753, + "logits/rejected": -1.9391257762908936, + "logps/chosen": -13.720974922180176, + "logps/rejected": -170.55081176757812, + "loss": 0.2468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39333564043045044, + "rewards/margins": 2.1387763023376465, + "rewards/rejected": -1.7454407215118408, + "step": 15532 + }, + { + "epoch": 0.9, + "learning_rate": 2.4013726120258983e-09, + "logits/chosen": -1.8044607639312744, + "logits/rejected": -1.8037267923355103, + "logps/chosen": -237.92344665527344, + "logps/rejected": -384.456787109375, + "loss": 0.0611, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3008804321289062, + "rewards/margins": 2.5271286964416504, + "rewards/rejected": -0.22624817490577698, + "step": 15533 + }, + { + "epoch": 0.9, + "learning_rate": 2.3984879778833566e-09, + "logits/chosen": -2.0954031944274902, + "logits/rejected": -2.094857931137085, + "logps/chosen": -0.14102968573570251, + "logps/rejected": -157.96405029296875, + "loss": 0.4041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035827599931508303, + "rewards/margins": 1.8906677961349487, + "rewards/rejected": -1.894250512123108, + "step": 15534 + }, + { + "epoch": 0.9, + "learning_rate": 2.3956050347793687e-09, + "logits/chosen": -1.7707633972167969, + "logits/rejected": -1.7626374959945679, + "logps/chosen": -0.00615563103929162, + "logps/rejected": -277.85638427734375, + "loss": 0.34, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003350919287186116, + "rewards/margins": 5.229947566986084, + "rewards/rejected": -5.230282783508301, + "step": 15535 + }, + { + "epoch": 0.9, + "learning_rate": 2.3927237828163526e-09, + "logits/chosen": -2.0329742431640625, + "logits/rejected": -2.0385782718658447, + "logps/chosen": -177.32948303222656, + "logps/rejected": -274.4164733886719, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23299561440944672, + "rewards/margins": 1.2811768054962158, + "rewards/rejected": -1.048181176185608, + "step": 15536 + }, + { + "epoch": 0.9, + "learning_rate": 2.3898442220966488e-09, + "logits/chosen": -1.8909332752227783, + "logits/rejected": -1.900682806968689, + "logps/chosen": -193.8187255859375, + "logps/rejected": -429.7776184082031, + "loss": 0.0593, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.032135009765625, + "rewards/margins": 2.504159450531006, + "rewards/rejected": -0.472024530172348, + "step": 15537 + }, + { + "epoch": 0.9, + "learning_rate": 2.386966352722564e-09, + "logits/chosen": -2.023714303970337, + "logits/rejected": -2.0244216918945312, + "logps/chosen": -0.0004312282253522426, + "logps/rejected": -77.48375701904297, + "loss": 0.6023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3352575933822664e-06, + "rewards/margins": 0.21185436844825745, + "rewards/rejected": -0.21185302734375, + "step": 15538 + }, + { + "epoch": 0.9, + "learning_rate": 2.3840901747963492e-09, + "logits/chosen": -1.9121782779693604, + "logits/rejected": -1.879693865776062, + "logps/chosen": -210.14398193359375, + "logps/rejected": -381.3587646484375, + "loss": 0.0823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9673172235488892, + "rewards/margins": 2.5555801391601562, + "rewards/rejected": -1.588262915611267, + "step": 15539 + }, + { + "epoch": 0.9, + "learning_rate": 2.381215688420152e-09, + "logits/chosen": -1.9220720529556274, + "logits/rejected": -1.9199022054672241, + "logps/chosen": -0.0029506233986467123, + "logps/rejected": -143.92108154296875, + "loss": 0.4167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002039301907643676, + "rewards/margins": 1.8482567071914673, + "rewards/rejected": -1.846217393875122, + "step": 15540 + }, + { + "epoch": 0.9, + "learning_rate": 2.378342893696117e-09, + "logits/chosen": -1.8234854936599731, + "logits/rejected": -1.8219945430755615, + "logps/chosen": -18.971576690673828, + "logps/rejected": -43.489810943603516, + "loss": 0.5247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4624035060405731, + "rewards/margins": 0.3086002469062805, + "rewards/rejected": 0.1538032591342926, + "step": 15541 + }, + { + "epoch": 0.9, + "learning_rate": 2.375471790726269e-09, + "logits/chosen": -1.8885682821273804, + "logits/rejected": -1.8879293203353882, + "logps/chosen": -39.4660530090332, + "logps/rejected": -178.75770568847656, + "loss": 0.3563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05384368821978569, + "rewards/margins": 4.002894401550293, + "rewards/rejected": -3.9490509033203125, + "step": 15542 + }, + { + "epoch": 0.9, + "learning_rate": 2.3726023796126314e-09, + "logits/chosen": -2.090646266937256, + "logits/rejected": -2.096050500869751, + "logps/chosen": -0.09291433542966843, + "logps/rejected": -24.386966705322266, + "loss": 0.584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002874771598726511, + "rewards/margins": 0.4225479066371918, + "rewards/rejected": -0.42542266845703125, + "step": 15543 + }, + { + "epoch": 0.9, + "learning_rate": 2.3697346604571235e-09, + "logits/chosen": -1.9069550037384033, + "logits/rejected": -1.9238773584365845, + "logps/chosen": -258.5745849609375, + "logps/rejected": -374.68743896484375, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6693971157073975, + "rewards/margins": 2.9568941593170166, + "rewards/rejected": -0.287496954202652, + "step": 15544 + }, + { + "epoch": 0.9, + "learning_rate": 2.366868633361624e-09, + "logits/chosen": -1.796242594718933, + "logits/rejected": -1.794668197631836, + "logps/chosen": -9.703446388244629, + "logps/rejected": -17.45816993713379, + "loss": 0.6459, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.20992831885814667, + "rewards/margins": -0.030956655740737915, + "rewards/rejected": 0.24088497459888458, + "step": 15545 + }, + { + "epoch": 0.9, + "learning_rate": 2.3640042984279464e-09, + "logits/chosen": -1.842427134513855, + "logits/rejected": -1.8435205221176147, + "logps/chosen": -21.16828727722168, + "logps/rejected": -102.71981811523438, + "loss": 0.449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08679237216711044, + "rewards/margins": 1.2673397064208984, + "rewards/rejected": -1.1805473566055298, + "step": 15546 + }, + { + "epoch": 0.9, + "learning_rate": 2.3611416557578587e-09, + "logits/chosen": -1.6948622465133667, + "logits/rejected": -1.685876488685608, + "logps/chosen": -209.3712615966797, + "logps/rejected": -267.4761047363281, + "loss": 0.4594, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7979965209960938, + "rewards/margins": -0.33301234245300293, + "rewards/rejected": 2.1310088634490967, + "step": 15547 + }, + { + "epoch": 0.9, + "learning_rate": 2.358280705453042e-09, + "logits/chosen": -1.8722476959228516, + "logits/rejected": -1.8642606735229492, + "logps/chosen": -4.219976835884154e-05, + "logps/rejected": -170.80760192871094, + "loss": 0.3447, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.192074705613777e-06, + "rewards/margins": 3.9624478816986084, + "rewards/rejected": -3.962446689605713, + "step": 15548 + }, + { + "epoch": 0.9, + "learning_rate": 2.35542144761513e-09, + "logits/chosen": -1.8772729635238647, + "logits/rejected": -1.929168939590454, + "logps/chosen": -211.5025634765625, + "logps/rejected": -403.10626220703125, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9513336420059204, + "rewards/margins": 6.050686836242676, + "rewards/rejected": -4.099353313446045, + "step": 15549 + }, + { + "epoch": 0.9, + "learning_rate": 2.3525638823457096e-09, + "logits/chosen": -1.9005074501037598, + "logits/rejected": -1.9010355472564697, + "logps/chosen": -0.7602538466453552, + "logps/rejected": -85.96046447753906, + "loss": 0.3922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01244302373379469, + "rewards/margins": 1.8808019161224365, + "rewards/rejected": -1.893244981765747, + "step": 15550 + }, + { + "epoch": 0.9, + "learning_rate": 2.3497080097462927e-09, + "logits/chosen": -1.868667721748352, + "logits/rejected": -1.734324336051941, + "logps/chosen": -187.63656616210938, + "logps/rejected": -601.1038818359375, + "loss": 0.0449, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7106568813323975, + "rewards/margins": 3.065203905105591, + "rewards/rejected": -0.3545471131801605, + "step": 15551 + }, + { + "epoch": 0.91, + "learning_rate": 2.3468538299183216e-09, + "logits/chosen": -1.9085158109664917, + "logits/rejected": -1.9080514907836914, + "logps/chosen": -0.4301101565361023, + "logps/rejected": -40.08922576904297, + "loss": 0.5412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03287022188305855, + "rewards/margins": 0.6919171810150146, + "rewards/rejected": -0.6590469479560852, + "step": 15552 + }, + { + "epoch": 0.91, + "learning_rate": 2.3440013429632033e-09, + "logits/chosen": -1.9602515697479248, + "logits/rejected": -1.9617040157318115, + "logps/chosen": -16.042314529418945, + "logps/rejected": -130.12582397460938, + "loss": 0.6809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4922335743904114, + "rewards/margins": 0.5360837578773499, + "rewards/rejected": -1.0283173322677612, + "step": 15553 + }, + { + "epoch": 0.91, + "learning_rate": 2.3411505489822623e-09, + "logits/chosen": -1.9584996700286865, + "logits/rejected": -1.9569072723388672, + "logps/chosen": -120.04219055175781, + "logps/rejected": -201.22340393066406, + "loss": 0.4032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3172851502895355, + "rewards/margins": 0.10193328559398651, + "rewards/rejected": 0.215351864695549, + "step": 15554 + }, + { + "epoch": 0.91, + "learning_rate": 2.3383014480767903e-09, + "logits/chosen": -2.1046228408813477, + "logits/rejected": -2.1072144508361816, + "logps/chosen": -71.55106353759766, + "logps/rejected": -347.8746643066406, + "loss": 0.1571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7575713992118835, + "rewards/margins": 5.50140380859375, + "rewards/rejected": -4.743832588195801, + "step": 15555 + }, + { + "epoch": 0.91, + "learning_rate": 2.3354540403479784e-09, + "logits/chosen": -1.794833779335022, + "logits/rejected": -1.7549484968185425, + "logps/chosen": -306.20001220703125, + "logps/rejected": -533.04541015625, + "loss": 0.1105, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4851136207580566, + "rewards/margins": 2.0984559059143066, + "rewards/rejected": 0.38665771484375, + "step": 15556 + }, + { + "epoch": 0.91, + "learning_rate": 2.3326083258969952e-09, + "logits/chosen": -1.8625621795654297, + "logits/rejected": -1.9152289628982544, + "logps/chosen": -314.26031494140625, + "logps/rejected": -278.0101623535156, + "loss": 0.1783, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1391571760177612, + "rewards/margins": 1.3459259271621704, + "rewards/rejected": -0.20676879584789276, + "step": 15557 + }, + { + "epoch": 0.91, + "learning_rate": 2.3297643048249273e-09, + "logits/chosen": -1.9088963270187378, + "logits/rejected": -1.89794921875, + "logps/chosen": -106.3482666015625, + "logps/rejected": -200.2074737548828, + "loss": 0.1623, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6944283246994019, + "rewards/margins": 1.6375313997268677, + "rewards/rejected": 0.05689697340130806, + "step": 15558 + }, + { + "epoch": 0.91, + "learning_rate": 2.326921977232815e-09, + "logits/chosen": -1.9010366201400757, + "logits/rejected": -1.898112177848816, + "logps/chosen": -62.15560531616211, + "logps/rejected": -213.0714569091797, + "loss": 0.2127, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0432194471359253, + "rewards/margins": 1.9092960357666016, + "rewards/rejected": -0.866076648235321, + "step": 15559 + }, + { + "epoch": 0.91, + "learning_rate": 2.3240813432216343e-09, + "logits/chosen": -1.9370613098144531, + "logits/rejected": -1.9318454265594482, + "logps/chosen": -39.07479476928711, + "logps/rejected": -189.0034942626953, + "loss": 0.2196, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2614964246749878, + "rewards/margins": 1.1191273927688599, + "rewards/rejected": 0.1423690766096115, + "step": 15560 + }, + { + "epoch": 0.91, + "learning_rate": 2.321242402892276e-09, + "logits/chosen": -1.9021151065826416, + "logits/rejected": -1.8926838636398315, + "logps/chosen": -5.12184476852417, + "logps/rejected": -193.40402221679688, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19994254410266876, + "rewards/margins": 2.4996161460876465, + "rewards/rejected": -2.299673557281494, + "step": 15561 + }, + { + "epoch": 0.91, + "learning_rate": 2.3184051563456265e-09, + "logits/chosen": -1.978057622909546, + "logits/rejected": -1.9780793190002441, + "logps/chosen": -79.24529266357422, + "logps/rejected": -167.50210571289062, + "loss": 0.1287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2032631635665894, + "rewards/margins": 2.2918312549591064, + "rewards/rejected": -1.088568091392517, + "step": 15562 + }, + { + "epoch": 0.91, + "learning_rate": 2.3155696036824546e-09, + "logits/chosen": -1.9244530200958252, + "logits/rejected": -1.9415526390075684, + "logps/chosen": -211.79042053222656, + "logps/rejected": -343.2295227050781, + "loss": 0.0696, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.430651903152466, + "rewards/margins": 2.1116790771484375, + "rewards/rejected": 0.31897279620170593, + "step": 15563 + }, + { + "epoch": 0.91, + "learning_rate": 2.3127357450035023e-09, + "logits/chosen": -1.7455620765686035, + "logits/rejected": -1.7513870000839233, + "logps/chosen": -39.887901306152344, + "logps/rejected": -187.65283203125, + "loss": 0.406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00414619455114007, + "rewards/margins": 1.9545719623565674, + "rewards/rejected": -1.9504257440567017, + "step": 15564 + }, + { + "epoch": 0.91, + "learning_rate": 2.3099035804094336e-09, + "logits/chosen": -1.7836202383041382, + "logits/rejected": -1.7857213020324707, + "logps/chosen": -15.861494064331055, + "logps/rejected": -111.74283599853516, + "loss": 0.5245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013344001956284046, + "rewards/margins": 0.4701410233974457, + "rewards/rejected": -0.4567970335483551, + "step": 15565 + }, + { + "epoch": 0.91, + "learning_rate": 2.3070731100008735e-09, + "logits/chosen": -1.709939956665039, + "logits/rejected": -1.7135202884674072, + "logps/chosen": -176.1581268310547, + "logps/rejected": -193.08030700683594, + "loss": 0.26, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7411545515060425, + "rewards/margins": 0.6858032941818237, + "rewards/rejected": 1.0553512573242188, + "step": 15566 + }, + { + "epoch": 0.91, + "learning_rate": 2.3042443338783636e-09, + "logits/chosen": -2.0246245861053467, + "logits/rejected": -2.0229203701019287, + "logps/chosen": -39.48841094970703, + "logps/rejected": -183.01663208007812, + "loss": 0.1852, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0630989074707031, + "rewards/margins": 2.931185245513916, + "rewards/rejected": -1.8680862188339233, + "step": 15567 + }, + { + "epoch": 0.91, + "learning_rate": 2.301417252142401e-09, + "logits/chosen": -2.019867181777954, + "logits/rejected": -2.017322301864624, + "logps/chosen": -1.9328651428222656, + "logps/rejected": -272.35797119140625, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1419099122285843, + "rewards/margins": 4.80741024017334, + "rewards/rejected": -4.665500164031982, + "step": 15568 + }, + { + "epoch": 0.91, + "learning_rate": 2.2985918648934177e-09, + "logits/chosen": -1.889814019203186, + "logits/rejected": -1.8836795091629028, + "logps/chosen": -0.014851562678813934, + "logps/rejected": -157.79873657226562, + "loss": 0.3518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019217148423194885, + "rewards/margins": 3.8416004180908203, + "rewards/rejected": -3.822383165359497, + "step": 15569 + }, + { + "epoch": 0.91, + "learning_rate": 2.2957681722317934e-09, + "logits/chosen": -1.7814151048660278, + "logits/rejected": -1.7828673124313354, + "logps/chosen": -246.100341796875, + "logps/rejected": -364.158935546875, + "loss": 0.0985, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7636231184005737, + "rewards/margins": 1.708367943763733, + "rewards/rejected": 0.05525512620806694, + "step": 15570 + }, + { + "epoch": 0.91, + "learning_rate": 2.2929461742578205e-09, + "logits/chosen": -1.9048852920532227, + "logits/rejected": -1.9065909385681152, + "logps/chosen": -0.03893293812870979, + "logps/rejected": -102.33834838867188, + "loss": 0.6601, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0013134945183992386, + "rewards/margins": -0.02737455442547798, + "rewards/rejected": 0.02868804894387722, + "step": 15571 + }, + { + "epoch": 0.91, + "learning_rate": 2.2901258710717575e-09, + "logits/chosen": -1.8581758737564087, + "logits/rejected": -1.8315072059631348, + "logps/chosen": -273.1712951660156, + "logps/rejected": -499.87615966796875, + "loss": 0.055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2600494623184204, + "rewards/margins": 5.748980522155762, + "rewards/rejected": -4.488931179046631, + "step": 15572 + }, + { + "epoch": 0.91, + "learning_rate": 2.287307262773802e-09, + "logits/chosen": -1.7722281217575073, + "logits/rejected": -1.8245794773101807, + "logps/chosen": -171.95217895507812, + "logps/rejected": -334.9986572265625, + "loss": 0.0928, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0659469366073608, + "rewards/margins": 3.299229621887207, + "rewards/rejected": -2.2332825660705566, + "step": 15573 + }, + { + "epoch": 0.91, + "learning_rate": 2.28449034946408e-09, + "logits/chosen": -1.8804444074630737, + "logits/rejected": -1.8856053352355957, + "logps/chosen": -28.680849075317383, + "logps/rejected": -186.92095947265625, + "loss": 0.2185, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.397321105003357, + "rewards/margins": 1.2231817245483398, + "rewards/rejected": 0.17413941025733948, + "step": 15574 + }, + { + "epoch": 0.91, + "learning_rate": 2.2816751312426663e-09, + "logits/chosen": -1.9000920057296753, + "logits/rejected": -1.8954122066497803, + "logps/chosen": -0.022158028557896614, + "logps/rejected": -245.27064514160156, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011701536132022738, + "rewards/margins": 4.034111499786377, + "rewards/rejected": -4.035281658172607, + "step": 15575 + }, + { + "epoch": 0.91, + "learning_rate": 2.278861608209559e-09, + "logits/chosen": -2.075017213821411, + "logits/rejected": -2.0699665546417236, + "logps/chosen": -30.436269760131836, + "logps/rejected": -240.96585083007812, + "loss": 0.3441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2304113358259201, + "rewards/margins": 1.471606969833374, + "rewards/rejected": -1.2411956787109375, + "step": 15576 + }, + { + "epoch": 0.91, + "learning_rate": 2.2760497804647225e-09, + "logits/chosen": -1.81748366355896, + "logits/rejected": -1.850048542022705, + "logps/chosen": -169.41282653808594, + "logps/rejected": -396.88385009765625, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1638596057891846, + "rewards/margins": 4.499495029449463, + "rewards/rejected": -2.3356354236602783, + "step": 15577 + }, + { + "epoch": 0.91, + "learning_rate": 2.273239648108044e-09, + "logits/chosen": -2.0355546474456787, + "logits/rejected": -2.0167317390441895, + "logps/chosen": -260.19903564453125, + "logps/rejected": -420.90167236328125, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.911083936691284, + "rewards/margins": 4.282281398773193, + "rewards/rejected": -1.3711975812911987, + "step": 15578 + }, + { + "epoch": 0.91, + "learning_rate": 2.270431211239343e-09, + "logits/chosen": -1.9151939153671265, + "logits/rejected": -1.922087550163269, + "logps/chosen": -64.02505493164062, + "logps/rejected": -169.83921813964844, + "loss": 0.5427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38014718890190125, + "rewards/margins": 1.1068599224090576, + "rewards/rejected": -1.4870071411132812, + "step": 15579 + }, + { + "epoch": 0.91, + "learning_rate": 2.2676244699583903e-09, + "logits/chosen": -1.972617268562317, + "logits/rejected": -1.9728832244873047, + "logps/chosen": -0.03729161620140076, + "logps/rejected": -210.03933715820312, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.095009945333004, + "rewards/margins": 3.1056973934173584, + "rewards/rejected": -3.0106873512268066, + "step": 15580 + }, + { + "epoch": 0.91, + "learning_rate": 2.2648194243649166e-09, + "logits/chosen": -1.814085841178894, + "logits/rejected": -1.809678077697754, + "logps/chosen": -1.9409793615341187, + "logps/rejected": -293.1123352050781, + "loss": 0.3704, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1349487602710724, + "rewards/margins": 4.451878547668457, + "rewards/rejected": -4.586827278137207, + "step": 15581 + }, + { + "epoch": 0.91, + "learning_rate": 2.2620160745585424e-09, + "logits/chosen": -1.9605481624603271, + "logits/rejected": -1.960854172706604, + "logps/chosen": -0.05941619724035263, + "logps/rejected": -188.3590087890625, + "loss": 0.3452, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0017357387114316225, + "rewards/margins": 3.1473443508148193, + "rewards/rejected": -3.14560866355896, + "step": 15582 + }, + { + "epoch": 0.91, + "learning_rate": 2.259214420638883e-09, + "logits/chosen": -1.8880457878112793, + "logits/rejected": -1.8862955570220947, + "logps/chosen": -6.85384464263916, + "logps/rejected": -122.94601440429688, + "loss": 0.5788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09412240982055664, + "rewards/margins": 0.5640318989753723, + "rewards/rejected": -0.658154308795929, + "step": 15583 + }, + { + "epoch": 0.91, + "learning_rate": 2.2564144627054303e-09, + "logits/chosen": -1.994035243988037, + "logits/rejected": -1.989039659500122, + "logps/chosen": -12.215981483459473, + "logps/rejected": -120.50614929199219, + "loss": 0.6929, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.04449443891644478, + "rewards/margins": -0.037099648267030716, + "rewards/rejected": 0.0815940871834755, + "step": 15584 + }, + { + "epoch": 0.91, + "learning_rate": 2.253616200857694e-09, + "logits/chosen": -1.7902523279190063, + "logits/rejected": -1.7947922945022583, + "logps/chosen": -0.5488158464431763, + "logps/rejected": -81.14215850830078, + "loss": 0.5415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17040011286735535, + "rewards/margins": 0.3773314356803894, + "rewards/rejected": -0.20693130791187286, + "step": 15585 + }, + { + "epoch": 0.91, + "learning_rate": 2.250819635195056e-09, + "logits/chosen": -1.7771350145339966, + "logits/rejected": -1.7945318222045898, + "logps/chosen": -231.39166259765625, + "logps/rejected": -457.59259033203125, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1774489879608154, + "rewards/margins": 3.615858554840088, + "rewards/rejected": -1.438409447669983, + "step": 15586 + }, + { + "epoch": 0.91, + "learning_rate": 2.248024765816875e-09, + "logits/chosen": -1.6082513332366943, + "logits/rejected": -1.6141626834869385, + "logps/chosen": -19.498064041137695, + "logps/rejected": -280.7681884765625, + "loss": 0.1988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6994152069091797, + "rewards/margins": 2.9294323921203613, + "rewards/rejected": -2.2300171852111816, + "step": 15587 + }, + { + "epoch": 0.91, + "learning_rate": 2.2452315928224275e-09, + "logits/chosen": -1.7478933334350586, + "logits/rejected": -1.739471197128296, + "logps/chosen": -32.918792724609375, + "logps/rejected": -205.44287109375, + "loss": 0.4436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10928573459386826, + "rewards/margins": 2.0064291954040527, + "rewards/rejected": -2.1157150268554688, + "step": 15588 + }, + { + "epoch": 0.91, + "learning_rate": 2.242440116310956e-09, + "logits/chosen": -2.023207426071167, + "logits/rejected": -2.0259344577789307, + "logps/chosen": -13.385575294494629, + "logps/rejected": -173.8605499267578, + "loss": 0.201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.646568775177002, + "rewards/margins": 4.330600738525391, + "rewards/rejected": -3.6840317249298096, + "step": 15589 + }, + { + "epoch": 0.91, + "learning_rate": 2.2396503363816155e-09, + "logits/chosen": -1.9256234169006348, + "logits/rejected": -1.9387621879577637, + "logps/chosen": -9.3372802734375, + "logps/rejected": -190.18212890625, + "loss": 0.5319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3608807623386383, + "rewards/margins": 0.08705872297286987, + "rewards/rejected": 0.27382203936576843, + "step": 15590 + }, + { + "epoch": 0.91, + "learning_rate": 2.236862253133515e-09, + "logits/chosen": -1.9757245779037476, + "logits/rejected": -1.9713610410690308, + "logps/chosen": -0.016436662524938583, + "logps/rejected": -160.27816772460938, + "loss": 0.4464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014565969468094409, + "rewards/margins": 1.4354958534240723, + "rewards/rejected": -1.4356415271759033, + "step": 15591 + }, + { + "epoch": 0.91, + "learning_rate": 2.234075866665702e-09, + "logits/chosen": -1.7286638021469116, + "logits/rejected": -1.7367138862609863, + "logps/chosen": -241.60711669921875, + "logps/rejected": -330.8248291015625, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.373779296875, + "rewards/margins": 4.21807861328125, + "rewards/rejected": -0.84429931640625, + "step": 15592 + }, + { + "epoch": 0.91, + "learning_rate": 2.2312911770771713e-09, + "logits/chosen": -2.035867691040039, + "logits/rejected": -2.040250301361084, + "logps/chosen": -30.543102264404297, + "logps/rejected": -243.65777587890625, + "loss": 0.1936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6784343719482422, + "rewards/margins": 5.891341686248779, + "rewards/rejected": -5.212907314300537, + "step": 15593 + }, + { + "epoch": 0.91, + "learning_rate": 2.228508184466826e-09, + "logits/chosen": -1.9845364093780518, + "logits/rejected": -1.9582891464233398, + "logps/chosen": -128.5852508544922, + "logps/rejected": -427.062255859375, + "loss": 0.0659, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8314239978790283, + "rewards/margins": 2.4179413318634033, + "rewards/rejected": -0.586517333984375, + "step": 15594 + }, + { + "epoch": 0.91, + "learning_rate": 2.2257268889335544e-09, + "logits/chosen": -1.939692735671997, + "logits/rejected": -1.939508080482483, + "logps/chosen": -0.027219174429774284, + "logps/rejected": -275.6626281738281, + "loss": 0.3286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002217812230810523, + "rewards/margins": 3.6321985721588135, + "rewards/rejected": -3.634416341781616, + "step": 15595 + }, + { + "epoch": 0.91, + "learning_rate": 2.2229472905761493e-09, + "logits/chosen": -1.777669072151184, + "logits/rejected": -1.7720856666564941, + "logps/chosen": -231.24386596679688, + "logps/rejected": -491.0561218261719, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6591827869415283, + "rewards/margins": 4.543972969055176, + "rewards/rejected": -1.884790062904358, + "step": 15596 + }, + { + "epoch": 0.91, + "learning_rate": 2.220169389493365e-09, + "logits/chosen": -1.9098612070083618, + "logits/rejected": -1.9103271961212158, + "logps/chosen": -17.808168411254883, + "logps/rejected": -187.39688110351562, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13939686119556427, + "rewards/margins": 2.3052477836608887, + "rewards/rejected": -2.165850877761841, + "step": 15597 + }, + { + "epoch": 0.91, + "learning_rate": 2.2173931857838723e-09, + "logits/chosen": -1.9350253343582153, + "logits/rejected": -1.9293794631958008, + "logps/chosen": -307.9615783691406, + "logps/rejected": -484.884033203125, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.786486864089966, + "rewards/margins": 4.515100002288818, + "rewards/rejected": -1.728613257408142, + "step": 15598 + }, + { + "epoch": 0.91, + "learning_rate": 2.2146186795462983e-09, + "logits/chosen": -1.9265692234039307, + "logits/rejected": -1.9221378564834595, + "logps/chosen": -26.969820022583008, + "logps/rejected": -157.31704711914062, + "loss": 0.587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08596935123205185, + "rewards/margins": 0.3820207715034485, + "rewards/rejected": -0.46799013018608093, + "step": 15599 + }, + { + "epoch": 0.91, + "learning_rate": 2.2118458708792255e-09, + "logits/chosen": -1.9762053489685059, + "logits/rejected": -1.9845675230026245, + "logps/chosen": -5.447093963623047, + "logps/rejected": -202.8837432861328, + "loss": 0.3696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08708906173706055, + "rewards/margins": 1.728119969367981, + "rewards/rejected": -1.6410309076309204, + "step": 15600 + }, + { + "epoch": 0.91, + "learning_rate": 2.209074759881141e-09, + "logits/chosen": -1.7942380905151367, + "logits/rejected": -1.7947907447814941, + "logps/chosen": -150.5856475830078, + "logps/rejected": -197.67214965820312, + "loss": 0.3825, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.6249297857284546, + "rewards/margins": -0.0470733642578125, + "rewards/rejected": 1.672003149986267, + "step": 15601 + }, + { + "epoch": 0.91, + "learning_rate": 2.2063053466504944e-09, + "logits/chosen": -1.9859576225280762, + "logits/rejected": -1.9740334749221802, + "logps/chosen": -27.66322898864746, + "logps/rejected": -287.81427001953125, + "loss": 0.7672, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1698276996612549, + "rewards/margins": 1.66795015335083, + "rewards/rejected": -2.837777853012085, + "step": 15602 + }, + { + "epoch": 0.91, + "learning_rate": 2.2035376312856566e-09, + "logits/chosen": -1.987107753753662, + "logits/rejected": -1.9769107103347778, + "logps/chosen": -0.014849399216473103, + "logps/rejected": -104.36638641357422, + "loss": 0.5171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004113294184207916, + "rewards/margins": 0.8164874315261841, + "rewards/rejected": -0.8123741149902344, + "step": 15603 + }, + { + "epoch": 0.91, + "learning_rate": 2.200771613884972e-09, + "logits/chosen": -2.01206636428833, + "logits/rejected": -2.0172204971313477, + "logps/chosen": -153.26895141601562, + "logps/rejected": -280.44769287109375, + "loss": 0.1298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3031647205352783, + "rewards/margins": 1.4473724365234375, + "rewards/rejected": -0.14420776069164276, + "step": 15604 + }, + { + "epoch": 0.91, + "learning_rate": 2.198007294546683e-09, + "logits/chosen": -1.8018558025360107, + "logits/rejected": -1.7923144102096558, + "logps/chosen": -173.93777465820312, + "logps/rejected": -529.2327880859375, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2386916875839233, + "rewards/margins": 6.529524326324463, + "rewards/rejected": -5.29083251953125, + "step": 15605 + }, + { + "epoch": 0.91, + "learning_rate": 2.195244673369001e-09, + "logits/chosen": -1.9072681665420532, + "logits/rejected": -1.9099582433700562, + "logps/chosen": -0.00034590429277159274, + "logps/rejected": -172.0209197998047, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0420025723287836e-05, + "rewards/margins": 3.064532518386841, + "rewards/rejected": -3.064502000808716, + "step": 15606 + }, + { + "epoch": 0.91, + "learning_rate": 2.192483750450064e-09, + "logits/chosen": -1.879804253578186, + "logits/rejected": -1.8805168867111206, + "logps/chosen": -0.0001392275735270232, + "logps/rejected": -181.5428009033203, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.851298854191555e-06, + "rewards/margins": 3.184730291366577, + "rewards/rejected": -3.184735059738159, + "step": 15607 + }, + { + "epoch": 0.91, + "learning_rate": 2.1897245258879647e-09, + "logits/chosen": -1.8072092533111572, + "logits/rejected": -1.7588163614273071, + "logps/chosen": -208.66299438476562, + "logps/rejected": -431.2528991699219, + "loss": 0.2245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7860580682754517, + "rewards/margins": 0.9048080444335938, + "rewards/rejected": 0.8812500238418579, + "step": 15608 + }, + { + "epoch": 0.91, + "learning_rate": 2.1869669997807093e-09, + "logits/chosen": -1.6514697074890137, + "logits/rejected": -1.6580500602722168, + "logps/chosen": -205.85516357421875, + "logps/rejected": -323.9547424316406, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.891680896282196, + "rewards/margins": 2.068106174468994, + "rewards/rejected": -1.1764252185821533, + "step": 15609 + }, + { + "epoch": 0.91, + "learning_rate": 2.1842111722262634e-09, + "logits/chosen": -1.94338059425354, + "logits/rejected": -1.964659333229065, + "logps/chosen": -286.660400390625, + "logps/rejected": -461.11376953125, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5834808349609375, + "rewards/margins": 5.3794708251953125, + "rewards/rejected": -3.795989990234375, + "step": 15610 + }, + { + "epoch": 0.91, + "learning_rate": 2.1814570433225265e-09, + "logits/chosen": -1.8495501279830933, + "logits/rejected": -1.8449925184249878, + "logps/chosen": -309.997802734375, + "logps/rejected": -391.485595703125, + "loss": 0.27, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.549908399581909, + "rewards/margins": 0.3716888427734375, + "rewards/rejected": 2.1782195568084717, + "step": 15611 + }, + { + "epoch": 0.91, + "learning_rate": 2.1787046131673536e-09, + "logits/chosen": -1.7973084449768066, + "logits/rejected": -1.799494743347168, + "logps/chosen": -196.2119903564453, + "logps/rejected": -367.4673156738281, + "loss": 0.0672, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8603928089141846, + "rewards/margins": 2.2498550415039062, + "rewards/rejected": -0.38946229219436646, + "step": 15612 + }, + { + "epoch": 0.91, + "learning_rate": 2.1759538818585e-09, + "logits/chosen": -1.7741990089416504, + "logits/rejected": -1.7484861612319946, + "logps/chosen": -203.87387084960938, + "logps/rejected": -390.2798767089844, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.351977586746216, + "rewards/margins": 3.293499708175659, + "rewards/rejected": 0.05847778543829918, + "step": 15613 + }, + { + "epoch": 0.91, + "learning_rate": 2.173204849493704e-09, + "logits/chosen": -1.8115726709365845, + "logits/rejected": -1.8089287281036377, + "logps/chosen": -0.25221171975135803, + "logps/rejected": -147.78363037109375, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04412266984581947, + "rewards/margins": 3.3967511653900146, + "rewards/rejected": -3.352628469467163, + "step": 15614 + }, + { + "epoch": 0.91, + "learning_rate": 2.170457516170615e-09, + "logits/chosen": -1.6620599031448364, + "logits/rejected": -1.6570281982421875, + "logps/chosen": -81.47186279296875, + "logps/rejected": -287.5252380371094, + "loss": 0.1079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8376297354698181, + "rewards/margins": 3.236192464828491, + "rewards/rejected": -2.3985626697540283, + "step": 15615 + }, + { + "epoch": 0.91, + "learning_rate": 2.167711881986839e-09, + "logits/chosen": -2.005571126937866, + "logits/rejected": -2.0093350410461426, + "logps/chosen": -2.0634922981262207, + "logps/rejected": -53.415443420410156, + "loss": 0.6379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09069974720478058, + "rewards/margins": 0.19208332896232605, + "rewards/rejected": -0.10138358920812607, + "step": 15616 + }, + { + "epoch": 0.91, + "learning_rate": 2.1649679470399027e-09, + "logits/chosen": -1.9486632347106934, + "logits/rejected": -1.9493815898895264, + "logps/chosen": -68.66114044189453, + "logps/rejected": -268.7616882324219, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33288803696632385, + "rewards/margins": 1.2130790948867798, + "rewards/rejected": -0.8801910281181335, + "step": 15617 + }, + { + "epoch": 0.91, + "learning_rate": 2.1622257114272956e-09, + "logits/chosen": -1.7633171081542969, + "logits/rejected": -1.7454521656036377, + "logps/chosen": -91.0472640991211, + "logps/rejected": -390.0135192871094, + "loss": 0.2048, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21145859360694885, + "rewards/margins": 2.833111047744751, + "rewards/rejected": -2.621652364730835, + "step": 15618 + }, + { + "epoch": 0.91, + "learning_rate": 2.1594851752464283e-09, + "logits/chosen": -2.0138370990753174, + "logits/rejected": -2.00850772857666, + "logps/chosen": -63.88496780395508, + "logps/rejected": -346.868896484375, + "loss": 0.2006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8817470669746399, + "rewards/margins": 4.325494289398193, + "rewards/rejected": -3.443747043609619, + "step": 15619 + }, + { + "epoch": 0.91, + "learning_rate": 2.1567463385946618e-09, + "logits/chosen": -1.9057936668395996, + "logits/rejected": -1.9007840156555176, + "logps/chosen": -172.4466552734375, + "logps/rejected": -277.50390625, + "loss": 0.2059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9898360967636108, + "rewards/margins": 0.7873458862304688, + "rewards/rejected": 1.202490210533142, + "step": 15620 + }, + { + "epoch": 0.91, + "learning_rate": 2.1540092015692955e-09, + "logits/chosen": -1.9224529266357422, + "logits/rejected": -1.9880911111831665, + "logps/chosen": -235.8267364501953, + "logps/rejected": -465.9371337890625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.911656141281128, + "rewards/margins": 9.054265022277832, + "rewards/rejected": -6.142608642578125, + "step": 15621 + }, + { + "epoch": 0.91, + "learning_rate": 2.1512737642675525e-09, + "logits/chosen": -1.9913870096206665, + "logits/rejected": -1.924950122833252, + "logps/chosen": -229.2224578857422, + "logps/rejected": -279.1416931152344, + "loss": 0.1366, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4256515502929688, + "rewards/margins": 1.934248447418213, + "rewards/rejected": -0.5085968375205994, + "step": 15622 + }, + { + "epoch": 0.91, + "learning_rate": 2.148540026786633e-09, + "logits/chosen": -1.9040085077285767, + "logits/rejected": -1.9032217264175415, + "logps/chosen": -37.70359420776367, + "logps/rejected": -53.31248474121094, + "loss": 0.721, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.006870269775390625, + "rewards/margins": -0.19830627739429474, + "rewards/rejected": 0.20517654716968536, + "step": 15623 + }, + { + "epoch": 0.91, + "learning_rate": 2.14580798922363e-09, + "logits/chosen": -1.6359044313430786, + "logits/rejected": -1.6866846084594727, + "logps/chosen": -188.35678100585938, + "logps/rejected": -336.4180908203125, + "loss": 0.0961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3017196655273438, + "rewards/margins": 1.9695663452148438, + "rewards/rejected": -0.6678466796875, + "step": 15624 + }, + { + "epoch": 0.91, + "learning_rate": 2.143077651675612e-09, + "logits/chosen": -1.8034672737121582, + "logits/rejected": -1.8012499809265137, + "logps/chosen": -0.0012398153776302934, + "logps/rejected": -127.34886932373047, + "loss": 0.3308, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4011751267826185e-05, + "rewards/margins": 3.5761547088623047, + "rewards/rejected": -3.576188802719116, + "step": 15625 + }, + { + "epoch": 0.91, + "learning_rate": 2.1403490142395565e-09, + "logits/chosen": -2.0062713623046875, + "logits/rejected": -1.9957619905471802, + "logps/chosen": -107.08124542236328, + "logps/rejected": -207.6405029296875, + "loss": 0.1515, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6109169721603394, + "rewards/margins": 1.3261680603027344, + "rewards/rejected": 0.2847488522529602, + "step": 15626 + }, + { + "epoch": 0.91, + "learning_rate": 2.1376220770124297e-09, + "logits/chosen": -1.859839677810669, + "logits/rejected": -1.8453353643417358, + "logps/chosen": -7.625934600830078, + "logps/rejected": -234.5456085205078, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.199964240193367, + "rewards/margins": 3.662282705307007, + "rewards/rejected": -3.4623184204101562, + "step": 15627 + }, + { + "epoch": 0.91, + "learning_rate": 2.1348968400910773e-09, + "logits/chosen": -1.882691502571106, + "logits/rejected": -1.8485950231552124, + "logps/chosen": -169.31260681152344, + "logps/rejected": -282.9112548828125, + "loss": 0.2513, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4108474254608154, + "rewards/margins": 0.57990562915802, + "rewards/rejected": 1.8309417963027954, + "step": 15628 + }, + { + "epoch": 0.91, + "learning_rate": 2.1321733035723212e-09, + "logits/chosen": -1.8974099159240723, + "logits/rejected": -1.9107367992401123, + "logps/chosen": -175.562255859375, + "logps/rejected": -462.0075378417969, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0090560913085938, + "rewards/margins": 7.878575325012207, + "rewards/rejected": -5.869519233703613, + "step": 15629 + }, + { + "epoch": 0.91, + "learning_rate": 2.1294514675529173e-09, + "logits/chosen": -1.9368230104446411, + "logits/rejected": -1.9166933298110962, + "logps/chosen": -229.6744842529297, + "logps/rejected": -434.4446105957031, + "loss": 0.1911, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.768724203109741, + "rewards/margins": 0.8211076259613037, + "rewards/rejected": 1.9476165771484375, + "step": 15630 + }, + { + "epoch": 0.91, + "learning_rate": 2.126731332129561e-09, + "logits/chosen": -1.83767569065094, + "logits/rejected": -1.8574409484863281, + "logps/chosen": -223.8678436279297, + "logps/rejected": -412.17413330078125, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.059004306793213, + "rewards/margins": 4.412971496582031, + "rewards/rejected": -0.3539672791957855, + "step": 15631 + }, + { + "epoch": 0.91, + "learning_rate": 2.12401289739888e-09, + "logits/chosen": -1.74252450466156, + "logits/rejected": -1.7453502416610718, + "logps/chosen": -0.09219218790531158, + "logps/rejected": -72.49159240722656, + "loss": 0.6451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004248771350830793, + "rewards/margins": 0.10731510818004608, + "rewards/rejected": -0.11156387627124786, + "step": 15632 + }, + { + "epoch": 0.91, + "learning_rate": 2.121296163457442e-09, + "logits/chosen": -1.8385206460952759, + "logits/rejected": -1.900824785232544, + "logps/chosen": -240.9945068359375, + "logps/rejected": -388.7511901855469, + "loss": 0.0621, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.488739013671875, + "rewards/margins": 2.589016914367676, + "rewards/rejected": -1.1002777814865112, + "step": 15633 + }, + { + "epoch": 0.91, + "learning_rate": 2.1185811304017697e-09, + "logits/chosen": -2.106531858444214, + "logits/rejected": -2.1116673946380615, + "logps/chosen": -51.73207473754883, + "logps/rejected": -281.9300842285156, + "loss": 0.1557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8662410974502563, + "rewards/margins": 3.010650157928467, + "rewards/rejected": -2.1444091796875, + "step": 15634 + }, + { + "epoch": 0.91, + "learning_rate": 2.1158677983283137e-09, + "logits/chosen": -1.7327932119369507, + "logits/rejected": -1.7129253149032593, + "logps/chosen": -114.71159362792969, + "logps/rejected": -539.6683349609375, + "loss": 0.1044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3670242428779602, + "rewards/margins": 11.202754974365234, + "rewards/rejected": -10.83573055267334, + "step": 15635 + }, + { + "epoch": 0.91, + "learning_rate": 2.113156167333452e-09, + "logits/chosen": -1.8244812488555908, + "logits/rejected": -1.8382538557052612, + "logps/chosen": -196.4725341796875, + "logps/rejected": -398.7535705566406, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2044646739959717, + "rewards/margins": 4.063494682312012, + "rewards/rejected": -1.8590301275253296, + "step": 15636 + }, + { + "epoch": 0.91, + "learning_rate": 2.1104462375135245e-09, + "logits/chosen": -1.845592975616455, + "logits/rejected": -1.8421415090560913, + "logps/chosen": -53.25495529174805, + "logps/rejected": -130.25172424316406, + "loss": 0.3067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5015193819999695, + "rewards/margins": 1.9525387287139893, + "rewards/rejected": -1.451019287109375, + "step": 15637 + }, + { + "epoch": 0.91, + "learning_rate": 2.107738008964799e-09, + "logits/chosen": -1.950576901435852, + "logits/rejected": -1.9321658611297607, + "logps/chosen": -247.83023071289062, + "logps/rejected": -434.504150390625, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.889593482017517, + "rewards/margins": 4.426062107086182, + "rewards/rejected": -2.536468505859375, + "step": 15638 + }, + { + "epoch": 0.91, + "learning_rate": 2.1050314817834925e-09, + "logits/chosen": -2.00376558303833, + "logits/rejected": -2.007850170135498, + "logps/chosen": -26.48974609375, + "logps/rejected": -117.25464630126953, + "loss": 0.4034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08218631893396378, + "rewards/margins": 1.3507333993911743, + "rewards/rejected": -1.2685470581054688, + "step": 15639 + }, + { + "epoch": 0.91, + "learning_rate": 2.1023266560657393e-09, + "logits/chosen": -1.8894436359405518, + "logits/rejected": -1.902281641960144, + "logps/chosen": -189.58132934570312, + "logps/rejected": -389.0349426269531, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.138742208480835, + "rewards/margins": 6.969000339508057, + "rewards/rejected": -3.8302581310272217, + "step": 15640 + }, + { + "epoch": 0.91, + "learning_rate": 2.0996235319076295e-09, + "logits/chosen": -1.9822484254837036, + "logits/rejected": -1.9783686399459839, + "logps/chosen": -17.14653778076172, + "logps/rejected": -98.07898712158203, + "loss": 0.5446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11385154724121094, + "rewards/margins": 0.4387272000312805, + "rewards/rejected": -0.3248756527900696, + "step": 15641 + }, + { + "epoch": 0.91, + "learning_rate": 2.0969221094052136e-09, + "logits/chosen": -2.0098462104797363, + "logits/rejected": -2.006432056427002, + "logps/chosen": -52.14394760131836, + "logps/rejected": -165.75045776367188, + "loss": 0.4364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38912391662597656, + "rewards/margins": 0.7537250518798828, + "rewards/rejected": -0.36460113525390625, + "step": 15642 + }, + { + "epoch": 0.91, + "learning_rate": 2.0942223886544376e-09, + "logits/chosen": -1.6194761991500854, + "logits/rejected": -1.6741143465042114, + "logps/chosen": -243.61337280273438, + "logps/rejected": -271.26153564453125, + "loss": 0.3811, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.2642914056777954, + "rewards/margins": -0.02297055721282959, + "rewards/rejected": 1.287261962890625, + "step": 15643 + }, + { + "epoch": 0.91, + "learning_rate": 2.0915243697512185e-09, + "logits/chosen": -1.7060617208480835, + "logits/rejected": -1.6960383653640747, + "logps/chosen": -41.434532165527344, + "logps/rejected": -255.47010803222656, + "loss": 0.2926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1961311399936676, + "rewards/margins": 4.499753952026367, + "rewards/rejected": -4.303622722625732, + "step": 15644 + }, + { + "epoch": 0.91, + "learning_rate": 2.0888280527913916e-09, + "logits/chosen": -1.9801355600357056, + "logits/rejected": -1.971200704574585, + "logps/chosen": -25.800214767456055, + "logps/rejected": -248.91781616210938, + "loss": 0.2739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2615707516670227, + "rewards/margins": 3.8388381004333496, + "rewards/rejected": -3.5772674083709717, + "step": 15645 + }, + { + "epoch": 0.91, + "learning_rate": 2.0861334378707574e-09, + "logits/chosen": -1.8168672323226929, + "logits/rejected": -1.7655413150787354, + "logps/chosen": -240.06259155273438, + "logps/rejected": -436.0348815917969, + "loss": 0.0534, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.333691358566284, + "rewards/margins": 3.365917921066284, + "rewards/rejected": -1.0322265625, + "step": 15646 + }, + { + "epoch": 0.91, + "learning_rate": 2.083440525085034e-09, + "logits/chosen": -1.8002619743347168, + "logits/rejected": -1.8020929098129272, + "logps/chosen": -38.89402770996094, + "logps/rejected": -280.0669860839844, + "loss": 0.1129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.767877995967865, + "rewards/margins": 2.9034035205841064, + "rewards/rejected": -2.1355254650115967, + "step": 15647 + }, + { + "epoch": 0.91, + "learning_rate": 2.0807493145298837e-09, + "logits/chosen": -1.87053382396698, + "logits/rejected": -1.8764539957046509, + "logps/chosen": -6.449093780247495e-05, + "logps/rejected": -298.7429504394531, + "loss": 0.3209, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.152455623327114e-07, + "rewards/margins": 7.632922172546387, + "rewards/rejected": -7.632922649383545, + "step": 15648 + }, + { + "epoch": 0.91, + "learning_rate": 2.078059806300919e-09, + "logits/chosen": -2.0131137371063232, + "logits/rejected": -1.9915568828582764, + "logps/chosen": -10.445168495178223, + "logps/rejected": -218.9418182373047, + "loss": 0.2286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5865675806999207, + "rewards/margins": 3.007986545562744, + "rewards/rejected": -2.4214189052581787, + "step": 15649 + }, + { + "epoch": 0.91, + "learning_rate": 2.0753720004936914e-09, + "logits/chosen": -1.7987717390060425, + "logits/rejected": -1.797295331954956, + "logps/chosen": -171.0526885986328, + "logps/rejected": -292.2603759765625, + "loss": 0.4253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.745086669921875, + "rewards/margins": 0.48286131024360657, + "rewards/rejected": 0.26222535967826843, + "step": 15650 + }, + { + "epoch": 0.91, + "learning_rate": 2.072685897203663e-09, + "logits/chosen": -2.0018134117126465, + "logits/rejected": -1.991313099861145, + "logps/chosen": -34.1540412902832, + "logps/rejected": -420.70684814453125, + "loss": 0.2216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22470436990261078, + "rewards/margins": 10.830533981323242, + "rewards/rejected": -10.605829238891602, + "step": 15651 + }, + { + "epoch": 0.91, + "learning_rate": 2.0700014965262746e-09, + "logits/chosen": -1.9871867895126343, + "logits/rejected": -1.9848992824554443, + "logps/chosen": -90.19273376464844, + "logps/rejected": -205.77005004882812, + "loss": 0.1232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1090484857559204, + "rewards/margins": 3.546051025390625, + "rewards/rejected": -2.437002658843994, + "step": 15652 + }, + { + "epoch": 0.91, + "learning_rate": 2.0673187985568774e-09, + "logits/chosen": -1.934147596359253, + "logits/rejected": -1.9262301921844482, + "logps/chosen": -69.35955047607422, + "logps/rejected": -276.31256103515625, + "loss": 0.1102, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3588234186172485, + "rewards/margins": 3.2419137954711914, + "rewards/rejected": -1.8830902576446533, + "step": 15653 + }, + { + "epoch": 0.91, + "learning_rate": 2.0646378033907897e-09, + "logits/chosen": -1.8934028148651123, + "logits/rejected": -1.8930875062942505, + "logps/chosen": -150.99618530273438, + "logps/rejected": -296.7803649902344, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7886886596679688, + "rewards/margins": 4.928523063659668, + "rewards/rejected": -2.1398346424102783, + "step": 15654 + }, + { + "epoch": 0.91, + "learning_rate": 2.0619585111232405e-09, + "logits/chosen": -1.890532374382019, + "logits/rejected": -1.8802800178527832, + "logps/chosen": -183.77101135253906, + "logps/rejected": -253.114501953125, + "loss": 0.2393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0099503993988037, + "rewards/margins": 0.6013016700744629, + "rewards/rejected": 1.4086487293243408, + "step": 15655 + }, + { + "epoch": 0.91, + "learning_rate": 2.0592809218494157e-09, + "logits/chosen": -1.7855219841003418, + "logits/rejected": -1.780227541923523, + "logps/chosen": -46.81999206542969, + "logps/rejected": -165.6151885986328, + "loss": 0.2701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5024105310440063, + "rewards/margins": 2.4988110065460205, + "rewards/rejected": -1.9964004755020142, + "step": 15656 + }, + { + "epoch": 0.91, + "learning_rate": 2.0566050356644327e-09, + "logits/chosen": -1.89588463306427, + "logits/rejected": -1.9327809810638428, + "logps/chosen": -164.041748046875, + "logps/rejected": -302.96929931640625, + "loss": 0.2217, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7664825916290283, + "rewards/margins": 0.7123688459396362, + "rewards/rejected": 1.054113745689392, + "step": 15657 + }, + { + "epoch": 0.91, + "learning_rate": 2.0539308526633603e-09, + "logits/chosen": -1.970257043838501, + "logits/rejected": -1.9753963947296143, + "logps/chosen": -0.0055573927238583565, + "logps/rejected": -70.80156707763672, + "loss": 0.7245, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0004174404020886868, + "rewards/margins": -0.08065807074308395, + "rewards/rejected": 0.0802406296133995, + "step": 15658 + }, + { + "epoch": 0.91, + "learning_rate": 2.0512583729411893e-09, + "logits/chosen": -1.8682150840759277, + "logits/rejected": -1.867653489112854, + "logps/chosen": -14.336836814880371, + "logps/rejected": -75.39704895019531, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0180760622024536, + "rewards/margins": 0.8135014772415161, + "rewards/rejected": 0.2045745849609375, + "step": 15659 + }, + { + "epoch": 0.91, + "learning_rate": 2.04858759659286e-09, + "logits/chosen": -1.9499554634094238, + "logits/rejected": -1.949800729751587, + "logps/chosen": -223.88758850097656, + "logps/rejected": -408.7154541015625, + "loss": 0.0515, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3652222156524658, + "rewards/margins": 3.7713258266448975, + "rewards/rejected": -2.4061036109924316, + "step": 15660 + }, + { + "epoch": 0.91, + "learning_rate": 2.0459185237132637e-09, + "logits/chosen": -1.7352756261825562, + "logits/rejected": -1.7437450885772705, + "logps/chosen": -0.00039289682172238827, + "logps/rejected": -122.7119140625, + "loss": 0.3598, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7987431419896893e-05, + "rewards/margins": 3.398850679397583, + "rewards/rejected": -3.3988685607910156, + "step": 15661 + }, + { + "epoch": 0.91, + "learning_rate": 2.0432511543972077e-09, + "logits/chosen": -1.845737338066101, + "logits/rejected": -1.826653242111206, + "logps/chosen": -256.60968017578125, + "logps/rejected": -367.9220886230469, + "loss": 0.2308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.999298095703125, + "rewards/margins": 0.6564697027206421, + "rewards/rejected": 1.342828392982483, + "step": 15662 + }, + { + "epoch": 0.91, + "learning_rate": 2.0405854887394548e-09, + "logits/chosen": -1.9733123779296875, + "logits/rejected": -1.9536151885986328, + "logps/chosen": -3.5404842492425814e-05, + "logps/rejected": -213.04071044921875, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.1973852350784e-06, + "rewards/margins": 3.4955527782440186, + "rewards/rejected": -3.4955475330352783, + "step": 15663 + }, + { + "epoch": 0.91, + "learning_rate": 2.0379215268346907e-09, + "logits/chosen": -1.9741721153259277, + "logits/rejected": -1.9627478122711182, + "logps/chosen": -164.66537475585938, + "logps/rejected": -303.2339782714844, + "loss": 0.045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2960968017578125, + "rewards/margins": 2.6617493629455566, + "rewards/rejected": -0.365652471780777, + "step": 15664 + }, + { + "epoch": 0.91, + "learning_rate": 2.0352592687775728e-09, + "logits/chosen": -1.9514504671096802, + "logits/rejected": -1.9600361585617065, + "logps/chosen": -39.573097229003906, + "logps/rejected": -238.71795654296875, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09493446350097656, + "rewards/margins": 2.240253210067749, + "rewards/rejected": -2.1453187465667725, + "step": 15665 + }, + { + "epoch": 0.91, + "learning_rate": 2.0325987146626645e-09, + "logits/chosen": -1.7342032194137573, + "logits/rejected": -1.762046217918396, + "logps/chosen": -146.20550537109375, + "logps/rejected": -346.96514892578125, + "loss": 0.065, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9015899896621704, + "rewards/margins": 2.293466091156006, + "rewards/rejected": -0.391876220703125, + "step": 15666 + }, + { + "epoch": 0.91, + "learning_rate": 2.0299398645844847e-09, + "logits/chosen": -1.8845007419586182, + "logits/rejected": -1.8760473728179932, + "logps/chosen": -15.189070701599121, + "logps/rejected": -236.75747680664062, + "loss": 0.2597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38138705492019653, + "rewards/margins": 4.913073539733887, + "rewards/rejected": -4.531686305999756, + "step": 15667 + }, + { + "epoch": 0.91, + "learning_rate": 2.027282718637474e-09, + "logits/chosen": -1.845017910003662, + "logits/rejected": -1.8493015766143799, + "logps/chosen": -183.37086486816406, + "logps/rejected": -268.66839599609375, + "loss": 0.0616, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8560731410980225, + "rewards/margins": 2.2870819568634033, + "rewards/rejected": 0.5689911246299744, + "step": 15668 + }, + { + "epoch": 0.91, + "learning_rate": 2.024627276916058e-09, + "logits/chosen": -1.8377031087875366, + "logits/rejected": -1.8256949186325073, + "logps/chosen": -175.98739624023438, + "logps/rejected": -370.92645263671875, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5642166137695312, + "rewards/margins": 3.0040602684020996, + "rewards/rejected": -1.439843773841858, + "step": 15669 + }, + { + "epoch": 0.91, + "learning_rate": 2.0219735395145486e-09, + "logits/chosen": -2.0012753009796143, + "logits/rejected": -2.0051655769348145, + "logps/chosen": -32.05461883544922, + "logps/rejected": -202.3662567138672, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06595668941736221, + "rewards/margins": 4.141097545623779, + "rewards/rejected": -4.207054138183594, + "step": 15670 + }, + { + "epoch": 0.91, + "learning_rate": 2.019321506527222e-09, + "logits/chosen": -1.8437503576278687, + "logits/rejected": -1.8329492807388306, + "logps/chosen": -305.5566101074219, + "logps/rejected": -504.24310302734375, + "loss": 0.1361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.381256103515625, + "rewards/margins": 2.423999071121216, + "rewards/rejected": -2.042742967605591, + "step": 15671 + }, + { + "epoch": 0.91, + "learning_rate": 2.016671178048296e-09, + "logits/chosen": -1.8606659173965454, + "logits/rejected": -1.8594682216644287, + "logps/chosen": -204.05734252929688, + "logps/rejected": -229.76748657226562, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7528961896896362, + "rewards/margins": 0.2647796869277954, + "rewards/rejected": 1.4881165027618408, + "step": 15672 + }, + { + "epoch": 0.91, + "learning_rate": 2.0140225541719235e-09, + "logits/chosen": -1.9521023035049438, + "logits/rejected": -1.9461866617202759, + "logps/chosen": -16.270360946655273, + "logps/rejected": -173.44883728027344, + "loss": 0.214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5737123489379883, + "rewards/margins": 2.562666416168213, + "rewards/rejected": -1.9889541864395142, + "step": 15673 + }, + { + "epoch": 0.91, + "learning_rate": 2.011375634992191e-09, + "logits/chosen": -1.810267448425293, + "logits/rejected": -1.8233811855316162, + "logps/chosen": -119.63557434082031, + "logps/rejected": -204.4075469970703, + "loss": 0.1308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4914932250976562, + "rewards/margins": 1.5109055042266846, + "rewards/rejected": -0.01941223256289959, + "step": 15674 + }, + { + "epoch": 0.91, + "learning_rate": 2.008730420603133e-09, + "logits/chosen": -1.9952489137649536, + "logits/rejected": -2.004906415939331, + "logps/chosen": -221.93260192871094, + "logps/rejected": -374.4219055175781, + "loss": 0.0275, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9396560192108154, + "rewards/margins": 3.2032699584960938, + "rewards/rejected": -0.26361390948295593, + "step": 15675 + }, + { + "epoch": 0.91, + "learning_rate": 2.0060869110987255e-09, + "logits/chosen": -2.1070668697357178, + "logits/rejected": -2.0967531204223633, + "logps/chosen": -5.149195671081543, + "logps/rejected": -65.3790283203125, + "loss": 0.6111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.275663822889328, + "rewards/margins": 0.03668829798698425, + "rewards/rejected": 0.23897552490234375, + "step": 15676 + }, + { + "epoch": 0.91, + "learning_rate": 2.0034451065728762e-09, + "logits/chosen": -1.8682500123977661, + "logits/rejected": -1.9328563213348389, + "logps/chosen": -238.9542999267578, + "logps/rejected": -353.60296630859375, + "loss": 0.0426, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0958054065704346, + "rewards/margins": 3.106144905090332, + "rewards/rejected": -1.010339379310608, + "step": 15677 + }, + { + "epoch": 0.91, + "learning_rate": 2.0008050071194272e-09, + "logits/chosen": -1.9493106603622437, + "logits/rejected": -1.9514920711517334, + "logps/chosen": -0.00033910697675310075, + "logps/rejected": -169.66912841796875, + "loss": 0.3666, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6865569452638738e-05, + "rewards/margins": 3.5165975093841553, + "rewards/rejected": -3.5166244506835938, + "step": 15678 + }, + { + "epoch": 0.91, + "learning_rate": 1.998166612832175e-09, + "logits/chosen": -1.7149678468704224, + "logits/rejected": -1.7992510795593262, + "logps/chosen": -216.87652587890625, + "logps/rejected": -332.671875, + "loss": 0.1511, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3843291997909546, + "rewards/margins": 1.2074187994003296, + "rewards/rejected": 0.176910400390625, + "step": 15679 + }, + { + "epoch": 0.91, + "learning_rate": 1.9955299238048506e-09, + "logits/chosen": -1.8486177921295166, + "logits/rejected": -1.858026385307312, + "logps/chosen": -145.9358673095703, + "logps/rejected": -230.27455139160156, + "loss": 0.1184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8572494983673096, + "rewards/margins": 1.4901123046875, + "rewards/rejected": 0.3671371638774872, + "step": 15680 + }, + { + "epoch": 0.91, + "learning_rate": 1.9928949401311125e-09, + "logits/chosen": -1.7339059114456177, + "logits/rejected": -1.7516355514526367, + "logps/chosen": -243.18069458007812, + "logps/rejected": -417.2076721191406, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6468536853790283, + "rewards/margins": 3.102203369140625, + "rewards/rejected": -0.45534974336624146, + "step": 15681 + }, + { + "epoch": 0.91, + "learning_rate": 1.9902616619045855e-09, + "logits/chosen": -2.0103650093078613, + "logits/rejected": -2.0099217891693115, + "logps/chosen": -4.641198635101318, + "logps/rejected": -41.576480865478516, + "loss": 0.4429, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27560853958129883, + "rewards/margins": 0.9986287355422974, + "rewards/rejected": -0.7230201959609985, + "step": 15682 + }, + { + "epoch": 0.91, + "learning_rate": 1.987630089218789e-09, + "logits/chosen": -1.8989381790161133, + "logits/rejected": -1.8775911331176758, + "logps/chosen": -226.49697875976562, + "logps/rejected": -409.66845703125, + "loss": 0.2563, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2050323486328125, + "rewards/margins": 0.4670654535293579, + "rewards/rejected": 1.7379668951034546, + "step": 15683 + }, + { + "epoch": 0.91, + "learning_rate": 1.9850002221672436e-09, + "logits/chosen": -1.80712890625, + "logits/rejected": -1.8033446073532104, + "logps/chosen": -0.5474719405174255, + "logps/rejected": -46.10035705566406, + "loss": 0.7408, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.009847283363342285, + "rewards/margins": -0.23217236995697021, + "rewards/rejected": 0.2420196533203125, + "step": 15684 + }, + { + "epoch": 0.91, + "learning_rate": 1.982372060843346e-09, + "logits/chosen": -1.697153925895691, + "logits/rejected": -1.6921405792236328, + "logps/chosen": -2.4836442470550537, + "logps/rejected": -197.22830200195312, + "loss": 0.3145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1415180265903473, + "rewards/margins": 3.457799196243286, + "rewards/rejected": -3.3162810802459717, + "step": 15685 + }, + { + "epoch": 0.91, + "learning_rate": 1.979745605340477e-09, + "logits/chosen": -1.8724024295806885, + "logits/rejected": -1.8740599155426025, + "logps/chosen": -162.3419189453125, + "logps/rejected": -304.40899658203125, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6327805519104004, + "rewards/margins": 3.255406379699707, + "rewards/rejected": -0.6226257681846619, + "step": 15686 + }, + { + "epoch": 0.91, + "learning_rate": 1.977120855751929e-09, + "logits/chosen": -1.9549040794372559, + "logits/rejected": -1.9401543140411377, + "logps/chosen": -0.0013180229580029845, + "logps/rejected": -382.0545349121094, + "loss": 0.3494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0002934187650680542, + "rewards/margins": 6.541714668273926, + "rewards/rejected": -6.541421413421631, + "step": 15687 + }, + { + "epoch": 0.91, + "learning_rate": 1.974497812170961e-09, + "logits/chosen": -1.8148753643035889, + "logits/rejected": -1.8048374652862549, + "logps/chosen": -3.6000794352730736e-05, + "logps/rejected": -178.34213256835938, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2278487702133134e-06, + "rewards/margins": 3.6507275104522705, + "rewards/rejected": -3.650726318359375, + "step": 15688 + }, + { + "epoch": 0.91, + "learning_rate": 1.971876474690748e-09, + "logits/chosen": -1.915956735610962, + "logits/rejected": -1.9073203802108765, + "logps/chosen": -131.73614501953125, + "logps/rejected": -417.43212890625, + "loss": 0.1311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.197180151939392, + "rewards/margins": 2.32354736328125, + "rewards/rejected": -1.126367211341858, + "step": 15689 + }, + { + "epoch": 0.91, + "learning_rate": 1.9692568434044097e-09, + "logits/chosen": -1.9729384183883667, + "logits/rejected": -1.9556304216384888, + "logps/chosen": -167.84095764160156, + "logps/rejected": -279.9709777832031, + "loss": 0.0889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7046676874160767, + "rewards/margins": 2.0734939575195312, + "rewards/rejected": -0.368826299905777, + "step": 15690 + }, + { + "epoch": 0.91, + "learning_rate": 1.966638918405011e-09, + "logits/chosen": -1.9988460540771484, + "logits/rejected": -1.9982388019561768, + "logps/chosen": -7.52186169847846e-05, + "logps/rejected": -109.19669342041016, + "loss": 0.5752, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.205206101280055e-06, + "rewards/margins": 0.5188750624656677, + "rewards/rejected": -0.518872857093811, + "step": 15691 + }, + { + "epoch": 0.91, + "learning_rate": 1.9640226997855613e-09, + "logits/chosen": -1.973999261856079, + "logits/rejected": -1.965279221534729, + "logps/chosen": -9.82945728302002, + "logps/rejected": -112.86994934082031, + "loss": 0.2335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5934123396873474, + "rewards/margins": 2.3323071002960205, + "rewards/rejected": -1.7388947010040283, + "step": 15692 + }, + { + "epoch": 0.91, + "learning_rate": 1.961408187638991e-09, + "logits/chosen": -2.1121222972869873, + "logits/rejected": -2.0974109172821045, + "logps/chosen": -231.8052978515625, + "logps/rejected": -391.42205810546875, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.708526611328125, + "rewards/margins": 4.818695068359375, + "rewards/rejected": -3.11016845703125, + "step": 15693 + }, + { + "epoch": 0.91, + "learning_rate": 1.9587953820581817e-09, + "logits/chosen": -1.7457162141799927, + "logits/rejected": -1.7394260168075562, + "logps/chosen": -304.6716613769531, + "logps/rejected": -498.4271240234375, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4636993408203125, + "rewards/margins": 4.377469062805176, + "rewards/rejected": -1.9137696027755737, + "step": 15694 + }, + { + "epoch": 0.91, + "learning_rate": 1.9561842831359585e-09, + "logits/chosen": -1.7488828897476196, + "logits/rejected": -1.7518844604492188, + "logps/chosen": -85.1446533203125, + "logps/rejected": -240.38031005859375, + "loss": 0.0962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7051399946212769, + "rewards/margins": 3.4634926319122314, + "rewards/rejected": -1.7583526372909546, + "step": 15695 + }, + { + "epoch": 0.91, + "learning_rate": 1.953574890965082e-09, + "logits/chosen": -2.0226786136627197, + "logits/rejected": -2.0218212604522705, + "logps/chosen": -13.70142650604248, + "logps/rejected": -148.20008850097656, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07304830849170685, + "rewards/margins": 2.982318162918091, + "rewards/rejected": -2.9092698097229004, + "step": 15696 + }, + { + "epoch": 0.91, + "learning_rate": 1.9509672056382375e-09, + "logits/chosen": -1.8462382555007935, + "logits/rejected": -1.8437272310256958, + "logps/chosen": -16.876861572265625, + "logps/rejected": -148.4625244140625, + "loss": 0.4785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23735542595386505, + "rewards/margins": 0.7505329251289368, + "rewards/rejected": -0.5131775140762329, + "step": 15697 + }, + { + "epoch": 0.91, + "learning_rate": 1.9483612272480743e-09, + "logits/chosen": -1.9493545293807983, + "logits/rejected": -1.9522162675857544, + "logps/chosen": -35.027976989746094, + "logps/rejected": -101.48889923095703, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7345665097236633, + "rewards/margins": 0.7788910269737244, + "rewards/rejected": -0.044324494898319244, + "step": 15698 + }, + { + "epoch": 0.91, + "learning_rate": 1.9457569558871623e-09, + "logits/chosen": -2.004002571105957, + "logits/rejected": -2.0081629753112793, + "logps/chosen": -0.002028481103479862, + "logps/rejected": -143.19888305664062, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011365846876287833, + "rewards/margins": 4.1085100173950195, + "rewards/rejected": -4.108623504638672, + "step": 15699 + }, + { + "epoch": 0.91, + "learning_rate": 1.943154391648033e-09, + "logits/chosen": -2.0925581455230713, + "logits/rejected": -2.0909974575042725, + "logps/chosen": -41.219329833984375, + "logps/rejected": -238.69467163085938, + "loss": 0.2301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4022178649902344, + "rewards/margins": 4.324306488037109, + "rewards/rejected": -3.922088623046875, + "step": 15700 + }, + { + "epoch": 0.91, + "learning_rate": 1.9405535346231237e-09, + "logits/chosen": -2.0216915607452393, + "logits/rejected": -2.0110814571380615, + "logps/chosen": -54.51182556152344, + "logps/rejected": -223.89198303222656, + "loss": 0.255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6752743124961853, + "rewards/margins": 1.9989116191864014, + "rewards/rejected": -1.3236373662948608, + "step": 15701 + }, + { + "epoch": 0.91, + "learning_rate": 1.9379543849048274e-09, + "logits/chosen": -1.910841464996338, + "logits/rejected": -1.9072104692459106, + "logps/chosen": -29.312602996826172, + "logps/rejected": -214.40452575683594, + "loss": 0.2718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2938808500766754, + "rewards/margins": 2.5990002155303955, + "rewards/rejected": -2.305119276046753, + "step": 15702 + }, + { + "epoch": 0.91, + "learning_rate": 1.9353569425855034e-09, + "logits/chosen": -1.683618426322937, + "logits/rejected": -1.683467984199524, + "logps/chosen": -6.508021831512451, + "logps/rejected": -218.89129638671875, + "loss": 0.316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007719326298683882, + "rewards/margins": 4.9267120361328125, + "rewards/rejected": -4.918992519378662, + "step": 15703 + }, + { + "epoch": 0.91, + "learning_rate": 1.932761207757405e-09, + "logits/chosen": -1.7949212789535522, + "logits/rejected": -1.805375337600708, + "logps/chosen": -212.69723510742188, + "logps/rejected": -404.5400390625, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5096588134765625, + "rewards/margins": 4.310360908508301, + "rewards/rejected": -0.800701916217804, + "step": 15704 + }, + { + "epoch": 0.91, + "learning_rate": 1.930167180512754e-09, + "logits/chosen": -1.9061989784240723, + "logits/rejected": -1.8949886560440063, + "logps/chosen": -40.90949249267578, + "logps/rejected": -260.5885009765625, + "loss": 0.154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6584636569023132, + "rewards/margins": 3.745033025741577, + "rewards/rejected": -3.086569309234619, + "step": 15705 + }, + { + "epoch": 0.91, + "learning_rate": 1.9275748609436927e-09, + "logits/chosen": -1.6362911462783813, + "logits/rejected": -1.6913137435913086, + "logps/chosen": -281.11639404296875, + "logps/rejected": -462.4856262207031, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3567291498184204, + "rewards/margins": 3.4603729248046875, + "rewards/rejected": -2.1036438941955566, + "step": 15706 + }, + { + "epoch": 0.91, + "learning_rate": 1.924984249142325e-09, + "logits/chosen": -1.7688207626342773, + "logits/rejected": -1.8154611587524414, + "logps/chosen": -321.4566650390625, + "logps/rejected": -438.39678955078125, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.50840163230896, + "rewards/margins": 5.289221286773682, + "rewards/rejected": -1.7808197736740112, + "step": 15707 + }, + { + "epoch": 0.91, + "learning_rate": 1.922395345200678e-09, + "logits/chosen": -2.049098491668701, + "logits/rejected": -2.041309118270874, + "logps/chosen": -50.91654586791992, + "logps/rejected": -192.25778198242188, + "loss": 0.1372, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2455753087997437, + "rewards/margins": 2.9145755767822266, + "rewards/rejected": -1.669000267982483, + "step": 15708 + }, + { + "epoch": 0.91, + "learning_rate": 1.9198081492107167e-09, + "logits/chosen": -1.8850300312042236, + "logits/rejected": -1.9130299091339111, + "logps/chosen": -136.84011840820312, + "logps/rejected": -188.12527465820312, + "loss": 0.1733, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7417923212051392, + "rewards/margins": 1.0002304315567017, + "rewards/rejected": 0.7415618896484375, + "step": 15709 + }, + { + "epoch": 0.91, + "learning_rate": 1.917222661264356e-09, + "logits/chosen": -1.8991841077804565, + "logits/rejected": -1.8976231813430786, + "logps/chosen": -215.7452392578125, + "logps/rejected": -295.1495361328125, + "loss": 0.0659, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8106720447540283, + "rewards/margins": 3.533252000808716, + "rewards/rejected": -1.7225799560546875, + "step": 15710 + }, + { + "epoch": 0.91, + "learning_rate": 1.914638881453451e-09, + "logits/chosen": -1.9165105819702148, + "logits/rejected": -1.9204410314559937, + "logps/chosen": -53.11920166015625, + "logps/rejected": -233.96241760253906, + "loss": 0.2657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3283897340297699, + "rewards/margins": 3.087930917739868, + "rewards/rejected": -2.7595412731170654, + "step": 15711 + }, + { + "epoch": 0.91, + "learning_rate": 1.9120568098697775e-09, + "logits/chosen": -1.717049479484558, + "logits/rejected": -1.7174201011657715, + "logps/chosen": -70.82440185546875, + "logps/rejected": -281.0887451171875, + "loss": 0.1513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7472915649414062, + "rewards/margins": 4.888804912567139, + "rewards/rejected": -4.141513347625732, + "step": 15712 + }, + { + "epoch": 0.91, + "learning_rate": 1.9094764466050738e-09, + "logits/chosen": -1.8707764148712158, + "logits/rejected": -1.864444613456726, + "logps/chosen": -45.68617248535156, + "logps/rejected": -244.40478515625, + "loss": 0.1352, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0515769720077515, + "rewards/margins": 4.978627681732178, + "rewards/rejected": -3.927050828933716, + "step": 15713 + }, + { + "epoch": 0.91, + "learning_rate": 1.906897791750994e-09, + "logits/chosen": -1.7031245231628418, + "logits/rejected": -1.6400481462478638, + "logps/chosen": -154.7826690673828, + "logps/rejected": -330.8736572265625, + "loss": 0.4064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8728790283203125, + "rewards/margins": 0.24604493379592896, + "rewards/rejected": 0.6268340945243835, + "step": 15714 + }, + { + "epoch": 0.91, + "learning_rate": 1.9043208453991653e-09, + "logits/chosen": -2.0128118991851807, + "logits/rejected": -2.0044755935668945, + "logps/chosen": -1.832079529762268, + "logps/rejected": -157.87997436523438, + "loss": 0.3692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12724050879478455, + "rewards/margins": 3.2912464141845703, + "rewards/rejected": -3.4184868335723877, + "step": 15715 + }, + { + "epoch": 0.91, + "learning_rate": 1.9017456076411196e-09, + "logits/chosen": -1.9192829132080078, + "logits/rejected": -1.9174025058746338, + "logps/chosen": -9.918050636770204e-05, + "logps/rejected": -219.311767578125, + "loss": 0.345, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.987595720100217e-06, + "rewards/margins": 4.65325927734375, + "rewards/rejected": -4.653250217437744, + "step": 15716 + }, + { + "epoch": 0.91, + "learning_rate": 1.8991720785683397e-09, + "logits/chosen": -1.887594223022461, + "logits/rejected": -1.9198687076568604, + "logps/chosen": -295.60015869140625, + "logps/rejected": -376.1313781738281, + "loss": 0.0524, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8487548828125, + "rewards/margins": 2.6765289306640625, + "rewards/rejected": -0.8277740478515625, + "step": 15717 + }, + { + "epoch": 0.91, + "learning_rate": 1.896600258272252e-09, + "logits/chosen": -1.7152724266052246, + "logits/rejected": -1.728913426399231, + "logps/chosen": -157.45310974121094, + "logps/rejected": -327.4888000488281, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2387313842773438, + "rewards/margins": 2.8532791137695312, + "rewards/rejected": -0.6145477294921875, + "step": 15718 + }, + { + "epoch": 0.91, + "learning_rate": 1.894030146844233e-09, + "logits/chosen": -1.9299376010894775, + "logits/rejected": -1.9226397275924683, + "logps/chosen": -204.15771484375, + "logps/rejected": -350.471923828125, + "loss": 0.134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4644012451171875, + "rewards/margins": 1.7023773193359375, + "rewards/rejected": -0.23797607421875, + "step": 15719 + }, + { + "epoch": 0.91, + "learning_rate": 1.8914617443755665e-09, + "logits/chosen": -1.8682774305343628, + "logits/rejected": -1.86276113986969, + "logps/chosen": -18.49155044555664, + "logps/rejected": -118.98543548583984, + "loss": 0.3615, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1880950927734375, + "rewards/margins": 1.5546821355819702, + "rewards/rejected": -1.3665870428085327, + "step": 15720 + }, + { + "epoch": 0.91, + "learning_rate": 1.888895050957495e-09, + "logits/chosen": -1.8642157316207886, + "logits/rejected": -1.8455108404159546, + "logps/chosen": -7.520868301391602, + "logps/rejected": -272.8700866699219, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14836955070495605, + "rewards/margins": 5.927231788635254, + "rewards/rejected": -6.075601100921631, + "step": 15721 + }, + { + "epoch": 0.91, + "learning_rate": 1.8863300666812188e-09, + "logits/chosen": -1.9660968780517578, + "logits/rejected": -1.960849642753601, + "logps/chosen": -181.99244689941406, + "logps/rejected": -294.7940368652344, + "loss": 0.2054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5540313720703125, + "rewards/margins": 1.6526612043380737, + "rewards/rejected": -1.0986298322677612, + "step": 15722 + }, + { + "epoch": 0.91, + "learning_rate": 1.8837667916378474e-09, + "logits/chosen": -1.8632457256317139, + "logits/rejected": -1.8624279499053955, + "logps/chosen": -16.285614013671875, + "logps/rejected": -193.2803955078125, + "loss": 0.2102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7290752530097961, + "rewards/margins": 3.662086009979248, + "rewards/rejected": -2.9330108165740967, + "step": 15723 + }, + { + "epoch": 0.92, + "learning_rate": 1.8812052259184473e-09, + "logits/chosen": -1.8502987623214722, + "logits/rejected": -1.9547317028045654, + "logps/chosen": -218.30996704101562, + "logps/rejected": -474.7689208984375, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7873672246932983, + "rewards/margins": 7.6250901222229, + "rewards/rejected": -5.8377227783203125, + "step": 15724 + }, + { + "epoch": 0.92, + "learning_rate": 1.878645369613996e-09, + "logits/chosen": -1.7257194519042969, + "logits/rejected": -1.7252528667449951, + "logps/chosen": -215.28077697753906, + "logps/rejected": -270.8418884277344, + "loss": 0.1024, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8144822120666504, + "rewards/margins": 1.9112167358398438, + "rewards/rejected": 0.9032654166221619, + "step": 15725 + }, + { + "epoch": 0.92, + "learning_rate": 1.8760872228154646e-09, + "logits/chosen": -1.8962924480438232, + "logits/rejected": -1.8973076343536377, + "logps/chosen": -21.969196319580078, + "logps/rejected": -281.82745361328125, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18112526834011078, + "rewards/margins": 5.362267971038818, + "rewards/rejected": -5.543393135070801, + "step": 15726 + }, + { + "epoch": 0.92, + "learning_rate": 1.873530785613703e-09, + "logits/chosen": -1.8955278396606445, + "logits/rejected": -1.8970575332641602, + "logps/chosen": -0.00526523869484663, + "logps/rejected": -242.849365234375, + "loss": 0.3478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004777538124471903, + "rewards/margins": 4.693350315093994, + "rewards/rejected": -4.693828105926514, + "step": 15727 + }, + { + "epoch": 0.92, + "learning_rate": 1.870976058099549e-09, + "logits/chosen": -1.856796145439148, + "logits/rejected": -1.8700371980667114, + "logps/chosen": -238.03366088867188, + "logps/rejected": -396.52685546875, + "loss": 0.1263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20422668755054474, + "rewards/margins": 4.692684650421143, + "rewards/rejected": -4.488458156585693, + "step": 15728 + }, + { + "epoch": 0.92, + "learning_rate": 1.868423040363737e-09, + "logits/chosen": -1.9972257614135742, + "logits/rejected": -1.9938114881515503, + "logps/chosen": -13.526583671569824, + "logps/rejected": -127.4709701538086, + "loss": 0.3868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022823428735136986, + "rewards/margins": 2.1307570934295654, + "rewards/rejected": -2.107933759689331, + "step": 15729 + }, + { + "epoch": 0.92, + "learning_rate": 1.865871732496993e-09, + "logits/chosen": -1.9099583625793457, + "logits/rejected": -1.9087570905685425, + "logps/chosen": -6.115269660949707, + "logps/rejected": -182.01016235351562, + "loss": 0.3734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12677593529224396, + "rewards/margins": 1.8470579385757446, + "rewards/rejected": -1.720281958580017, + "step": 15730 + }, + { + "epoch": 0.92, + "learning_rate": 1.863322134589923e-09, + "logits/chosen": -2.0514044761657715, + "logits/rejected": -2.0923452377319336, + "logps/chosen": -182.66177368164062, + "logps/rejected": -351.7845764160156, + "loss": 0.0476, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.869757056236267, + "rewards/margins": 2.9153900146484375, + "rewards/rejected": -1.0456329584121704, + "step": 15731 + }, + { + "epoch": 0.92, + "learning_rate": 1.8607742467331156e-09, + "logits/chosen": -1.894912838935852, + "logits/rejected": -1.8933148384094238, + "logps/chosen": -22.705995559692383, + "logps/rejected": -192.65521240234375, + "loss": 0.1948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5089882016181946, + "rewards/margins": 3.0317986011505127, + "rewards/rejected": -2.522810459136963, + "step": 15732 + }, + { + "epoch": 0.92, + "learning_rate": 1.858228069017076e-09, + "logits/chosen": -1.9919445514678955, + "logits/rejected": -1.9629148244857788, + "logps/chosen": -32.401824951171875, + "logps/rejected": -220.4705810546875, + "loss": 0.5804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4748130738735199, + "rewards/margins": 0.7684943675994873, + "rewards/rejected": -1.2433074712753296, + "step": 15733 + }, + { + "epoch": 0.92, + "learning_rate": 1.8556836015322709e-09, + "logits/chosen": -2.026644468307495, + "logits/rejected": -2.0268774032592773, + "logps/chosen": -2.8185455799102783, + "logps/rejected": -97.21695709228516, + "loss": 0.4532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06726384162902832, + "rewards/margins": 1.478118896484375, + "rewards/rejected": -1.5453827381134033, + "step": 15734 + }, + { + "epoch": 0.92, + "learning_rate": 1.8531408443690777e-09, + "logits/chosen": -1.9159634113311768, + "logits/rejected": -1.8950799703598022, + "logps/chosen": -15.09061050415039, + "logps/rejected": -358.5771179199219, + "loss": 0.217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3006179928779602, + "rewards/margins": 3.9023728370666504, + "rewards/rejected": -3.601754903793335, + "step": 15735 + }, + { + "epoch": 0.92, + "learning_rate": 1.8505997976178356e-09, + "logits/chosen": -1.7600440979003906, + "logits/rejected": -1.7471959590911865, + "logps/chosen": -198.33212280273438, + "logps/rejected": -462.7940368652344, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0871765613555908, + "rewards/margins": 4.742520332336426, + "rewards/rejected": -3.655343770980835, + "step": 15736 + }, + { + "epoch": 0.92, + "learning_rate": 1.8480604613688111e-09, + "logits/chosen": -1.7831931114196777, + "logits/rejected": -1.8025749921798706, + "logps/chosen": -28.67142105102539, + "logps/rejected": -274.0857238769531, + "loss": 0.2477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3682418763637543, + "rewards/margins": 3.069257974624634, + "rewards/rejected": -2.7010161876678467, + "step": 15737 + }, + { + "epoch": 0.92, + "learning_rate": 1.8455228357122265e-09, + "logits/chosen": -1.8695757389068604, + "logits/rejected": -1.8636740446090698, + "logps/chosen": -45.67969512939453, + "logps/rejected": -187.40835571289062, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35501939058303833, + "rewards/margins": 1.188082218170166, + "rewards/rejected": -0.8330627679824829, + "step": 15738 + }, + { + "epoch": 0.92, + "learning_rate": 1.8429869207382043e-09, + "logits/chosen": -1.9811255931854248, + "logits/rejected": -1.9866899251937866, + "logps/chosen": -12.511209487915039, + "logps/rejected": -158.7957000732422, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37257519364356995, + "rewards/margins": 2.598501443862915, + "rewards/rejected": -2.225926160812378, + "step": 15739 + }, + { + "epoch": 0.92, + "learning_rate": 1.8404527165368554e-09, + "logits/chosen": -1.7980754375457764, + "logits/rejected": -1.798954963684082, + "logps/chosen": -1.1151328086853027, + "logps/rejected": -161.21835327148438, + "loss": 0.3659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015602207742631435, + "rewards/margins": 2.171534776687622, + "rewards/rejected": -2.155932664871216, + "step": 15740 + }, + { + "epoch": 0.92, + "learning_rate": 1.8379202231981972e-09, + "logits/chosen": -1.879549264907837, + "logits/rejected": -1.8821159601211548, + "logps/chosen": -0.011211834847927094, + "logps/rejected": -167.61260986328125, + "loss": 0.3682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014880996895954013, + "rewards/margins": 2.601834535598755, + "rewards/rejected": -2.600346326828003, + "step": 15741 + }, + { + "epoch": 0.92, + "learning_rate": 1.8353894408122016e-09, + "logits/chosen": -2.1020495891571045, + "logits/rejected": -2.0993967056274414, + "logps/chosen": -15.676910400390625, + "logps/rejected": -197.85281372070312, + "loss": 0.2633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2891773283481598, + "rewards/margins": 4.401854515075684, + "rewards/rejected": -4.112677097320557, + "step": 15742 + }, + { + "epoch": 0.92, + "learning_rate": 1.832860369468775e-09, + "logits/chosen": -1.8540337085723877, + "logits/rejected": -1.856207013130188, + "logps/chosen": -0.00113210070412606, + "logps/rejected": -115.7725830078125, + "loss": 0.5174, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.517400419805199e-05, + "rewards/margins": 0.7309382557868958, + "rewards/rejected": -0.7310234308242798, + "step": 15743 + }, + { + "epoch": 0.92, + "learning_rate": 1.8303330092577452e-09, + "logits/chosen": -2.073507785797119, + "logits/rejected": -2.0652265548706055, + "logps/chosen": -44.17198181152344, + "logps/rejected": -360.9820251464844, + "loss": 0.2023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2459087371826172, + "rewards/margins": 6.045051097869873, + "rewards/rejected": -5.799142360687256, + "step": 15744 + }, + { + "epoch": 0.92, + "learning_rate": 1.8278073602689236e-09, + "logits/chosen": -1.7377581596374512, + "logits/rejected": -1.7591042518615723, + "logps/chosen": -268.37152099609375, + "logps/rejected": -343.6160583496094, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5345826148986816, + "rewards/margins": 2.9203522205352783, + "rewards/rejected": -0.38576966524124146, + "step": 15745 + }, + { + "epoch": 0.92, + "learning_rate": 1.8252834225920166e-09, + "logits/chosen": -1.8793678283691406, + "logits/rejected": -1.8763539791107178, + "logps/chosen": -42.409034729003906, + "logps/rejected": -240.26803588867188, + "loss": 0.3875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06669654697179794, + "rewards/margins": 2.0828542709350586, + "rewards/rejected": -2.016157627105713, + "step": 15746 + }, + { + "epoch": 0.92, + "learning_rate": 1.8227611963166967e-09, + "logits/chosen": -1.8927161693572998, + "logits/rejected": -1.894723892211914, + "logps/chosen": -15.611120223999023, + "logps/rejected": -42.73838806152344, + "loss": 0.3838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3791425824165344, + "rewards/margins": 0.8166875839233398, + "rewards/rejected": -0.4375450313091278, + "step": 15747 + }, + { + "epoch": 0.92, + "learning_rate": 1.8202406815325422e-09, + "logits/chosen": -1.805052399635315, + "logits/rejected": -1.7986983060836792, + "logps/chosen": -4.163333892822266, + "logps/rejected": -80.45242309570312, + "loss": 0.9434, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.013976812362670898, + "rewards/margins": -0.8663149476051331, + "rewards/rejected": 0.880291759967804, + "step": 15748 + }, + { + "epoch": 0.92, + "learning_rate": 1.8177218783291259e-09, + "logits/chosen": -1.9653161764144897, + "logits/rejected": -1.9649657011032104, + "logps/chosen": -0.6972822546958923, + "logps/rejected": -37.40912628173828, + "loss": 0.5637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.044504743069410324, + "rewards/margins": 0.5180410742759705, + "rewards/rejected": -0.47353631258010864, + "step": 15749 + }, + { + "epoch": 0.92, + "learning_rate": 1.8152047867959096e-09, + "logits/chosen": -1.9073752164840698, + "logits/rejected": -1.9369287490844727, + "logps/chosen": -247.66302490234375, + "logps/rejected": -470.26800537109375, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.815466284751892, + "rewards/margins": 4.858972072601318, + "rewards/rejected": -3.043505907058716, + "step": 15750 + }, + { + "epoch": 0.92, + "learning_rate": 1.8126894070223164e-09, + "logits/chosen": -1.666974663734436, + "logits/rejected": -1.6801893711090088, + "logps/chosen": -233.763916015625, + "logps/rejected": -412.58135986328125, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.16654372215271, + "rewards/margins": 4.2952117919921875, + "rewards/rejected": -2.1286683082580566, + "step": 15751 + }, + { + "epoch": 0.92, + "learning_rate": 1.8101757390977024e-09, + "logits/chosen": -1.7751356363296509, + "logits/rejected": -1.8263649940490723, + "logps/chosen": -167.42662048339844, + "logps/rejected": -306.35089111328125, + "loss": 0.1762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5955215692520142, + "rewards/margins": 1.4391586780548096, + "rewards/rejected": 0.15636292099952698, + "step": 15752 + }, + { + "epoch": 0.92, + "learning_rate": 1.8076637831113795e-09, + "logits/chosen": -1.8712964057922363, + "logits/rejected": -1.8580571413040161, + "logps/chosen": -52.98100280761719, + "logps/rejected": -285.6590881347656, + "loss": 0.2179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7416664361953735, + "rewards/margins": 2.726822853088379, + "rewards/rejected": -1.9851562976837158, + "step": 15753 + }, + { + "epoch": 0.92, + "learning_rate": 1.8051535391525596e-09, + "logits/chosen": -2.074627161026001, + "logits/rejected": -2.0692946910858154, + "logps/chosen": -0.25681623816490173, + "logps/rejected": -182.45791625976562, + "loss": 0.3243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07258888334035873, + "rewards/margins": 4.118952751159668, + "rewards/rejected": -4.046363830566406, + "step": 15754 + }, + { + "epoch": 0.92, + "learning_rate": 1.8026450073104382e-09, + "logits/chosen": -1.9052284955978394, + "logits/rejected": -1.9074064493179321, + "logps/chosen": -10.846929550170898, + "logps/rejected": -178.43783569335938, + "loss": 0.3693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30999526381492615, + "rewards/margins": 1.3304909467697144, + "rewards/rejected": -1.0204956531524658, + "step": 15755 + }, + { + "epoch": 0.92, + "learning_rate": 1.8001381876741217e-09, + "logits/chosen": -1.860718846321106, + "logits/rejected": -1.8949823379516602, + "logps/chosen": -335.81732177734375, + "logps/rejected": -334.599853515625, + "loss": 0.1072, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1508119106292725, + "rewards/margins": 1.8075928688049316, + "rewards/rejected": 0.34321901202201843, + "step": 15756 + }, + { + "epoch": 0.92, + "learning_rate": 1.7976330803326778e-09, + "logits/chosen": -1.9115855693817139, + "logits/rejected": -1.9008607864379883, + "logps/chosen": -173.04678344726562, + "logps/rejected": -369.3836975097656, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7286041975021362, + "rewards/margins": 3.5922303199768066, + "rewards/rejected": -1.8636261224746704, + "step": 15757 + }, + { + "epoch": 0.92, + "learning_rate": 1.7951296853750796e-09, + "logits/chosen": -2.068603515625, + "logits/rejected": -2.073589324951172, + "logps/chosen": -28.44929313659668, + "logps/rejected": -112.4654541015625, + "loss": 0.2568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5565584301948547, + "rewards/margins": 2.7003931999206543, + "rewards/rejected": -2.1438348293304443, + "step": 15758 + }, + { + "epoch": 0.92, + "learning_rate": 1.792628002890273e-09, + "logits/chosen": -1.6946096420288086, + "logits/rejected": -1.6978245973587036, + "logps/chosen": -0.3598581850528717, + "logps/rejected": -38.25522232055664, + "loss": 0.6497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024260995909571648, + "rewards/margins": 0.18794165551662445, + "rewards/rejected": -0.16368065774440765, + "step": 15759 + }, + { + "epoch": 0.92, + "learning_rate": 1.7901280329671254e-09, + "logits/chosen": -1.8588216304779053, + "logits/rejected": -1.8620661497116089, + "logps/chosen": -58.00749969482422, + "logps/rejected": -232.97601318359375, + "loss": 0.6271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16480408608913422, + "rewards/margins": 0.0110931396484375, + "rewards/rejected": 0.15371094644069672, + "step": 15760 + }, + { + "epoch": 0.92, + "learning_rate": 1.7876297756944548e-09, + "logits/chosen": -2.00838041305542, + "logits/rejected": -2.015805721282959, + "logps/chosen": -110.70529174804688, + "logps/rejected": -225.32052612304688, + "loss": 0.2361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1305023431777954, + "rewards/margins": 1.2733917236328125, + "rewards/rejected": -0.14288941025733948, + "step": 15761 + }, + { + "epoch": 0.92, + "learning_rate": 1.7851332311610069e-09, + "logits/chosen": -2.01680588722229, + "logits/rejected": -2.0083365440368652, + "logps/chosen": -1.6369516849517822, + "logps/rejected": -199.66404724121094, + "loss": 0.4568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02923537604510784, + "rewards/margins": 1.3229517936706543, + "rewards/rejected": -1.2937164306640625, + "step": 15762 + }, + { + "epoch": 0.92, + "learning_rate": 1.7826383994554607e-09, + "logits/chosen": -1.8198474645614624, + "logits/rejected": -1.818056583404541, + "logps/chosen": -245.9508514404297, + "logps/rejected": -347.4336242675781, + "loss": 0.0478, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.517042636871338, + "rewards/margins": 3.5028061866760254, + "rewards/rejected": -0.9857635498046875, + "step": 15763 + }, + { + "epoch": 0.92, + "learning_rate": 1.7801452806664674e-09, + "logits/chosen": -1.9728264808654785, + "logits/rejected": -1.9648094177246094, + "logps/chosen": -10.144071578979492, + "logps/rejected": -104.32589721679688, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2638356387615204, + "rewards/margins": 3.1153295040130615, + "rewards/rejected": -2.8514938354492188, + "step": 15764 + }, + { + "epoch": 0.92, + "learning_rate": 1.7776538748825788e-09, + "logits/chosen": -1.7864031791687012, + "logits/rejected": -1.7913854122161865, + "logps/chosen": -2.8759615421295166, + "logps/rejected": -167.12646484375, + "loss": 0.4155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0965428277850151, + "rewards/margins": 1.537899374961853, + "rewards/rejected": -1.6344422101974487, + "step": 15765 + }, + { + "epoch": 0.92, + "learning_rate": 1.7751641821923069e-09, + "logits/chosen": -1.9531816244125366, + "logits/rejected": -1.9444698095321655, + "logps/chosen": -106.00489044189453, + "logps/rejected": -282.43939208984375, + "loss": 0.3543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08746414631605148, + "rewards/margins": 1.7515510320663452, + "rewards/rejected": -1.664086937904358, + "step": 15766 + }, + { + "epoch": 0.92, + "learning_rate": 1.7726762026840869e-09, + "logits/chosen": -1.8799304962158203, + "logits/rejected": -1.883772373199463, + "logps/chosen": -176.31484985351562, + "logps/rejected": -203.020263671875, + "loss": 0.3763, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2458466291427612, + "rewards/margins": 0.18230748176574707, + "rewards/rejected": 1.0635391473770142, + "step": 15767 + }, + { + "epoch": 0.92, + "learning_rate": 1.7701899364463258e-09, + "logits/chosen": -1.9919503927230835, + "logits/rejected": -1.9943636655807495, + "logps/chosen": -0.12709574401378632, + "logps/rejected": -142.7826385498047, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009726859629154205, + "rewards/margins": 3.909923553466797, + "rewards/rejected": -3.9196503162384033, + "step": 15768 + }, + { + "epoch": 0.92, + "learning_rate": 1.7677053835673305e-09, + "logits/chosen": -1.7192964553833008, + "logits/rejected": -1.7169021368026733, + "logps/chosen": -6.785264015197754, + "logps/rejected": -91.60759735107422, + "loss": 0.7544, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.026159096509218216, + "rewards/margins": -0.2904817759990692, + "rewards/rejected": 0.2643226683139801, + "step": 15769 + }, + { + "epoch": 0.92, + "learning_rate": 1.7652225441353753e-09, + "logits/chosen": -1.8410485982894897, + "logits/rejected": -1.8138331174850464, + "logps/chosen": -142.693603515625, + "logps/rejected": -284.3647155761719, + "loss": 0.142, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7290314435958862, + "rewards/margins": 1.9639160633087158, + "rewards/rejected": -0.23488464951515198, + "step": 15770 + }, + { + "epoch": 0.92, + "learning_rate": 1.7627414182386446e-09, + "logits/chosen": -2.166536569595337, + "logits/rejected": -2.1739001274108887, + "logps/chosen": -0.03909621015191078, + "logps/rejected": -33.2283821105957, + "loss": 1.0023, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0024587446823716164, + "rewards/margins": -1.0693604946136475, + "rewards/rejected": 1.0669018030166626, + "step": 15771 + }, + { + "epoch": 0.92, + "learning_rate": 1.7602620059653072e-09, + "logits/chosen": -1.8915479183197021, + "logits/rejected": -1.8819758892059326, + "logps/chosen": -246.91693115234375, + "logps/rejected": -420.13543701171875, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5905091762542725, + "rewards/margins": 5.282788276672363, + "rewards/rejected": -2.692279100418091, + "step": 15772 + }, + { + "epoch": 0.92, + "learning_rate": 1.7577843074034204e-09, + "logits/chosen": -1.859737515449524, + "logits/rejected": -1.9081342220306396, + "logps/chosen": -230.74267578125, + "logps/rejected": -297.99981689453125, + "loss": 0.073, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3008606433868408, + "rewards/margins": 2.1568360328674316, + "rewards/rejected": -0.855975329875946, + "step": 15773 + }, + { + "epoch": 0.92, + "learning_rate": 1.7553083226410138e-09, + "logits/chosen": -1.8880650997161865, + "logits/rejected": -1.8801608085632324, + "logps/chosen": -23.92400550842285, + "logps/rejected": -243.52755737304688, + "loss": 0.3238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021211815997958183, + "rewards/margins": 5.121394634246826, + "rewards/rejected": -5.100183010101318, + "step": 15774 + }, + { + "epoch": 0.92, + "learning_rate": 1.7528340517660445e-09, + "logits/chosen": -1.957603931427002, + "logits/rejected": -1.9584506750106812, + "logps/chosen": -113.49539184570312, + "logps/rejected": -191.1015167236328, + "loss": 0.5214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48937758803367615, + "rewards/margins": 0.2466987669467926, + "rewards/rejected": -0.7360763549804688, + "step": 15775 + }, + { + "epoch": 0.92, + "learning_rate": 1.7503614948664203e-09, + "logits/chosen": -1.9509873390197754, + "logits/rejected": -1.9485586881637573, + "logps/chosen": -76.41654968261719, + "logps/rejected": -260.8709716796875, + "loss": 0.2604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3531433045864105, + "rewards/margins": 4.409112453460693, + "rewards/rejected": -4.05596923828125, + "step": 15776 + }, + { + "epoch": 0.92, + "learning_rate": 1.7478906520299652e-09, + "logits/chosen": -1.9944219589233398, + "logits/rejected": -1.987188696861267, + "logps/chosen": -9.344335556030273, + "logps/rejected": -203.39407348632812, + "loss": 0.3256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009918212890625, + "rewards/margins": 4.125583171844482, + "rewards/rejected": -4.12459135055542, + "step": 15777 + }, + { + "epoch": 0.92, + "learning_rate": 1.7454215233444537e-09, + "logits/chosen": -1.8023415803909302, + "logits/rejected": -1.850297212600708, + "logps/chosen": -276.3826904296875, + "logps/rejected": -425.4444274902344, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.355609178543091, + "rewards/margins": 3.840170383453369, + "rewards/rejected": -1.4845612049102783, + "step": 15778 + }, + { + "epoch": 0.92, + "learning_rate": 1.7429541088976151e-09, + "logits/chosen": -2.181100368499756, + "logits/rejected": -2.1717681884765625, + "logps/chosen": -23.667526245117188, + "logps/rejected": -286.91937255859375, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26131612062454224, + "rewards/margins": 5.5270209312438965, + "rewards/rejected": -5.26570463180542, + "step": 15779 + }, + { + "epoch": 0.92, + "learning_rate": 1.7404884087771022e-09, + "logits/chosen": -1.9785422086715698, + "logits/rejected": -1.9693540334701538, + "logps/chosen": -18.570457458496094, + "logps/rejected": -307.63507080078125, + "loss": 0.1561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9821504950523376, + "rewards/margins": 4.992569446563721, + "rewards/rejected": -4.010418891906738, + "step": 15780 + }, + { + "epoch": 0.92, + "learning_rate": 1.7380244230704998e-09, + "logits/chosen": -1.854035496711731, + "logits/rejected": -1.8514310121536255, + "logps/chosen": -2.5419511795043945, + "logps/rejected": -269.0819091796875, + "loss": 0.3529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03511965274810791, + "rewards/margins": 3.860063076019287, + "rewards/rejected": -3.8951828479766846, + "step": 15781 + }, + { + "epoch": 0.92, + "learning_rate": 1.7355621518653384e-09, + "logits/chosen": -1.9135342836380005, + "logits/rejected": -1.8609458208084106, + "logps/chosen": -249.40469360351562, + "logps/rejected": -622.7689208984375, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0878937244415283, + "rewards/margins": 8.4566011428833, + "rewards/rejected": -5.368707180023193, + "step": 15782 + }, + { + "epoch": 0.92, + "learning_rate": 1.7331015952491035e-09, + "logits/chosen": -1.6669464111328125, + "logits/rejected": -1.661199688911438, + "logps/chosen": -39.735191345214844, + "logps/rejected": -143.21607971191406, + "loss": 0.2119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7937225699424744, + "rewards/margins": 2.8227570056915283, + "rewards/rejected": -2.029034376144409, + "step": 15783 + }, + { + "epoch": 0.92, + "learning_rate": 1.7306427533091972e-09, + "logits/chosen": -1.967799186706543, + "logits/rejected": -1.953209400177002, + "logps/chosen": -157.4998779296875, + "logps/rejected": -360.7723083496094, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.674072265625, + "rewards/margins": 3.3287415504455566, + "rewards/rejected": -0.6546692252159119, + "step": 15784 + }, + { + "epoch": 0.92, + "learning_rate": 1.728185626132972e-09, + "logits/chosen": -1.7926454544067383, + "logits/rejected": -1.7648804187774658, + "logps/chosen": -222.47412109375, + "logps/rejected": -343.439697265625, + "loss": 0.5099, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.664874315261841, + "rewards/margins": -0.5554718971252441, + "rewards/rejected": 3.220346212387085, + "step": 15785 + }, + { + "epoch": 0.92, + "learning_rate": 1.7257302138077079e-09, + "logits/chosen": -1.793501853942871, + "logits/rejected": -1.7900501489639282, + "logps/chosen": -74.51583099365234, + "logps/rejected": -216.07171630859375, + "loss": 0.2267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.484017938375473, + "rewards/margins": 2.8194289207458496, + "rewards/rejected": -2.3354110717773438, + "step": 15786 + }, + { + "epoch": 0.92, + "learning_rate": 1.723276516420652e-09, + "logits/chosen": -1.9840848445892334, + "logits/rejected": -1.9909265041351318, + "logps/chosen": -0.9406362175941467, + "logps/rejected": -171.70779418945312, + "loss": 0.2691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20294971764087677, + "rewards/margins": 4.565911769866943, + "rewards/rejected": -4.362962245941162, + "step": 15787 + }, + { + "epoch": 0.92, + "learning_rate": 1.7208245340589566e-09, + "logits/chosen": -1.8073887825012207, + "logits/rejected": -1.8101705312728882, + "logps/chosen": -3.319544792175293, + "logps/rejected": -129.0906219482422, + "loss": 0.4161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012367486953735352, + "rewards/margins": 1.3224531412124634, + "rewards/rejected": -1.3348206281661987, + "step": 15788 + }, + { + "epoch": 0.92, + "learning_rate": 1.7183742668097411e-09, + "logits/chosen": -1.977024793624878, + "logits/rejected": -2.0135538578033447, + "logps/chosen": -178.56199645996094, + "logps/rejected": -400.1732482910156, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2389099597930908, + "rewards/margins": 5.227407932281494, + "rewards/rejected": -3.9884979724884033, + "step": 15789 + }, + { + "epoch": 0.92, + "learning_rate": 1.7159257147600303e-09, + "logits/chosen": -1.8931362628936768, + "logits/rejected": -1.9030157327651978, + "logps/chosen": -182.6482696533203, + "logps/rejected": -360.2259216308594, + "loss": 0.0474, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3829116821289062, + "rewards/margins": 2.6226913928985596, + "rewards/rejected": -0.23977966606616974, + "step": 15790 + }, + { + "epoch": 0.92, + "learning_rate": 1.7134788779968324e-09, + "logits/chosen": -1.7606310844421387, + "logits/rejected": -1.765161395072937, + "logps/chosen": -183.72239685058594, + "logps/rejected": -264.66455078125, + "loss": 0.073, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2445709705352783, + "rewards/margins": 2.0548675060272217, + "rewards/rejected": 0.18970337510108948, + "step": 15791 + }, + { + "epoch": 0.92, + "learning_rate": 1.71103375660705e-09, + "logits/chosen": -1.9294383525848389, + "logits/rejected": -1.919661521911621, + "logps/chosen": -137.7880401611328, + "logps/rejected": -309.31304931640625, + "loss": 0.4608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4506545960903168, + "rewards/margins": 1.55848228931427, + "rewards/rejected": -2.009136915206909, + "step": 15792 + }, + { + "epoch": 0.92, + "learning_rate": 1.7085903506775578e-09, + "logits/chosen": -2.0191991329193115, + "logits/rejected": -2.016815185546875, + "logps/chosen": -7.170477390289307, + "logps/rejected": -219.02781677246094, + "loss": 0.3386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28707289695739746, + "rewards/margins": 1.7433427572250366, + "rewards/rejected": -1.4562698602676392, + "step": 15793 + }, + { + "epoch": 0.92, + "learning_rate": 1.7061486602951535e-09, + "logits/chosen": -2.004746437072754, + "logits/rejected": -2.001227855682373, + "logps/chosen": -0.008678700774908066, + "logps/rejected": -225.99449157714844, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031491482513956726, + "rewards/margins": 5.097733974456787, + "rewards/rejected": -5.098048686981201, + "step": 15794 + }, + { + "epoch": 0.92, + "learning_rate": 1.70370868554659e-09, + "logits/chosen": -1.882543683052063, + "logits/rejected": -1.882266640663147, + "logps/chosen": -199.1038818359375, + "logps/rejected": -267.75445556640625, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.969186544418335, + "rewards/margins": 3.279684543609619, + "rewards/rejected": -0.31049805879592896, + "step": 15795 + }, + { + "epoch": 0.92, + "learning_rate": 1.7012704265185251e-09, + "logits/chosen": -1.9774256944656372, + "logits/rejected": -1.9622845649719238, + "logps/chosen": -7.283544255187735e-05, + "logps/rejected": -262.4687194824219, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5759150307512755e-08, + "rewards/margins": 5.70947265625, + "rewards/rejected": -5.70947265625, + "step": 15796 + }, + { + "epoch": 0.92, + "learning_rate": 1.6988338832975958e-09, + "logits/chosen": -1.6711037158966064, + "logits/rejected": -1.6630003452301025, + "logps/chosen": -0.03410650044679642, + "logps/rejected": -123.97235107421875, + "loss": 0.3826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024047151673585176, + "rewards/margins": 2.5195086002349854, + "rewards/rejected": -2.5219132900238037, + "step": 15797 + }, + { + "epoch": 0.92, + "learning_rate": 1.696399055970349e-09, + "logits/chosen": -1.6857861280441284, + "logits/rejected": -1.6787830591201782, + "logps/chosen": -219.1053466796875, + "logps/rejected": -264.2867431640625, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3384231328964233, + "rewards/margins": 0.6922957897186279, + "rewards/rejected": 0.6461273431777954, + "step": 15798 + }, + { + "epoch": 0.92, + "learning_rate": 1.6939659446232934e-09, + "logits/chosen": -1.7128580808639526, + "logits/rejected": -1.7117606401443481, + "logps/chosen": -237.5875244140625, + "logps/rejected": -417.18536376953125, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.908346652984619, + "rewards/margins": 3.827017307281494, + "rewards/rejected": -0.918670654296875, + "step": 15799 + }, + { + "epoch": 0.92, + "learning_rate": 1.6915345493428491e-09, + "logits/chosen": -1.8761948347091675, + "logits/rejected": -1.8724565505981445, + "logps/chosen": -110.40376281738281, + "logps/rejected": -213.1608123779297, + "loss": 0.1989, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8444732427597046, + "rewards/margins": 1.0892242193222046, + "rewards/rejected": 0.7552490234375, + "step": 15800 + }, + { + "epoch": 0.92, + "learning_rate": 1.689104870215402e-09, + "logits/chosen": -1.8880170583724976, + "logits/rejected": -1.8859866857528687, + "logps/chosen": -25.452985763549805, + "logps/rejected": -154.26425170898438, + "loss": 0.4541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15911255776882172, + "rewards/margins": 0.3664200007915497, + "rewards/rejected": -0.20730744302272797, + "step": 15801 + }, + { + "epoch": 0.92, + "learning_rate": 1.6866769073272668e-09, + "logits/chosen": -2.0441136360168457, + "logits/rejected": -2.0442986488342285, + "logps/chosen": -0.02272886596620083, + "logps/rejected": -40.884605407714844, + "loss": 0.6207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009814952500164509, + "rewards/margins": 0.26931002736091614, + "rewards/rejected": -0.2702915370464325, + "step": 15802 + }, + { + "epoch": 0.92, + "learning_rate": 1.6842506607646911e-09, + "logits/chosen": -1.9439632892608643, + "logits/rejected": -1.8804347515106201, + "logps/chosen": -267.55194091796875, + "logps/rejected": -363.79498291015625, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.055090308189392, + "rewards/margins": 1.138555884361267, + "rewards/rejected": -0.083465576171875, + "step": 15803 + }, + { + "epoch": 0.92, + "learning_rate": 1.6818261306138725e-09, + "logits/chosen": -1.6973053216934204, + "logits/rejected": -1.671558141708374, + "logps/chosen": -221.17355346679688, + "logps/rejected": -382.82562255859375, + "loss": 0.2323, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2520583868026733, + "rewards/margins": 0.6687179207801819, + "rewards/rejected": 0.5833404660224915, + "step": 15804 + }, + { + "epoch": 0.92, + "learning_rate": 1.6794033169609311e-09, + "logits/chosen": -1.880795955657959, + "logits/rejected": -1.8739287853240967, + "logps/chosen": -4.716238975524902, + "logps/rejected": -96.50492095947266, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14960356056690216, + "rewards/margins": 2.0263819694519043, + "rewards/rejected": -1.876778483390808, + "step": 15805 + }, + { + "epoch": 0.92, + "learning_rate": 1.6769822198919591e-09, + "logits/chosen": -1.6889231204986572, + "logits/rejected": -1.6956826448440552, + "logps/chosen": -100.7589111328125, + "logps/rejected": -233.8082275390625, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2412201166152954, + "rewards/margins": 0.7913239002227783, + "rewards/rejected": 0.4498962461948395, + "step": 15806 + }, + { + "epoch": 0.92, + "learning_rate": 1.6745628394929433e-09, + "logits/chosen": -1.8576457500457764, + "logits/rejected": -1.8569011688232422, + "logps/chosen": -213.25479125976562, + "logps/rejected": -394.4326171875, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6906814575195312, + "rewards/margins": 4.229484558105469, + "rewards/rejected": -2.5388031005859375, + "step": 15807 + }, + { + "epoch": 0.92, + "learning_rate": 1.6721451758498484e-09, + "logits/chosen": -1.8897323608398438, + "logits/rejected": -1.9112803936004639, + "logps/chosen": -220.923583984375, + "logps/rejected": -336.15399169921875, + "loss": 0.2011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.537634253501892, + "rewards/margins": 1.0138792991638184, + "rewards/rejected": 0.523754894733429, + "step": 15808 + }, + { + "epoch": 0.92, + "learning_rate": 1.6697292290485443e-09, + "logits/chosen": -1.7214096784591675, + "logits/rejected": -1.7177367210388184, + "logps/chosen": -26.99748992919922, + "logps/rejected": -207.42803955078125, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07705707848072052, + "rewards/margins": 5.793576240539551, + "rewards/rejected": -5.716519355773926, + "step": 15809 + }, + { + "epoch": 0.92, + "learning_rate": 1.6673149991748735e-09, + "logits/chosen": -1.8328622579574585, + "logits/rejected": -1.8259072303771973, + "logps/chosen": -0.062110185623168945, + "logps/rejected": -143.0226287841797, + "loss": 0.4711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011743346229195595, + "rewards/margins": 1.1979844570159912, + "rewards/rejected": -1.1862411499023438, + "step": 15810 + }, + { + "epoch": 0.92, + "learning_rate": 1.6649024863145899e-09, + "logits/chosen": -1.7841521501541138, + "logits/rejected": -1.7843436002731323, + "logps/chosen": -47.08343505859375, + "logps/rejected": -180.57369995117188, + "loss": 0.1963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7917030453681946, + "rewards/margins": 2.4444618225097656, + "rewards/rejected": -1.6527588367462158, + "step": 15811 + }, + { + "epoch": 0.92, + "learning_rate": 1.6624916905534026e-09, + "logits/chosen": -1.7981947660446167, + "logits/rejected": -1.7559905052185059, + "logps/chosen": -247.43211364746094, + "logps/rejected": -442.02532958984375, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1003921031951904, + "rewards/margins": 5.0559797286987305, + "rewards/rejected": -1.9555877447128296, + "step": 15812 + }, + { + "epoch": 0.92, + "learning_rate": 1.6600826119769596e-09, + "logits/chosen": -2.053847312927246, + "logits/rejected": -2.0550971031188965, + "logps/chosen": -0.052958883345127106, + "logps/rejected": -142.7973175048828, + "loss": 0.4815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004570324905216694, + "rewards/margins": 1.158498764038086, + "rewards/rejected": -1.1630691289901733, + "step": 15813 + }, + { + "epoch": 0.92, + "learning_rate": 1.657675250670837e-09, + "logits/chosen": -1.7189475297927856, + "logits/rejected": -1.7208119630813599, + "logps/chosen": -234.13711547851562, + "logps/rejected": -468.6586608886719, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.10991370677948, + "rewards/margins": 5.382945537567139, + "rewards/rejected": -4.273031711578369, + "step": 15814 + }, + { + "epoch": 0.92, + "learning_rate": 1.6552696067205551e-09, + "logits/chosen": -1.6700621843338013, + "logits/rejected": -1.6478610038757324, + "logps/chosen": -159.79342651367188, + "logps/rejected": -387.35125732421875, + "loss": 0.114, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1261048316955566, + "rewards/margins": 1.6740235090255737, + "rewards/rejected": 0.4520812928676605, + "step": 15815 + }, + { + "epoch": 0.92, + "learning_rate": 1.6528656802115792e-09, + "logits/chosen": -1.8459962606430054, + "logits/rejected": -1.6983636617660522, + "logps/chosen": -220.4887237548828, + "logps/rejected": -556.9998168945312, + "loss": 0.074, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.087506055831909, + "rewards/margins": 2.8078551292419434, + "rewards/rejected": -0.720349133014679, + "step": 15816 + }, + { + "epoch": 0.92, + "learning_rate": 1.650463471229302e-09, + "logits/chosen": -1.861448049545288, + "logits/rejected": -1.811827301979065, + "logps/chosen": -198.62445068359375, + "logps/rejected": -335.0177001953125, + "loss": 0.1935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7030670642852783, + "rewards/margins": 1.1923797130584717, + "rewards/rejected": 0.5106872916221619, + "step": 15817 + }, + { + "epoch": 0.92, + "learning_rate": 1.6480629798590718e-09, + "logits/chosen": -2.1162097454071045, + "logits/rejected": -2.115385055541992, + "logps/chosen": -2.4372220039367676, + "logps/rejected": -198.78439331054688, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006280803587287664, + "rewards/margins": 4.171191215515137, + "rewards/rejected": -4.177472114562988, + "step": 15818 + }, + { + "epoch": 0.92, + "learning_rate": 1.6456642061861537e-09, + "logits/chosen": -2.014366865158081, + "logits/rejected": -2.0069851875305176, + "logps/chosen": -0.000411798624554649, + "logps/rejected": -122.90609741210938, + "loss": 0.5008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00024250338901765645, + "rewards/margins": 0.9706847667694092, + "rewards/rejected": -0.9704422354698181, + "step": 15819 + }, + { + "epoch": 0.92, + "learning_rate": 1.6432671502957685e-09, + "logits/chosen": -2.1647510528564453, + "logits/rejected": -2.1680569648742676, + "logps/chosen": -60.884620666503906, + "logps/rejected": -402.43310546875, + "loss": 0.2142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6314041018486023, + "rewards/margins": 5.795616149902344, + "rewards/rejected": -5.164212226867676, + "step": 15820 + }, + { + "epoch": 0.92, + "learning_rate": 1.6408718122730757e-09, + "logits/chosen": -1.9362478256225586, + "logits/rejected": -1.919542908668518, + "logps/chosen": -0.20191359519958496, + "logps/rejected": -248.83868408203125, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055589254945516586, + "rewards/margins": 4.966239929199219, + "rewards/rejected": -4.910650730133057, + "step": 15821 + }, + { + "epoch": 0.92, + "learning_rate": 1.6384781922031688e-09, + "logits/chosen": -1.9257962703704834, + "logits/rejected": -1.907538890838623, + "logps/chosen": -0.04982075095176697, + "logps/rejected": -327.44732666015625, + "loss": 0.3476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004601968452334404, + "rewards/margins": 8.09223461151123, + "rewards/rejected": -8.096837043762207, + "step": 15822 + }, + { + "epoch": 0.92, + "learning_rate": 1.6360862901710737e-09, + "logits/chosen": -1.6764382123947144, + "logits/rejected": -1.6638789176940918, + "logps/chosen": -251.693603515625, + "logps/rejected": -314.05352783203125, + "loss": 0.1108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.923669457435608, + "rewards/margins": 2.293560743331909, + "rewards/rejected": -0.36989137530326843, + "step": 15823 + }, + { + "epoch": 0.92, + "learning_rate": 1.6336961062617616e-09, + "logits/chosen": -1.8573954105377197, + "logits/rejected": -1.8599413633346558, + "logps/chosen": -274.54107666015625, + "logps/rejected": -427.7657165527344, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9340423345565796, + "rewards/margins": 4.347699165344238, + "rewards/rejected": -2.413656711578369, + "step": 15824 + }, + { + "epoch": 0.92, + "learning_rate": 1.631307640560159e-09, + "logits/chosen": -1.9443124532699585, + "logits/rejected": -1.940603494644165, + "logps/chosen": -97.14044189453125, + "logps/rejected": -464.02001953125, + "loss": 0.1554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7428016662597656, + "rewards/margins": 10.816526412963867, + "rewards/rejected": -10.073724746704102, + "step": 15825 + }, + { + "epoch": 0.92, + "learning_rate": 1.6289208931511034e-09, + "logits/chosen": -1.820352554321289, + "logits/rejected": -1.8123364448547363, + "logps/chosen": -159.17990112304688, + "logps/rejected": -525.3306884765625, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.872564673423767, + "rewards/margins": 5.557717800140381, + "rewards/rejected": -3.6851532459259033, + "step": 15826 + }, + { + "epoch": 0.92, + "learning_rate": 1.6265358641193882e-09, + "logits/chosen": -1.814897060394287, + "logits/rejected": -1.8237236738204956, + "logps/chosen": -202.97486877441406, + "logps/rejected": -419.43536376953125, + "loss": 0.1494, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5095016956329346, + "rewards/margins": 1.3717514276504517, + "rewards/rejected": 0.13775025308132172, + "step": 15827 + }, + { + "epoch": 0.92, + "learning_rate": 1.6241525535497292e-09, + "logits/chosen": -1.4742763042449951, + "logits/rejected": -1.472024917602539, + "logps/chosen": -5.434648513793945, + "logps/rejected": -124.25227355957031, + "loss": 0.3511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035715341567993164, + "rewards/margins": 2.4797911643981934, + "rewards/rejected": -2.4440758228302, + "step": 15828 + }, + { + "epoch": 0.92, + "learning_rate": 1.6217709615268137e-09, + "logits/chosen": -1.8166178464889526, + "logits/rejected": -1.8083961009979248, + "logps/chosen": -272.6012878417969, + "logps/rejected": -377.9881591796875, + "loss": 0.0423, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.852465867996216, + "rewards/margins": 3.0212128162384033, + "rewards/rejected": -0.1687469482421875, + "step": 15829 + }, + { + "epoch": 0.92, + "learning_rate": 1.6193910881352357e-09, + "logits/chosen": -1.859036922454834, + "logits/rejected": -1.8532781600952148, + "logps/chosen": -214.4520263671875, + "logps/rejected": -281.96478271484375, + "loss": 0.2478, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5815871953964233, + "rewards/margins": 0.6137130260467529, + "rewards/rejected": 0.9678741693496704, + "step": 15830 + }, + { + "epoch": 0.92, + "learning_rate": 1.617012933459544e-09, + "logits/chosen": -1.98055899143219, + "logits/rejected": -1.9579814672470093, + "logps/chosen": -171.71771240234375, + "logps/rejected": -250.04046630859375, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0402374267578125, + "rewards/margins": 0.6261688470840454, + "rewards/rejected": 0.4140686094760895, + "step": 15831 + }, + { + "epoch": 0.92, + "learning_rate": 1.6146364975842098e-09, + "logits/chosen": -2.0957062244415283, + "logits/rejected": -2.0964503288269043, + "logps/chosen": -18.501535415649414, + "logps/rejected": -112.86409759521484, + "loss": 0.2934, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3958294093608856, + "rewards/margins": 2.6991238594055176, + "rewards/rejected": -2.3032944202423096, + "step": 15832 + }, + { + "epoch": 0.92, + "learning_rate": 1.6122617805936767e-09, + "logits/chosen": -2.0774738788604736, + "logits/rejected": -2.0204250812530518, + "logps/chosen": -173.34762573242188, + "logps/rejected": -569.305908203125, + "loss": 0.037, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2987382411956787, + "rewards/margins": 4.912153720855713, + "rewards/rejected": -2.613415479660034, + "step": 15833 + }, + { + "epoch": 0.92, + "learning_rate": 1.6098887825722884e-09, + "logits/chosen": -2.092602491378784, + "logits/rejected": -2.0902998447418213, + "logps/chosen": -40.001251220703125, + "logps/rejected": -247.2713623046875, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35329362750053406, + "rewards/margins": 0.9426368474960327, + "rewards/rejected": -0.589343249797821, + "step": 15834 + }, + { + "epoch": 0.92, + "learning_rate": 1.6075175036043498e-09, + "logits/chosen": -1.9565891027450562, + "logits/rejected": -1.931050419807434, + "logps/chosen": -0.00015139247989282012, + "logps/rejected": -189.4893798828125, + "loss": 0.4512, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.896715841416153e-06, + "rewards/margins": 1.4312089681625366, + "rewards/rejected": -1.4312118291854858, + "step": 15835 + }, + { + "epoch": 0.92, + "learning_rate": 1.6051479437741045e-09, + "logits/chosen": -1.9608116149902344, + "logits/rejected": -1.9511200189590454, + "logps/chosen": -9.919637680053711, + "logps/rejected": -219.17971801757812, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2249917984008789, + "rewards/margins": 5.372907638549805, + "rewards/rejected": -5.147915840148926, + "step": 15836 + }, + { + "epoch": 0.92, + "learning_rate": 1.6027801031657351e-09, + "logits/chosen": -1.87965989112854, + "logits/rejected": -1.8820253610610962, + "logps/chosen": -0.52488774061203, + "logps/rejected": -156.89166259765625, + "loss": 0.4972, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012858510308433324, + "rewards/margins": 0.9547542333602905, + "rewards/rejected": -0.954882800579071, + "step": 15837 + }, + { + "epoch": 0.92, + "learning_rate": 1.6004139818633466e-09, + "logits/chosen": -1.7887095212936401, + "logits/rejected": -1.7924840450286865, + "logps/chosen": -4.488561630249023, + "logps/rejected": -91.71100616455078, + "loss": 0.521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10638914257287979, + "rewards/margins": 0.6214282512664795, + "rewards/rejected": -0.5150390863418579, + "step": 15838 + }, + { + "epoch": 0.92, + "learning_rate": 1.5980495799510051e-09, + "logits/chosen": -1.7796064615249634, + "logits/rejected": -1.7944996356964111, + "logps/chosen": -263.76251220703125, + "logps/rejected": -403.59600830078125, + "loss": 0.1844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7967895865440369, + "rewards/margins": 1.1412231922149658, + "rewards/rejected": -0.34443360567092896, + "step": 15839 + }, + { + "epoch": 0.92, + "learning_rate": 1.595686897512699e-09, + "logits/chosen": -1.8995285034179688, + "logits/rejected": -1.869291067123413, + "logps/chosen": -44.89881134033203, + "logps/rejected": -303.7044677734375, + "loss": 0.2392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3447677791118622, + "rewards/margins": 5.975389003753662, + "rewards/rejected": -5.630621433258057, + "step": 15840 + }, + { + "epoch": 0.92, + "learning_rate": 1.5933259346323725e-09, + "logits/chosen": -1.8588793277740479, + "logits/rejected": -1.8544949293136597, + "logps/chosen": -295.6195068359375, + "logps/rejected": -383.21539306640625, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.014224529266357, + "rewards/margins": 4.278268814086914, + "rewards/rejected": -0.2640441954135895, + "step": 15841 + }, + { + "epoch": 0.92, + "learning_rate": 1.5909666913938803e-09, + "logits/chosen": -2.0045547485351562, + "logits/rejected": -2.0035481452941895, + "logps/chosen": -200.79141235351562, + "logps/rejected": -405.95684814453125, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0658187866210938, + "rewards/margins": 4.197868347167969, + "rewards/rejected": -1.132049560546875, + "step": 15842 + }, + { + "epoch": 0.92, + "learning_rate": 1.5886091678810499e-09, + "logits/chosen": -1.9847800731658936, + "logits/rejected": -1.9081844091415405, + "logps/chosen": -146.81076049804688, + "logps/rejected": -346.9755859375, + "loss": 0.0746, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.887182593345642, + "rewards/margins": 3.285595655441284, + "rewards/rejected": -1.398413062095642, + "step": 15843 + }, + { + "epoch": 0.92, + "learning_rate": 1.586253364177631e-09, + "logits/chosen": -1.8728629350662231, + "logits/rejected": -1.871127724647522, + "logps/chosen": -8.677656173706055, + "logps/rejected": -149.33729553222656, + "loss": 0.235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5754667520523071, + "rewards/margins": 3.5335755348205566, + "rewards/rejected": -2.95810866355896, + "step": 15844 + }, + { + "epoch": 0.92, + "learning_rate": 1.5838992803673013e-09, + "logits/chosen": -2.077756643295288, + "logits/rejected": -2.0753257274627686, + "logps/chosen": -29.665855407714844, + "logps/rejected": -216.57814025878906, + "loss": 0.7854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7030121088027954, + "rewards/margins": 0.5011641979217529, + "rewards/rejected": -1.2041763067245483, + "step": 15845 + }, + { + "epoch": 0.92, + "learning_rate": 1.5815469165337103e-09, + "logits/chosen": -1.9378889799118042, + "logits/rejected": -1.9334160089492798, + "logps/chosen": -171.6175079345703, + "logps/rejected": -248.77146911621094, + "loss": 0.0649, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2851853370666504, + "rewards/margins": 2.1741440296173096, + "rewards/rejected": 0.11104126274585724, + "step": 15846 + }, + { + "epoch": 0.92, + "learning_rate": 1.5791962727604024e-09, + "logits/chosen": -1.9352006912231445, + "logits/rejected": -1.9288172721862793, + "logps/chosen": -154.87655639648438, + "logps/rejected": -208.09912109375, + "loss": 0.4183, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.2917726039886475, + "rewards/margins": -0.1878814697265625, + "rewards/rejected": 2.47965407371521, + "step": 15847 + }, + { + "epoch": 0.92, + "learning_rate": 1.5768473491308997e-09, + "logits/chosen": -1.8844252824783325, + "logits/rejected": -1.8855067491531372, + "logps/chosen": -151.23403930664062, + "logps/rejected": -370.6808166503906, + "loss": 0.1102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5993438959121704, + "rewards/margins": 3.698586940765381, + "rewards/rejected": -3.0992431640625, + "step": 15848 + }, + { + "epoch": 0.92, + "learning_rate": 1.574500145728641e-09, + "logits/chosen": -1.8067026138305664, + "logits/rejected": -1.8122276067733765, + "logps/chosen": -110.23373413085938, + "logps/rejected": -232.5296630859375, + "loss": 0.4315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4881477355957031, + "rewards/margins": 0.5904213190078735, + "rewards/rejected": -0.10227356106042862, + "step": 15849 + }, + { + "epoch": 0.92, + "learning_rate": 1.5721546626370153e-09, + "logits/chosen": -1.872374176979065, + "logits/rejected": -1.8702423572540283, + "logps/chosen": -216.563720703125, + "logps/rejected": -446.11688232421875, + "loss": 0.0313, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0290298461914062, + "rewards/margins": 4.175013542175293, + "rewards/rejected": -2.145983934402466, + "step": 15850 + }, + { + "epoch": 0.92, + "learning_rate": 1.5698108999393334e-09, + "logits/chosen": -1.8268662691116333, + "logits/rejected": -1.8183480501174927, + "logps/chosen": -20.520957946777344, + "logps/rejected": -131.1039581298828, + "loss": 0.5668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.057062532752752304, + "rewards/margins": 0.45808565616607666, + "rewards/rejected": -0.40102311968803406, + "step": 15851 + }, + { + "epoch": 0.92, + "learning_rate": 1.567468857718879e-09, + "logits/chosen": -2.0207858085632324, + "logits/rejected": -2.0146713256835938, + "logps/chosen": -3.056070566177368, + "logps/rejected": -151.5497589111328, + "loss": 0.41, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08414240181446075, + "rewards/margins": 1.3949533700942993, + "rewards/rejected": -1.310810923576355, + "step": 15852 + }, + { + "epoch": 0.92, + "learning_rate": 1.5651285360588296e-09, + "logits/chosen": -1.8149406909942627, + "logits/rejected": -1.829798698425293, + "logps/chosen": -283.94659423828125, + "logps/rejected": -412.8087463378906, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.69046950340271, + "rewards/margins": 4.496954441070557, + "rewards/rejected": -1.8064850568771362, + "step": 15853 + }, + { + "epoch": 0.92, + "learning_rate": 1.5627899350423357e-09, + "logits/chosen": -1.820550560951233, + "logits/rejected": -1.8546961545944214, + "logps/chosen": -216.80987548828125, + "logps/rejected": -383.2148742675781, + "loss": 0.104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.776654064655304, + "rewards/margins": 3.3706696033477783, + "rewards/rejected": -2.594015598297119, + "step": 15854 + }, + { + "epoch": 0.92, + "learning_rate": 1.5604530547524809e-09, + "logits/chosen": -2.0369443893432617, + "logits/rejected": -2.074787139892578, + "logps/chosen": -203.780029296875, + "logps/rejected": -381.74200439453125, + "loss": 0.0484, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.928564429283142, + "rewards/margins": 2.9169037342071533, + "rewards/rejected": -0.9883392453193665, + "step": 15855 + }, + { + "epoch": 0.92, + "learning_rate": 1.5581178952722762e-09, + "logits/chosen": -1.676491379737854, + "logits/rejected": -1.6738717555999756, + "logps/chosen": -35.25416564941406, + "logps/rejected": -100.0020751953125, + "loss": 0.5475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45234376192092896, + "rewards/margins": 0.2709770202636719, + "rewards/rejected": 0.1813667267560959, + "step": 15856 + }, + { + "epoch": 0.92, + "learning_rate": 1.5557844566846779e-09, + "logits/chosen": -1.6479710340499878, + "logits/rejected": -1.6544333696365356, + "logps/chosen": -259.89654541015625, + "logps/rejected": -381.90283203125, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9849426746368408, + "rewards/margins": 2.5293519496917725, + "rewards/rejected": -0.5444092154502869, + "step": 15857 + }, + { + "epoch": 0.92, + "learning_rate": 1.5534527390725805e-09, + "logits/chosen": -1.7654221057891846, + "logits/rejected": -1.7621930837631226, + "logps/chosen": -246.9735107421875, + "logps/rejected": -454.1341857910156, + "loss": 0.0511, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1693389415740967, + "rewards/margins": 2.6615233421325684, + "rewards/rejected": -0.49218446016311646, + "step": 15858 + }, + { + "epoch": 0.92, + "learning_rate": 1.5511227425188178e-09, + "logits/chosen": -2.0999770164489746, + "logits/rejected": -2.0986838340759277, + "logps/chosen": -5.616469860076904, + "logps/rejected": -339.6352844238281, + "loss": 0.3536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08987150341272354, + "rewards/margins": 7.080013751983643, + "rewards/rejected": -7.169885158538818, + "step": 15859 + }, + { + "epoch": 0.92, + "learning_rate": 1.548794467106168e-09, + "logits/chosen": -1.7280821800231934, + "logits/rejected": -1.736682653427124, + "logps/chosen": -285.3736267089844, + "logps/rejected": -343.58251953125, + "loss": 0.0953, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7694977521896362, + "rewards/margins": 1.760482907295227, + "rewards/rejected": 0.009014892391860485, + "step": 15860 + }, + { + "epoch": 0.92, + "learning_rate": 1.5464679129173375e-09, + "logits/chosen": -1.9369583129882812, + "logits/rejected": -1.9330543279647827, + "logps/chosen": -6.795263767242432, + "logps/rejected": -185.48245239257812, + "loss": 0.2874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25885435938835144, + "rewards/margins": 3.465894937515259, + "rewards/rejected": -3.207040548324585, + "step": 15861 + }, + { + "epoch": 0.92, + "learning_rate": 1.5441430800349764e-09, + "logits/chosen": -1.8327146768569946, + "logits/rejected": -1.8939815759658813, + "logps/chosen": -210.91091918945312, + "logps/rejected": -481.6996154785156, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8884124755859375, + "rewards/margins": 6.379525661468506, + "rewards/rejected": -4.491113185882568, + "step": 15862 + }, + { + "epoch": 0.92, + "learning_rate": 1.54181996854168e-09, + "logits/chosen": -1.7441802024841309, + "logits/rejected": -1.8203645944595337, + "logps/chosen": -221.88796997070312, + "logps/rejected": -396.005126953125, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.074331760406494, + "rewards/margins": 4.492703437805176, + "rewards/rejected": -2.4183716773986816, + "step": 15863 + }, + { + "epoch": 0.92, + "learning_rate": 1.5394985785199654e-09, + "logits/chosen": -1.816859483718872, + "logits/rejected": -1.8256796598434448, + "logps/chosen": -104.31338500976562, + "logps/rejected": -184.57958984375, + "loss": 0.1595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7863052487373352, + "rewards/margins": 2.5578231811523438, + "rewards/rejected": -1.7715179920196533, + "step": 15864 + }, + { + "epoch": 0.92, + "learning_rate": 1.5371789100523224e-09, + "logits/chosen": -1.6346981525421143, + "logits/rejected": -1.638718843460083, + "logps/chosen": -34.78036880493164, + "logps/rejected": -153.1368408203125, + "loss": 0.6949, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.14428368210792542, + "rewards/margins": -0.49216803908348083, + "rewards/rejected": 0.6364517211914062, + "step": 15865 + }, + { + "epoch": 0.92, + "learning_rate": 1.534860963221124e-09, + "logits/chosen": -1.779264211654663, + "logits/rejected": -1.778977394104004, + "logps/chosen": -23.757295608520508, + "logps/rejected": -181.87350463867188, + "loss": 0.1921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0613855123519897, + "rewards/margins": 2.059508800506592, + "rewards/rejected": -0.9981231689453125, + "step": 15866 + }, + { + "epoch": 0.92, + "learning_rate": 1.5325447381087431e-09, + "logits/chosen": -1.9501285552978516, + "logits/rejected": -1.9275472164154053, + "logps/chosen": -277.66949462890625, + "logps/rejected": -462.1399230957031, + "loss": 0.1476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3542236387729645, + "rewards/margins": 3.1693787574768066, + "rewards/rejected": -2.815155029296875, + "step": 15867 + }, + { + "epoch": 0.92, + "learning_rate": 1.5302302347974527e-09, + "logits/chosen": -1.9828734397888184, + "logits/rejected": -1.9850295782089233, + "logps/chosen": -9.250477160094306e-05, + "logps/rejected": -236.85446166992188, + "loss": 0.3441, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1444113852121518e-06, + "rewards/margins": 4.8774590492248535, + "rewards/rejected": -4.87746000289917, + "step": 15868 + }, + { + "epoch": 0.92, + "learning_rate": 1.527917453369476e-09, + "logits/chosen": -1.8747832775115967, + "logits/rejected": -1.872913122177124, + "logps/chosen": -185.72605895996094, + "logps/rejected": -214.10052490234375, + "loss": 0.0845, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9213974475860596, + "rewards/margins": 1.8807419538497925, + "rewards/rejected": 0.04065551981329918, + "step": 15869 + }, + { + "epoch": 0.92, + "learning_rate": 1.5256063939069696e-09, + "logits/chosen": -1.95625638961792, + "logits/rejected": -1.9580652713775635, + "logps/chosen": -9.975635528564453, + "logps/rejected": -239.65753173828125, + "loss": 0.3653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06885061413049698, + "rewards/margins": 3.407382011413574, + "rewards/rejected": -3.338531494140625, + "step": 15870 + }, + { + "epoch": 0.92, + "learning_rate": 1.5232970564920456e-09, + "logits/chosen": -1.8684320449829102, + "logits/rejected": -1.8743146657943726, + "logps/chosen": -108.16253662109375, + "logps/rejected": -187.45709228515625, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8312782645225525, + "rewards/margins": 0.6510368585586548, + "rewards/rejected": 0.1802413910627365, + "step": 15871 + }, + { + "epoch": 0.92, + "learning_rate": 1.520989441206727e-09, + "logits/chosen": -1.9533979892730713, + "logits/rejected": -1.9346065521240234, + "logps/chosen": -65.73104858398438, + "logps/rejected": -244.54400634765625, + "loss": 0.2683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0334739685058594, + "rewards/margins": 0.9819008111953735, + "rewards/rejected": 0.05157318338751793, + "step": 15872 + }, + { + "epoch": 0.92, + "learning_rate": 1.5186835481330096e-09, + "logits/chosen": -1.896458625793457, + "logits/rejected": -1.8919850587844849, + "logps/chosen": -155.97071838378906, + "logps/rejected": -163.59564208984375, + "loss": 1.6901, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.7098525762557983, + "rewards/margins": -1.1565184593200684, + "rewards/rejected": -0.5533340573310852, + "step": 15873 + }, + { + "epoch": 0.92, + "learning_rate": 1.5163793773527834e-09, + "logits/chosen": -1.7898203134536743, + "logits/rejected": -1.7756587266921997, + "logps/chosen": -232.57064819335938, + "logps/rejected": -359.8890075683594, + "loss": 0.145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.831625461578369, + "rewards/margins": 1.161492943763733, + "rewards/rejected": 1.6701325178146362, + "step": 15874 + }, + { + "epoch": 0.92, + "learning_rate": 1.5140769289479383e-09, + "logits/chosen": -2.06712007522583, + "logits/rejected": -2.0632741451263428, + "logps/chosen": -44.964176177978516, + "logps/rejected": -106.45276641845703, + "loss": 0.6122, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5297252535820007, + "rewards/margins": -0.11825984716415405, + "rewards/rejected": 0.6479851007461548, + "step": 15875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5117762030002423e-09, + "logits/chosen": -1.7330238819122314, + "logits/rejected": -1.7519097328186035, + "logps/chosen": -210.07046508789062, + "logps/rejected": -437.9414978027344, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2181549072265625, + "rewards/margins": 6.533596992492676, + "rewards/rejected": -4.315442085266113, + "step": 15876 + }, + { + "epoch": 0.92, + "learning_rate": 1.5094771995914356e-09, + "logits/chosen": -1.8746155500411987, + "logits/rejected": -1.869362473487854, + "logps/chosen": -47.72039794921875, + "logps/rejected": -267.08331298828125, + "loss": 0.1787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6591896414756775, + "rewards/margins": 3.074683427810669, + "rewards/rejected": -2.4154937267303467, + "step": 15877 + }, + { + "epoch": 0.92, + "learning_rate": 1.5071799188031975e-09, + "logits/chosen": -2.107172727584839, + "logits/rejected": -2.0888428688049316, + "logps/chosen": -0.0025472035631537437, + "logps/rejected": -201.4873046875, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0009542637853883207, + "rewards/margins": 5.800633907318115, + "rewards/rejected": -5.799679756164551, + "step": 15878 + }, + { + "epoch": 0.92, + "learning_rate": 1.504884360717129e-09, + "logits/chosen": -1.7569420337677002, + "logits/rejected": -1.7500313520431519, + "logps/chosen": -199.4304962158203, + "logps/rejected": -331.87628173828125, + "loss": 0.156, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6950149536132812, + "rewards/margins": 1.3518569469451904, + "rewards/rejected": 0.34315797686576843, + "step": 15879 + }, + { + "epoch": 0.92, + "learning_rate": 1.5025905254147875e-09, + "logits/chosen": -1.8803482055664062, + "logits/rejected": -1.874225378036499, + "logps/chosen": -90.38343048095703, + "logps/rejected": -376.96270751953125, + "loss": 0.3583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36412736773490906, + "rewards/margins": 1.0776954889297485, + "rewards/rejected": -0.7135681509971619, + "step": 15880 + }, + { + "epoch": 0.92, + "learning_rate": 1.500298412977652e-09, + "logits/chosen": -1.877315640449524, + "logits/rejected": -1.8803566694259644, + "logps/chosen": -0.0016677146777510643, + "logps/rejected": -221.42652893066406, + "loss": 0.3397, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4504198487848043e-05, + "rewards/margins": 3.8304524421691895, + "rewards/rejected": -3.8305070400238037, + "step": 15881 + }, + { + "epoch": 0.92, + "learning_rate": 1.4980080234871517e-09, + "logits/chosen": -1.6820285320281982, + "logits/rejected": -1.7157225608825684, + "logps/chosen": -157.57913208007812, + "logps/rejected": -344.77337646484375, + "loss": 0.1117, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8837798833847046, + "rewards/margins": 1.6271392107009888, + "rewards/rejected": 0.25664064288139343, + "step": 15882 + }, + { + "epoch": 0.92, + "learning_rate": 1.4957193570246662e-09, + "logits/chosen": -1.8483515977859497, + "logits/rejected": -1.852295994758606, + "logps/chosen": -7.501875400543213, + "logps/rejected": -86.488525390625, + "loss": 0.4383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0510592944920063, + "rewards/margins": 1.4514278173446655, + "rewards/rejected": -1.400368571281433, + "step": 15883 + }, + { + "epoch": 0.92, + "learning_rate": 1.4934324136714805e-09, + "logits/chosen": -1.9105709791183472, + "logits/rejected": -1.912925362586975, + "logps/chosen": -25.578580856323242, + "logps/rejected": -260.6033020019531, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13816605508327484, + "rewards/margins": 6.22918176651001, + "rewards/rejected": -6.367347717285156, + "step": 15884 + }, + { + "epoch": 0.92, + "learning_rate": 1.4911471935088516e-09, + "logits/chosen": -1.5478098392486572, + "logits/rejected": -1.53839910030365, + "logps/chosen": -195.3079376220703, + "logps/rejected": -359.4888000488281, + "loss": 0.0642, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1268951892852783, + "rewards/margins": 2.3586792945861816, + "rewards/rejected": 0.7682159543037415, + "step": 15885 + }, + { + "epoch": 0.92, + "learning_rate": 1.4888636966179535e-09, + "logits/chosen": -1.7366915941238403, + "logits/rejected": -1.7422661781311035, + "logps/chosen": -161.7047119140625, + "logps/rejected": -314.8526611328125, + "loss": 0.0605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9519897699356079, + "rewards/margins": 3.4134764671325684, + "rewards/rejected": -2.46148681640625, + "step": 15886 + }, + { + "epoch": 0.92, + "learning_rate": 1.4865819230799048e-09, + "logits/chosen": -1.972891092300415, + "logits/rejected": -1.991073489189148, + "logps/chosen": -216.05245971679688, + "logps/rejected": -239.2826385498047, + "loss": 0.4684, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.7510833740234375, + "rewards/margins": -0.3637864589691162, + "rewards/rejected": 2.1148698329925537, + "step": 15887 + }, + { + "epoch": 0.92, + "learning_rate": 1.484301872975785e-09, + "logits/chosen": -1.781787395477295, + "logits/rejected": -1.7708771228790283, + "logps/chosen": -221.56942749023438, + "logps/rejected": -295.8600158691406, + "loss": 0.2324, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.390028476715088, + "rewards/margins": 0.5660263299942017, + "rewards/rejected": 1.8240021467208862, + "step": 15888 + }, + { + "epoch": 0.92, + "learning_rate": 1.4820235463865627e-09, + "logits/chosen": -1.8675535917282104, + "logits/rejected": -1.9037888050079346, + "logps/chosen": -197.03488159179688, + "logps/rejected": -432.7984924316406, + "loss": 0.1446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.043060302734375, + "rewards/margins": 4.8213348388671875, + "rewards/rejected": -4.7782745361328125, + "step": 15889 + }, + { + "epoch": 0.92, + "learning_rate": 1.4797469433932063e-09, + "logits/chosen": -1.8939317464828491, + "logits/rejected": -1.8988778591156006, + "logps/chosen": -42.64387893676758, + "logps/rejected": -198.58934020996094, + "loss": 0.1808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9021900296211243, + "rewards/margins": 2.0020527839660645, + "rewards/rejected": -1.0998626947402954, + "step": 15890 + }, + { + "epoch": 0.92, + "learning_rate": 1.477472064076568e-09, + "logits/chosen": -1.830635666847229, + "logits/rejected": -1.8531039953231812, + "logps/chosen": -215.1374969482422, + "logps/rejected": -422.45733642578125, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.22794508934021, + "rewards/margins": 2.5302460193634033, + "rewards/rejected": -0.3023010194301605, + "step": 15891 + }, + { + "epoch": 0.92, + "learning_rate": 1.4751989085174776e-09, + "logits/chosen": -1.8265883922576904, + "logits/rejected": -1.8290927410125732, + "logps/chosen": -247.68832397460938, + "logps/rejected": -384.1888427734375, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.38157057762146, + "rewards/margins": 3.7271180152893066, + "rewards/rejected": -1.3455475568771362, + "step": 15892 + }, + { + "epoch": 0.92, + "learning_rate": 1.4729274767966704e-09, + "logits/chosen": -2.0546185970306396, + "logits/rejected": -2.056701898574829, + "logps/chosen": -9.047749519348145, + "logps/rejected": -177.00827026367188, + "loss": 0.4492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2182828038930893, + "rewards/margins": 1.0172741413116455, + "rewards/rejected": -0.7989913821220398, + "step": 15893 + }, + { + "epoch": 0.92, + "learning_rate": 1.470657768994865e-09, + "logits/chosen": -1.764391303062439, + "logits/rejected": -1.7966405153274536, + "logps/chosen": -197.6673583984375, + "logps/rejected": -434.3134765625, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8350143432617188, + "rewards/margins": 5.6954450607299805, + "rewards/rejected": -2.860430955886841, + "step": 15894 + }, + { + "epoch": 0.92, + "learning_rate": 1.4683897851926697e-09, + "logits/chosen": -1.8227834701538086, + "logits/rejected": -1.8007081747055054, + "logps/chosen": -65.34647369384766, + "logps/rejected": -228.582763671875, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28697434067726135, + "rewards/margins": 1.5293296575546265, + "rewards/rejected": -1.2423553466796875, + "step": 15895 + }, + { + "epoch": 0.93, + "learning_rate": 1.4661235254706639e-09, + "logits/chosen": -1.7645219564437866, + "logits/rejected": -1.7392627000808716, + "logps/chosen": -203.30154418945312, + "logps/rejected": -332.8946533203125, + "loss": 0.1045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2526490688323975, + "rewards/margins": 1.8934082984924316, + "rewards/rejected": 0.35924074053764343, + "step": 15896 + }, + { + "epoch": 0.93, + "learning_rate": 1.4638589899093557e-09, + "logits/chosen": -1.6964088678359985, + "logits/rejected": -1.6897691488265991, + "logps/chosen": -1.0420517921447754, + "logps/rejected": -394.0062561035156, + "loss": 0.2524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24445314705371857, + "rewards/margins": 7.124964714050293, + "rewards/rejected": -6.88051176071167, + "step": 15897 + }, + { + "epoch": 0.93, + "learning_rate": 1.4615961785891973e-09, + "logits/chosen": -2.049280881881714, + "logits/rejected": -2.038451671600342, + "logps/chosen": -43.981773376464844, + "logps/rejected": -281.949951171875, + "loss": 0.2595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2859199643135071, + "rewards/margins": 4.51276969909668, + "rewards/rejected": -4.226849555969238, + "step": 15898 + }, + { + "epoch": 0.93, + "learning_rate": 1.4593350915905634e-09, + "logits/chosen": -1.9974690675735474, + "logits/rejected": -1.9617685079574585, + "logps/chosen": -131.28843688964844, + "logps/rejected": -285.4117126464844, + "loss": 0.2279, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1353806257247925, + "rewards/margins": 1.4609239101409912, + "rewards/rejected": -0.32554322481155396, + "step": 15899 + }, + { + "epoch": 0.93, + "learning_rate": 1.4570757289937841e-09, + "logits/chosen": -1.782741904258728, + "logits/rejected": -1.7799103260040283, + "logps/chosen": -149.22198486328125, + "logps/rejected": -266.79327392578125, + "loss": 0.026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.781420946121216, + "rewards/margins": 3.7621796131134033, + "rewards/rejected": -0.9807586669921875, + "step": 15900 + }, + { + "epoch": 0.93, + "learning_rate": 1.4548180908791285e-09, + "logits/chosen": -1.9204320907592773, + "logits/rejected": -1.922895908355713, + "logps/chosen": -67.78514099121094, + "logps/rejected": -211.8638458251953, + "loss": 0.2581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00929870642721653, + "rewards/margins": 5.566296577453613, + "rewards/rejected": -5.575595378875732, + "step": 15901 + }, + { + "epoch": 0.93, + "learning_rate": 1.4525621773267938e-09, + "logits/chosen": -2.011270046234131, + "logits/rejected": -1.996842861175537, + "logps/chosen": -76.64411926269531, + "logps/rejected": -283.53863525390625, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.472041368484497, + "rewards/margins": 3.668715000152588, + "rewards/rejected": -2.196673631668091, + "step": 15902 + }, + { + "epoch": 0.93, + "learning_rate": 1.4503079884169211e-09, + "logits/chosen": -1.8967602252960205, + "logits/rejected": -1.8792566061019897, + "logps/chosen": -0.00026915629860013723, + "logps/rejected": -235.11021423339844, + "loss": 0.3463, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4971275049902033e-05, + "rewards/margins": 3.320457696914673, + "rewards/rejected": -3.3204727172851562, + "step": 15903 + }, + { + "epoch": 0.93, + "learning_rate": 1.4480555242295912e-09, + "logits/chosen": -1.8744221925735474, + "logits/rejected": -1.8569879531860352, + "logps/chosen": -112.67481231689453, + "logps/rejected": -302.80120849609375, + "loss": 0.0535, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8845008611679077, + "rewards/margins": 3.388718366622925, + "rewards/rejected": -1.504217505455017, + "step": 15904 + }, + { + "epoch": 0.93, + "learning_rate": 1.4458047848448174e-09, + "logits/chosen": -1.8512476682662964, + "logits/rejected": -1.8489972352981567, + "logps/chosen": -3.071000099182129, + "logps/rejected": -142.18951416015625, + "loss": 0.3685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04855921491980553, + "rewards/margins": 2.2181308269500732, + "rewards/rejected": -2.2666900157928467, + "step": 15905 + }, + { + "epoch": 0.93, + "learning_rate": 1.4435557703425693e-09, + "logits/chosen": -1.9006651639938354, + "logits/rejected": -1.894463300704956, + "logps/chosen": -15.166348457336426, + "logps/rejected": -73.7762451171875, + "loss": 0.5509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03767042234539986, + "rewards/margins": 0.3626338243484497, + "rewards/rejected": -0.32496339082717896, + "step": 15906 + }, + { + "epoch": 0.93, + "learning_rate": 1.4413084808027332e-09, + "logits/chosen": -2.0272083282470703, + "logits/rejected": -2.0063962936401367, + "logps/chosen": -143.9342041015625, + "logps/rejected": -223.47288513183594, + "loss": 0.0453, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4110610485076904, + "rewards/margins": 3.885733127593994, + "rewards/rejected": -1.4746719598770142, + "step": 15907 + }, + { + "epoch": 0.93, + "learning_rate": 1.4390629163051393e-09, + "logits/chosen": -1.8409491777420044, + "logits/rejected": -1.843366265296936, + "logps/chosen": -7.1879191398620605, + "logps/rejected": -158.8038330078125, + "loss": 0.316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14884887635707855, + "rewards/margins": 2.919541358947754, + "rewards/rejected": -2.7706925868988037, + "step": 15908 + }, + { + "epoch": 0.93, + "learning_rate": 1.4368190769295796e-09, + "logits/chosen": -1.9505523443222046, + "logits/rejected": -1.94407057762146, + "logps/chosen": -241.645263671875, + "logps/rejected": -564.626708984375, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.906268358230591, + "rewards/margins": 7.019085884094238, + "rewards/rejected": -4.112817287445068, + "step": 15909 + }, + { + "epoch": 0.93, + "learning_rate": 1.4345769627557457e-09, + "logits/chosen": -1.8070780038833618, + "logits/rejected": -1.8063633441925049, + "logps/chosen": -61.468292236328125, + "logps/rejected": -221.7379150390625, + "loss": 0.5421, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.32417067885398865, + "rewards/margins": -0.04542618989944458, + "rewards/rejected": 0.3695968687534332, + "step": 15910 + }, + { + "epoch": 0.93, + "learning_rate": 1.432336573863302e-09, + "logits/chosen": -1.8148452043533325, + "logits/rejected": -1.829573154449463, + "logps/chosen": -17.199132919311523, + "logps/rejected": -200.81787109375, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1238798126578331, + "rewards/margins": 2.5124754905700684, + "rewards/rejected": -2.3885955810546875, + "step": 15911 + }, + { + "epoch": 0.93, + "learning_rate": 1.430097910331829e-09, + "logits/chosen": -1.7993394136428833, + "logits/rejected": -1.7965275049209595, + "logps/chosen": -0.11507363617420197, + "logps/rejected": -250.95303344726562, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004161167424172163, + "rewards/margins": 6.336591720581055, + "rewards/rejected": -6.340753078460693, + "step": 15912 + }, + { + "epoch": 0.93, + "learning_rate": 1.427860972240863e-09, + "logits/chosen": -1.9243195056915283, + "logits/rejected": -1.9246035814285278, + "logps/chosen": -109.58489990234375, + "logps/rejected": -187.8665771484375, + "loss": 0.2276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5027000308036804, + "rewards/margins": 3.0793817043304443, + "rewards/rejected": -2.576681613922119, + "step": 15913 + }, + { + "epoch": 0.93, + "learning_rate": 1.4256257596698684e-09, + "logits/chosen": -1.8880988359451294, + "logits/rejected": -1.8891727924346924, + "logps/chosen": -0.00037127823452465236, + "logps/rejected": -124.71080017089844, + "loss": 0.4034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.954345634658239e-06, + "rewards/margins": 1.8121410608291626, + "rewards/rejected": -1.8121429681777954, + "step": 15914 + }, + { + "epoch": 0.93, + "learning_rate": 1.4233922726982429e-09, + "logits/chosen": -1.9792274236679077, + "logits/rejected": -1.9423867464065552, + "logps/chosen": -144.9063262939453, + "logps/rejected": -602.0488891601562, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1692383289337158, + "rewards/margins": 6.560376167297363, + "rewards/rejected": -5.391137599945068, + "step": 15915 + }, + { + "epoch": 0.93, + "learning_rate": 1.4211605114053448e-09, + "logits/chosen": -2.0130677223205566, + "logits/rejected": -2.0051794052124023, + "logps/chosen": -34.36471176147461, + "logps/rejected": -148.50982666015625, + "loss": 0.308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.459450125694275, + "rewards/margins": 0.5950610637664795, + "rewards/rejected": 0.8643890619277954, + "step": 15916 + }, + { + "epoch": 0.93, + "learning_rate": 1.4189304758704501e-09, + "logits/chosen": -1.9506672620773315, + "logits/rejected": -1.947159767150879, + "logps/chosen": -37.11408996582031, + "logps/rejected": -168.59487915039062, + "loss": 0.3483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.580828845500946, + "rewards/margins": 0.7708083987236023, + "rewards/rejected": -0.18997955322265625, + "step": 15917 + }, + { + "epoch": 0.93, + "learning_rate": 1.4167021661727784e-09, + "logits/chosen": -1.821796178817749, + "logits/rejected": -1.8279937505722046, + "logps/chosen": -296.8380126953125, + "logps/rejected": -369.54412841796875, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6603150367736816, + "rewards/margins": 3.3848085403442383, + "rewards/rejected": -0.7244934439659119, + "step": 15918 + }, + { + "epoch": 0.93, + "learning_rate": 1.414475582391489e-09, + "logits/chosen": -2.081151247024536, + "logits/rejected": -2.0522964000701904, + "logps/chosen": -164.95469665527344, + "logps/rejected": -302.0111083984375, + "loss": 0.0571, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.0707292556762695, + "rewards/margins": 2.199598789215088, + "rewards/rejected": 1.871130347251892, + "step": 15919 + }, + { + "epoch": 0.93, + "learning_rate": 1.4122507246056846e-09, + "logits/chosen": -2.012909412384033, + "logits/rejected": -2.0032079219818115, + "logps/chosen": -27.368751525878906, + "logps/rejected": -193.46766662597656, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12600098550319672, + "rewards/margins": 2.4883437156677246, + "rewards/rejected": -2.3623428344726562, + "step": 15920 + }, + { + "epoch": 0.93, + "learning_rate": 1.410027592894414e-09, + "logits/chosen": -1.8791773319244385, + "logits/rejected": -1.8500696420669556, + "logps/chosen": -245.97608947753906, + "logps/rejected": -304.55029296875, + "loss": 0.4969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6038131713867188, + "rewards/margins": 0.12562713027000427, + "rewards/rejected": 0.4781860411167145, + "step": 15921 + }, + { + "epoch": 0.93, + "learning_rate": 1.40780618733663e-09, + "logits/chosen": -1.9737330675125122, + "logits/rejected": -1.9802764654159546, + "logps/chosen": -10.451303482055664, + "logps/rejected": -132.26683044433594, + "loss": 0.2313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47101545333862305, + "rewards/margins": 2.965566635131836, + "rewards/rejected": -2.494551181793213, + "step": 15922 + }, + { + "epoch": 0.93, + "learning_rate": 1.405586508011264e-09, + "logits/chosen": -2.1108901500701904, + "logits/rejected": -2.111495018005371, + "logps/chosen": -3.9696395106147975e-05, + "logps/rejected": -173.6842803955078, + "loss": 0.4214, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0728748520705267e-06, + "rewards/margins": 1.8696156740188599, + "rewards/rejected": -1.8696167469024658, + "step": 15923 + }, + { + "epoch": 0.93, + "learning_rate": 1.4033685549971642e-09, + "logits/chosen": -1.8906886577606201, + "logits/rejected": -1.8927124738693237, + "logps/chosen": -222.59786987304688, + "logps/rejected": -364.2298889160156, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.498455762863159, + "rewards/margins": 3.4437713623046875, + "rewards/rejected": 0.05468444898724556, + "step": 15924 + }, + { + "epoch": 0.93, + "learning_rate": 1.4011523283731286e-09, + "logits/chosen": -2.1195759773254395, + "logits/rejected": -2.106167793273926, + "logps/chosen": -0.010874936357140541, + "logps/rejected": -161.1458282470703, + "loss": 0.5449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025561332586221397, + "rewards/margins": 0.7152015566825867, + "rewards/rejected": -0.7154571413993835, + "step": 15925 + }, + { + "epoch": 0.93, + "learning_rate": 1.3989378282178888e-09, + "logits/chosen": -1.7579196691513062, + "logits/rejected": -1.7374513149261475, + "logps/chosen": -7.378811836242676, + "logps/rejected": -100.72637939453125, + "loss": 0.466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07237472385168076, + "rewards/margins": 1.2534356117248535, + "rewards/rejected": -1.3258103132247925, + "step": 15926 + }, + { + "epoch": 0.93, + "learning_rate": 1.3967250546100983e-09, + "logits/chosen": -1.981906771659851, + "logits/rejected": -1.9804275035858154, + "logps/chosen": -8.341022491455078, + "logps/rejected": -47.840782165527344, + "loss": 0.4028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4004115164279938, + "rewards/margins": 1.0572596788406372, + "rewards/rejected": -0.656848132610321, + "step": 15927 + }, + { + "epoch": 0.93, + "learning_rate": 1.394514007628389e-09, + "logits/chosen": -1.750903844833374, + "logits/rejected": -1.793084979057312, + "logps/chosen": -240.2066650390625, + "logps/rejected": -312.81060791015625, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9003418684005737, + "rewards/margins": 4.996664524078369, + "rewards/rejected": -3.096322774887085, + "step": 15928 + }, + { + "epoch": 0.93, + "learning_rate": 1.3923046873512978e-09, + "logits/chosen": -1.843559980392456, + "logits/rejected": -1.7931327819824219, + "logps/chosen": -207.83346557617188, + "logps/rejected": -527.3411865234375, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3117737770080566, + "rewards/margins": 3.9790468215942383, + "rewards/rejected": -1.667272925376892, + "step": 15929 + }, + { + "epoch": 0.93, + "learning_rate": 1.3900970938573064e-09, + "logits/chosen": -1.964105486869812, + "logits/rejected": -1.9924046993255615, + "logps/chosen": -41.210506439208984, + "logps/rejected": -261.8838806152344, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017156600952148438, + "rewards/margins": 5.264986038208008, + "rewards/rejected": -5.282142639160156, + "step": 15930 + }, + { + "epoch": 0.93, + "learning_rate": 1.387891227224841e-09, + "logits/chosen": -1.8297337293624878, + "logits/rejected": -1.830723762512207, + "logps/chosen": -141.17835998535156, + "logps/rejected": -265.2918701171875, + "loss": 0.0679, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3439667224884033, + "rewards/margins": 2.348489284515381, + "rewards/rejected": -1.004522681236267, + "step": 15931 + }, + { + "epoch": 0.93, + "learning_rate": 1.3856870875322723e-09, + "logits/chosen": -1.7760205268859863, + "logits/rejected": -1.7673699855804443, + "logps/chosen": -0.06520216912031174, + "logps/rejected": -52.640907287597656, + "loss": 0.7087, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.12866757810115814, + "rewards/margins": -0.18038813769817352, + "rewards/rejected": 0.30905571579933167, + "step": 15932 + }, + { + "epoch": 0.93, + "learning_rate": 1.3834846748578876e-09, + "logits/chosen": -1.7798840999603271, + "logits/rejected": -1.7784501314163208, + "logps/chosen": -0.00011920404358534142, + "logps/rejected": -274.25799560546875, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.921682491636602e-06, + "rewards/margins": 6.742446422576904, + "rewards/rejected": -6.74245023727417, + "step": 15933 + }, + { + "epoch": 0.93, + "learning_rate": 1.3812839892799465e-09, + "logits/chosen": -1.9753475189208984, + "logits/rejected": -1.9698033332824707, + "logps/chosen": -204.90980529785156, + "logps/rejected": -256.2483825683594, + "loss": 0.3469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12101592868566513, + "rewards/margins": 1.0971558094024658, + "rewards/rejected": -0.9761398434638977, + "step": 15934 + }, + { + "epoch": 0.93, + "learning_rate": 1.3790850308766088e-09, + "logits/chosen": -1.7802480459213257, + "logits/rejected": -1.824952244758606, + "logps/chosen": -373.64453125, + "logps/rejected": -232.20199584960938, + "loss": 0.7422, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.22647705674171448, + "rewards/margins": -0.9208739995956421, + "rewards/rejected": 0.69439697265625, + "step": 15935 + }, + { + "epoch": 0.93, + "learning_rate": 1.376887799726012e-09, + "logits/chosen": -1.9570671319961548, + "logits/rejected": -1.9512561559677124, + "logps/chosen": -0.00027307873824611306, + "logps/rejected": -143.59146118164062, + "loss": 0.3545, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.211041828559246e-05, + "rewards/margins": 3.392444610595703, + "rewards/rejected": -3.3924667835235596, + "step": 15936 + }, + { + "epoch": 0.93, + "learning_rate": 1.3746922959061934e-09, + "logits/chosen": -1.9856294393539429, + "logits/rejected": -1.9806581735610962, + "logps/chosen": -27.604793548583984, + "logps/rejected": -147.400146484375, + "loss": 0.4281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28424912691116333, + "rewards/margins": 1.1219154596328735, + "rewards/rejected": -0.8376663327217102, + "step": 15937 + }, + { + "epoch": 0.93, + "learning_rate": 1.3724985194951578e-09, + "logits/chosen": -1.8656678199768066, + "logits/rejected": -1.8679721355438232, + "logps/chosen": -190.5194549560547, + "logps/rejected": -355.0472412109375, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.276750326156616, + "rewards/margins": 6.075166702270508, + "rewards/rejected": -2.7984161376953125, + "step": 15938 + }, + { + "epoch": 0.93, + "learning_rate": 1.3703064705708367e-09, + "logits/chosen": -2.07952618598938, + "logits/rejected": -2.1240317821502686, + "logps/chosen": -287.19140625, + "logps/rejected": -332.3880310058594, + "loss": 0.0407, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4638917446136475, + "rewards/margins": 2.6990602016448975, + "rewards/rejected": -0.23516845703125, + "step": 15939 + }, + { + "epoch": 0.93, + "learning_rate": 1.3681161492111071e-09, + "logits/chosen": -2.0007872581481934, + "logits/rejected": -2.0047388076782227, + "logps/chosen": -0.00875072181224823, + "logps/rejected": -126.04440307617188, + "loss": 0.4521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006095808348618448, + "rewards/margins": 1.2156463861465454, + "rewards/rejected": -1.2150367498397827, + "step": 15940 + }, + { + "epoch": 0.93, + "learning_rate": 1.3659275554937733e-09, + "logits/chosen": -1.8943461179733276, + "logits/rejected": -1.873792290687561, + "logps/chosen": -155.56907653808594, + "logps/rejected": -243.00180053710938, + "loss": 0.3804, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.735005259513855, + "rewards/margins": -0.02472066879272461, + "rewards/rejected": 1.7597259283065796, + "step": 15941 + }, + { + "epoch": 0.93, + "learning_rate": 1.3637406894965897e-09, + "logits/chosen": -1.8980913162231445, + "logits/rejected": -1.9005886316299438, + "logps/chosen": -0.00034842564491555095, + "logps/rejected": -241.03976440429688, + "loss": 0.3223, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.99889892025385e-06, + "rewards/margins": 5.629112243652344, + "rewards/rejected": -5.62912130355835, + "step": 15942 + }, + { + "epoch": 0.93, + "learning_rate": 1.3615555512972388e-09, + "logits/chosen": -1.9310685396194458, + "logits/rejected": -1.9276565313339233, + "logps/chosen": -0.00041801808401942253, + "logps/rejected": -137.0320281982422, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.376212458126247e-05, + "rewards/margins": 3.4458725452423096, + "rewards/rejected": -3.4458587169647217, + "step": 15943 + }, + { + "epoch": 0.93, + "learning_rate": 1.3593721409733584e-09, + "logits/chosen": -1.9311563968658447, + "logits/rejected": -1.936287522315979, + "logps/chosen": -56.35134506225586, + "logps/rejected": -100.45555114746094, + "loss": 0.6137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3588848114013672, + "rewards/margins": 0.3613773584365845, + "rewards/rejected": -0.7202621698379517, + "step": 15944 + }, + { + "epoch": 0.93, + "learning_rate": 1.3571904586025029e-09, + "logits/chosen": -1.9604400396347046, + "logits/rejected": -1.9462568759918213, + "logps/chosen": -135.61968994140625, + "logps/rejected": -211.063232421875, + "loss": 0.2329, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7012542486190796, + "rewards/margins": 1.0325530767440796, + "rewards/rejected": 0.668701171875, + "step": 15945 + }, + { + "epoch": 0.93, + "learning_rate": 1.3550105042621774e-09, + "logits/chosen": -1.8215559720993042, + "logits/rejected": -1.8181203603744507, + "logps/chosen": -67.24922180175781, + "logps/rejected": -205.14083862304688, + "loss": 0.5903, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.14882813394069672, + "rewards/margins": -0.12589110434055328, + "rewards/rejected": 0.27471923828125, + "step": 15946 + }, + { + "epoch": 0.93, + "learning_rate": 1.3528322780298251e-09, + "logits/chosen": -2.0416018962860107, + "logits/rejected": -2.0670857429504395, + "logps/chosen": -24.371639251708984, + "logps/rejected": -165.81687927246094, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29294607043266296, + "rewards/margins": 3.418759346008301, + "rewards/rejected": -3.1258132457733154, + "step": 15947 + }, + { + "epoch": 0.93, + "learning_rate": 1.3506557799828343e-09, + "logits/chosen": -1.8119916915893555, + "logits/rejected": -1.8079884052276611, + "logps/chosen": -58.29988098144531, + "logps/rejected": -204.51446533203125, + "loss": 0.3276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.003710150718689, + "rewards/margins": 0.8856223821640015, + "rewards/rejected": 0.1180877685546875, + "step": 15948 + }, + { + "epoch": 0.93, + "learning_rate": 1.3484810101985211e-09, + "logits/chosen": -1.6659934520721436, + "logits/rejected": -1.6802127361297607, + "logps/chosen": -11.138009071350098, + "logps/rejected": -225.28469848632812, + "loss": 0.3525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061483096331357956, + "rewards/margins": 2.6205224990844727, + "rewards/rejected": -2.559039354324341, + "step": 15949 + }, + { + "epoch": 0.93, + "learning_rate": 1.3463079687541346e-09, + "logits/chosen": -1.9502451419830322, + "logits/rejected": -1.9475396871566772, + "logps/chosen": -193.525146484375, + "logps/rejected": -439.6300964355469, + "loss": 0.052, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3102387189865112, + "rewards/margins": 2.69635009765625, + "rewards/rejected": -1.3861114978790283, + "step": 15950 + }, + { + "epoch": 0.93, + "learning_rate": 1.3441366557268907e-09, + "logits/chosen": -1.7802790403366089, + "logits/rejected": -1.7357677221298218, + "logps/chosen": -262.93463134765625, + "logps/rejected": -426.35662841796875, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1037964820861816, + "rewards/margins": 3.0101380348205566, + "rewards/rejected": 0.093658447265625, + "step": 15951 + }, + { + "epoch": 0.93, + "learning_rate": 1.3419670711939058e-09, + "logits/chosen": -1.9024122953414917, + "logits/rejected": -1.8985507488250732, + "logps/chosen": -6.453128814697266, + "logps/rejected": -224.88369750976562, + "loss": 0.3611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21704129874706268, + "rewards/margins": 2.6730685234069824, + "rewards/rejected": -2.4560272693634033, + "step": 15952 + }, + { + "epoch": 0.93, + "learning_rate": 1.3397992152322735e-09, + "logits/chosen": -1.934213638305664, + "logits/rejected": -1.9261656999588013, + "logps/chosen": -45.885765075683594, + "logps/rejected": -228.93218994140625, + "loss": 0.1295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9043899774551392, + "rewards/margins": 3.33974027633667, + "rewards/rejected": -2.435350179672241, + "step": 15953 + }, + { + "epoch": 0.93, + "learning_rate": 1.3376330879189823e-09, + "logits/chosen": -1.8455851078033447, + "logits/rejected": -1.8355263471603394, + "logps/chosen": -16.416126251220703, + "logps/rejected": -252.00827026367188, + "loss": 0.2838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2588268220424652, + "rewards/margins": 4.595313549041748, + "rewards/rejected": -4.33648681640625, + "step": 15954 + }, + { + "epoch": 0.93, + "learning_rate": 1.3354686893310041e-09, + "logits/chosen": -1.834280252456665, + "logits/rejected": -1.8295694589614868, + "logps/chosen": -54.11344909667969, + "logps/rejected": -273.5212097167969, + "loss": 0.094, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.277034044265747, + "rewards/margins": 3.0918960571289062, + "rewards/rejected": -1.8148621320724487, + "step": 15955 + }, + { + "epoch": 0.93, + "learning_rate": 1.3333060195452218e-09, + "logits/chosen": -1.8901997804641724, + "logits/rejected": -1.9063715934753418, + "logps/chosen": -201.85389709472656, + "logps/rejected": -466.98419189453125, + "loss": 0.1128, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1452072858810425, + "rewards/margins": 2.1363143920898438, + "rewards/rejected": -0.991107165813446, + "step": 15956 + }, + { + "epoch": 0.93, + "learning_rate": 1.3311450786384627e-09, + "logits/chosen": -1.6333520412445068, + "logits/rejected": -1.6322746276855469, + "logps/chosen": -0.8284857869148254, + "logps/rejected": -193.84869384765625, + "loss": 0.4162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06262960284948349, + "rewards/margins": 2.18621826171875, + "rewards/rejected": -2.2488479614257812, + "step": 15957 + }, + { + "epoch": 0.93, + "learning_rate": 1.3289858666874988e-09, + "logits/chosen": -1.901868462562561, + "logits/rejected": -1.8837004899978638, + "logps/chosen": -323.1592712402344, + "logps/rejected": -417.66015625, + "loss": 0.4455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2788543701171875, + "rewards/margins": 0.456411749124527, + "rewards/rejected": -0.17755737900733948, + "step": 15958 + }, + { + "epoch": 0.93, + "learning_rate": 1.3268283837690297e-09, + "logits/chosen": -1.8735334873199463, + "logits/rejected": -1.8863351345062256, + "logps/chosen": -159.78579711914062, + "logps/rejected": -231.6119384765625, + "loss": 0.1612, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.20947265625, + "rewards/margins": 1.4237334728240967, + "rewards/rejected": -0.21426086127758026, + "step": 15959 + }, + { + "epoch": 0.93, + "learning_rate": 1.3246726299597056e-09, + "logits/chosen": -2.016266107559204, + "logits/rejected": -2.0085041522979736, + "logps/chosen": -12.406394004821777, + "logps/rejected": -168.5562744140625, + "loss": 0.4889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12446384876966476, + "rewards/margins": 1.4182394742965698, + "rewards/rejected": -1.5427032709121704, + "step": 15960 + }, + { + "epoch": 0.93, + "learning_rate": 1.3225186053361037e-09, + "logits/chosen": -1.8164079189300537, + "logits/rejected": -1.818389892578125, + "logps/chosen": -10.51154613494873, + "logps/rejected": -186.80181884765625, + "loss": 0.5026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07882509380578995, + "rewards/margins": 0.7318860292434692, + "rewards/rejected": -0.6530609130859375, + "step": 15961 + }, + { + "epoch": 0.93, + "learning_rate": 1.3203663099747463e-09, + "logits/chosen": -1.737547755241394, + "logits/rejected": -1.7448066473007202, + "logps/chosen": -170.6426239013672, + "logps/rejected": -273.530029296875, + "loss": 0.1432, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.318524122238159, + "rewards/margins": 1.302923560142517, + "rewards/rejected": 1.015600562095642, + "step": 15962 + }, + { + "epoch": 0.93, + "learning_rate": 1.3182157439521003e-09, + "logits/chosen": -1.7991849184036255, + "logits/rejected": -1.7658441066741943, + "logps/chosen": -176.00271606445312, + "logps/rejected": -303.1429443359375, + "loss": 0.0751, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.28464674949646, + "rewards/margins": 2.6202149391174316, + "rewards/rejected": -0.33556824922561646, + "step": 15963 + }, + { + "epoch": 0.93, + "learning_rate": 1.316066907344554e-09, + "logits/chosen": -1.9974877834320068, + "logits/rejected": -1.9968063831329346, + "logps/chosen": -69.0293960571289, + "logps/rejected": -111.29570770263672, + "loss": 0.6971, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.15383301675319672, + "rewards/margins": -0.18595200777053833, + "rewards/rejected": 0.032118987292051315, + "step": 15964 + }, + { + "epoch": 0.93, + "learning_rate": 1.3139198002284525e-09, + "logits/chosen": -1.7453901767730713, + "logits/rejected": -1.7274147272109985, + "logps/chosen": -249.14952087402344, + "logps/rejected": -394.32989501953125, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1235549449920654, + "rewards/margins": 4.100019931793213, + "rewards/rejected": -1.976464867591858, + "step": 15965 + }, + { + "epoch": 0.93, + "learning_rate": 1.3117744226800676e-09, + "logits/chosen": -1.8324205875396729, + "logits/rejected": -1.8311909437179565, + "logps/chosen": -0.11079319566488266, + "logps/rejected": -49.630126953125, + "loss": 0.6386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003953543957322836, + "rewards/margins": 0.1829841583967209, + "rewards/rejected": -0.1790306121110916, + "step": 15966 + }, + { + "epoch": 0.93, + "learning_rate": 1.3096307747756163e-09, + "logits/chosen": -2.020608425140381, + "logits/rejected": -2.0194551944732666, + "logps/chosen": -17.526782989501953, + "logps/rejected": -61.29275894165039, + "loss": 1.0491, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.45431968569755554, + "rewards/margins": -0.8405413627624512, + "rewards/rejected": 0.386221706867218, + "step": 15967 + }, + { + "epoch": 0.93, + "learning_rate": 1.3074888565912545e-09, + "logits/chosen": -1.9592211246490479, + "logits/rejected": -1.9467979669570923, + "logps/chosen": -51.85197067260742, + "logps/rejected": -101.86927032470703, + "loss": 0.2773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9975833892822266, + "rewards/margins": 0.952688992023468, + "rewards/rejected": 0.04489440843462944, + "step": 15968 + }, + { + "epoch": 0.93, + "learning_rate": 1.30534866820306e-09, + "logits/chosen": -1.75701105594635, + "logits/rejected": -1.7336225509643555, + "logps/chosen": -285.19085693359375, + "logps/rejected": -339.67498779296875, + "loss": 0.2273, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.429656982421875, + "rewards/margins": 0.6072784662246704, + "rewards/rejected": 1.8223785161972046, + "step": 15969 + }, + { + "epoch": 0.93, + "learning_rate": 1.3032102096870834e-09, + "logits/chosen": -1.8780466318130493, + "logits/rejected": -1.880987524986267, + "logps/chosen": -353.00213623046875, + "logps/rejected": -502.4561767578125, + "loss": 0.0993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.918774425983429, + "rewards/margins": 3.7424468994140625, + "rewards/rejected": -2.8236725330352783, + "step": 15970 + }, + { + "epoch": 0.93, + "learning_rate": 1.3010734811192748e-09, + "logits/chosen": -1.9052406549453735, + "logits/rejected": -1.9082499742507935, + "logps/chosen": -1.2408761978149414, + "logps/rejected": -68.05951690673828, + "loss": 0.6088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01830064132809639, + "rewards/margins": 0.13685911893844604, + "rewards/rejected": -0.15515975654125214, + "step": 15971 + }, + { + "epoch": 0.93, + "learning_rate": 1.2989384825755512e-09, + "logits/chosen": -1.7738703489303589, + "logits/rejected": -1.7496370077133179, + "logps/chosen": -324.0184326171875, + "logps/rejected": -417.415283203125, + "loss": 0.0966, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.540771484375, + "rewards/margins": 2.7712221145629883, + "rewards/rejected": -1.2304505109786987, + "step": 15972 + }, + { + "epoch": 0.93, + "learning_rate": 1.296805214131752e-09, + "logits/chosen": -2.036165714263916, + "logits/rejected": -2.0317680835723877, + "logps/chosen": -15.922951698303223, + "logps/rejected": -29.615581512451172, + "loss": 0.5923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22762231528759003, + "rewards/margins": 0.05017632246017456, + "rewards/rejected": 0.17744599282741547, + "step": 15973 + }, + { + "epoch": 0.93, + "learning_rate": 1.2946736758636667e-09, + "logits/chosen": -1.8350975513458252, + "logits/rejected": -1.8336511850357056, + "logps/chosen": -9.453149687033147e-05, + "logps/rejected": -262.13006591796875, + "loss": 0.3225, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8578826206503436e-05, + "rewards/margins": 8.185803413391113, + "rewards/rejected": -8.185765266418457, + "step": 15974 + }, + { + "epoch": 0.93, + "learning_rate": 1.292543867847018e-09, + "logits/chosen": -1.9734644889831543, + "logits/rejected": -2.018280506134033, + "logps/chosen": -211.13031005859375, + "logps/rejected": -398.7603454589844, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.114091634750366, + "rewards/margins": 7.379790306091309, + "rewards/rejected": -4.265698432922363, + "step": 15975 + }, + { + "epoch": 0.93, + "learning_rate": 1.2904157901574675e-09, + "logits/chosen": -2.0011959075927734, + "logits/rejected": -2.0021708011627197, + "logps/chosen": -14.681218147277832, + "logps/rejected": -142.75131225585938, + "loss": 0.3131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4774083197116852, + "rewards/margins": 1.8410741090774536, + "rewards/rejected": -1.3636658191680908, + "step": 15976 + }, + { + "epoch": 0.93, + "learning_rate": 1.2882894428705993e-09, + "logits/chosen": -2.0671753883361816, + "logits/rejected": -2.06233286857605, + "logps/chosen": -45.78803253173828, + "logps/rejected": -287.43951416015625, + "loss": 0.1227, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1916706562042236, + "rewards/margins": 6.605788230895996, + "rewards/rejected": -5.414117336273193, + "step": 15977 + }, + { + "epoch": 0.93, + "learning_rate": 1.2861648260619751e-09, + "logits/chosen": -1.937990665435791, + "logits/rejected": -1.9144330024719238, + "logps/chosen": -227.66729736328125, + "logps/rejected": -368.27392578125, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0181000232696533, + "rewards/margins": 2.994650363922119, + "rewards/rejected": 0.02344970777630806, + "step": 15978 + }, + { + "epoch": 0.93, + "learning_rate": 1.2840419398070624e-09, + "logits/chosen": -2.1411826610565186, + "logits/rejected": -2.1477105617523193, + "logps/chosen": -71.73983764648438, + "logps/rejected": -305.53985595703125, + "loss": 0.1819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5547637939453125, + "rewards/margins": 5.916206359863281, + "rewards/rejected": -5.361442565917969, + "step": 15979 + }, + { + "epoch": 0.93, + "learning_rate": 1.2819207841812673e-09, + "logits/chosen": -1.8286755084991455, + "logits/rejected": -1.8189308643341064, + "logps/chosen": -88.26300048828125, + "logps/rejected": -238.12185668945312, + "loss": 0.2317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8767181634902954, + "rewards/margins": 1.5147491693496704, + "rewards/rejected": -0.638031005859375, + "step": 15980 + }, + { + "epoch": 0.93, + "learning_rate": 1.2798013592599576e-09, + "logits/chosen": -1.879461646080017, + "logits/rejected": -1.8986459970474243, + "logps/chosen": -194.52215576171875, + "logps/rejected": -235.3487548828125, + "loss": 0.1129, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1764848232269287, + "rewards/margins": 1.7274034023284912, + "rewards/rejected": 0.4490814208984375, + "step": 15981 + }, + { + "epoch": 0.93, + "learning_rate": 1.277683665118423e-09, + "logits/chosen": -1.9626976251602173, + "logits/rejected": -1.9581705331802368, + "logps/chosen": -215.68270874023438, + "logps/rejected": -319.6297607421875, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.704211413860321, + "rewards/margins": 0.49368590116500854, + "rewards/rejected": 0.2105255126953125, + "step": 15982 + }, + { + "epoch": 0.93, + "learning_rate": 1.2755677018318867e-09, + "logits/chosen": -1.9376286268234253, + "logits/rejected": -1.9291880130767822, + "logps/chosen": -289.0467529296875, + "logps/rejected": -395.9716796875, + "loss": 0.1077, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.616571068763733, + "rewards/margins": 1.6669708490371704, + "rewards/rejected": -0.0503997802734375, + "step": 15983 + }, + { + "epoch": 0.93, + "learning_rate": 1.2734534694755217e-09, + "logits/chosen": -1.8452621698379517, + "logits/rejected": -1.8384572267532349, + "logps/chosen": -59.81905746459961, + "logps/rejected": -334.7435607910156, + "loss": 0.1939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6507587432861328, + "rewards/margins": 3.688878297805786, + "rewards/rejected": -3.0381195545196533, + "step": 15984 + }, + { + "epoch": 0.93, + "learning_rate": 1.2713409681244402e-09, + "logits/chosen": -1.8336992263793945, + "logits/rejected": -1.8315993547439575, + "logps/chosen": -31.172775268554688, + "logps/rejected": -271.9551086425781, + "loss": 0.2168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5225502252578735, + "rewards/margins": 2.488906145095825, + "rewards/rejected": -1.9663559198379517, + "step": 15985 + }, + { + "epoch": 0.93, + "learning_rate": 1.2692301978536824e-09, + "logits/chosen": -1.8806570768356323, + "logits/rejected": -1.8773847818374634, + "logps/chosen": -12.410036087036133, + "logps/rejected": -89.52983093261719, + "loss": 0.585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0233001708984375, + "rewards/margins": 0.35858154296875, + "rewards/rejected": -0.3818817138671875, + "step": 15986 + }, + { + "epoch": 0.93, + "learning_rate": 1.2671211587382436e-09, + "logits/chosen": -1.6445832252502441, + "logits/rejected": -1.6576454639434814, + "logps/chosen": -289.8025207519531, + "logps/rejected": -467.1555480957031, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0549895763397217, + "rewards/margins": 3.938305616378784, + "rewards/rejected": -0.8833160400390625, + "step": 15987 + }, + { + "epoch": 0.93, + "learning_rate": 1.2650138508530306e-09, + "logits/chosen": -1.9512290954589844, + "logits/rejected": -1.9476889371871948, + "logps/chosen": -2.841766595840454, + "logps/rejected": -158.55703735351562, + "loss": 0.3814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.046022605150938034, + "rewards/margins": 1.8282889127731323, + "rewards/rejected": -1.7822662591934204, + "step": 15988 + }, + { + "epoch": 0.93, + "learning_rate": 1.262908274272917e-09, + "logits/chosen": -1.9507246017456055, + "logits/rejected": -1.9571081399917603, + "logps/chosen": -16.145954132080078, + "logps/rejected": -261.0243835449219, + "loss": 0.384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0373445525765419, + "rewards/margins": 2.2044923305511475, + "rewards/rejected": -2.1671478748321533, + "step": 15989 + }, + { + "epoch": 0.93, + "learning_rate": 1.2608044290726982e-09, + "logits/chosen": -1.8839071989059448, + "logits/rejected": -1.873207449913025, + "logps/chosen": -152.56634521484375, + "logps/rejected": -295.0985107421875, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3096985816955566, + "rewards/margins": 3.1950836181640625, + "rewards/rejected": -0.8853851556777954, + "step": 15990 + }, + { + "epoch": 0.93, + "learning_rate": 1.2587023153271204e-09, + "logits/chosen": -1.7904503345489502, + "logits/rejected": -1.795587420463562, + "logps/chosen": -9.434819221496582, + "logps/rejected": -205.19439697265625, + "loss": 0.3834, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15290489792823792, + "rewards/margins": 1.3946011066436768, + "rewards/rejected": -1.2416962385177612, + "step": 15991 + }, + { + "epoch": 0.93, + "learning_rate": 1.2566019331108456e-09, + "logits/chosen": -1.9291495084762573, + "logits/rejected": -1.8452956676483154, + "logps/chosen": -314.8453369140625, + "logps/rejected": -513.9482421875, + "loss": 0.2553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7040557861328125, + "rewards/margins": 1.5180878639221191, + "rewards/rejected": -0.8140320181846619, + "step": 15992 + }, + { + "epoch": 0.93, + "learning_rate": 1.254503282498509e-09, + "logits/chosen": -2.002488851547241, + "logits/rejected": -1.9979325532913208, + "logps/chosen": -0.0003623595112003386, + "logps/rejected": -267.3210754394531, + "loss": 0.3667, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4660916981010814e-07, + "rewards/margins": 5.156013011932373, + "rewards/rejected": -5.156013488769531, + "step": 15993 + }, + { + "epoch": 0.93, + "learning_rate": 1.2524063635646564e-09, + "logits/chosen": -1.9953547716140747, + "logits/rejected": -1.9845528602600098, + "logps/chosen": -21.86549949645996, + "logps/rejected": -189.39776611328125, + "loss": 0.283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34276333451271057, + "rewards/margins": 1.763063669204712, + "rewards/rejected": -1.4203003644943237, + "step": 15994 + }, + { + "epoch": 0.93, + "learning_rate": 1.2503111763837782e-09, + "logits/chosen": -1.9718122482299805, + "logits/rejected": -1.9724798202514648, + "logps/chosen": -14.80479621887207, + "logps/rejected": -134.27273559570312, + "loss": 0.4682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06069936975836754, + "rewards/margins": 0.9342947602272034, + "rewards/rejected": -0.8735954165458679, + "step": 15995 + }, + { + "epoch": 0.93, + "learning_rate": 1.2482177210303035e-09, + "logits/chosen": -1.8735255002975464, + "logits/rejected": -1.8733110427856445, + "logps/chosen": -24.005659103393555, + "logps/rejected": -146.63587951660156, + "loss": 0.6196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23900394141674042, + "rewards/margins": 0.3514302968978882, + "rewards/rejected": -0.5904342532157898, + "step": 15996 + }, + { + "epoch": 0.93, + "learning_rate": 1.246125997578612e-09, + "logits/chosen": -1.8858990669250488, + "logits/rejected": -1.8853486776351929, + "logps/chosen": -126.266357421875, + "logps/rejected": -333.127197265625, + "loss": 0.1431, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4613418579101562, + "rewards/margins": 1.5988296270370483, + "rewards/rejected": -0.13748779892921448, + "step": 15997 + }, + { + "epoch": 0.93, + "learning_rate": 1.2440360061029997e-09, + "logits/chosen": -1.943477749824524, + "logits/rejected": -1.9436683654785156, + "logps/chosen": -10.503305435180664, + "logps/rejected": -286.4581298828125, + "loss": 0.4115, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14130201935768127, + "rewards/margins": 1.5486323833465576, + "rewards/rejected": -1.4073303937911987, + "step": 15998 + }, + { + "epoch": 0.93, + "learning_rate": 1.241947746677724e-09, + "logits/chosen": -1.662392020225525, + "logits/rejected": -1.6603742837905884, + "logps/chosen": -0.01864253170788288, + "logps/rejected": -228.2550048828125, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016198477242141962, + "rewards/margins": 4.491007328033447, + "rewards/rejected": -4.492627143859863, + "step": 15999 + }, + { + "epoch": 0.93, + "learning_rate": 1.2398612193769642e-09, + "logits/chosen": -1.8510288000106812, + "logits/rejected": -1.9421260356903076, + "logps/chosen": -236.20928955078125, + "logps/rejected": -334.04931640625, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9980896711349487, + "rewards/margins": 5.142340183258057, + "rewards/rejected": -3.1442506313323975, + "step": 16000 + }, + { + "epoch": 0.93, + "learning_rate": 1.23777642427485e-09, + "logits/chosen": -1.8697251081466675, + "logits/rejected": -1.8655760288238525, + "logps/chosen": -252.99560546875, + "logps/rejected": -417.1957092285156, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.796459913253784, + "rewards/margins": 5.325286865234375, + "rewards/rejected": -1.5288269519805908, + "step": 16001 + }, + { + "epoch": 0.93, + "learning_rate": 1.2356933614454334e-09, + "logits/chosen": -1.6262162923812866, + "logits/rejected": -1.554011344909668, + "logps/chosen": -166.91189575195312, + "logps/rejected": -395.2763671875, + "loss": 0.0684, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.299816846847534, + "rewards/margins": 2.914257764816284, + "rewards/rejected": -0.61444091796875, + "step": 16002 + }, + { + "epoch": 0.93, + "learning_rate": 1.2336120309627218e-09, + "logits/chosen": -1.9314483404159546, + "logits/rejected": -1.931039571762085, + "logps/chosen": -6.616532802581787, + "logps/rejected": -52.79113006591797, + "loss": 0.4229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23864321410655975, + "rewards/margins": 1.1670829057693481, + "rewards/rejected": -0.928439736366272, + "step": 16003 + }, + { + "epoch": 0.93, + "learning_rate": 1.2315324329006505e-09, + "logits/chosen": -1.9021100997924805, + "logits/rejected": -1.891662836074829, + "logps/chosen": -59.77475357055664, + "logps/rejected": -212.81964111328125, + "loss": 0.3527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2130718231201172, + "rewards/margins": 1.8308528661727905, + "rewards/rejected": -1.6177810430526733, + "step": 16004 + }, + { + "epoch": 0.93, + "learning_rate": 1.2294545673331102e-09, + "logits/chosen": -1.9343026876449585, + "logits/rejected": -1.9421919584274292, + "logps/chosen": -0.00013994595792610198, + "logps/rejected": -123.86782836914062, + "loss": 0.4637, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.675443842141249e-07, + "rewards/margins": 1.1971275806427002, + "rewards/rejected": -1.1971282958984375, + "step": 16005 + }, + { + "epoch": 0.93, + "learning_rate": 1.2273784343338978e-09, + "logits/chosen": -1.6842867136001587, + "logits/rejected": -1.6573879718780518, + "logps/chosen": -99.29523468017578, + "logps/rejected": -457.3155822753906, + "loss": 0.1315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.915539562702179, + "rewards/margins": 8.5458345413208, + "rewards/rejected": -7.6302947998046875, + "step": 16006 + }, + { + "epoch": 0.93, + "learning_rate": 1.2253040339767761e-09, + "logits/chosen": -2.0318007469177246, + "logits/rejected": -2.0131330490112305, + "logps/chosen": -0.00014328483666758984, + "logps/rejected": -284.1368408203125, + "loss": 0.3472, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.011812835524324e-06, + "rewards/margins": 4.9016265869140625, + "rewards/rejected": -4.901635646820068, + "step": 16007 + }, + { + "epoch": 0.93, + "learning_rate": 1.2232313663354365e-09, + "logits/chosen": -1.9356645345687866, + "logits/rejected": -1.9257783889770508, + "logps/chosen": -4.337915420532227, + "logps/rejected": -83.04386901855469, + "loss": 0.5583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24668613076210022, + "rewards/margins": 0.21862751245498657, + "rewards/rejected": 0.028058623895049095, + "step": 16008 + }, + { + "epoch": 0.93, + "learning_rate": 1.2211604314835145e-09, + "logits/chosen": -2.0466911792755127, + "logits/rejected": -2.061505079269409, + "logps/chosen": -159.14016723632812, + "logps/rejected": -267.0913391113281, + "loss": 0.0481, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.519662618637085, + "rewards/margins": 2.6016602516174316, + "rewards/rejected": -0.08199768513441086, + "step": 16009 + }, + { + "epoch": 0.93, + "learning_rate": 1.2190912294945789e-09, + "logits/chosen": -2.028219699859619, + "logits/rejected": -2.0334339141845703, + "logps/chosen": -0.1005387082695961, + "logps/rejected": -172.65850830078125, + "loss": 0.341, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3023143158934545e-06, + "rewards/margins": 3.5729236602783203, + "rewards/rejected": -3.5729310512542725, + "step": 16010 + }, + { + "epoch": 0.93, + "learning_rate": 1.2170237604421264e-09, + "logits/chosen": -1.9520231485366821, + "logits/rejected": -1.9547327756881714, + "logps/chosen": -0.0022043678909540176, + "logps/rejected": -127.98471069335938, + "loss": 0.3885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0003198114281985909, + "rewards/margins": 2.424038887023926, + "rewards/rejected": -2.4237191677093506, + "step": 16011 + }, + { + "epoch": 0.93, + "learning_rate": 1.214958024399626e-09, + "logits/chosen": -1.9490898847579956, + "logits/rejected": -1.9523069858551025, + "logps/chosen": -105.63787841796875, + "logps/rejected": -308.5317687988281, + "loss": 0.1245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.277045488357544, + "rewards/margins": 2.6844887733459473, + "rewards/rejected": -1.4074432849884033, + "step": 16012 + }, + { + "epoch": 0.93, + "learning_rate": 1.2128940214404414e-09, + "logits/chosen": -1.902395486831665, + "logits/rejected": -1.8980377912521362, + "logps/chosen": -49.546417236328125, + "logps/rejected": -338.785888671875, + "loss": 0.1289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.110266089439392, + "rewards/margins": 3.779296875, + "rewards/rejected": -2.6690309047698975, + "step": 16013 + }, + { + "epoch": 0.93, + "learning_rate": 1.2108317516379142e-09, + "logits/chosen": -1.7840397357940674, + "logits/rejected": -1.7842450141906738, + "logps/chosen": -50.55558395385742, + "logps/rejected": -244.3152618408203, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4812271296977997, + "rewards/margins": 2.133108615875244, + "rewards/rejected": -1.651881456375122, + "step": 16014 + }, + { + "epoch": 0.93, + "learning_rate": 1.2087712150652851e-09, + "logits/chosen": -1.775090217590332, + "logits/rejected": -1.800499677658081, + "logps/chosen": -346.3857421875, + "logps/rejected": -519.3431396484375, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.668713331222534, + "rewards/margins": 7.438043594360352, + "rewards/rejected": -4.769330024719238, + "step": 16015 + }, + { + "epoch": 0.93, + "learning_rate": 1.2067124117957738e-09, + "logits/chosen": -1.614733099937439, + "logits/rejected": -1.6005717515945435, + "logps/chosen": -179.50221252441406, + "logps/rejected": -331.8067321777344, + "loss": 0.1452, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2584900856018066, + "rewards/margins": 1.52189040184021, + "rewards/rejected": 0.7365997433662415, + "step": 16016 + }, + { + "epoch": 0.93, + "learning_rate": 1.2046553419025106e-09, + "logits/chosen": -1.995866298675537, + "logits/rejected": -1.995527744293213, + "logps/chosen": -258.650146484375, + "logps/rejected": -375.53167724609375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2179534435272217, + "rewards/margins": 7.755251884460449, + "rewards/rejected": -4.537298679351807, + "step": 16017 + }, + { + "epoch": 0.93, + "learning_rate": 1.2026000054585706e-09, + "logits/chosen": -1.5636931657791138, + "logits/rejected": -1.5597648620605469, + "logps/chosen": -197.11053466796875, + "logps/rejected": -180.90313720703125, + "loss": 0.2186, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2671921253204346, + "rewards/margins": 0.6731277704238892, + "rewards/rejected": 1.5940643548965454, + "step": 16018 + }, + { + "epoch": 0.93, + "learning_rate": 1.2005464025369727e-09, + "logits/chosen": -2.0739099979400635, + "logits/rejected": -2.056633949279785, + "logps/chosen": -18.343088150024414, + "logps/rejected": -208.66680908203125, + "loss": 0.3187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09414463490247726, + "rewards/margins": 5.782660961151123, + "rewards/rejected": -5.688516139984131, + "step": 16019 + }, + { + "epoch": 0.93, + "learning_rate": 1.1984945332106755e-09, + "logits/chosen": -1.7893595695495605, + "logits/rejected": -1.7440418004989624, + "logps/chosen": -137.03543090820312, + "logps/rejected": -241.56524658203125, + "loss": 0.1166, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.500727891921997, + "rewards/margins": 1.7309662103652954, + "rewards/rejected": 0.7697616815567017, + "step": 16020 + }, + { + "epoch": 0.93, + "learning_rate": 1.1964443975525595e-09, + "logits/chosen": -1.882150650024414, + "logits/rejected": -1.858172059059143, + "logps/chosen": -242.49404907226562, + "logps/rejected": -326.01470947265625, + "loss": 0.2098, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6823394298553467, + "rewards/margins": 0.6955199241638184, + "rewards/rejected": 1.9868195056915283, + "step": 16021 + }, + { + "epoch": 0.93, + "learning_rate": 1.1943959956354666e-09, + "logits/chosen": -2.0138585567474365, + "logits/rejected": -2.009706735610962, + "logps/chosen": -0.00034128205152228475, + "logps/rejected": -90.07382202148438, + "loss": 0.5453, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.843321585212834e-05, + "rewards/margins": 0.6867122650146484, + "rewards/rejected": -0.686663806438446, + "step": 16022 + }, + { + "epoch": 0.93, + "learning_rate": 1.1923493275321607e-09, + "logits/chosen": -1.9343791007995605, + "logits/rejected": -1.9316681623458862, + "logps/chosen": -112.60660552978516, + "logps/rejected": -366.55572509765625, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2830650508403778, + "rewards/margins": 2.469106435775757, + "rewards/rejected": -2.1860413551330566, + "step": 16023 + }, + { + "epoch": 0.93, + "learning_rate": 1.19030439331535e-09, + "logits/chosen": -2.0615527629852295, + "logits/rejected": -2.047891855239868, + "logps/chosen": -0.998996913433075, + "logps/rejected": -143.29193115234375, + "loss": 0.5722, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02365683950483799, + "rewards/margins": 0.23435325920581818, + "rewards/rejected": -0.21069641411304474, + "step": 16024 + }, + { + "epoch": 0.93, + "learning_rate": 1.1882611930576826e-09, + "logits/chosen": -1.481943130493164, + "logits/rejected": -1.4852514266967773, + "logps/chosen": -16.79001235961914, + "logps/rejected": -140.62818908691406, + "loss": 0.5222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.176310732960701, + "rewards/margins": 0.013630673289299011, + "rewards/rejected": 0.16268005967140198, + "step": 16025 + }, + { + "epoch": 0.93, + "learning_rate": 1.1862197268317387e-09, + "logits/chosen": -1.8879302740097046, + "logits/rejected": -1.8797130584716797, + "logps/chosen": -213.43234252929688, + "logps/rejected": -355.26025390625, + "loss": 0.2581, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7212876081466675, + "rewards/margins": 0.8114578723907471, + "rewards/rejected": 0.9098297357559204, + "step": 16026 + }, + { + "epoch": 0.93, + "learning_rate": 1.1841799947100494e-09, + "logits/chosen": -1.8111523389816284, + "logits/rejected": -1.8057382106781006, + "logps/chosen": -132.18544006347656, + "logps/rejected": -261.56524658203125, + "loss": 0.1593, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.346090793609619, + "rewards/margins": 1.0513306856155396, + "rewards/rejected": 1.2947601079940796, + "step": 16027 + }, + { + "epoch": 0.93, + "learning_rate": 1.1821419967650625e-09, + "logits/chosen": -1.7486358880996704, + "logits/rejected": -1.7458385229110718, + "logps/chosen": -31.76567840576172, + "logps/rejected": -223.44418334960938, + "loss": 0.2755, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1897304505109787, + "rewards/margins": 7.752102375030518, + "rewards/rejected": -7.562371730804443, + "step": 16028 + }, + { + "epoch": 0.93, + "learning_rate": 1.1801057330691976e-09, + "logits/chosen": -1.9965370893478394, + "logits/rejected": -2.0035641193389893, + "logps/chosen": -15.735158920288086, + "logps/rejected": -141.4580078125, + "loss": 0.2146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5208637118339539, + "rewards/margins": 4.047794818878174, + "rewards/rejected": -3.526931047439575, + "step": 16029 + }, + { + "epoch": 0.93, + "learning_rate": 1.1780712036947693e-09, + "logits/chosen": -1.9173754453659058, + "logits/rejected": -1.909325122833252, + "logps/chosen": -264.08544921875, + "logps/rejected": -485.7697448730469, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.322650194168091, + "rewards/margins": 3.9065613746643066, + "rewards/rejected": -0.583911120891571, + "step": 16030 + }, + { + "epoch": 0.93, + "learning_rate": 1.1760384087140751e-09, + "logits/chosen": -2.0456745624542236, + "logits/rejected": -2.0407068729400635, + "logps/chosen": -0.013881475664675236, + "logps/rejected": -157.82911682128906, + "loss": 0.3485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038915639743208885, + "rewards/margins": 5.6389970779418945, + "rewards/rejected": -5.639386177062988, + "step": 16031 + }, + { + "epoch": 0.93, + "learning_rate": 1.174007348199313e-09, + "logits/chosen": -1.7860708236694336, + "logits/rejected": -1.8024113178253174, + "logps/chosen": -166.50582885742188, + "logps/rejected": -517.8162841796875, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0361175537109375, + "rewards/margins": 6.427066326141357, + "rewards/rejected": -5.39094877243042, + "step": 16032 + }, + { + "epoch": 0.93, + "learning_rate": 1.1719780222226527e-09, + "logits/chosen": -1.9686713218688965, + "logits/rejected": -1.9742704629898071, + "logps/chosen": -5.502909183502197, + "logps/rejected": -74.7403793334961, + "loss": 0.5436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23068438470363617, + "rewards/margins": 0.3884083926677704, + "rewards/rejected": -0.15772400796413422, + "step": 16033 + }, + { + "epoch": 0.93, + "learning_rate": 1.1699504308561647e-09, + "logits/chosen": -1.9294999837875366, + "logits/rejected": -1.9475501775741577, + "logps/chosen": -211.16488647460938, + "logps/rejected": -525.0055541992188, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7552002668380737, + "rewards/margins": 6.304043769836426, + "rewards/rejected": -4.5488433837890625, + "step": 16034 + }, + { + "epoch": 0.93, + "learning_rate": 1.1679245741719023e-09, + "logits/chosen": -1.3704721927642822, + "logits/rejected": -1.362799048423767, + "logps/chosen": -81.04890441894531, + "logps/rejected": -325.9418640136719, + "loss": 0.4476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5620102286338806, + "rewards/margins": 4.117021083831787, + "rewards/rejected": -4.6790313720703125, + "step": 16035 + }, + { + "epoch": 0.93, + "learning_rate": 1.1659004522418137e-09, + "logits/chosen": -1.9279396533966064, + "logits/rejected": -1.9241927862167358, + "logps/chosen": -17.55622100830078, + "logps/rejected": -70.31097412109375, + "loss": 0.3529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7563854455947876, + "rewards/margins": 0.6923313140869141, + "rewards/rejected": 0.06405410915613174, + "step": 16036 + }, + { + "epoch": 0.93, + "learning_rate": 1.1638780651378243e-09, + "logits/chosen": -1.8777443170547485, + "logits/rejected": -1.8835474252700806, + "logps/chosen": -31.661487579345703, + "logps/rejected": -158.86558532714844, + "loss": 0.5166, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.5638489127159119, + "rewards/margins": -0.02795863151550293, + "rewards/rejected": 0.5918075442314148, + "step": 16037 + }, + { + "epoch": 0.93, + "learning_rate": 1.161857412931755e-09, + "logits/chosen": -1.9987531900405884, + "logits/rejected": -1.9965534210205078, + "logps/chosen": -20.946645736694336, + "logps/rejected": -233.38392639160156, + "loss": 0.125, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1702378988265991, + "rewards/margins": 4.916043281555176, + "rewards/rejected": -3.745805501937866, + "step": 16038 + }, + { + "epoch": 0.93, + "learning_rate": 1.1598384956954199e-09, + "logits/chosen": -1.9843482971191406, + "logits/rejected": -1.9774032831192017, + "logps/chosen": -0.02779327891767025, + "logps/rejected": -105.87788391113281, + "loss": 0.4577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031396474689245224, + "rewards/margins": 1.208699107170105, + "rewards/rejected": -1.177302598953247, + "step": 16039 + }, + { + "epoch": 0.93, + "learning_rate": 1.1578213135005122e-09, + "logits/chosen": -1.8363527059555054, + "logits/rejected": -1.8209229707717896, + "logps/chosen": -211.68862915039062, + "logps/rejected": -356.36724853515625, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1316163539886475, + "rewards/margins": 1.8894654512405396, + "rewards/rejected": 0.24215088784694672, + "step": 16040 + }, + { + "epoch": 0.93, + "learning_rate": 1.1558058664187077e-09, + "logits/chosen": -1.7883497476577759, + "logits/rejected": -1.7915568351745605, + "logps/chosen": -153.81336975097656, + "logps/rejected": -208.35598754882812, + "loss": 0.115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.824580430984497, + "rewards/margins": 2.1895737648010254, + "rewards/rejected": -0.36499330401420593, + "step": 16041 + }, + { + "epoch": 0.93, + "learning_rate": 1.1537921545216044e-09, + "logits/chosen": -2.0108842849731445, + "logits/rejected": -2.0263442993164062, + "logps/chosen": -130.17242431640625, + "logps/rejected": -332.280517578125, + "loss": 0.1268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7033737301826477, + "rewards/margins": 2.774876356124878, + "rewards/rejected": -2.071502685546875, + "step": 16042 + }, + { + "epoch": 0.93, + "learning_rate": 1.1517801778807346e-09, + "logits/chosen": -1.9804306030273438, + "logits/rejected": -1.9821845293045044, + "logps/chosen": -29.522201538085938, + "logps/rejected": -184.90574645996094, + "loss": 0.4004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6193779110908508, + "rewards/margins": 0.6751732230186462, + "rewards/rejected": -0.05579528957605362, + "step": 16043 + }, + { + "epoch": 0.93, + "learning_rate": 1.149769936567574e-09, + "logits/chosen": -1.7650011777877808, + "logits/rejected": -1.764034390449524, + "logps/chosen": -36.04335403442383, + "logps/rejected": -240.18460083007812, + "loss": 0.2801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32869264483451843, + "rewards/margins": 3.2182679176330566, + "rewards/rejected": -2.889575242996216, + "step": 16044 + }, + { + "epoch": 0.93, + "learning_rate": 1.1477614306535378e-09, + "logits/chosen": -1.9571746587753296, + "logits/rejected": -1.9393606185913086, + "logps/chosen": -80.03858947753906, + "logps/rejected": -344.63775634765625, + "loss": 0.1089, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0426346063613892, + "rewards/margins": 4.313661098480225, + "rewards/rejected": -3.271026611328125, + "step": 16045 + }, + { + "epoch": 0.93, + "learning_rate": 1.14575466020998e-09, + "logits/chosen": -2.041999340057373, + "logits/rejected": -2.038123369216919, + "logps/chosen": -84.00367736816406, + "logps/rejected": -255.85281372070312, + "loss": 0.3291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20258788764476776, + "rewards/margins": 2.4961228370666504, + "rewards/rejected": -2.293534994125366, + "step": 16046 + }, + { + "epoch": 0.93, + "learning_rate": 1.143749625308188e-09, + "logits/chosen": -1.9523310661315918, + "logits/rejected": -1.9506076574325562, + "logps/chosen": -5.792544841766357, + "logps/rejected": -106.80850219726562, + "loss": 0.6798, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03151268884539604, + "rewards/margins": -0.12903085350990295, + "rewards/rejected": 0.09751816093921661, + "step": 16047 + }, + { + "epoch": 0.93, + "learning_rate": 1.1417463260193938e-09, + "logits/chosen": -1.7791885137557983, + "logits/rejected": -1.793519139289856, + "logps/chosen": -175.49008178710938, + "logps/rejected": -383.02703857421875, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.851007103919983, + "rewards/margins": 4.34413480758667, + "rewards/rejected": -2.4931275844573975, + "step": 16048 + }, + { + "epoch": 0.93, + "learning_rate": 1.139744762414757e-09, + "logits/chosen": -1.9151790142059326, + "logits/rejected": -1.9254488945007324, + "logps/chosen": -198.92820739746094, + "logps/rejected": -396.63470458984375, + "loss": 0.0626, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.306025743484497, + "rewards/margins": 2.8595657348632812, + "rewards/rejected": -0.553540050983429, + "step": 16049 + }, + { + "epoch": 0.93, + "learning_rate": 1.1377449345653877e-09, + "logits/chosen": -1.798722743988037, + "logits/rejected": -1.7736279964447021, + "logps/chosen": -285.3687744140625, + "logps/rejected": -440.0419006347656, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.092633008956909, + "rewards/margins": 3.641909599304199, + "rewards/rejected": -1.5492767095565796, + "step": 16050 + }, + { + "epoch": 0.93, + "learning_rate": 1.135746842542329e-09, + "logits/chosen": -2.001000165939331, + "logits/rejected": -2.0001401901245117, + "logps/chosen": -3.8925163745880127, + "logps/rejected": -173.12530517578125, + "loss": 0.5369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06625249236822128, + "rewards/margins": 0.7040302157402039, + "rewards/rejected": -0.6377777457237244, + "step": 16051 + }, + { + "epoch": 0.93, + "learning_rate": 1.133750486416568e-09, + "logits/chosen": -2.0216867923736572, + "logits/rejected": -2.0121967792510986, + "logps/chosen": -0.11270548403263092, + "logps/rejected": -175.0379180908203, + "loss": 0.3458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003991635050624609, + "rewards/margins": 2.886561632156372, + "rewards/rejected": -2.8905532360076904, + "step": 16052 + }, + { + "epoch": 0.93, + "learning_rate": 1.13175586625901e-09, + "logits/chosen": -1.8790662288665771, + "logits/rejected": -1.8665536642074585, + "logps/chosen": -99.42758178710938, + "logps/rejected": -169.98330688476562, + "loss": 0.1554, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0825066566467285, + "rewards/margins": 1.2305169105529785, + "rewards/rejected": 0.85198974609375, + "step": 16053 + }, + { + "epoch": 0.93, + "learning_rate": 1.1297629821405363e-09, + "logits/chosen": -1.7665672302246094, + "logits/rejected": -1.7561928033828735, + "logps/chosen": -26.286720275878906, + "logps/rejected": -193.78738403320312, + "loss": 0.1669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9183670282363892, + "rewards/margins": 2.8058578968048096, + "rewards/rejected": -1.8874908685684204, + "step": 16054 + }, + { + "epoch": 0.93, + "learning_rate": 1.1277718341319241e-09, + "logits/chosen": -2.020362377166748, + "logits/rejected": -2.0225846767425537, + "logps/chosen": -0.04904336482286453, + "logps/rejected": -83.04205322265625, + "loss": 0.5161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005847092834301293, + "rewards/margins": 0.699583888053894, + "rewards/rejected": -0.7001686096191406, + "step": 16055 + }, + { + "epoch": 0.93, + "learning_rate": 1.1257824223039224e-09, + "logits/chosen": -1.7812726497650146, + "logits/rejected": -1.8390339612960815, + "logps/chosen": -166.5707550048828, + "logps/rejected": -251.6073455810547, + "loss": 0.0957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6012557744979858, + "rewards/margins": 1.9530456066131592, + "rewards/rejected": -0.3517898619174957, + "step": 16056 + }, + { + "epoch": 0.93, + "learning_rate": 1.1237947467271914e-09, + "logits/chosen": -1.8930379152297974, + "logits/rejected": -1.894080638885498, + "logps/chosen": -11.171708106994629, + "logps/rejected": -194.00341796875, + "loss": 0.3128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09696712344884872, + "rewards/margins": 3.1097190380096436, + "rewards/rejected": -3.012751817703247, + "step": 16057 + }, + { + "epoch": 0.93, + "learning_rate": 1.1218088074723575e-09, + "logits/chosen": -1.9456264972686768, + "logits/rejected": -1.9565094709396362, + "logps/chosen": -356.0149841308594, + "logps/rejected": -446.7398681640625, + "loss": 0.1244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09805908054113388, + "rewards/margins": 2.926440477371216, + "rewards/rejected": -3.0244996547698975, + "step": 16058 + }, + { + "epoch": 0.93, + "learning_rate": 1.119824604609959e-09, + "logits/chosen": -1.9543577432632446, + "logits/rejected": -1.9438979625701904, + "logps/chosen": -35.37869644165039, + "logps/rejected": -244.977783203125, + "loss": 0.3052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11335640400648117, + "rewards/margins": 5.382225513458252, + "rewards/rejected": -5.268868923187256, + "step": 16059 + }, + { + "epoch": 0.93, + "learning_rate": 1.1178421382104896e-09, + "logits/chosen": -1.856977105140686, + "logits/rejected": -1.843388557434082, + "logps/chosen": -244.9537811279297, + "logps/rejected": -352.95806884765625, + "loss": 0.0749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3833328485488892, + "rewards/margins": 2.0756728649139404, + "rewards/rejected": -0.692340075969696, + "step": 16060 + }, + { + "epoch": 0.93, + "learning_rate": 1.1158614083443763e-09, + "logits/chosen": -1.7672383785247803, + "logits/rejected": -1.7740598917007446, + "logps/chosen": -18.29800033569336, + "logps/rejected": -205.0643310546875, + "loss": 0.261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32838669419288635, + "rewards/margins": 3.061870813369751, + "rewards/rejected": -2.7334840297698975, + "step": 16061 + }, + { + "epoch": 0.93, + "learning_rate": 1.1138824150819903e-09, + "logits/chosen": -1.9497309923171997, + "logits/rejected": -1.946352243423462, + "logps/chosen": -39.86322784423828, + "logps/rejected": -139.623291015625, + "loss": 0.3601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009138870052993298, + "rewards/margins": 2.2691287994384766, + "rewards/rejected": -2.2599899768829346, + "step": 16062 + }, + { + "epoch": 0.93, + "learning_rate": 1.11190515849362e-09, + "logits/chosen": -1.9776338338851929, + "logits/rejected": -1.9699006080627441, + "logps/chosen": -32.90081787109375, + "logps/rejected": -237.21853637695312, + "loss": 0.2018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6834434866905212, + "rewards/margins": 4.530171871185303, + "rewards/rejected": -3.846728563308716, + "step": 16063 + }, + { + "epoch": 0.93, + "learning_rate": 1.1099296386495205e-09, + "logits/chosen": -1.7688919305801392, + "logits/rejected": -1.761861801147461, + "logps/chosen": -42.89331817626953, + "logps/rejected": -136.82464599609375, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7491855621337891, + "rewards/margins": 1.5789799690246582, + "rewards/rejected": -0.8297943472862244, + "step": 16064 + }, + { + "epoch": 0.93, + "learning_rate": 1.1079558556198631e-09, + "logits/chosen": -1.7124695777893066, + "logits/rejected": -1.7312486171722412, + "logps/chosen": -232.9507293701172, + "logps/rejected": -339.47308349609375, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.785505771636963, + "rewards/margins": 2.8236618041992188, + "rewards/rejected": -0.03815612941980362, + "step": 16065 + }, + { + "epoch": 0.93, + "learning_rate": 1.1059838094747808e-09, + "logits/chosen": -2.073911428451538, + "logits/rejected": -2.0779812335968018, + "logps/chosen": -295.4350280761719, + "logps/rejected": -532.9619140625, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3099334239959717, + "rewards/margins": 6.113381862640381, + "rewards/rejected": -2.803448438644409, + "step": 16066 + }, + { + "epoch": 0.93, + "learning_rate": 1.104013500284312e-09, + "logits/chosen": -1.998012900352478, + "logits/rejected": -1.998399257659912, + "logps/chosen": -294.5049743652344, + "logps/rejected": -405.2732849121094, + "loss": 0.1384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04093628004193306, + "rewards/margins": 2.7920472621917725, + "rewards/rejected": -2.8329834938049316, + "step": 16067 + }, + { + "epoch": 0.94, + "learning_rate": 1.1020449281184562e-09, + "logits/chosen": -1.8855839967727661, + "logits/rejected": -1.910223126411438, + "logps/chosen": -207.30935668945312, + "logps/rejected": -324.84149169921875, + "loss": 0.055, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.917614698410034, + "rewards/margins": 2.2781736850738525, + "rewards/rejected": 0.6394409537315369, + "step": 16068 + }, + { + "epoch": 0.94, + "learning_rate": 1.100078093047152e-09, + "logits/chosen": -1.8334873914718628, + "logits/rejected": -1.8309080600738525, + "logps/chosen": -222.61032104492188, + "logps/rejected": -314.05621337890625, + "loss": 0.3655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.617016613483429, + "rewards/margins": 1.0370910167694092, + "rewards/rejected": -0.420074462890625, + "step": 16069 + }, + { + "epoch": 0.94, + "learning_rate": 1.0981129951402713e-09, + "logits/chosen": -1.9463390111923218, + "logits/rejected": -1.9362053871154785, + "logps/chosen": -5.3524592658504844e-05, + "logps/rejected": -296.4093322753906, + "loss": 0.3395, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.001300574898778e-06, + "rewards/margins": 6.215168476104736, + "rewards/rejected": -6.21516752243042, + "step": 16070 + }, + { + "epoch": 0.94, + "learning_rate": 1.0961496344676302e-09, + "logits/chosen": -1.9430304765701294, + "logits/rejected": -1.9403343200683594, + "logps/chosen": -43.67527389526367, + "logps/rejected": -57.20075607299805, + "loss": 0.7047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2491302490234375, + "rewards/margins": 0.06107446551322937, + "rewards/rejected": -0.31020471453666687, + "step": 16071 + }, + { + "epoch": 0.94, + "learning_rate": 1.0941880110989565e-09, + "logits/chosen": -1.999751329421997, + "logits/rejected": -2.021070718765259, + "logps/chosen": -240.11563110351562, + "logps/rejected": -248.31849670410156, + "loss": 0.0696, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7231597900390625, + "rewards/margins": 2.0631117820739746, + "rewards/rejected": 0.6600479483604431, + "step": 16072 + }, + { + "epoch": 0.94, + "learning_rate": 1.0922281251039556e-09, + "logits/chosen": -2.0065226554870605, + "logits/rejected": -1.9917062520980835, + "logps/chosen": -47.18573760986328, + "logps/rejected": -222.12220764160156, + "loss": 0.0848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7602516412734985, + "rewards/margins": 5.292147159576416, + "rewards/rejected": -3.531895399093628, + "step": 16073 + }, + { + "epoch": 0.94, + "learning_rate": 1.090269976552244e-09, + "logits/chosen": -1.9711575508117676, + "logits/rejected": -1.9659905433654785, + "logps/chosen": -1.7523739337921143, + "logps/rejected": -121.39360046386719, + "loss": 0.3745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07311628013849258, + "rewards/margins": 2.3412468433380127, + "rewards/rejected": -2.2681305408477783, + "step": 16074 + }, + { + "epoch": 0.94, + "learning_rate": 1.088313565513388e-09, + "logits/chosen": -1.7632814645767212, + "logits/rejected": -1.6099933385849, + "logps/chosen": -213.82601928710938, + "logps/rejected": -641.0297241210938, + "loss": 0.0731, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8211182355880737, + "rewards/margins": 3.91701078414917, + "rewards/rejected": -2.0958924293518066, + "step": 16075 + }, + { + "epoch": 0.94, + "learning_rate": 1.0863588920568766e-09, + "logits/chosen": -1.8117362260818481, + "logits/rejected": -1.8378958702087402, + "logps/chosen": -171.84474182128906, + "logps/rejected": -345.0580749511719, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.014268636703491, + "rewards/margins": 3.0885636806488037, + "rewards/rejected": -1.0742950439453125, + "step": 16076 + }, + { + "epoch": 0.94, + "learning_rate": 1.0844059562521712e-09, + "logits/chosen": -2.0720367431640625, + "logits/rejected": -2.0577354431152344, + "logps/chosen": -50.23524856567383, + "logps/rejected": -228.33934020996094, + "loss": 0.1027, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5570789575576782, + "rewards/margins": 2.757582664489746, + "rewards/rejected": -1.2005035877227783, + "step": 16077 + }, + { + "epoch": 0.94, + "learning_rate": 1.0824547581686383e-09, + "logits/chosen": -1.9630751609802246, + "logits/rejected": -1.9127912521362305, + "logps/chosen": -181.16358947753906, + "logps/rejected": -462.3457336425781, + "loss": 0.2193, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9828170537948608, + "rewards/margins": 0.9429519176483154, + "rewards/rejected": 1.0398651361465454, + "step": 16078 + }, + { + "epoch": 0.94, + "learning_rate": 1.0805052978755946e-09, + "logits/chosen": -1.8721846342086792, + "logits/rejected": -1.8701980113983154, + "logps/chosen": -19.175411224365234, + "logps/rejected": -140.9835662841797, + "loss": 0.295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26845207810401917, + "rewards/margins": 2.2633235454559326, + "rewards/rejected": -1.9948714971542358, + "step": 16079 + }, + { + "epoch": 0.94, + "learning_rate": 1.0785575754422793e-09, + "logits/chosen": -1.9099189043045044, + "logits/rejected": -1.9473521709442139, + "logps/chosen": -136.2154998779297, + "logps/rejected": -379.03277587890625, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.641993761062622, + "rewards/margins": 3.7430741786956787, + "rewards/rejected": -2.1010804176330566, + "step": 16080 + }, + { + "epoch": 0.94, + "learning_rate": 1.0766115909379148e-09, + "logits/chosen": -1.7107815742492676, + "logits/rejected": -1.620937705039978, + "logps/chosen": -165.32110595703125, + "logps/rejected": -388.56390380859375, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.147966146469116, + "rewards/margins": 3.2905688285827637, + "rewards/rejected": -1.142602562904358, + "step": 16081 + }, + { + "epoch": 0.94, + "learning_rate": 1.0746673444316124e-09, + "logits/chosen": -1.9430686235427856, + "logits/rejected": -1.9438626766204834, + "logps/chosen": -0.0032191064674407244, + "logps/rejected": -129.38853454589844, + "loss": 0.4043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020275055430829525, + "rewards/margins": 2.1430208683013916, + "rewards/rejected": -2.143223524093628, + "step": 16082 + }, + { + "epoch": 0.94, + "learning_rate": 1.0727248359924446e-09, + "logits/chosen": -1.841302752494812, + "logits/rejected": -1.8421285152435303, + "logps/chosen": -1.0748368501663208, + "logps/rejected": -53.84256362915039, + "loss": 0.4251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12328054010868073, + "rewards/margins": 1.391738772392273, + "rewards/rejected": -1.2684582471847534, + "step": 16083 + }, + { + "epoch": 0.94, + "learning_rate": 1.0707840656894173e-09, + "logits/chosen": -1.9956953525543213, + "logits/rejected": -2.0025627613067627, + "logps/chosen": -0.5254760384559631, + "logps/rejected": -220.0716552734375, + "loss": 0.3668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024126678705215454, + "rewards/margins": 3.75530743598938, + "rewards/rejected": -3.7794342041015625, + "step": 16084 + }, + { + "epoch": 0.94, + "learning_rate": 1.0688450335914868e-09, + "logits/chosen": -2.0189638137817383, + "logits/rejected": -2.017496109008789, + "logps/chosen": -3.049194812774658, + "logps/rejected": -42.711524963378906, + "loss": 0.3938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22162948548793793, + "rewards/margins": 1.3212873935699463, + "rewards/rejected": -1.0996578931808472, + "step": 16085 + }, + { + "epoch": 0.94, + "learning_rate": 1.0669077397675253e-09, + "logits/chosen": -1.9293361902236938, + "logits/rejected": -1.9104068279266357, + "logps/chosen": -155.12936401367188, + "logps/rejected": -330.50006103515625, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.019418478012085, + "rewards/margins": 4.587018013000488, + "rewards/rejected": -2.5675995349884033, + "step": 16086 + }, + { + "epoch": 0.94, + "learning_rate": 1.0649721842863557e-09, + "logits/chosen": -1.8199596405029297, + "logits/rejected": -1.8532596826553345, + "logps/chosen": -238.79885864257812, + "logps/rejected": -306.7494201660156, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1484649181365967, + "rewards/margins": 3.4514098167419434, + "rewards/rejected": -0.30294495820999146, + "step": 16087 + }, + { + "epoch": 0.94, + "learning_rate": 1.0630383672167398e-09, + "logits/chosen": -1.8889881372451782, + "logits/rejected": -1.8820316791534424, + "logps/chosen": -33.60823440551758, + "logps/rejected": -223.43333435058594, + "loss": 0.3057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23817062377929688, + "rewards/margins": 3.2800240516662598, + "rewards/rejected": -3.041853427886963, + "step": 16088 + }, + { + "epoch": 0.94, + "learning_rate": 1.061106288627378e-09, + "logits/chosen": -1.7673194408416748, + "logits/rejected": -1.7661807537078857, + "logps/chosen": -76.13014221191406, + "logps/rejected": -224.1571044921875, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02770843543112278, + "rewards/margins": 2.9443559646606445, + "rewards/rejected": -2.916647434234619, + "step": 16089 + }, + { + "epoch": 0.94, + "learning_rate": 1.0591759485869156e-09, + "logits/chosen": -1.9998613595962524, + "logits/rejected": -1.9948705434799194, + "logps/chosen": -0.03331925719976425, + "logps/rejected": -49.15538024902344, + "loss": 0.5885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007837962475605309, + "rewards/margins": 0.435776025056839, + "rewards/rejected": -0.4349922239780426, + "step": 16090 + }, + { + "epoch": 0.94, + "learning_rate": 1.0572473471639087e-09, + "logits/chosen": -1.7216156721115112, + "logits/rejected": -1.7209166288375854, + "logps/chosen": -2.1918892860412598, + "logps/rejected": -143.25222778320312, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3744799792766571, + "rewards/margins": 2.570221424102783, + "rewards/rejected": -2.1957414150238037, + "step": 16091 + }, + { + "epoch": 0.94, + "learning_rate": 1.0553204844268859e-09, + "logits/chosen": -1.8555525541305542, + "logits/rejected": -1.8872641324996948, + "logps/chosen": -151.3948974609375, + "logps/rejected": -528.0222778320312, + "loss": 0.0373, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3693329095840454, + "rewards/margins": 6.403994560241699, + "rewards/rejected": -5.034661769866943, + "step": 16092 + }, + { + "epoch": 0.94, + "learning_rate": 1.0533953604442925e-09, + "logits/chosen": -1.769061803817749, + "logits/rejected": -1.748208999633789, + "logps/chosen": -108.14732360839844, + "logps/rejected": -531.4385375976562, + "loss": 0.1022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.18871009349823, + "rewards/margins": 4.230155944824219, + "rewards/rejected": -3.0414459705352783, + "step": 16093 + }, + { + "epoch": 0.94, + "learning_rate": 1.0514719752845236e-09, + "logits/chosen": -1.8777174949645996, + "logits/rejected": -1.8788061141967773, + "logps/chosen": -289.94561767578125, + "logps/rejected": -365.613525390625, + "loss": 0.0347, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1478271484375, + "rewards/margins": 3.2668213844299316, + "rewards/rejected": -1.118994116783142, + "step": 16094 + }, + { + "epoch": 0.94, + "learning_rate": 1.0495503290158914e-09, + "logits/chosen": -1.750616192817688, + "logits/rejected": -1.7513693571090698, + "logps/chosen": -145.71270751953125, + "logps/rejected": -442.96978759765625, + "loss": 0.0355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4869384765625, + "rewards/margins": 5.463635444641113, + "rewards/rejected": -3.976696729660034, + "step": 16095 + }, + { + "epoch": 0.94, + "learning_rate": 1.0476304217066856e-09, + "logits/chosen": -1.8810758590698242, + "logits/rejected": -1.936012864112854, + "logps/chosen": -157.70449829101562, + "logps/rejected": -477.59564208984375, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.822357177734375, + "rewards/margins": 3.8716981410980225, + "rewards/rejected": -2.0493409633636475, + "step": 16096 + }, + { + "epoch": 0.94, + "learning_rate": 1.0457122534250962e-09, + "logits/chosen": -2.086688995361328, + "logits/rejected": -2.070829153060913, + "logps/chosen": -13.01695728302002, + "logps/rejected": -214.11947631835938, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06157388910651207, + "rewards/margins": 3.9483842849731445, + "rewards/rejected": -3.886810302734375, + "step": 16097 + }, + { + "epoch": 0.94, + "learning_rate": 1.0437958242392742e-09, + "logits/chosen": -1.9255985021591187, + "logits/rejected": -1.884474515914917, + "logps/chosen": -162.7702178955078, + "logps/rejected": -374.39788818359375, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9959090948104858, + "rewards/margins": 3.419386386871338, + "rewards/rejected": -1.4234771728515625, + "step": 16098 + }, + { + "epoch": 0.94, + "learning_rate": 1.0418811342172817e-09, + "logits/chosen": -1.8850345611572266, + "logits/rejected": -1.8634051084518433, + "logps/chosen": -215.87164306640625, + "logps/rejected": -503.437255859375, + "loss": 0.0708, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2731902599334717, + "rewards/margins": 2.954019069671631, + "rewards/rejected": -0.680828869342804, + "step": 16099 + }, + { + "epoch": 0.94, + "learning_rate": 1.0399681834271646e-09, + "logits/chosen": -1.7889273166656494, + "logits/rejected": -1.794144630432129, + "logps/chosen": -11.82762622833252, + "logps/rejected": -141.77395629882812, + "loss": 0.3308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11000442504882812, + "rewards/margins": 3.586132764816284, + "rewards/rejected": -3.476128339767456, + "step": 16100 + }, + { + "epoch": 0.94, + "learning_rate": 1.038056971936857e-09, + "logits/chosen": -1.9444141387939453, + "logits/rejected": -1.9409767389297485, + "logps/chosen": -1.0394309759140015, + "logps/rejected": -119.58030700683594, + "loss": 0.3843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01932188868522644, + "rewards/margins": 2.334073305130005, + "rewards/rejected": -2.3533952236175537, + "step": 16101 + }, + { + "epoch": 0.94, + "learning_rate": 1.0361474998142717e-09, + "logits/chosen": -1.776228904724121, + "logits/rejected": -1.7390011548995972, + "logps/chosen": -438.38751220703125, + "logps/rejected": -577.3124389648438, + "loss": 0.1672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11049499362707138, + "rewards/margins": 3.5009491443634033, + "rewards/rejected": -3.390454053878784, + "step": 16102 + }, + { + "epoch": 0.94, + "learning_rate": 1.0342397671272319e-09, + "logits/chosen": -1.8673819303512573, + "logits/rejected": -1.8805878162384033, + "logps/chosen": -294.2135009765625, + "logps/rejected": -607.43994140625, + "loss": 0.0858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05840149149298668, + "rewards/margins": 6.648343086242676, + "rewards/rejected": -6.70674467086792, + "step": 16103 + }, + { + "epoch": 0.94, + "learning_rate": 1.0323337739435167e-09, + "logits/chosen": -1.8570293188095093, + "logits/rejected": -1.8984167575836182, + "logps/chosen": -153.11087036132812, + "logps/rejected": -275.2972412109375, + "loss": 0.1637, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6038910150527954, + "rewards/margins": 1.165948510169983, + "rewards/rejected": 0.4379425048828125, + "step": 16104 + }, + { + "epoch": 0.94, + "learning_rate": 1.0304295203308278e-09, + "logits/chosen": -1.8958224058151245, + "logits/rejected": -1.8814983367919922, + "logps/chosen": -120.29070281982422, + "logps/rejected": -375.7935791015625, + "loss": 0.52, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9163063168525696, + "rewards/margins": 3.3926751613616943, + "rewards/rejected": -4.308981418609619, + "step": 16105 + }, + { + "epoch": 0.94, + "learning_rate": 1.0285270063568218e-09, + "logits/chosen": -1.806876540184021, + "logits/rejected": -1.8022934198379517, + "logps/chosen": -118.40127563476562, + "logps/rejected": -423.4293212890625, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.591070532798767, + "rewards/margins": 6.947668552398682, + "rewards/rejected": -5.356597900390625, + "step": 16106 + }, + { + "epoch": 0.94, + "learning_rate": 1.0266262320890783e-09, + "logits/chosen": -1.8013110160827637, + "logits/rejected": -1.8153759241104126, + "logps/chosen": -41.62987518310547, + "logps/rejected": -114.56619262695312, + "loss": 0.493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027640914544463158, + "rewards/margins": 0.5145503878593445, + "rewards/rejected": -0.4869094789028168, + "step": 16107 + }, + { + "epoch": 0.94, + "learning_rate": 1.0247271975951266e-09, + "logits/chosen": -1.8775426149368286, + "logits/rejected": -1.8678892850875854, + "logps/chosen": -30.508573532104492, + "logps/rejected": -169.48260498046875, + "loss": 0.3079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21960316598415375, + "rewards/margins": 2.405565023422241, + "rewards/rejected": -2.185961961746216, + "step": 16108 + }, + { + "epoch": 0.94, + "learning_rate": 1.0228299029424347e-09, + "logits/chosen": -2.0353212356567383, + "logits/rejected": -1.9839377403259277, + "logps/chosen": -197.7088623046875, + "logps/rejected": -361.34832763671875, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.546075463294983, + "rewards/margins": 3.058255195617676, + "rewards/rejected": -1.5121796131134033, + "step": 16109 + }, + { + "epoch": 0.94, + "learning_rate": 1.0209343481983879e-09, + "logits/chosen": -1.9486989974975586, + "logits/rejected": -1.9282821416854858, + "logps/chosen": -151.02938842773438, + "logps/rejected": -285.81036376953125, + "loss": 0.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9030365347862244, + "rewards/margins": 0.7927429676055908, + "rewards/rejected": 0.11029358208179474, + "step": 16110 + }, + { + "epoch": 0.94, + "learning_rate": 1.0190405334303431e-09, + "logits/chosen": -1.7631847858428955, + "logits/rejected": -1.7692807912826538, + "logps/chosen": -202.55975341796875, + "logps/rejected": -436.30322265625, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.374310255050659, + "rewards/margins": 3.399575710296631, + "rewards/rejected": -0.02526550367474556, + "step": 16111 + }, + { + "epoch": 0.94, + "learning_rate": 1.0171484587055634e-09, + "logits/chosen": -1.8780829906463623, + "logits/rejected": -1.8790417909622192, + "logps/chosen": -6.960545063018799, + "logps/rejected": -108.62458801269531, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34998250007629395, + "rewards/margins": 1.7385262250900269, + "rewards/rejected": -1.388543725013733, + "step": 16112 + }, + { + "epoch": 0.94, + "learning_rate": 1.0152581240912839e-09, + "logits/chosen": -1.765809416770935, + "logits/rejected": -1.7676441669464111, + "logps/chosen": -165.19134521484375, + "logps/rejected": -358.0484313964844, + "loss": 0.083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9069595336914062, + "rewards/margins": 2.3824844360351562, + "rewards/rejected": -0.47552490234375, + "step": 16113 + }, + { + "epoch": 0.94, + "learning_rate": 1.0133695296546285e-09, + "logits/chosen": -2.0026016235351562, + "logits/rejected": -1.989977478981018, + "logps/chosen": -0.11894607543945312, + "logps/rejected": -166.2608184814453, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010450975969433784, + "rewards/margins": 3.988058090209961, + "rewards/rejected": -3.998509168624878, + "step": 16114 + }, + { + "epoch": 0.94, + "learning_rate": 1.0114826754627215e-09, + "logits/chosen": -1.781600832939148, + "logits/rejected": -1.7848236560821533, + "logps/chosen": -170.44520568847656, + "logps/rejected": -295.8255310058594, + "loss": 0.0497, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.003007650375366, + "rewards/margins": 2.791438579559326, + "rewards/rejected": -0.7884308099746704, + "step": 16115 + }, + { + "epoch": 0.94, + "learning_rate": 1.0095975615825758e-09, + "logits/chosen": -1.7987957000732422, + "logits/rejected": -1.7867259979248047, + "logps/chosen": -4.411166191101074, + "logps/rejected": -195.18594360351562, + "loss": 0.3259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09813747555017471, + "rewards/margins": 3.774310827255249, + "rewards/rejected": -3.676173448562622, + "step": 16116 + }, + { + "epoch": 0.94, + "learning_rate": 1.00771418808116e-09, + "logits/chosen": -1.8022944927215576, + "logits/rejected": -1.7948917150497437, + "logps/chosen": -141.56027221679688, + "logps/rejected": -322.73150634765625, + "loss": 0.2012, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6946289539337158, + "rewards/margins": 1.3532379865646362, + "rewards/rejected": 0.341390997171402, + "step": 16117 + }, + { + "epoch": 0.94, + "learning_rate": 1.0058325550253822e-09, + "logits/chosen": -1.786547064781189, + "logits/rejected": -1.7905175685882568, + "logps/chosen": -65.25823974609375, + "logps/rejected": -106.11128234863281, + "loss": 0.5065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15300635993480682, + "rewards/margins": 1.2248241901397705, + "rewards/rejected": -1.3778305053710938, + "step": 16118 + }, + { + "epoch": 0.94, + "learning_rate": 1.0039526624820937e-09, + "logits/chosen": -1.874362826347351, + "logits/rejected": -1.8815582990646362, + "logps/chosen": -56.020328521728516, + "logps/rejected": -165.29962158203125, + "loss": 0.4574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3062584102153778, + "rewards/margins": 0.49758225679397583, + "rewards/rejected": -0.19132386147975922, + "step": 16119 + }, + { + "epoch": 0.94, + "learning_rate": 1.002074510518064e-09, + "logits/chosen": -1.7572851181030273, + "logits/rejected": -1.6587400436401367, + "logps/chosen": -221.01156616210938, + "logps/rejected": -520.97412109375, + "loss": 0.1516, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4055969715118408, + "rewards/margins": 2.1735901832580566, + "rewards/rejected": -0.767993152141571, + "step": 16120 + }, + { + "epoch": 0.94, + "learning_rate": 1.000198099200028e-09, + "logits/chosen": -1.7843250036239624, + "logits/rejected": -1.7675758600234985, + "logps/chosen": -188.1522216796875, + "logps/rejected": -327.8323059082031, + "loss": 0.1503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8433319330215454, + "rewards/margins": 1.7814514636993408, + "rewards/rejected": -0.9381195306777954, + "step": 16121 + }, + { + "epoch": 0.94, + "learning_rate": 9.983234285946384e-10, + "logits/chosen": -1.8268792629241943, + "logits/rejected": -1.871250867843628, + "logps/chosen": -154.96591186523438, + "logps/rejected": -386.0899963378906, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.748449683189392, + "rewards/margins": 3.390237331390381, + "rewards/rejected": -1.6417877674102783, + "step": 16122 + }, + { + "epoch": 0.94, + "learning_rate": 9.964504987684974e-10, + "logits/chosen": -1.7823212146759033, + "logits/rejected": -1.788405179977417, + "logps/chosen": -60.90141677856445, + "logps/rejected": -210.5589141845703, + "loss": 0.35, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1758037656545639, + "rewards/margins": 1.5067501068115234, + "rewards/rejected": -1.3309463262557983, + "step": 16123 + }, + { + "epoch": 0.94, + "learning_rate": 9.94579309788135e-10, + "logits/chosen": -1.8898732662200928, + "logits/rejected": -1.8831862211227417, + "logps/chosen": -35.482547760009766, + "logps/rejected": -128.269775390625, + "loss": 0.3746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42687225341796875, + "rewards/margins": 1.1269118785858154, + "rewards/rejected": -0.7000396847724915, + "step": 16124 + }, + { + "epoch": 0.94, + "learning_rate": 9.927098617200203e-10, + "logits/chosen": -1.984290361404419, + "logits/rejected": -1.9885683059692383, + "logps/chosen": -38.931800842285156, + "logps/rejected": -239.21920776367188, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41963425278663635, + "rewards/margins": 1.3494285345077515, + "rewards/rejected": -0.9297943115234375, + "step": 16125 + }, + { + "epoch": 0.94, + "learning_rate": 9.908421546305779e-10, + "logits/chosen": -2.0729920864105225, + "logits/rejected": -2.07411789894104, + "logps/chosen": -0.011660994961857796, + "logps/rejected": -51.35122299194336, + "loss": 0.571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007054207962937653, + "rewards/margins": 0.5604500770568848, + "rewards/rejected": -0.559744656085968, + "step": 16126 + }, + { + "epoch": 0.94, + "learning_rate": 9.889761885861548e-10, + "logits/chosen": -1.7218070030212402, + "logits/rejected": -1.7289903163909912, + "logps/chosen": -49.406272888183594, + "logps/rejected": -230.0897979736328, + "loss": 0.1191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9894515872001648, + "rewards/margins": 5.028929233551025, + "rewards/rejected": -4.039477825164795, + "step": 16127 + }, + { + "epoch": 0.94, + "learning_rate": 9.871119636530312e-10, + "logits/chosen": -1.8588727712631226, + "logits/rejected": -1.8517402410507202, + "logps/chosen": -56.07735824584961, + "logps/rejected": -237.62493896484375, + "loss": 0.4793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33041420578956604, + "rewards/margins": 3.916019916534424, + "rewards/rejected": -4.246434211730957, + "step": 16128 + }, + { + "epoch": 0.94, + "learning_rate": 9.852494798974375e-10, + "logits/chosen": -1.8376857042312622, + "logits/rejected": -1.833067774772644, + "logps/chosen": -38.77425765991211, + "logps/rejected": -141.55166625976562, + "loss": 0.3773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049156952649354935, + "rewards/margins": 2.407344102859497, + "rewards/rejected": -2.358187198638916, + "step": 16129 + }, + { + "epoch": 0.94, + "learning_rate": 9.83388737385543e-10, + "logits/chosen": -1.7865545749664307, + "logits/rejected": -1.7821711301803589, + "logps/chosen": -8.117770195007324, + "logps/rejected": -38.299095153808594, + "loss": 0.6541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15305256843566895, + "rewards/margins": 0.23818203806877136, + "rewards/rejected": -0.3912346065044403, + "step": 16130 + }, + { + "epoch": 0.94, + "learning_rate": 9.815297361834452e-10, + "logits/chosen": -1.8599309921264648, + "logits/rejected": -1.851204752922058, + "logps/chosen": -218.0174560546875, + "logps/rejected": -354.8404541015625, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.755479574203491, + "rewards/margins": 4.068364143371582, + "rewards/rejected": -0.31288453936576843, + "step": 16131 + }, + { + "epoch": 0.94, + "learning_rate": 9.796724763571905e-10, + "logits/chosen": -2.0078487396240234, + "logits/rejected": -2.026923418045044, + "logps/chosen": -194.62075805664062, + "logps/rejected": -420.80450439453125, + "loss": 0.0597, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3005218505859375, + "rewards/margins": 3.640158176422119, + "rewards/rejected": -2.3396363258361816, + "step": 16132 + }, + { + "epoch": 0.94, + "learning_rate": 9.778169579727436e-10, + "logits/chosen": -1.919866681098938, + "logits/rejected": -1.8436658382415771, + "logps/chosen": -169.22265625, + "logps/rejected": -608.2083740234375, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1754868030548096, + "rewards/margins": 7.1230573654174805, + "rewards/rejected": -4.94757080078125, + "step": 16133 + }, + { + "epoch": 0.94, + "learning_rate": 9.7596318109604e-10, + "logits/chosen": -1.7544691562652588, + "logits/rejected": -1.7561115026474, + "logps/chosen": -2.90999174118042, + "logps/rejected": -93.03764343261719, + "loss": 0.486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24095745384693146, + "rewards/margins": 0.6209608316421509, + "rewards/rejected": -0.3800033628940582, + "step": 16134 + }, + { + "epoch": 0.94, + "learning_rate": 9.741111457929274e-10, + "logits/chosen": -1.7528352737426758, + "logits/rejected": -1.7485570907592773, + "logps/chosen": -1.6110749244689941, + "logps/rejected": -53.949851989746094, + "loss": 0.3724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16866205632686615, + "rewards/margins": 1.9449468851089478, + "rewards/rejected": -1.7762848138809204, + "step": 16135 + }, + { + "epoch": 0.94, + "learning_rate": 9.722608521291974e-10, + "logits/chosen": -1.9741357564926147, + "logits/rejected": -1.9805562496185303, + "logps/chosen": -4.8933491706848145, + "logps/rejected": -79.51211547851562, + "loss": 0.3719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2997561991214752, + "rewards/margins": 1.3199946880340576, + "rewards/rejected": -1.0202385187149048, + "step": 16136 + }, + { + "epoch": 0.94, + "learning_rate": 9.704123001705755e-10, + "logits/chosen": -1.7850091457366943, + "logits/rejected": -1.770528793334961, + "logps/chosen": -63.176265716552734, + "logps/rejected": -201.71742248535156, + "loss": 0.1086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8506733179092407, + "rewards/margins": 2.4063541889190674, + "rewards/rejected": -0.5556808710098267, + "step": 16137 + }, + { + "epoch": 0.94, + "learning_rate": 9.685654899827478e-10, + "logits/chosen": -1.9595340490341187, + "logits/rejected": -1.9368079900741577, + "logps/chosen": -196.75045776367188, + "logps/rejected": -291.67193603515625, + "loss": 0.3295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.226678490638733, + "rewards/margins": 0.5556365847587585, + "rewards/rejected": 0.6710419058799744, + "step": 16138 + }, + { + "epoch": 0.94, + "learning_rate": 9.667204216313062e-10, + "logits/chosen": -1.6101208925247192, + "logits/rejected": -1.58169686794281, + "logps/chosen": -308.1266174316406, + "logps/rejected": -484.7406005859375, + "loss": 0.114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.974645972251892, + "rewards/margins": 2.0886473655700684, + "rewards/rejected": -0.11400146782398224, + "step": 16139 + }, + { + "epoch": 0.94, + "learning_rate": 9.648770951818096e-10, + "logits/chosen": -1.799881100654602, + "logits/rejected": -1.7993687391281128, + "logps/chosen": -1.4161527156829834, + "logps/rejected": -152.00808715820312, + "loss": 0.4811, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10907653719186783, + "rewards/margins": 1.5652369260787964, + "rewards/rejected": -1.6743134260177612, + "step": 16140 + }, + { + "epoch": 0.94, + "learning_rate": 9.630355106997279e-10, + "logits/chosen": -1.8820698261260986, + "logits/rejected": -1.8119018077850342, + "logps/chosen": -121.9156265258789, + "logps/rejected": -276.08477783203125, + "loss": 0.4674, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.144538164138794, + "rewards/margins": 0.08419883251190186, + "rewards/rejected": 1.060339331626892, + "step": 16141 + }, + { + "epoch": 0.94, + "learning_rate": 9.611956682504974e-10, + "logits/chosen": -2.029001474380493, + "logits/rejected": -1.9743961095809937, + "logps/chosen": -68.48772430419922, + "logps/rejected": -419.3138427734375, + "loss": 0.3813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3908348083496094, + "rewards/margins": 5.960517406463623, + "rewards/rejected": -6.351352214813232, + "step": 16142 + }, + { + "epoch": 0.94, + "learning_rate": 9.593575678994658e-10, + "logits/chosen": -1.7452189922332764, + "logits/rejected": -1.7454822063446045, + "logps/chosen": -27.861467361450195, + "logps/rejected": -158.0869903564453, + "loss": 0.2644, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0608274936676025, + "rewards/margins": 1.1261824369430542, + "rewards/rejected": -0.06535492092370987, + "step": 16143 + }, + { + "epoch": 0.94, + "learning_rate": 9.575212097119367e-10, + "logits/chosen": -1.9638594388961792, + "logits/rejected": -1.9622046947479248, + "logps/chosen": -5.825145244598389, + "logps/rejected": -271.0521545410156, + "loss": 0.2326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14076076447963715, + "rewards/margins": 3.997802734375, + "rewards/rejected": -3.857042074203491, + "step": 16144 + }, + { + "epoch": 0.94, + "learning_rate": 9.556865937531523e-10, + "logits/chosen": -1.9251152276992798, + "logits/rejected": -1.920745849609375, + "logps/chosen": -176.9922332763672, + "logps/rejected": -340.15728759765625, + "loss": 0.0468, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.228628635406494, + "rewards/margins": 2.817282199859619, + "rewards/rejected": -0.588653564453125, + "step": 16145 + }, + { + "epoch": 0.94, + "learning_rate": 9.538537200882767e-10, + "logits/chosen": -1.8028172254562378, + "logits/rejected": -1.7819088697433472, + "logps/chosen": -216.62417602539062, + "logps/rejected": -344.5913391113281, + "loss": 0.1266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5891571044921875, + "rewards/margins": 1.929815649986267, + "rewards/rejected": -1.3406585454940796, + "step": 16146 + }, + { + "epoch": 0.94, + "learning_rate": 9.520225887824306e-10, + "logits/chosen": -1.8862414360046387, + "logits/rejected": -1.8857591152191162, + "logps/chosen": -3.816166877746582, + "logps/rejected": -87.72728729248047, + "loss": 0.968, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.18601518869400024, + "rewards/margins": -0.8533316254615784, + "rewards/rejected": 0.6673164367675781, + "step": 16147 + }, + { + "epoch": 0.94, + "learning_rate": 9.501931999006618e-10, + "logits/chosen": -1.8665874004364014, + "logits/rejected": -1.8743313550949097, + "logps/chosen": -11.759381294250488, + "logps/rejected": -228.1131591796875, + "loss": 0.2367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6495648622512817, + "rewards/margins": 3.0526747703552246, + "rewards/rejected": -2.4031097888946533, + "step": 16148 + }, + { + "epoch": 0.94, + "learning_rate": 9.48365553507957e-10, + "logits/chosen": -2.0248565673828125, + "logits/rejected": -2.017981767654419, + "logps/chosen": -8.153705857694149e-05, + "logps/rejected": -131.82803344726562, + "loss": 0.377, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.331859135068953e-06, + "rewards/margins": 2.608341932296753, + "rewards/rejected": -2.6083335876464844, + "step": 16149 + }, + { + "epoch": 0.94, + "learning_rate": 9.465396496692423e-10, + "logits/chosen": -1.956601858139038, + "logits/rejected": -1.961458683013916, + "logps/chosen": -10.871011734008789, + "logps/rejected": -95.73552703857422, + "loss": 0.3832, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1464308798313141, + "rewards/margins": 1.379910945892334, + "rewards/rejected": -1.2334800958633423, + "step": 16150 + }, + { + "epoch": 0.94, + "learning_rate": 9.447154884493935e-10, + "logits/chosen": -1.759489893913269, + "logits/rejected": -1.774493932723999, + "logps/chosen": -166.69378662109375, + "logps/rejected": -305.8211669921875, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4212234020233154, + "rewards/margins": 3.505702018737793, + "rewards/rejected": -1.084478735923767, + "step": 16151 + }, + { + "epoch": 0.94, + "learning_rate": 9.428930699132032e-10, + "logits/chosen": -2.0431201457977295, + "logits/rejected": -2.0316061973571777, + "logps/chosen": -47.972496032714844, + "logps/rejected": -289.06683349609375, + "loss": 0.0979, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0355175733566284, + "rewards/margins": 5.400743007659912, + "rewards/rejected": -4.365225315093994, + "step": 16152 + }, + { + "epoch": 0.94, + "learning_rate": 9.41072394125414e-10, + "logits/chosen": -1.833613634109497, + "logits/rejected": -1.8372451066970825, + "logps/chosen": -19.209033966064453, + "logps/rejected": -92.97055053710938, + "loss": 0.4008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4253324568271637, + "rewards/margins": 1.0832738876342773, + "rewards/rejected": -0.657941460609436, + "step": 16153 + }, + { + "epoch": 0.94, + "learning_rate": 9.392534611507074e-10, + "logits/chosen": -2.0563127994537354, + "logits/rejected": -2.053459644317627, + "logps/chosen": -18.730480194091797, + "logps/rejected": -314.765625, + "loss": 0.2744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22220955789089203, + "rewards/margins": 6.510776042938232, + "rewards/rejected": -6.288566589355469, + "step": 16154 + }, + { + "epoch": 0.94, + "learning_rate": 9.374362710536988e-10, + "logits/chosen": -2.0014684200286865, + "logits/rejected": -2.0002946853637695, + "logps/chosen": -27.655420303344727, + "logps/rejected": -169.67530822753906, + "loss": 0.2553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3569028973579407, + "rewards/margins": 3.882633686065674, + "rewards/rejected": -3.525730848312378, + "step": 16155 + }, + { + "epoch": 0.94, + "learning_rate": 9.356208238989415e-10, + "logits/chosen": -1.9264750480651855, + "logits/rejected": -1.905880331993103, + "logps/chosen": -209.91127014160156, + "logps/rejected": -359.79437255859375, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2784621715545654, + "rewards/margins": 2.4494307041168213, + "rewards/rejected": 0.8290314078330994, + "step": 16156 + }, + { + "epoch": 0.94, + "learning_rate": 9.338071197509402e-10, + "logits/chosen": -1.9559588432312012, + "logits/rejected": -1.9549251794815063, + "logps/chosen": -25.183643341064453, + "logps/rejected": -285.9219665527344, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01436691265553236, + "rewards/margins": 3.5315377712249756, + "rewards/rejected": -3.5459046363830566, + "step": 16157 + }, + { + "epoch": 0.94, + "learning_rate": 9.31995158674115e-10, + "logits/chosen": -1.893550992012024, + "logits/rejected": -1.882426142692566, + "logps/chosen": -0.00038583180867135525, + "logps/rejected": -365.3807067871094, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4600269171060063e-05, + "rewards/margins": 8.806063652038574, + "rewards/rejected": -8.8060884475708, + "step": 16158 + }, + { + "epoch": 0.94, + "learning_rate": 9.301849407328422e-10, + "logits/chosen": -1.8912273645401, + "logits/rejected": -1.8760260343551636, + "logps/chosen": -267.4096374511719, + "logps/rejected": -331.63214111328125, + "loss": 0.2869, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8000702261924744, + "rewards/margins": 0.5947235226631165, + "rewards/rejected": 0.20534668862819672, + "step": 16159 + }, + { + "epoch": 0.94, + "learning_rate": 9.283764659914151e-10, + "logits/chosen": -1.9093095064163208, + "logits/rejected": -1.9032747745513916, + "logps/chosen": -130.498779296875, + "logps/rejected": -157.03636169433594, + "loss": 0.4189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.250909447669983, + "rewards/margins": 0.1619476079940796, + "rewards/rejected": 1.0889618396759033, + "step": 16160 + }, + { + "epoch": 0.94, + "learning_rate": 9.265697345141044e-10, + "logits/chosen": -1.9570492506027222, + "logits/rejected": -1.9572736024856567, + "logps/chosen": -50.694419860839844, + "logps/rejected": -293.693359375, + "loss": 0.1776, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34869155287742615, + "rewards/margins": 5.542598724365234, + "rewards/rejected": -5.193907260894775, + "step": 16161 + }, + { + "epoch": 0.94, + "learning_rate": 9.247647463650754e-10, + "logits/chosen": -1.8464525938034058, + "logits/rejected": -1.8502169847488403, + "logps/chosen": -7.990972518920898, + "logps/rejected": -162.0723419189453, + "loss": 0.239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41708680987358093, + "rewards/margins": 3.324575901031494, + "rewards/rejected": -2.907489061355591, + "step": 16162 + }, + { + "epoch": 0.94, + "learning_rate": 9.229615016084546e-10, + "logits/chosen": -1.9756076335906982, + "logits/rejected": -1.9717164039611816, + "logps/chosen": -48.09458923339844, + "logps/rejected": -172.03396606445312, + "loss": 0.2106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.810738742351532, + "rewards/margins": 1.88621187210083, + "rewards/rejected": -1.0754730701446533, + "step": 16163 + }, + { + "epoch": 0.94, + "learning_rate": 9.21160000308302e-10, + "logits/chosen": -2.0467140674591064, + "logits/rejected": -2.0945489406585693, + "logps/chosen": -171.92198181152344, + "logps/rejected": -326.6521911621094, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.436113119125366, + "rewards/margins": 3.8709123134613037, + "rewards/rejected": -1.4347991943359375, + "step": 16164 + }, + { + "epoch": 0.94, + "learning_rate": 9.193602425286218e-10, + "logits/chosen": -1.8319238424301147, + "logits/rejected": -1.8274333477020264, + "logps/chosen": -21.908843994140625, + "logps/rejected": -55.595760345458984, + "loss": 0.6875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02738323248922825, + "rewards/margins": 0.0651208907365799, + "rewards/rejected": -0.092504121363163, + "step": 16165 + }, + { + "epoch": 0.94, + "learning_rate": 9.175622283333407e-10, + "logits/chosen": -1.770739197731018, + "logits/rejected": -1.7688734531402588, + "logps/chosen": -7.244012832641602, + "logps/rejected": -156.80654907226562, + "loss": 0.4232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22080783545970917, + "rewards/margins": 0.7894449234008789, + "rewards/rejected": -0.5686370730400085, + "step": 16166 + }, + { + "epoch": 0.94, + "learning_rate": 9.157659577863408e-10, + "logits/chosen": -1.921707034111023, + "logits/rejected": -1.9004946947097778, + "logps/chosen": -153.10302734375, + "logps/rejected": -252.0087890625, + "loss": 0.2217, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2580139636993408, + "rewards/margins": 1.2139999866485596, + "rewards/rejected": 0.04401397705078125, + "step": 16167 + }, + { + "epoch": 0.94, + "learning_rate": 9.139714309514268e-10, + "logits/chosen": -1.9512991905212402, + "logits/rejected": -1.9497737884521484, + "logps/chosen": -10.27585220336914, + "logps/rejected": -85.14785766601562, + "loss": 0.4529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10067596286535263, + "rewards/margins": 1.1548583507537842, + "rewards/rejected": -1.0541824102401733, + "step": 16168 + }, + { + "epoch": 0.94, + "learning_rate": 9.121786478923587e-10, + "logits/chosen": -1.8678184747695923, + "logits/rejected": -1.9375382661819458, + "logps/chosen": -265.2322998046875, + "logps/rejected": -468.579833984375, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0197479724884033, + "rewards/margins": 8.998245239257812, + "rewards/rejected": -6.978497505187988, + "step": 16169 + }, + { + "epoch": 0.94, + "learning_rate": 9.10387608672819e-10, + "logits/chosen": -1.955491065979004, + "logits/rejected": -1.933728814125061, + "logps/chosen": -185.99087524414062, + "logps/rejected": -280.85107421875, + "loss": 0.0717, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.219748020172119, + "rewards/margins": 2.561572313308716, + "rewards/rejected": -0.34182435274124146, + "step": 16170 + }, + { + "epoch": 0.94, + "learning_rate": 9.08598313356429e-10, + "logits/chosen": -1.705858588218689, + "logits/rejected": -1.704863429069519, + "logps/chosen": -173.50531005859375, + "logps/rejected": -264.2550964355469, + "loss": 0.0672, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0603272914886475, + "rewards/margins": 2.119589328765869, + "rewards/rejected": -0.05926208570599556, + "step": 16171 + }, + { + "epoch": 0.94, + "learning_rate": 9.068107620067656e-10, + "logits/chosen": -2.0088844299316406, + "logits/rejected": -2.0082895755767822, + "logps/chosen": -65.54689025878906, + "logps/rejected": -147.15870666503906, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6679618954658508, + "rewards/margins": 1.1707465648651123, + "rewards/rejected": -0.5027847290039062, + "step": 16172 + }, + { + "epoch": 0.94, + "learning_rate": 9.050249546873167e-10, + "logits/chosen": -1.875196099281311, + "logits/rejected": -1.855156660079956, + "logps/chosen": -317.3931884765625, + "logps/rejected": -594.8185424804688, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.466418504714966, + "rewards/margins": 1.4808471202850342, + "rewards/rejected": 0.9855713248252869, + "step": 16173 + }, + { + "epoch": 0.94, + "learning_rate": 9.032408914615431e-10, + "logits/chosen": -2.020123243331909, + "logits/rejected": -2.0188775062561035, + "logps/chosen": -88.32563018798828, + "logps/rejected": -210.40045166015625, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.590429663658142, + "rewards/margins": 0.3119049072265625, + "rewards/rejected": 1.2785247564315796, + "step": 16174 + }, + { + "epoch": 0.94, + "learning_rate": 9.014585723927937e-10, + "logits/chosen": -2.0075855255126953, + "logits/rejected": -2.004748821258545, + "logps/chosen": -70.5739517211914, + "logps/rejected": -160.4818572998047, + "loss": 0.315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6593849062919617, + "rewards/margins": 0.6985496282577515, + "rewards/rejected": -0.03916473314166069, + "step": 16175 + }, + { + "epoch": 0.94, + "learning_rate": 8.99677997544418e-10, + "logits/chosen": -1.804469347000122, + "logits/rejected": -1.806470513343811, + "logps/chosen": -216.48284912109375, + "logps/rejected": -382.60748291015625, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.929312229156494, + "rewards/margins": 4.200933933258057, + "rewards/rejected": -0.2716217041015625, + "step": 16176 + }, + { + "epoch": 0.94, + "learning_rate": 8.978991669796487e-10, + "logits/chosen": -1.8172705173492432, + "logits/rejected": -1.8148127794265747, + "logps/chosen": -24.749229431152344, + "logps/rejected": -217.2644805908203, + "loss": 0.5745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27985936403274536, + "rewards/margins": 0.07047063112258911, + "rewards/rejected": 0.20938873291015625, + "step": 16177 + }, + { + "epoch": 0.94, + "learning_rate": 8.961220807616909e-10, + "logits/chosen": -1.7881830930709839, + "logits/rejected": -1.8168705701828003, + "logps/chosen": -238.6132354736328, + "logps/rejected": -219.4991912841797, + "loss": 0.1603, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7328689098358154, + "rewards/margins": 1.026565432548523, + "rewards/rejected": 1.7063034772872925, + "step": 16178 + }, + { + "epoch": 0.94, + "learning_rate": 8.943467389536608e-10, + "logits/chosen": -1.8292927742004395, + "logits/rejected": -1.8344923257827759, + "logps/chosen": -39.29998779296875, + "logps/rejected": -254.20721435546875, + "loss": 0.2736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6303859949111938, + "rewards/margins": 1.857137680053711, + "rewards/rejected": -1.226751685142517, + "step": 16179 + }, + { + "epoch": 0.94, + "learning_rate": 8.925731416186466e-10, + "logits/chosen": -2.0337929725646973, + "logits/rejected": -2.0510895252227783, + "logps/chosen": -255.76278686523438, + "logps/rejected": -526.6392822265625, + "loss": 0.1155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6133880615234375, + "rewards/margins": 6.027001857757568, + "rewards/rejected": -5.413613796234131, + "step": 16180 + }, + { + "epoch": 0.94, + "learning_rate": 8.90801288819637e-10, + "logits/chosen": -1.756745457649231, + "logits/rejected": -1.7469151020050049, + "logps/chosen": -155.11793518066406, + "logps/rejected": -303.2187194824219, + "loss": 0.0941, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.485607862472534, + "rewards/margins": 1.8947844505310059, + "rewards/rejected": 0.5908233523368835, + "step": 16181 + }, + { + "epoch": 0.94, + "learning_rate": 8.890311806195927e-10, + "logits/chosen": -1.864944338798523, + "logits/rejected": -1.8428019285202026, + "logps/chosen": -172.9229278564453, + "logps/rejected": -287.06689453125, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8820144534111023, + "rewards/margins": 0.6782363653182983, + "rewards/rejected": 0.20377807319164276, + "step": 16182 + }, + { + "epoch": 0.94, + "learning_rate": 8.872628170813745e-10, + "logits/chosen": -1.7302420139312744, + "logits/rejected": -1.714709997177124, + "logps/chosen": -174.62692260742188, + "logps/rejected": -423.210693359375, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.26641845703125, + "rewards/margins": 4.445818901062012, + "rewards/rejected": -1.1794006824493408, + "step": 16183 + }, + { + "epoch": 0.94, + "learning_rate": 8.854961982678322e-10, + "logits/chosen": -1.6603604555130005, + "logits/rejected": -1.6460824012756348, + "logps/chosen": -135.58596801757812, + "logps/rejected": -231.88442993164062, + "loss": 0.2639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6428253054618835, + "rewards/margins": 1.554692029953003, + "rewards/rejected": -0.9118667840957642, + "step": 16184 + }, + { + "epoch": 0.94, + "learning_rate": 8.837313242416988e-10, + "logits/chosen": -1.8936680555343628, + "logits/rejected": -1.899453043937683, + "logps/chosen": -9.881054878234863, + "logps/rejected": -168.5776824951172, + "loss": 0.3683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02570934407413006, + "rewards/margins": 3.491891622543335, + "rewards/rejected": -3.5176010131835938, + "step": 16185 + }, + { + "epoch": 0.94, + "learning_rate": 8.81968195065691e-10, + "logits/chosen": -1.9492032527923584, + "logits/rejected": -1.9376226663589478, + "logps/chosen": -18.500144958496094, + "logps/rejected": -214.78631591796875, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22500191628932953, + "rewards/margins": 3.8667469024658203, + "rewards/rejected": -3.641745090484619, + "step": 16186 + }, + { + "epoch": 0.94, + "learning_rate": 8.802068108024307e-10, + "logits/chosen": -1.8314226865768433, + "logits/rejected": -1.849610686302185, + "logps/chosen": -211.66148376464844, + "logps/rejected": -348.5379943847656, + "loss": 0.0546, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.488124132156372, + "rewards/margins": 2.4112350940704346, + "rewards/rejected": 0.0768890380859375, + "step": 16187 + }, + { + "epoch": 0.94, + "learning_rate": 8.784471715144904e-10, + "logits/chosen": -1.7170391082763672, + "logits/rejected": -1.700735092163086, + "logps/chosen": -359.2326965332031, + "logps/rejected": -683.4849243164062, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.616830587387085, + "rewards/margins": 4.7271575927734375, + "rewards/rejected": -2.1103272438049316, + "step": 16188 + }, + { + "epoch": 0.94, + "learning_rate": 8.766892772643975e-10, + "logits/chosen": -1.6551896333694458, + "logits/rejected": -1.6557830572128296, + "logps/chosen": -330.10516357421875, + "logps/rejected": -415.3182373046875, + "loss": 0.1927, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19472657144069672, + "rewards/margins": 1.3106812238693237, + "rewards/rejected": -1.1159546375274658, + "step": 16189 + }, + { + "epoch": 0.94, + "learning_rate": 8.749331281145855e-10, + "logits/chosen": -1.84128737449646, + "logits/rejected": -1.83925461769104, + "logps/chosen": -13.305908203125, + "logps/rejected": -221.02761840820312, + "loss": 0.4136, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1545734405517578, + "rewards/margins": 1.0846378803253174, + "rewards/rejected": -0.9300643801689148, + "step": 16190 + }, + { + "epoch": 0.94, + "learning_rate": 8.73178724127438e-10, + "logits/chosen": -1.837336540222168, + "logits/rejected": -1.8806309700012207, + "logps/chosen": -278.38360595703125, + "logps/rejected": -322.3130798339844, + "loss": 0.0798, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.802752733230591, + "rewards/margins": 1.8164764642715454, + "rewards/rejected": 0.9862762689590454, + "step": 16191 + }, + { + "epoch": 0.94, + "learning_rate": 8.714260653652938e-10, + "logits/chosen": -1.684661865234375, + "logits/rejected": -1.6757383346557617, + "logps/chosen": -13.307334899902344, + "logps/rejected": -83.6092529296875, + "loss": 0.7019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1759907752275467, + "rewards/margins": 0.10313291847705841, + "rewards/rejected": -0.2791236937046051, + "step": 16192 + }, + { + "epoch": 0.94, + "learning_rate": 8.696751518904144e-10, + "logits/chosen": -1.9652953147888184, + "logits/rejected": -1.9664117097854614, + "logps/chosen": -43.37385559082031, + "logps/rejected": -135.8350830078125, + "loss": 0.5595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5718990564346313, + "rewards/margins": 2.0867013931274414, + "rewards/rejected": -2.6586005687713623, + "step": 16193 + }, + { + "epoch": 0.94, + "learning_rate": 8.679259837649889e-10, + "logits/chosen": -2.032493829727173, + "logits/rejected": -2.0305099487304688, + "logps/chosen": -49.22618103027344, + "logps/rejected": -179.96063232421875, + "loss": 0.3204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.935687243938446, + "rewards/margins": 0.8915985226631165, + "rewards/rejected": 0.04408874735236168, + "step": 16194 + }, + { + "epoch": 0.94, + "learning_rate": 8.661785610511618e-10, + "logits/chosen": -1.8836253881454468, + "logits/rejected": -1.8768670558929443, + "logps/chosen": -0.00018929461657535285, + "logps/rejected": -95.1910400390625, + "loss": 0.66, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5500859667081386e-05, + "rewards/margins": 0.08240168541669846, + "rewards/rejected": -0.08236618340015411, + "step": 16195 + }, + { + "epoch": 0.94, + "learning_rate": 8.644328838110171e-10, + "logits/chosen": -1.644492506980896, + "logits/rejected": -1.626080870628357, + "logps/chosen": -308.0679016113281, + "logps/rejected": -418.41619873046875, + "loss": 0.1101, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4459075927734375, + "rewards/margins": 1.8343169689178467, + "rewards/rejected": 0.611590564250946, + "step": 16196 + }, + { + "epoch": 0.94, + "learning_rate": 8.626889521065661e-10, + "logits/chosen": -1.9143823385238647, + "logits/rejected": -1.9103604555130005, + "logps/chosen": -5.345869541168213, + "logps/rejected": -230.2495880126953, + "loss": 0.3514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013432646170258522, + "rewards/margins": 3.428234577178955, + "rewards/rejected": -3.441667318344116, + "step": 16197 + }, + { + "epoch": 0.94, + "learning_rate": 8.609467659997482e-10, + "logits/chosen": -1.8382643461227417, + "logits/rejected": -1.7863993644714355, + "logps/chosen": -182.02394104003906, + "logps/rejected": -335.8995666503906, + "loss": 0.058, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1530182361602783, + "rewards/margins": 4.339221000671387, + "rewards/rejected": -2.1862030029296875, + "step": 16198 + }, + { + "epoch": 0.94, + "learning_rate": 8.592063255524806e-10, + "logits/chosen": -2.0707809925079346, + "logits/rejected": -2.053077459335327, + "logps/chosen": -163.20904541015625, + "logps/rejected": -269.6750793457031, + "loss": 0.1143, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9429672956466675, + "rewards/margins": 1.503199815750122, + "rewards/rejected": 0.439767450094223, + "step": 16199 + }, + { + "epoch": 0.94, + "learning_rate": 8.574676308265694e-10, + "logits/chosen": -1.8451324701309204, + "logits/rejected": -1.8387774229049683, + "logps/chosen": -17.51431655883789, + "logps/rejected": -229.05679321289062, + "loss": 0.2869, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13006077706813812, + "rewards/margins": 3.8870248794555664, + "rewards/rejected": -3.7569642066955566, + "step": 16200 + }, + { + "epoch": 0.94, + "learning_rate": 8.557306818837984e-10, + "logits/chosen": -2.080216884613037, + "logits/rejected": -2.0815677642822266, + "logps/chosen": -5.435893763205968e-05, + "logps/rejected": -64.28350067138672, + "loss": 0.3877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2515958587755449e-05, + "rewards/margins": 2.4008798599243164, + "rewards/rejected": -2.400867462158203, + "step": 16201 + }, + { + "epoch": 0.94, + "learning_rate": 8.539954787858517e-10, + "logits/chosen": -1.822202444076538, + "logits/rejected": -1.8182815313339233, + "logps/chosen": -128.38729858398438, + "logps/rejected": -380.04296875, + "loss": 0.0851, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3783997297286987, + "rewards/margins": 6.308252334594727, + "rewards/rejected": -4.929852485656738, + "step": 16202 + }, + { + "epoch": 0.94, + "learning_rate": 8.52262021594391e-10, + "logits/chosen": -1.8671544790267944, + "logits/rejected": -1.8640546798706055, + "logps/chosen": -43.835487365722656, + "logps/rejected": -181.12635803222656, + "loss": 0.616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015642547979950905, + "rewards/margins": 0.4045356810092926, + "rewards/rejected": -0.42017823457717896, + "step": 16203 + }, + { + "epoch": 0.94, + "learning_rate": 8.505303103709948e-10, + "logits/chosen": -2.020512104034424, + "logits/rejected": -2.0572659969329834, + "logps/chosen": -132.08187866210938, + "logps/rejected": -413.53521728515625, + "loss": 0.3694, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.245184302330017, + "rewards/margins": 0.07240593433380127, + "rewards/rejected": 1.1727783679962158, + "step": 16204 + }, + { + "epoch": 0.94, + "learning_rate": 8.488003451771697e-10, + "logits/chosen": -1.8733705282211304, + "logits/rejected": -1.880436658859253, + "logps/chosen": -5.703371047973633, + "logps/rejected": -86.02400207519531, + "loss": 0.8582, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.21974268555641174, + "rewards/margins": -0.8780410289764404, + "rewards/rejected": 1.0977836847305298, + "step": 16205 + }, + { + "epoch": 0.94, + "learning_rate": 8.470721260743885e-10, + "logits/chosen": -1.7490450143814087, + "logits/rejected": -1.756374478340149, + "logps/chosen": -16.60176658630371, + "logps/rejected": -120.98435974121094, + "loss": 0.5389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012564659118652344, + "rewards/margins": 0.6556116342544556, + "rewards/rejected": -0.6681762933731079, + "step": 16206 + }, + { + "epoch": 0.94, + "learning_rate": 8.453456531240355e-10, + "logits/chosen": -1.7546157836914062, + "logits/rejected": -1.7401561737060547, + "logps/chosen": -283.6446838378906, + "logps/rejected": -482.168212890625, + "loss": 0.2045, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3664581775665283, + "rewards/margins": 1.321160912513733, + "rewards/rejected": 0.04529724270105362, + "step": 16207 + }, + { + "epoch": 0.94, + "learning_rate": 8.436209263874449e-10, + "logits/chosen": -1.6607500314712524, + "logits/rejected": -1.6572307348251343, + "logps/chosen": -39.08381652832031, + "logps/rejected": -266.7021484375, + "loss": 0.2809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3383232057094574, + "rewards/margins": 2.6269569396972656, + "rewards/rejected": -2.2886338233947754, + "step": 16208 + }, + { + "epoch": 0.94, + "learning_rate": 8.418979459258901e-10, + "logits/chosen": -2.0096242427825928, + "logits/rejected": -1.9616413116455078, + "logps/chosen": -299.727294921875, + "logps/rejected": -496.4252624511719, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.450396776199341, + "rewards/margins": 4.242251396179199, + "rewards/rejected": -0.7918548583984375, + "step": 16209 + }, + { + "epoch": 0.94, + "learning_rate": 8.401767118005776e-10, + "logits/chosen": -1.8332468271255493, + "logits/rejected": -1.84093177318573, + "logps/chosen": -132.34091186523438, + "logps/rejected": -361.46307373046875, + "loss": 0.0787, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2278594970703125, + "rewards/margins": 2.0481841564178467, + "rewards/rejected": 0.17967529594898224, + "step": 16210 + }, + { + "epoch": 0.94, + "learning_rate": 8.384572240726528e-10, + "logits/chosen": -1.8196301460266113, + "logits/rejected": -1.823154330253601, + "logps/chosen": -0.6407153010368347, + "logps/rejected": -192.5103759765625, + "loss": 0.3688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04993679001927376, + "rewards/margins": 2.33278751373291, + "rewards/rejected": -2.282850742340088, + "step": 16211 + }, + { + "epoch": 0.94, + "learning_rate": 8.367394828032115e-10, + "logits/chosen": -1.9673645496368408, + "logits/rejected": -1.962777853012085, + "logps/chosen": -246.094970703125, + "logps/rejected": -429.7681579589844, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8557891845703125, + "rewards/margins": 4.78887939453125, + "rewards/rejected": -2.9330902099609375, + "step": 16212 + }, + { + "epoch": 0.94, + "learning_rate": 8.350234880532603e-10, + "logits/chosen": -2.0014493465423584, + "logits/rejected": -1.9957672357559204, + "logps/chosen": -0.007317894138395786, + "logps/rejected": -201.15602111816406, + "loss": 0.3841, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00041395454900339246, + "rewards/margins": 3.4431216716766357, + "rewards/rejected": -3.4427077770233154, + "step": 16213 + }, + { + "epoch": 0.94, + "learning_rate": 8.333092398837671e-10, + "logits/chosen": -1.9999289512634277, + "logits/rejected": -1.9987059831619263, + "logps/chosen": -16.713668823242188, + "logps/rejected": -44.52318572998047, + "loss": 0.4929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.056739043444395065, + "rewards/margins": 0.965201199054718, + "rewards/rejected": -0.9084621667861938, + "step": 16214 + }, + { + "epoch": 0.94, + "learning_rate": 8.315967383556278e-10, + "logits/chosen": -1.9434616565704346, + "logits/rejected": -1.9598815441131592, + "logps/chosen": -192.5361785888672, + "logps/rejected": -546.0869140625, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5212310552597046, + "rewards/margins": 8.302642822265625, + "rewards/rejected": -6.781411647796631, + "step": 16215 + }, + { + "epoch": 0.94, + "learning_rate": 8.298859835296878e-10, + "logits/chosen": -1.9698550701141357, + "logits/rejected": -1.9768061637878418, + "logps/chosen": -24.962739944458008, + "logps/rejected": -104.72090911865234, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3679996430873871, + "rewards/margins": 2.7617924213409424, + "rewards/rejected": -2.3937928676605225, + "step": 16216 + }, + { + "epoch": 0.94, + "learning_rate": 8.281769754667045e-10, + "logits/chosen": -1.9031447172164917, + "logits/rejected": -1.9063231945037842, + "logps/chosen": -0.007476063910871744, + "logps/rejected": -24.351070404052734, + "loss": 0.5935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00045041172415949404, + "rewards/margins": 0.4058161973953247, + "rewards/rejected": -0.40626659989356995, + "step": 16217 + }, + { + "epoch": 0.94, + "learning_rate": 8.264697142274124e-10, + "logits/chosen": -1.9774202108383179, + "logits/rejected": -1.9772106409072876, + "logps/chosen": -15.349420547485352, + "logps/rejected": -207.04812622070312, + "loss": 0.2704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2218448668718338, + "rewards/margins": 4.254834175109863, + "rewards/rejected": -4.032989501953125, + "step": 16218 + }, + { + "epoch": 0.94, + "learning_rate": 8.247641998724408e-10, + "logits/chosen": -1.8377162218093872, + "logits/rejected": -1.8373477458953857, + "logps/chosen": -28.836835861206055, + "logps/rejected": -255.70399475097656, + "loss": 0.1433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7205888628959656, + "rewards/margins": 2.5654101371765137, + "rewards/rejected": -1.8448212146759033, + "step": 16219 + }, + { + "epoch": 0.94, + "learning_rate": 8.230604324623968e-10, + "logits/chosen": -2.070537805557251, + "logits/rejected": -2.0713231563568115, + "logps/chosen": -68.26319885253906, + "logps/rejected": -129.22634887695312, + "loss": 0.7105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46500205993652344, + "rewards/margins": 0.08643072843551636, + "rewards/rejected": -0.5514327883720398, + "step": 16220 + }, + { + "epoch": 0.94, + "learning_rate": 8.213584120577821e-10, + "logits/chosen": -1.7368351221084595, + "logits/rejected": -1.7433140277862549, + "logps/chosen": -70.77598571777344, + "logps/rejected": -325.3625183105469, + "loss": 0.0962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3145722150802612, + "rewards/margins": 4.285027980804443, + "rewards/rejected": -2.9704558849334717, + "step": 16221 + }, + { + "epoch": 0.94, + "learning_rate": 8.196581387190871e-10, + "logits/chosen": -1.8480545282363892, + "logits/rejected": -1.7927123308181763, + "logps/chosen": -210.0659942626953, + "logps/rejected": -418.98992919921875, + "loss": 0.2558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9960678219795227, + "rewards/margins": 1.3677352666854858, + "rewards/rejected": -0.3716674745082855, + "step": 16222 + }, + { + "epoch": 0.94, + "learning_rate": 8.179596125066968e-10, + "logits/chosen": -1.8346830606460571, + "logits/rejected": -1.8295423984527588, + "logps/chosen": -41.92806625366211, + "logps/rejected": -206.55067443847656, + "loss": 0.118, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2813377380371094, + "rewards/margins": 4.923160552978516, + "rewards/rejected": -3.6418228149414062, + "step": 16223 + }, + { + "epoch": 0.94, + "learning_rate": 8.162628334809574e-10, + "logits/chosen": -1.727412223815918, + "logits/rejected": -1.7762112617492676, + "logps/chosen": -214.95591735839844, + "logps/rejected": -397.3607177734375, + "loss": 0.1868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7401199340820312, + "rewards/margins": 2.0987014770507812, + "rewards/rejected": -1.35858154296875, + "step": 16224 + }, + { + "epoch": 0.94, + "learning_rate": 8.145678017021429e-10, + "logits/chosen": -1.9840410947799683, + "logits/rejected": -1.9786784648895264, + "logps/chosen": -2.3624958992004395, + "logps/rejected": -200.57757568359375, + "loss": 0.3183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010939192958176136, + "rewards/margins": 4.62205696105957, + "rewards/rejected": -4.611117839813232, + "step": 16225 + }, + { + "epoch": 0.94, + "learning_rate": 8.12874517230483e-10, + "logits/chosen": -1.919965386390686, + "logits/rejected": -1.893080472946167, + "logps/chosen": -190.4806365966797, + "logps/rejected": -545.1961059570312, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8630142211914062, + "rewards/margins": 6.083808898925781, + "rewards/rejected": -2.220794677734375, + "step": 16226 + }, + { + "epoch": 0.94, + "learning_rate": 8.111829801261072e-10, + "logits/chosen": -1.864058494567871, + "logits/rejected": -1.885507583618164, + "logps/chosen": -223.6778564453125, + "logps/rejected": -396.0917663574219, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.622699022293091, + "rewards/margins": 3.617462158203125, + "rewards/rejected": -0.994763195514679, + "step": 16227 + }, + { + "epoch": 0.94, + "learning_rate": 8.094931904491287e-10, + "logits/chosen": -2.038447141647339, + "logits/rejected": -2.0422604084014893, + "logps/chosen": -28.049564361572266, + "logps/rejected": -272.25213623046875, + "loss": 0.2099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5492389798164368, + "rewards/margins": 4.836917877197266, + "rewards/rejected": -4.2876787185668945, + "step": 16228 + }, + { + "epoch": 0.94, + "learning_rate": 8.07805148259566e-10, + "logits/chosen": -1.8633824586868286, + "logits/rejected": -1.8628275394439697, + "logps/chosen": -89.89801025390625, + "logps/rejected": -326.9126892089844, + "loss": 0.3793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020514680072665215, + "rewards/margins": 4.372780799865723, + "rewards/rejected": -4.3932952880859375, + "step": 16229 + }, + { + "epoch": 0.94, + "learning_rate": 8.061188536173879e-10, + "logits/chosen": -2.006190538406372, + "logits/rejected": -2.0084095001220703, + "logps/chosen": -40.65957260131836, + "logps/rejected": -167.1098175048828, + "loss": 0.3116, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2756500244140625, + "rewards/margins": 1.1903107166290283, + "rewards/rejected": -0.914660632610321, + "step": 16230 + }, + { + "epoch": 0.94, + "learning_rate": 8.044343065825077e-10, + "logits/chosen": -1.8214014768600464, + "logits/rejected": -1.8098117113113403, + "logps/chosen": -49.61870574951172, + "logps/rejected": -161.73995971679688, + "loss": 0.4141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22268104553222656, + "rewards/margins": 1.0008121728897095, + "rewards/rejected": -0.7781311273574829, + "step": 16231 + }, + { + "epoch": 0.94, + "learning_rate": 8.02751507214755e-10, + "logits/chosen": -1.7993557453155518, + "logits/rejected": -1.8019276857376099, + "logps/chosen": -5.050081729888916, + "logps/rejected": -116.44389343261719, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13472986221313477, + "rewards/margins": 1.918668508529663, + "rewards/rejected": -1.7839386463165283, + "step": 16232 + }, + { + "epoch": 0.94, + "learning_rate": 8.010704555739211e-10, + "logits/chosen": -1.7994160652160645, + "logits/rejected": -1.8138970136642456, + "logps/chosen": -211.61865234375, + "logps/rejected": -250.37901306152344, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.523838758468628, + "rewards/margins": 0.0026092529296875, + "rewards/rejected": 2.5212295055389404, + "step": 16233 + }, + { + "epoch": 0.94, + "learning_rate": 7.993911517197194e-10, + "logits/chosen": -1.9010589122772217, + "logits/rejected": -1.901604413986206, + "logps/chosen": -0.1437484472990036, + "logps/rejected": -51.76546096801758, + "loss": 0.4851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0054182978346943855, + "rewards/margins": 0.9912903904914856, + "rewards/rejected": -0.9967086911201477, + "step": 16234 + }, + { + "epoch": 0.94, + "learning_rate": 7.977135957118186e-10, + "logits/chosen": -1.812887191772461, + "logits/rejected": -1.8215558528900146, + "logps/chosen": -5.531226270250045e-05, + "logps/rejected": -288.23663330078125, + "loss": 0.3291, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2993438076591701e-06, + "rewards/margins": 5.632196426391602, + "rewards/rejected": -5.632197856903076, + "step": 16235 + }, + { + "epoch": 0.94, + "learning_rate": 7.96037787609799e-10, + "logits/chosen": -1.822830319404602, + "logits/rejected": -1.870941162109375, + "logps/chosen": -158.91790771484375, + "logps/rejected": -395.8783874511719, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2365174293518066, + "rewards/margins": 3.57151198387146, + "rewards/rejected": -1.3349945545196533, + "step": 16236 + }, + { + "epoch": 0.94, + "learning_rate": 7.943637274732018e-10, + "logits/chosen": -1.7535502910614014, + "logits/rejected": -1.885855793952942, + "logps/chosen": -288.9773254394531, + "logps/rejected": -193.507080078125, + "loss": 0.1372, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.906957983970642, + "rewards/margins": 1.2596588134765625, + "rewards/rejected": 0.6472992300987244, + "step": 16237 + }, + { + "epoch": 0.94, + "learning_rate": 7.926914153614961e-10, + "logits/chosen": -1.963999629020691, + "logits/rejected": -1.95111083984375, + "logps/chosen": -4.901495456695557, + "logps/rejected": -109.44708251953125, + "loss": 0.6327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023669147863984108, + "rewards/margins": 0.1990344077348709, + "rewards/rejected": -0.22270356118679047, + "step": 16238 + }, + { + "epoch": 0.95, + "learning_rate": 7.910208513340899e-10, + "logits/chosen": -1.8268446922302246, + "logits/rejected": -1.8254306316375732, + "logps/chosen": -8.770967483520508, + "logps/rejected": -54.480628967285156, + "loss": 0.5085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07084856182336807, + "rewards/margins": 0.6361222863197327, + "rewards/rejected": -0.5652737021446228, + "step": 16239 + }, + { + "epoch": 0.95, + "learning_rate": 7.893520354503247e-10, + "logits/chosen": -1.6329580545425415, + "logits/rejected": -1.6412954330444336, + "logps/chosen": -0.0009534449782222509, + "logps/rejected": -206.67068481445312, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5686134399147704e-05, + "rewards/margins": 5.396052837371826, + "rewards/rejected": -5.396098613739014, + "step": 16240 + }, + { + "epoch": 0.95, + "learning_rate": 7.876849677694975e-10, + "logits/chosen": -1.9060566425323486, + "logits/rejected": -1.9072190523147583, + "logps/chosen": -12.398193359375, + "logps/rejected": -220.0665283203125, + "loss": 0.2617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15793056786060333, + "rewards/margins": 3.546555280685425, + "rewards/rejected": -3.388624668121338, + "step": 16241 + }, + { + "epoch": 0.95, + "learning_rate": 7.860196483508219e-10, + "logits/chosen": -1.8192861080169678, + "logits/rejected": -1.8096811771392822, + "logps/chosen": -260.68719482421875, + "logps/rejected": -437.44561767578125, + "loss": 0.1467, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.324352979660034, + "rewards/margins": 1.1655181646347046, + "rewards/rejected": 1.1588348150253296, + "step": 16242 + }, + { + "epoch": 0.95, + "learning_rate": 7.843560772534618e-10, + "logits/chosen": -1.7563812732696533, + "logits/rejected": -1.7611031532287598, + "logps/chosen": -192.11163330078125, + "logps/rejected": -399.80078125, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.096038818359375, + "rewards/margins": 7.361828804016113, + "rewards/rejected": -5.265789985656738, + "step": 16243 + }, + { + "epoch": 0.95, + "learning_rate": 7.826942545365089e-10, + "logits/chosen": -1.9544682502746582, + "logits/rejected": -1.9535863399505615, + "logps/chosen": -6.765391826629639, + "logps/rejected": -130.8003387451172, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024158764630556107, + "rewards/margins": 2.4515607357025146, + "rewards/rejected": -2.475719451904297, + "step": 16244 + }, + { + "epoch": 0.95, + "learning_rate": 7.810341802590104e-10, + "logits/chosen": -1.761198878288269, + "logits/rejected": -1.7200181484222412, + "logps/chosen": -203.32542419433594, + "logps/rejected": -317.4797058105469, + "loss": 0.1493, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1384048461914062, + "rewards/margins": 1.317256212234497, + "rewards/rejected": 0.821148693561554, + "step": 16245 + }, + { + "epoch": 0.95, + "learning_rate": 7.7937585447993e-10, + "logits/chosen": -2.0415544509887695, + "logits/rejected": -2.033874988555908, + "logps/chosen": -10.241809844970703, + "logps/rejected": -86.26211547851562, + "loss": 0.4582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40556755661964417, + "rewards/margins": 0.684024453163147, + "rewards/rejected": -0.2784568965435028, + "step": 16246 + }, + { + "epoch": 0.95, + "learning_rate": 7.777192772581819e-10, + "logits/chosen": -1.9102221727371216, + "logits/rejected": -1.893194317817688, + "logps/chosen": -61.2879753112793, + "logps/rejected": -181.13763427734375, + "loss": 0.3928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07992744445800781, + "rewards/margins": 4.281649589538574, + "rewards/rejected": -4.361577033996582, + "step": 16247 + }, + { + "epoch": 0.95, + "learning_rate": 7.760644486526135e-10, + "logits/chosen": -1.8331677913665771, + "logits/rejected": -1.7729690074920654, + "logps/chosen": -204.3978271484375, + "logps/rejected": -492.8626403808594, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.905412197113037, + "rewards/margins": 4.331602573394775, + "rewards/rejected": 0.573809802532196, + "step": 16248 + }, + { + "epoch": 0.95, + "learning_rate": 7.744113687220222e-10, + "logits/chosen": -2.0182008743286133, + "logits/rejected": -2.0143136978149414, + "logps/chosen": -26.067760467529297, + "logps/rejected": -363.965087890625, + "loss": 0.0521, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8114513158798218, + "rewards/margins": 9.000824928283691, + "rewards/rejected": -7.189373970031738, + "step": 16249 + }, + { + "epoch": 0.95, + "learning_rate": 7.727600375251331e-10, + "logits/chosen": -1.9065380096435547, + "logits/rejected": -1.888445496559143, + "logps/chosen": -207.0144805908203, + "logps/rejected": -340.067138671875, + "loss": 0.0501, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9637863636016846, + "rewards/margins": 2.410017490386963, + "rewards/rejected": 0.5537689328193665, + "step": 16250 + }, + { + "epoch": 0.95, + "learning_rate": 7.711104551205938e-10, + "logits/chosen": -1.9320063591003418, + "logits/rejected": -1.9131711721420288, + "logps/chosen": -29.00481605529785, + "logps/rejected": -356.31939697265625, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09926357120275497, + "rewards/margins": 7.053994178771973, + "rewards/rejected": -6.95473051071167, + "step": 16251 + }, + { + "epoch": 0.95, + "learning_rate": 7.694626215670185e-10, + "logits/chosen": -1.8699443340301514, + "logits/rejected": -1.8665525913238525, + "logps/chosen": -0.6334145069122314, + "logps/rejected": -158.5955810546875, + "loss": 0.427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10685183852910995, + "rewards/margins": 1.3849600553512573, + "rewards/rejected": -1.2781082391738892, + "step": 16252 + }, + { + "epoch": 0.95, + "learning_rate": 7.678165369229439e-10, + "logits/chosen": -1.8847465515136719, + "logits/rejected": -1.8801332712173462, + "logps/chosen": -26.564939498901367, + "logps/rejected": -148.3867645263672, + "loss": 0.245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7818880081176758, + "rewards/margins": 1.7684907913208008, + "rewards/rejected": -0.986602783203125, + "step": 16253 + }, + { + "epoch": 0.95, + "learning_rate": 7.661722012468508e-10, + "logits/chosen": -2.0610949993133545, + "logits/rejected": -2.0517327785491943, + "logps/chosen": -32.20437240600586, + "logps/rejected": -156.42897033691406, + "loss": 0.1571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7838165163993835, + "rewards/margins": 3.5155534744262695, + "rewards/rejected": -2.731736898422241, + "step": 16254 + }, + { + "epoch": 0.95, + "learning_rate": 7.645296145971425e-10, + "logits/chosen": -1.839095115661621, + "logits/rejected": -1.8344395160675049, + "logps/chosen": -18.758068084716797, + "logps/rejected": -201.03863525390625, + "loss": 0.4376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08745117485523224, + "rewards/margins": 1.7228240966796875, + "rewards/rejected": -1.8102753162384033, + "step": 16255 + }, + { + "epoch": 0.95, + "learning_rate": 7.628887770321835e-10, + "logits/chosen": -1.6220146417617798, + "logits/rejected": -1.6718686819076538, + "logps/chosen": -216.81442260742188, + "logps/rejected": -320.3355712890625, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.519195556640625, + "rewards/margins": 4.456948757171631, + "rewards/rejected": -1.9377533197402954, + "step": 16256 + }, + { + "epoch": 0.95, + "learning_rate": 7.612496886102604e-10, + "logits/chosen": -1.7890421152114868, + "logits/rejected": -1.7987805604934692, + "logps/chosen": -227.29721069335938, + "logps/rejected": -370.62860107421875, + "loss": 0.0849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2807159423828125, + "rewards/margins": 3.890887498855591, + "rewards/rejected": -3.6101715564727783, + "step": 16257 + }, + { + "epoch": 0.95, + "learning_rate": 7.59612349389599e-10, + "logits/chosen": -1.927269458770752, + "logits/rejected": -1.9288164377212524, + "logps/chosen": -3.3095247745513916, + "logps/rejected": -78.93302917480469, + "loss": 0.4759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06050529703497887, + "rewards/margins": 0.7236087918281555, + "rewards/rejected": -0.6631035208702087, + "step": 16258 + }, + { + "epoch": 0.95, + "learning_rate": 7.579767594283636e-10, + "logits/chosen": -1.8422856330871582, + "logits/rejected": -1.867754578590393, + "logps/chosen": -214.65516662597656, + "logps/rejected": -323.5418701171875, + "loss": 0.0657, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6516342163085938, + "rewards/margins": 2.2527389526367188, + "rewards/rejected": -0.601104736328125, + "step": 16259 + }, + { + "epoch": 0.95, + "learning_rate": 7.563429187846693e-10, + "logits/chosen": -1.8328266143798828, + "logits/rejected": -1.8334226608276367, + "logps/chosen": -15.533038139343262, + "logps/rejected": -85.12544250488281, + "loss": 0.3997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1649802178144455, + "rewards/margins": 1.0744292736053467, + "rewards/rejected": -0.90944904088974, + "step": 16260 + }, + { + "epoch": 0.95, + "learning_rate": 7.547108275165526e-10, + "logits/chosen": -1.684848427772522, + "logits/rejected": -1.6844769716262817, + "logps/chosen": -137.54971313476562, + "logps/rejected": -183.77993774414062, + "loss": 0.2889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5457763671875, + "rewards/margins": 0.4379394054412842, + "rewards/rejected": 1.1078369617462158, + "step": 16261 + }, + { + "epoch": 0.95, + "learning_rate": 7.530804856819895e-10, + "logits/chosen": -1.7478430271148682, + "logits/rejected": -1.7596356868743896, + "logps/chosen": -286.2639465332031, + "logps/rejected": -513.1409912109375, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7641419172286987, + "rewards/margins": 4.252130031585693, + "rewards/rejected": -2.487988233566284, + "step": 16262 + }, + { + "epoch": 0.95, + "learning_rate": 7.514518933388947e-10, + "logits/chosen": -1.9723031520843506, + "logits/rejected": -1.990809679031372, + "logps/chosen": -262.8224792480469, + "logps/rejected": -470.41729736328125, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8139495849609375, + "rewards/margins": 9.0946044921875, + "rewards/rejected": -6.2806549072265625, + "step": 16263 + }, + { + "epoch": 0.95, + "learning_rate": 7.498250505451387e-10, + "logits/chosen": -1.9661643505096436, + "logits/rejected": -2.0022428035736084, + "logps/chosen": -290.78826904296875, + "logps/rejected": -429.30615234375, + "loss": 0.0542, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0229737758636475, + "rewards/margins": 2.481192111968994, + "rewards/rejected": -0.45821839570999146, + "step": 16264 + }, + { + "epoch": 0.95, + "learning_rate": 7.481999573584973e-10, + "logits/chosen": -1.8235976696014404, + "logits/rejected": -1.8234922885894775, + "logps/chosen": -3.1232218742370605, + "logps/rejected": -67.24934387207031, + "loss": 0.691, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09448855370283127, + "rewards/margins": 0.1061202809214592, + "rewards/rejected": -0.20060883462429047, + "step": 16265 + }, + { + "epoch": 0.95, + "learning_rate": 7.465766138367135e-10, + "logits/chosen": -1.973601222038269, + "logits/rejected": -1.9762547016143799, + "logps/chosen": -19.389190673828125, + "logps/rejected": -146.76492309570312, + "loss": 0.3609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.158564954996109, + "rewards/margins": 1.974064826965332, + "rewards/rejected": -1.8154999017715454, + "step": 16266 + }, + { + "epoch": 0.95, + "learning_rate": 7.449550200374522e-10, + "logits/chosen": -1.7179183959960938, + "logits/rejected": -1.7071460485458374, + "logps/chosen": -144.157470703125, + "logps/rejected": -400.78106689453125, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7970367670059204, + "rewards/margins": 4.234250068664551, + "rewards/rejected": -2.437213182449341, + "step": 16267 + }, + { + "epoch": 0.95, + "learning_rate": 7.433351760183227e-10, + "logits/chosen": -1.8298572301864624, + "logits/rejected": -1.7760376930236816, + "logps/chosen": -276.00048828125, + "logps/rejected": -530.28759765625, + "loss": 0.0801, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3901824951171875, + "rewards/margins": 3.290493965148926, + "rewards/rejected": -1.9003113508224487, + "step": 16268 + }, + { + "epoch": 0.95, + "learning_rate": 7.417170818368679e-10, + "logits/chosen": -2.033942222595215, + "logits/rejected": -2.0324137210845947, + "logps/chosen": -10.90788459777832, + "logps/rejected": -53.84952926635742, + "loss": 0.4916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21046142280101776, + "rewards/margins": 1.0494304895401, + "rewards/rejected": -1.2598918676376343, + "step": 16269 + }, + { + "epoch": 0.95, + "learning_rate": 7.401007375505697e-10, + "logits/chosen": -2.122455596923828, + "logits/rejected": -2.1177189350128174, + "logps/chosen": -75.25640106201172, + "logps/rejected": -182.04898071289062, + "loss": 0.7489, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.4975852966308594, + "rewards/margins": -0.686566948890686, + "rewards/rejected": 1.1841522455215454, + "step": 16270 + }, + { + "epoch": 0.95, + "learning_rate": 7.384861432168488e-10, + "logits/chosen": -1.6681121587753296, + "logits/rejected": -1.6561777591705322, + "logps/chosen": -73.24584197998047, + "logps/rejected": -100.35595703125, + "loss": 0.8795, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.115148164331913, + "rewards/margins": -0.6572357416152954, + "rewards/rejected": 0.5420875549316406, + "step": 16271 + }, + { + "epoch": 0.95, + "learning_rate": 7.368732988930648e-10, + "logits/chosen": -2.0022430419921875, + "logits/rejected": -2.000370502471924, + "logps/chosen": -2.5900442600250244, + "logps/rejected": -136.2939453125, + "loss": 0.4231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048065781593322754, + "rewards/margins": 1.4489996433258057, + "rewards/rejected": -1.400933861732483, + "step": 16272 + }, + { + "epoch": 0.95, + "learning_rate": 7.352622046365165e-10, + "logits/chosen": -1.9089076519012451, + "logits/rejected": -1.910923719406128, + "logps/chosen": -138.0491943359375, + "logps/rejected": -297.4830627441406, + "loss": 0.1025, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9553543329238892, + "rewards/margins": 1.8835738897323608, + "rewards/rejected": 0.07178039848804474, + "step": 16273 + }, + { + "epoch": 0.95, + "learning_rate": 7.336528605044301e-10, + "logits/chosen": -2.0802550315856934, + "logits/rejected": -2.0764033794403076, + "logps/chosen": -7.235855446197093e-05, + "logps/rejected": -151.24069213867188, + "loss": 0.3791, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0190867871860974e-05, + "rewards/margins": 2.5283772945404053, + "rewards/rejected": -2.5283570289611816, + "step": 16274 + }, + { + "epoch": 0.95, + "learning_rate": 7.320452665539822e-10, + "logits/chosen": -1.9842989444732666, + "logits/rejected": -2.0337727069854736, + "logps/chosen": -192.07630920410156, + "logps/rejected": -440.74615478515625, + "loss": 0.0442, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.879756212234497, + "rewards/margins": 2.8030197620391846, + "rewards/rejected": -0.9232635498046875, + "step": 16275 + }, + { + "epoch": 0.95, + "learning_rate": 7.304394228422828e-10, + "logits/chosen": -1.9626643657684326, + "logits/rejected": -1.983206033706665, + "logps/chosen": -195.92947387695312, + "logps/rejected": -320.70611572265625, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2801055908203125, + "rewards/margins": 2.9156768321990967, + "rewards/rejected": -0.635571300983429, + "step": 16276 + }, + { + "epoch": 0.95, + "learning_rate": 7.288353294263805e-10, + "logits/chosen": -1.923698902130127, + "logits/rejected": -1.9258663654327393, + "logps/chosen": -32.753326416015625, + "logps/rejected": -162.0131072998047, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0019702911376953125, + "rewards/margins": 2.161055326461792, + "rewards/rejected": -2.1590850353240967, + "step": 16277 + }, + { + "epoch": 0.95, + "learning_rate": 7.272329863632576e-10, + "logits/chosen": -1.9614229202270508, + "logits/rejected": -1.923362135887146, + "logps/chosen": -227.55999755859375, + "logps/rejected": -396.703125, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.659921407699585, + "rewards/margins": 5.132711887359619, + "rewards/rejected": -1.4727905988693237, + "step": 16278 + }, + { + "epoch": 0.95, + "learning_rate": 7.256323937098407e-10, + "logits/chosen": -1.8397221565246582, + "logits/rejected": -1.8368597030639648, + "logps/chosen": -7.00937557220459, + "logps/rejected": -171.60324096679688, + "loss": 0.551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18065081536769867, + "rewards/margins": 0.32466021180152893, + "rewards/rejected": -0.14400939643383026, + "step": 16279 + }, + { + "epoch": 0.95, + "learning_rate": 7.240335515229845e-10, + "logits/chosen": -1.7195768356323242, + "logits/rejected": -1.7358590364456177, + "logps/chosen": -172.91531372070312, + "logps/rejected": -237.58090209960938, + "loss": 0.264, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.492028832435608, + "rewards/margins": 0.6320648193359375, + "rewards/rejected": 0.8599640130996704, + "step": 16280 + }, + { + "epoch": 0.95, + "learning_rate": 7.224364598594934e-10, + "logits/chosen": -1.9338281154632568, + "logits/rejected": -1.9208964109420776, + "logps/chosen": -134.4635467529297, + "logps/rejected": -324.04351806640625, + "loss": 0.1434, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9150390625, + "rewards/margins": 1.3464903831481934, + "rewards/rejected": 0.5685486197471619, + "step": 16281 + }, + { + "epoch": 0.95, + "learning_rate": 7.208411187760944e-10, + "logits/chosen": -1.8501777648925781, + "logits/rejected": -1.8468751907348633, + "logps/chosen": -70.21224975585938, + "logps/rejected": -124.64686584472656, + "loss": 0.1091, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.253431797027588, + "rewards/margins": 1.8317277431488037, + "rewards/rejected": 0.42170411348342896, + "step": 16282 + }, + { + "epoch": 0.95, + "learning_rate": 7.192475283294808e-10, + "logits/chosen": -1.7541126012802124, + "logits/rejected": -1.7641141414642334, + "logps/chosen": -237.3758544921875, + "logps/rejected": -401.101806640625, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3573548793792725, + "rewards/margins": 5.713879585266113, + "rewards/rejected": -3.356524705886841, + "step": 16283 + }, + { + "epoch": 0.95, + "learning_rate": 7.176556885762464e-10, + "logits/chosen": -1.89119553565979, + "logits/rejected": -1.9278039932250977, + "logps/chosen": -201.78399658203125, + "logps/rejected": -222.09803771972656, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.838578939437866, + "rewards/margins": 2.7078232765197754, + "rewards/rejected": 1.1307556629180908, + "step": 16284 + }, + { + "epoch": 0.95, + "learning_rate": 7.160655995729514e-10, + "logits/chosen": -1.9048776626586914, + "logits/rejected": -1.9076645374298096, + "logps/chosen": -2.0821213722229004, + "logps/rejected": -213.22164916992188, + "loss": 0.405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00558781623840332, + "rewards/margins": 2.1126556396484375, + "rewards/rejected": -2.118243455886841, + "step": 16285 + }, + { + "epoch": 0.95, + "learning_rate": 7.14477261376073e-10, + "logits/chosen": -1.6361078023910522, + "logits/rejected": -1.6309025287628174, + "logps/chosen": -96.53654479980469, + "logps/rejected": -352.1083984375, + "loss": 0.1523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5177230834960938, + "rewards/margins": 4.806846618652344, + "rewards/rejected": -4.28912353515625, + "step": 16286 + }, + { + "epoch": 0.95, + "learning_rate": 7.128906740420549e-10, + "logits/chosen": -1.8271045684814453, + "logits/rejected": -1.8102411031723022, + "logps/chosen": -0.00014423996617551893, + "logps/rejected": -153.355224609375, + "loss": 0.3433, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.087079676333815e-06, + "rewards/margins": 3.512486457824707, + "rewards/rejected": -3.5124833583831787, + "step": 16287 + }, + { + "epoch": 0.95, + "learning_rate": 7.113058376272408e-10, + "logits/chosen": -1.9481650590896606, + "logits/rejected": -1.9622011184692383, + "logps/chosen": -184.2269287109375, + "logps/rejected": -570.29833984375, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7063201665878296, + "rewards/margins": 6.095834732055664, + "rewards/rejected": -4.389514446258545, + "step": 16288 + }, + { + "epoch": 0.95, + "learning_rate": 7.097227521879412e-10, + "logits/chosen": -1.9785900115966797, + "logits/rejected": -1.9663053750991821, + "logps/chosen": -146.52938842773438, + "logps/rejected": -347.016845703125, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6803650856018066, + "rewards/margins": 3.859671115875244, + "rewards/rejected": -1.1793060302734375, + "step": 16289 + }, + { + "epoch": 0.95, + "learning_rate": 7.081414177803946e-10, + "logits/chosen": -1.7027883529663086, + "logits/rejected": -1.6965359449386597, + "logps/chosen": -41.14937210083008, + "logps/rejected": -267.220458984375, + "loss": 0.3642, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05765419080853462, + "rewards/margins": 6.008251667022705, + "rewards/rejected": -6.065906047821045, + "step": 16290 + }, + { + "epoch": 0.95, + "learning_rate": 7.065618344607782e-10, + "logits/chosen": -1.9489741325378418, + "logits/rejected": -1.9171032905578613, + "logps/chosen": -242.1169891357422, + "logps/rejected": -452.4390869140625, + "loss": 0.071, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9959182739257812, + "rewards/margins": 3.4201979637145996, + "rewards/rejected": -1.424279808998108, + "step": 16291 + }, + { + "epoch": 0.95, + "learning_rate": 7.049840022852083e-10, + "logits/chosen": -1.844001293182373, + "logits/rejected": -1.8313171863555908, + "logps/chosen": -275.2026062011719, + "logps/rejected": -413.80621337890625, + "loss": 0.0956, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.079907178878784, + "rewards/margins": 1.9930541515350342, + "rewards/rejected": 0.08685302734375, + "step": 16292 + }, + { + "epoch": 0.95, + "learning_rate": 7.034079213097344e-10, + "logits/chosen": -1.8354082107543945, + "logits/rejected": -1.8217459917068481, + "logps/chosen": -53.48398971557617, + "logps/rejected": -170.56719970703125, + "loss": 0.2916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5613307952880859, + "rewards/margins": 1.8059567213058472, + "rewards/rejected": -1.2446259260177612, + "step": 16293 + }, + { + "epoch": 0.95, + "learning_rate": 7.01833591590345e-10, + "logits/chosen": -1.8719875812530518, + "logits/rejected": -1.8743157386779785, + "logps/chosen": -0.10032643377780914, + "logps/rejected": -222.26275634765625, + "loss": 0.3263, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008612737990915775, + "rewards/margins": 6.192093849182129, + "rewards/rejected": -6.200706481933594, + "step": 16294 + }, + { + "epoch": 0.95, + "learning_rate": 7.002610131829678e-10, + "logits/chosen": -1.8431062698364258, + "logits/rejected": -1.919809103012085, + "logps/chosen": -261.020751953125, + "logps/rejected": -455.7575988769531, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.496716260910034, + "rewards/margins": 6.868585586547852, + "rewards/rejected": -4.371869087219238, + "step": 16295 + }, + { + "epoch": 0.95, + "learning_rate": 6.986901861434746e-10, + "logits/chosen": -2.007016181945801, + "logits/rejected": -2.0016496181488037, + "logps/chosen": -1.2320860624313354, + "logps/rejected": -75.24215698242188, + "loss": 0.5874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06032703071832657, + "rewards/margins": 0.3565535247325897, + "rewards/rejected": -0.29622650146484375, + "step": 16296 + }, + { + "epoch": 0.95, + "learning_rate": 6.971211105276653e-10, + "logits/chosen": -1.8072357177734375, + "logits/rejected": -1.8019542694091797, + "logps/chosen": -11.749076843261719, + "logps/rejected": -213.21533203125, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06030712276697159, + "rewards/margins": 2.5679197311401367, + "rewards/rejected": -2.507612705230713, + "step": 16297 + }, + { + "epoch": 0.95, + "learning_rate": 6.955537863912731e-10, + "logits/chosen": -1.9144840240478516, + "logits/rejected": -1.9158484935760498, + "logps/chosen": -18.210582733154297, + "logps/rejected": -104.50297546386719, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13129234313964844, + "rewards/margins": 1.8443882465362549, + "rewards/rejected": -1.7130959033966064, + "step": 16298 + }, + { + "epoch": 0.95, + "learning_rate": 6.939882137899921e-10, + "logits/chosen": -2.052093505859375, + "logits/rejected": -2.0470166206359863, + "logps/chosen": -10.386640548706055, + "logps/rejected": -191.21932983398438, + "loss": 0.2264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4313966929912567, + "rewards/margins": 6.306646823883057, + "rewards/rejected": -5.875250339508057, + "step": 16299 + }, + { + "epoch": 0.95, + "learning_rate": 6.924243927794282e-10, + "logits/chosen": -1.7790966033935547, + "logits/rejected": -1.7621554136276245, + "logps/chosen": -95.87466430664062, + "logps/rejected": -166.35997009277344, + "loss": 0.1381, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0829651355743408, + "rewards/margins": 3.0631256103515625, + "rewards/rejected": -1.9801605939865112, + "step": 16300 + }, + { + "epoch": 0.95, + "learning_rate": 6.908623234151367e-10, + "logits/chosen": -1.8733044862747192, + "logits/rejected": -1.8718039989471436, + "logps/chosen": -24.728166580200195, + "logps/rejected": -122.30220794677734, + "loss": 0.4051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27030259370803833, + "rewards/margins": 1.6547112464904785, + "rewards/rejected": -1.3844085931777954, + "step": 16301 + }, + { + "epoch": 0.95, + "learning_rate": 6.893020057526178e-10, + "logits/chosen": -2.0637335777282715, + "logits/rejected": -2.0645651817321777, + "logps/chosen": -51.035892486572266, + "logps/rejected": -195.23228454589844, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0044078826904296875, + "rewards/margins": 2.7930386066436768, + "rewards/rejected": -2.788630723953247, + "step": 16302 + }, + { + "epoch": 0.95, + "learning_rate": 6.877434398472936e-10, + "logits/chosen": -2.074923276901245, + "logits/rejected": -2.068253993988037, + "logps/chosen": -8.704384803771973, + "logps/rejected": -54.030757904052734, + "loss": 0.7276, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.028646279126405716, + "rewards/margins": -0.20774975419044495, + "rewards/rejected": 0.23639602959156036, + "step": 16303 + }, + { + "epoch": 0.95, + "learning_rate": 6.861866257545423e-10, + "logits/chosen": -1.9595032930374146, + "logits/rejected": -1.9603521823883057, + "logps/chosen": -21.80974006652832, + "logps/rejected": -311.0703430175781, + "loss": 0.2409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4278862178325653, + "rewards/margins": 7.175487518310547, + "rewards/rejected": -6.747601509094238, + "step": 16304 + }, + { + "epoch": 0.95, + "learning_rate": 6.846315635296474e-10, + "logits/chosen": -1.6904001235961914, + "logits/rejected": -1.7281707525253296, + "logps/chosen": -234.14854431152344, + "logps/rejected": -349.33099365234375, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.851252794265747, + "rewards/margins": 2.8584671020507812, + "rewards/rejected": -0.007214355748146772, + "step": 16305 + }, + { + "epoch": 0.95, + "learning_rate": 6.830782532278812e-10, + "logits/chosen": -1.7854689359664917, + "logits/rejected": -1.7582719326019287, + "logps/chosen": -179.96908569335938, + "logps/rejected": -487.1304931640625, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5021636486053467, + "rewards/margins": 8.905056953430176, + "rewards/rejected": -6.40289306640625, + "step": 16306 + }, + { + "epoch": 0.95, + "learning_rate": 6.815266949043996e-10, + "logits/chosen": -1.8782159090042114, + "logits/rejected": -1.8710598945617676, + "logps/chosen": -4.1500372886657715, + "logps/rejected": -226.10354614257812, + "loss": 0.4548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023886967450380325, + "rewards/margins": 1.7538734674453735, + "rewards/rejected": -1.729986548423767, + "step": 16307 + }, + { + "epoch": 0.95, + "learning_rate": 6.799768886143365e-10, + "logits/chosen": -1.7484526634216309, + "logits/rejected": -1.731422781944275, + "logps/chosen": -137.55291748046875, + "logps/rejected": -310.89776611328125, + "loss": 0.0744, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1602447032928467, + "rewards/margins": 3.107491970062256, + "rewards/rejected": -0.947247326374054, + "step": 16308 + }, + { + "epoch": 0.95, + "learning_rate": 6.784288344127476e-10, + "logits/chosen": -1.8723214864730835, + "logits/rejected": -1.8195334672927856, + "logps/chosen": -151.13858032226562, + "logps/rejected": -299.491943359375, + "loss": 0.1138, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6789824962615967, + "rewards/margins": 1.604934573173523, + "rewards/rejected": 1.0740479230880737, + "step": 16309 + }, + { + "epoch": 0.95, + "learning_rate": 6.768825323546223e-10, + "logits/chosen": -2.001401662826538, + "logits/rejected": -1.99528968334198, + "logps/chosen": -4.0173250454245135e-05, + "logps/rejected": -182.22357177734375, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0384559320518747e-06, + "rewards/margins": 4.805048942565918, + "rewards/rejected": -4.805050849914551, + "step": 16310 + }, + { + "epoch": 0.95, + "learning_rate": 6.753379824948946e-10, + "logits/chosen": -1.8268711566925049, + "logits/rejected": -1.8197832107543945, + "logps/chosen": -73.36389923095703, + "logps/rejected": -224.46372985839844, + "loss": 0.5252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5048584342002869, + "rewards/margins": 2.63295578956604, + "rewards/rejected": -3.1378142833709717, + "step": 16311 + }, + { + "epoch": 0.95, + "learning_rate": 6.737951848884316e-10, + "logits/chosen": -2.0147531032562256, + "logits/rejected": -2.0162301063537598, + "logps/chosen": -2.5629919036873616e-05, + "logps/rejected": -222.7595977783203, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.344615594069182e-07, + "rewards/margins": 5.363608360290527, + "rewards/rejected": -5.363609313964844, + "step": 16312 + }, + { + "epoch": 0.95, + "learning_rate": 6.722541395900505e-10, + "logits/chosen": -1.8177461624145508, + "logits/rejected": -1.8294845819473267, + "logps/chosen": -245.75759887695312, + "logps/rejected": -473.3502197265625, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3657928705215454, + "rewards/margins": 7.665270805358887, + "rewards/rejected": -6.299478054046631, + "step": 16313 + }, + { + "epoch": 0.95, + "learning_rate": 6.707148466544854e-10, + "logits/chosen": -1.9179482460021973, + "logits/rejected": -1.9229987859725952, + "logps/chosen": -14.14381217956543, + "logps/rejected": -43.07667541503906, + "loss": 0.6251, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.1916816681623459, + "rewards/margins": -0.049887850880622864, + "rewards/rejected": 0.24156951904296875, + "step": 16314 + }, + { + "epoch": 0.95, + "learning_rate": 6.691773061364314e-10, + "logits/chosen": -1.7874093055725098, + "logits/rejected": -1.7812756299972534, + "logps/chosen": -56.792396545410156, + "logps/rejected": -207.449462890625, + "loss": 0.1362, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.640527367591858, + "rewards/margins": 2.038404941558838, + "rewards/rejected": -0.3978775143623352, + "step": 16315 + }, + { + "epoch": 0.95, + "learning_rate": 6.676415180904948e-10, + "logits/chosen": -1.911981463432312, + "logits/rejected": -1.8982011079788208, + "logps/chosen": -178.32955932617188, + "logps/rejected": -223.55221557617188, + "loss": 0.1824, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.048597812652588, + "rewards/margins": 1.1068894863128662, + "rewards/rejected": 0.9417083859443665, + "step": 16316 + }, + { + "epoch": 0.95, + "learning_rate": 6.661074825712432e-10, + "logits/chosen": -2.0169942378997803, + "logits/rejected": -2.012741804122925, + "logps/chosen": -163.99533081054688, + "logps/rejected": -329.367431640625, + "loss": 0.0865, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4528825283050537, + "rewards/margins": 2.107682943344116, + "rewards/rejected": 0.3451995849609375, + "step": 16317 + }, + { + "epoch": 0.95, + "learning_rate": 6.645751996331717e-10, + "logits/chosen": -1.6192495822906494, + "logits/rejected": -1.6206190586090088, + "logps/chosen": -27.59429931640625, + "logps/rejected": -191.09983825683594, + "loss": 0.2201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7556697726249695, + "rewards/margins": 3.2550458908081055, + "rewards/rejected": -2.499376058578491, + "step": 16318 + }, + { + "epoch": 0.95, + "learning_rate": 6.630446693307201e-10, + "logits/chosen": -1.972061038017273, + "logits/rejected": -1.9705911874771118, + "logps/chosen": -6.7035231590271, + "logps/rejected": -94.45223999023438, + "loss": 0.5334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08047442883253098, + "rewards/margins": 0.48529621958732605, + "rewards/rejected": -0.4048217833042145, + "step": 16319 + }, + { + "epoch": 0.95, + "learning_rate": 6.615158917182506e-10, + "logits/chosen": -1.958523154258728, + "logits/rejected": -2.0137178897857666, + "logps/chosen": -244.37718200683594, + "logps/rejected": -464.97393798828125, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2436599731445312, + "rewards/margins": 4.678053379058838, + "rewards/rejected": -2.4343934059143066, + "step": 16320 + }, + { + "epoch": 0.95, + "learning_rate": 6.599888668500809e-10, + "logits/chosen": -1.919222354888916, + "logits/rejected": -1.9050523042678833, + "logps/chosen": -83.10503387451172, + "logps/rejected": -195.63800048828125, + "loss": 0.4218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4911079406738281, + "rewards/margins": 1.9401938915252686, + "rewards/rejected": -2.4313018321990967, + "step": 16321 + }, + { + "epoch": 0.95, + "learning_rate": 6.584635947804507e-10, + "logits/chosen": -2.004859209060669, + "logits/rejected": -1.9653881788253784, + "logps/chosen": -174.6455078125, + "logps/rejected": -556.9764404296875, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8828766345977783, + "rewards/margins": 7.635266304016113, + "rewards/rejected": -5.752389430999756, + "step": 16322 + }, + { + "epoch": 0.95, + "learning_rate": 6.569400755635501e-10, + "logits/chosen": -1.9164763689041138, + "logits/rejected": -1.9328399896621704, + "logps/chosen": -160.9376983642578, + "logps/rejected": -327.1234130859375, + "loss": 0.0911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.832220435142517, + "rewards/margins": 2.093640089035034, + "rewards/rejected": -0.2614196836948395, + "step": 16323 + }, + { + "epoch": 0.95, + "learning_rate": 6.55418309253497e-10, + "logits/chosen": -1.5900166034698486, + "logits/rejected": -1.5591473579406738, + "logps/chosen": -164.10275268554688, + "logps/rejected": -274.1962585449219, + "loss": 0.1036, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.810589551925659, + "rewards/margins": 1.691879153251648, + "rewards/rejected": 1.1187103986740112, + "step": 16324 + }, + { + "epoch": 0.95, + "learning_rate": 6.538982959043649e-10, + "logits/chosen": -1.7283172607421875, + "logits/rejected": -1.7197022438049316, + "logps/chosen": -235.27005004882812, + "logps/rejected": -336.7976989746094, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.849971055984497, + "rewards/margins": 1.1373519897460938, + "rewards/rejected": 0.7126190066337585, + "step": 16325 + }, + { + "epoch": 0.95, + "learning_rate": 6.523800355701381e-10, + "logits/chosen": -1.8156160116195679, + "logits/rejected": -1.8026213645935059, + "logps/chosen": -5.159298419952393, + "logps/rejected": -164.78610229492188, + "loss": 0.4231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06204581260681152, + "rewards/margins": 1.7133631706237793, + "rewards/rejected": -1.7754089832305908, + "step": 16326 + }, + { + "epoch": 0.95, + "learning_rate": 6.50863528304757e-10, + "logits/chosen": -1.89788019657135, + "logits/rejected": -1.9235131740570068, + "logps/chosen": -217.04029846191406, + "logps/rejected": -484.7413330078125, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9861252307891846, + "rewards/margins": 8.735206604003906, + "rewards/rejected": -5.749081611633301, + "step": 16327 + }, + { + "epoch": 0.95, + "learning_rate": 6.493487741621006e-10, + "logits/chosen": -1.7941254377365112, + "logits/rejected": -1.805843710899353, + "logps/chosen": -247.7344207763672, + "logps/rejected": -345.9639587402344, + "loss": 0.0699, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5755081176757812, + "rewards/margins": 2.609959602355957, + "rewards/rejected": -1.0344513654708862, + "step": 16328 + }, + { + "epoch": 0.95, + "learning_rate": 6.478357731959761e-10, + "logits/chosen": -2.047667980194092, + "logits/rejected": -2.043863534927368, + "logps/chosen": -3.2925524711608887, + "logps/rejected": -253.22781372070312, + "loss": 0.2403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37113261222839355, + "rewards/margins": 5.824474334716797, + "rewards/rejected": -5.453341960906982, + "step": 16329 + }, + { + "epoch": 0.95, + "learning_rate": 6.463245254601236e-10, + "logits/chosen": -2.069462776184082, + "logits/rejected": -2.0549068450927734, + "logps/chosen": -4.926608085632324, + "logps/rejected": -234.4221954345703, + "loss": 0.2755, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3066282868385315, + "rewards/margins": 3.496960401535034, + "rewards/rejected": -3.1903321743011475, + "step": 16330 + }, + { + "epoch": 0.95, + "learning_rate": 6.448150310082445e-10, + "logits/chosen": -1.7809535264968872, + "logits/rejected": -1.7829313278198242, + "logps/chosen": -62.494224548339844, + "logps/rejected": -225.1074981689453, + "loss": 0.7095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4898033142089844, + "rewards/margins": 0.22260057926177979, + "rewards/rejected": -0.7124038934707642, + "step": 16331 + }, + { + "epoch": 0.95, + "learning_rate": 6.433072898939574e-10, + "logits/chosen": -2.107210636138916, + "logits/rejected": -2.0901899337768555, + "logps/chosen": -135.22056579589844, + "logps/rejected": -273.230224609375, + "loss": 0.1824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9455001950263977, + "rewards/margins": 1.3469833135604858, + "rewards/rejected": -0.4014831483364105, + "step": 16332 + }, + { + "epoch": 0.95, + "learning_rate": 6.418013021708247e-10, + "logits/chosen": -1.8392366170883179, + "logits/rejected": -1.8291683197021484, + "logps/chosen": -243.3924560546875, + "logps/rejected": -485.8587646484375, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1246490478515625, + "rewards/margins": 8.393289566040039, + "rewards/rejected": -5.268640041351318, + "step": 16333 + }, + { + "epoch": 0.95, + "learning_rate": 6.402970678923537e-10, + "logits/chosen": -1.9973458051681519, + "logits/rejected": -1.9905576705932617, + "logps/chosen": -63.52961349487305, + "logps/rejected": -359.10528564453125, + "loss": 0.163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8310047388076782, + "rewards/margins": 3.4924306869506836, + "rewards/rejected": -2.661425828933716, + "step": 16334 + }, + { + "epoch": 0.95, + "learning_rate": 6.387945871119682e-10, + "logits/chosen": -2.0263874530792236, + "logits/rejected": -2.011460304260254, + "logps/chosen": -206.1480712890625, + "logps/rejected": -336.7698059082031, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.677337646484375, + "rewards/margins": 0.22260427474975586, + "rewards/rejected": 2.454733371734619, + "step": 16335 + }, + { + "epoch": 0.95, + "learning_rate": 6.372938598830479e-10, + "logits/chosen": -1.8420696258544922, + "logits/rejected": -1.8000285625457764, + "logps/chosen": -211.50408935546875, + "logps/rejected": -376.77490234375, + "loss": 0.106, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1665589809417725, + "rewards/margins": 2.0321474075317383, + "rewards/rejected": 0.13441161811351776, + "step": 16336 + }, + { + "epoch": 0.95, + "learning_rate": 6.357948862589113e-10, + "logits/chosen": -1.862939476966858, + "logits/rejected": -1.857158899307251, + "logps/chosen": -58.291954040527344, + "logps/rejected": -316.3179931640625, + "loss": 0.1129, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.170691728591919, + "rewards/margins": 5.981040000915527, + "rewards/rejected": -4.8103485107421875, + "step": 16337 + }, + { + "epoch": 0.95, + "learning_rate": 6.342976662928101e-10, + "logits/chosen": -1.8513052463531494, + "logits/rejected": -1.8375365734100342, + "logps/chosen": -58.461856842041016, + "logps/rejected": -202.87060546875, + "loss": 0.2995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39465293288230896, + "rewards/margins": 2.076205015182495, + "rewards/rejected": -1.6815521717071533, + "step": 16338 + }, + { + "epoch": 0.95, + "learning_rate": 6.328022000379296e-10, + "logits/chosen": -1.924206256866455, + "logits/rejected": -1.9234415292739868, + "logps/chosen": -21.753721237182617, + "logps/rejected": -125.70565795898438, + "loss": 0.1295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6083627939224243, + "rewards/margins": 2.2337090969085693, + "rewards/rejected": -0.6253463625907898, + "step": 16339 + }, + { + "epoch": 0.95, + "learning_rate": 6.313084875473939e-10, + "logits/chosen": -1.9987766742706299, + "logits/rejected": -1.9929091930389404, + "logps/chosen": -3.048632860183716, + "logps/rejected": -105.5017318725586, + "loss": 0.5089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27046000957489014, + "rewards/margins": 0.5077257752418518, + "rewards/rejected": -0.23726578056812286, + "step": 16340 + }, + { + "epoch": 0.95, + "learning_rate": 6.298165288742663e-10, + "logits/chosen": -1.715279221534729, + "logits/rejected": -1.7033878564834595, + "logps/chosen": -289.1251220703125, + "logps/rejected": -495.8847961425781, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.05411696434021, + "rewards/margins": 5.457708835601807, + "rewards/rejected": -3.4035918712615967, + "step": 16341 + }, + { + "epoch": 0.95, + "learning_rate": 6.283263240715542e-10, + "logits/chosen": -1.8230843544006348, + "logits/rejected": -1.8594987392425537, + "logps/chosen": -212.5797882080078, + "logps/rejected": -324.3165588378906, + "loss": 0.08, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9582535028457642, + "rewards/margins": 2.8184616565704346, + "rewards/rejected": -1.8602081537246704, + "step": 16342 + }, + { + "epoch": 0.95, + "learning_rate": 6.268378731921931e-10, + "logits/chosen": -1.9085986614227295, + "logits/rejected": -1.8981876373291016, + "logps/chosen": -26.322675704956055, + "logps/rejected": -181.59811401367188, + "loss": 0.2745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1369405835866928, + "rewards/margins": 4.251975059509277, + "rewards/rejected": -4.115034580230713, + "step": 16343 + }, + { + "epoch": 0.95, + "learning_rate": 6.253511762890628e-10, + "logits/chosen": -2.0019733905792236, + "logits/rejected": -2.004960775375366, + "logps/chosen": -2.3973188400268555, + "logps/rejected": -204.97280883789062, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04963571950793266, + "rewards/margins": 4.471951961517334, + "rewards/rejected": -4.422316074371338, + "step": 16344 + }, + { + "epoch": 0.95, + "learning_rate": 6.238662334149769e-10, + "logits/chosen": -1.9269403219223022, + "logits/rejected": -1.9033052921295166, + "logps/chosen": -217.73931884765625, + "logps/rejected": -667.5535278320312, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4252991676330566, + "rewards/margins": 14.218572616577148, + "rewards/rejected": -10.79327392578125, + "step": 16345 + }, + { + "epoch": 0.95, + "learning_rate": 6.223830446226875e-10, + "logits/chosen": -1.7899327278137207, + "logits/rejected": -1.7838748693466187, + "logps/chosen": -145.75889587402344, + "logps/rejected": -220.6322021484375, + "loss": 0.1259, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6419296264648438, + "rewards/margins": 1.475256323814392, + "rewards/rejected": 1.1666733026504517, + "step": 16346 + }, + { + "epoch": 0.95, + "learning_rate": 6.209016099648745e-10, + "logits/chosen": -1.7671422958374023, + "logits/rejected": -1.769776463508606, + "logps/chosen": -0.00021134888811502606, + "logps/rejected": -166.99493408203125, + "loss": 0.3325, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6187745131901465e-05, + "rewards/margins": 5.285404205322266, + "rewards/rejected": -5.2854204177856445, + "step": 16347 + }, + { + "epoch": 0.95, + "learning_rate": 6.194219294941905e-10, + "logits/chosen": -1.814767599105835, + "logits/rejected": -1.807173490524292, + "logps/chosen": -234.434326171875, + "logps/rejected": -310.1104736328125, + "loss": 0.4321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3995727598667145, + "rewards/margins": 0.13314208388328552, + "rewards/rejected": 0.26643067598342896, + "step": 16348 + }, + { + "epoch": 0.95, + "learning_rate": 6.179440032631766e-10, + "logits/chosen": -2.0438685417175293, + "logits/rejected": -2.0258285999298096, + "logps/chosen": -220.62826538085938, + "logps/rejected": -462.24884033203125, + "loss": 0.0735, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0254242420196533, + "rewards/margins": 3.9640135765075684, + "rewards/rejected": -1.9385894536972046, + "step": 16349 + }, + { + "epoch": 0.95, + "learning_rate": 6.164678313243466e-10, + "logits/chosen": -1.9309412240982056, + "logits/rejected": -1.9078019857406616, + "logps/chosen": -114.96221923828125, + "logps/rejected": -254.2837677001953, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5261566638946533, + "rewards/margins": 4.283563137054443, + "rewards/rejected": -1.7574065923690796, + "step": 16350 + }, + { + "epoch": 0.95, + "learning_rate": 6.149934137301416e-10, + "logits/chosen": -2.0331499576568604, + "logits/rejected": -2.028319835662842, + "logps/chosen": -19.525758743286133, + "logps/rejected": -178.86691284179688, + "loss": 0.4088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1678854078054428, + "rewards/margins": 3.2436153888702393, + "rewards/rejected": -3.4115006923675537, + "step": 16351 + }, + { + "epoch": 0.95, + "learning_rate": 6.135207505329365e-10, + "logits/chosen": -1.9297658205032349, + "logits/rejected": -1.9250524044036865, + "logps/chosen": -0.3936895728111267, + "logps/rejected": -147.05133056640625, + "loss": 0.4402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013721245341002941, + "rewards/margins": 1.7422449588775635, + "rewards/rejected": -1.7559661865234375, + "step": 16352 + }, + { + "epoch": 0.95, + "learning_rate": 6.120498417850562e-10, + "logits/chosen": -1.933430552482605, + "logits/rejected": -1.924558401107788, + "logps/chosen": -35.65584182739258, + "logps/rejected": -337.96453857421875, + "loss": 0.3867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14967842400074005, + "rewards/margins": 3.198319673538208, + "rewards/rejected": -3.3479981422424316, + "step": 16353 + }, + { + "epoch": 0.95, + "learning_rate": 6.10580687538742e-10, + "logits/chosen": -1.9965559244155884, + "logits/rejected": -1.9958817958831787, + "logps/chosen": -55.180564880371094, + "logps/rejected": -167.17691040039062, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4634597897529602, + "rewards/margins": 1.8776748180389404, + "rewards/rejected": -1.414215087890625, + "step": 16354 + }, + { + "epoch": 0.95, + "learning_rate": 6.091132878461913e-10, + "logits/chosen": -1.8592911958694458, + "logits/rejected": -1.8514740467071533, + "logps/chosen": -103.41133117675781, + "logps/rejected": -253.14425659179688, + "loss": 0.218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9258224368095398, + "rewards/margins": 1.5732406377792358, + "rewards/rejected": -0.647418200969696, + "step": 16355 + }, + { + "epoch": 0.95, + "learning_rate": 6.076476427595345e-10, + "logits/chosen": -1.8288617134094238, + "logits/rejected": -1.8628607988357544, + "logps/chosen": -203.18551635742188, + "logps/rejected": -425.3642883300781, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.217926025390625, + "rewards/margins": 3.4836792945861816, + "rewards/rejected": -1.265753149986267, + "step": 16356 + }, + { + "epoch": 0.95, + "learning_rate": 6.06183752330841e-10, + "logits/chosen": -1.9075556993484497, + "logits/rejected": -1.8477811813354492, + "logps/chosen": -169.45677185058594, + "logps/rejected": -361.052001953125, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.861436605453491, + "rewards/margins": 3.4809465408325195, + "rewards/rejected": -0.6195098757743835, + "step": 16357 + }, + { + "epoch": 0.95, + "learning_rate": 6.047216166121083e-10, + "logits/chosen": -1.935530185699463, + "logits/rejected": -1.9355961084365845, + "logps/chosen": -38.7695426940918, + "logps/rejected": -153.54385375976562, + "loss": 0.1687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8064247369766235, + "rewards/margins": 4.124300479888916, + "rewards/rejected": -3.317875623703003, + "step": 16358 + }, + { + "epoch": 0.95, + "learning_rate": 6.032612356552836e-10, + "logits/chosen": -1.992292046546936, + "logits/rejected": -1.992465853691101, + "logps/chosen": -0.0005760856438428164, + "logps/rejected": -188.5746307373047, + "loss": 0.3732, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7753896322101355e-05, + "rewards/margins": 2.682607650756836, + "rewards/rejected": -2.682569980621338, + "step": 16359 + }, + { + "epoch": 0.95, + "learning_rate": 6.01802609512242e-10, + "logits/chosen": -1.8861546516418457, + "logits/rejected": -1.8733243942260742, + "logps/chosen": -8.237271686084569e-05, + "logps/rejected": -252.39935302734375, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.821079973131418e-07, + "rewards/margins": 3.4162042140960693, + "rewards/rejected": -3.416203260421753, + "step": 16360 + }, + { + "epoch": 0.95, + "learning_rate": 6.003457382348143e-10, + "logits/chosen": -1.8269869089126587, + "logits/rejected": -1.8253003358840942, + "logps/chosen": -302.1177978515625, + "logps/rejected": -557.0655517578125, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3300538063049316, + "rewards/margins": 3.8727478981018066, + "rewards/rejected": -1.542694091796875, + "step": 16361 + }, + { + "epoch": 0.95, + "learning_rate": 5.988906218747314e-10, + "logits/chosen": -1.7483131885528564, + "logits/rejected": -1.7576287984848022, + "logps/chosen": -176.4302520751953, + "logps/rejected": -384.9200744628906, + "loss": 0.0684, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0505921840667725, + "rewards/margins": 2.663846015930176, + "rewards/rejected": -0.6132537722587585, + "step": 16362 + }, + { + "epoch": 0.95, + "learning_rate": 5.97437260483713e-10, + "logits/chosen": -1.8828260898590088, + "logits/rejected": -1.9187562465667725, + "logps/chosen": -162.07876586914062, + "logps/rejected": -374.29681396484375, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.121481418609619, + "rewards/margins": 6.733789443969727, + "rewards/rejected": -4.612308025360107, + "step": 16363 + }, + { + "epoch": 0.95, + "learning_rate": 5.959856541133679e-10, + "logits/chosen": -2.007131814956665, + "logits/rejected": -2.013606548309326, + "logps/chosen": -40.52919006347656, + "logps/rejected": -194.99359130859375, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19552002847194672, + "rewards/margins": 3.745922803878784, + "rewards/rejected": -3.550402879714966, + "step": 16364 + }, + { + "epoch": 0.95, + "learning_rate": 5.945358028152824e-10, + "logits/chosen": -1.956081748008728, + "logits/rejected": -1.9847625494003296, + "logps/chosen": -261.16876220703125, + "logps/rejected": -491.9150390625, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.55267333984375, + "rewards/margins": 5.764346122741699, + "rewards/rejected": -3.2116730213165283, + "step": 16365 + }, + { + "epoch": 0.95, + "learning_rate": 5.930877066409434e-10, + "logits/chosen": -1.8530428409576416, + "logits/rejected": -1.8603203296661377, + "logps/chosen": -246.53843688964844, + "logps/rejected": -241.1292724609375, + "loss": 0.1687, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.761592149734497, + "rewards/margins": 1.5039658546447754, + "rewards/rejected": 0.25762635469436646, + "step": 16366 + }, + { + "epoch": 0.95, + "learning_rate": 5.916413656418151e-10, + "logits/chosen": -1.9651639461517334, + "logits/rejected": -1.9655392169952393, + "logps/chosen": -6.0430006980896, + "logps/rejected": -90.09563446044922, + "loss": 0.6307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3577544093132019, + "rewards/margins": 0.7117516398429871, + "rewards/rejected": -1.069506049156189, + "step": 16367 + }, + { + "epoch": 0.95, + "learning_rate": 5.90196779869262e-10, + "logits/chosen": -1.8082987070083618, + "logits/rejected": -1.7924758195877075, + "logps/chosen": -54.865570068359375, + "logps/rejected": -359.1907653808594, + "loss": 0.2897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6605434417724609, + "rewards/margins": 4.699676990509033, + "rewards/rejected": -5.360220432281494, + "step": 16368 + }, + { + "epoch": 0.95, + "learning_rate": 5.887539493746097e-10, + "logits/chosen": -1.7140511274337769, + "logits/rejected": -1.720992922782898, + "logps/chosen": -135.96424865722656, + "logps/rejected": -199.26751708984375, + "loss": 0.1122, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.127105712890625, + "rewards/margins": 1.8260329961776733, + "rewards/rejected": 0.3010726869106293, + "step": 16369 + }, + { + "epoch": 0.95, + "learning_rate": 5.873128742091116e-10, + "logits/chosen": -1.6732170581817627, + "logits/rejected": -1.6782011985778809, + "logps/chosen": -15.139302253723145, + "logps/rejected": -129.15713500976562, + "loss": 0.3526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1186276450753212, + "rewards/margins": 2.0513546466827393, + "rewards/rejected": -1.9327270984649658, + "step": 16370 + }, + { + "epoch": 0.95, + "learning_rate": 5.858735544239657e-10, + "logits/chosen": -2.065572500228882, + "logits/rejected": -2.051403045654297, + "logps/chosen": -0.7781661152839661, + "logps/rejected": -237.50897216796875, + "loss": 0.3957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013411414809525013, + "rewards/margins": 2.015692710876465, + "rewards/rejected": -2.0022811889648438, + "step": 16371 + }, + { + "epoch": 0.95, + "learning_rate": 5.844359900703033e-10, + "logits/chosen": -1.744780421257019, + "logits/rejected": -1.7445471286773682, + "logps/chosen": -264.1661376953125, + "logps/rejected": -427.0104675292969, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.237194776535034, + "rewards/margins": 3.6197571754455566, + "rewards/rejected": -1.382562279701233, + "step": 16372 + }, + { + "epoch": 0.95, + "learning_rate": 5.830001811991891e-10, + "logits/chosen": -1.9564834833145142, + "logits/rejected": -1.9477765560150146, + "logps/chosen": -3.713528871536255, + "logps/rejected": -203.46881103515625, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11045730113983154, + "rewards/margins": 2.2908711433410645, + "rewards/rejected": -2.1804139614105225, + "step": 16373 + }, + { + "epoch": 0.95, + "learning_rate": 5.815661278616324e-10, + "logits/chosen": -1.8457647562026978, + "logits/rejected": -1.835345983505249, + "logps/chosen": -231.4398193359375, + "logps/rejected": -398.7384948730469, + "loss": 0.1463, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.997222900390625, + "rewards/margins": 1.202606201171875, + "rewards/rejected": 0.79461669921875, + "step": 16374 + }, + { + "epoch": 0.95, + "learning_rate": 5.801338301085811e-10, + "logits/chosen": -1.6606369018554688, + "logits/rejected": -1.6503010988235474, + "logps/chosen": -193.8441925048828, + "logps/rejected": -315.031982421875, + "loss": 0.0949, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0669784545898438, + "rewards/margins": 1.8485275506973267, + "rewards/rejected": 0.21845093369483948, + "step": 16375 + }, + { + "epoch": 0.95, + "learning_rate": 5.787032879909171e-10, + "logits/chosen": -1.887864112854004, + "logits/rejected": -1.8870563507080078, + "logps/chosen": -0.021094465628266335, + "logps/rejected": -158.00137329101562, + "loss": 0.3846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005865438375622034, + "rewards/margins": 2.498178005218506, + "rewards/rejected": -2.4923126697540283, + "step": 16376 + }, + { + "epoch": 0.95, + "learning_rate": 5.772745015594549e-10, + "logits/chosen": -1.9006015062332153, + "logits/rejected": -1.961185336112976, + "logps/chosen": -205.29580688476562, + "logps/rejected": -300.513671875, + "loss": 0.1509, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.226031541824341, + "rewards/margins": 1.4058258533477783, + "rewards/rejected": 0.8202056884765625, + "step": 16377 + }, + { + "epoch": 0.95, + "learning_rate": 5.758474708649542e-10, + "logits/chosen": -1.5745543241500854, + "logits/rejected": -1.5383069515228271, + "logps/chosen": -163.00653076171875, + "logps/rejected": -351.51806640625, + "loss": 0.0715, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.442556858062744, + "rewards/margins": 2.075827121734619, + "rewards/rejected": 1.366729736328125, + "step": 16378 + }, + { + "epoch": 0.95, + "learning_rate": 5.74422195958113e-10, + "logits/chosen": -1.9125680923461914, + "logits/rejected": -1.9032752513885498, + "logps/chosen": -133.08322143554688, + "logps/rejected": -225.0450439453125, + "loss": 0.1992, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.714880347251892, + "rewards/margins": 1.1039519309997559, + "rewards/rejected": 0.6109283566474915, + "step": 16379 + }, + { + "epoch": 0.95, + "learning_rate": 5.729986768895634e-10, + "logits/chosen": -1.6500358581542969, + "logits/rejected": -1.677971601486206, + "logps/chosen": -309.085205078125, + "logps/rejected": -397.98321533203125, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9036285877227783, + "rewards/margins": 3.1734955310821533, + "rewards/rejected": -0.269866943359375, + "step": 16380 + }, + { + "epoch": 0.95, + "learning_rate": 5.715769137098703e-10, + "logits/chosen": -2.0862650871276855, + "logits/rejected": -2.071155548095703, + "logps/chosen": -17.683326721191406, + "logps/rejected": -349.27734375, + "loss": 0.2159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32673951983451843, + "rewards/margins": 5.648248195648193, + "rewards/rejected": -5.321508884429932, + "step": 16381 + }, + { + "epoch": 0.95, + "learning_rate": 5.701569064695488e-10, + "logits/chosen": -2.0652592182159424, + "logits/rejected": -2.058182954788208, + "logps/chosen": -131.30642700195312, + "logps/rejected": -192.96615600585938, + "loss": 0.3074, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6284454464912415, + "rewards/margins": 0.7055725455284119, + "rewards/rejected": -0.07712707668542862, + "step": 16382 + }, + { + "epoch": 0.95, + "learning_rate": 5.68738655219042e-10, + "logits/chosen": -2.1085989475250244, + "logits/rejected": -2.1102867126464844, + "logps/chosen": -6.4123101234436035, + "logps/rejected": -114.15290069580078, + "loss": 0.6683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33255407214164734, + "rewards/margins": 0.6228078603744507, + "rewards/rejected": -0.9553619623184204, + "step": 16383 + }, + { + "epoch": 0.95, + "learning_rate": 5.673221600087319e-10, + "logits/chosen": -1.7593072652816772, + "logits/rejected": -1.7439392805099487, + "logps/chosen": -89.12922668457031, + "logps/rejected": -354.1199035644531, + "loss": 0.1698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7654510736465454, + "rewards/margins": 3.7161803245544434, + "rewards/rejected": -2.9507293701171875, + "step": 16384 + }, + { + "epoch": 0.95, + "learning_rate": 5.659074208889336e-10, + "logits/chosen": -1.9521217346191406, + "logits/rejected": -1.9115509986877441, + "logps/chosen": -241.4082794189453, + "logps/rejected": -495.3170166015625, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.6492111682891846, + "rewards/margins": 4.167640686035156, + "rewards/rejected": -0.5184295773506165, + "step": 16385 + }, + { + "epoch": 0.95, + "learning_rate": 5.644944379099237e-10, + "logits/chosen": -1.7453505992889404, + "logits/rejected": -1.7506802082061768, + "logps/chosen": -182.48751831054688, + "logps/rejected": -414.8492431640625, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.654620409011841, + "rewards/margins": 4.111987113952637, + "rewards/rejected": -1.457366943359375, + "step": 16386 + }, + { + "epoch": 0.95, + "learning_rate": 5.630832111218786e-10, + "logits/chosen": -1.9355106353759766, + "logits/rejected": -1.9339524507522583, + "logps/chosen": -24.947214126586914, + "logps/rejected": -178.6647186279297, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5425010919570923, + "rewards/margins": 1.0550575256347656, + "rewards/rejected": -0.5125564932823181, + "step": 16387 + }, + { + "epoch": 0.95, + "learning_rate": 5.616737405749417e-10, + "logits/chosen": -1.9739254713058472, + "logits/rejected": -1.975385308265686, + "logps/chosen": -11.604194641113281, + "logps/rejected": -114.74419403076172, + "loss": 0.5699, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10519075393676758, + "rewards/margins": 0.6452715992927551, + "rewards/rejected": -0.7504623532295227, + "step": 16388 + }, + { + "epoch": 0.95, + "learning_rate": 5.602660263191783e-10, + "logits/chosen": -2.0249440670013428, + "logits/rejected": -2.012521743774414, + "logps/chosen": -9.37595272064209, + "logps/rejected": -160.55870056152344, + "loss": 0.3845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12494783848524094, + "rewards/margins": 1.9050686359405518, + "rewards/rejected": -1.780120849609375, + "step": 16389 + }, + { + "epoch": 0.95, + "learning_rate": 5.588600684046096e-10, + "logits/chosen": -2.000688314437866, + "logits/rejected": -1.9713319540023804, + "logps/chosen": -234.97402954101562, + "logps/rejected": -474.06268310546875, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2548797130584717, + "rewards/margins": 3.277474880218506, + "rewards/rejected": -1.0225952863693237, + "step": 16390 + }, + { + "epoch": 0.95, + "learning_rate": 5.574558668811736e-10, + "logits/chosen": -1.9019145965576172, + "logits/rejected": -1.9008498191833496, + "logps/chosen": -9.295024871826172, + "logps/rejected": -91.99530029296875, + "loss": 0.3505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06091919168829918, + "rewards/margins": 2.014752149581909, + "rewards/rejected": -1.953832983970642, + "step": 16391 + }, + { + "epoch": 0.95, + "learning_rate": 5.560534217987467e-10, + "logits/chosen": -1.7569022178649902, + "logits/rejected": -1.7484972476959229, + "logps/chosen": -195.555419921875, + "logps/rejected": -353.18280029296875, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2617995738983154, + "rewards/margins": 4.839363098144531, + "rewards/rejected": -2.577563524246216, + "step": 16392 + }, + { + "epoch": 0.95, + "learning_rate": 5.546527332071671e-10, + "logits/chosen": -1.7646979093551636, + "logits/rejected": -1.7617541551589966, + "logps/chosen": -16.45395278930664, + "logps/rejected": -60.290489196777344, + "loss": 0.6921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18164406716823578, + "rewards/margins": 0.12886695563793182, + "rewards/rejected": -0.3105110228061676, + "step": 16393 + }, + { + "epoch": 0.95, + "learning_rate": 5.532538011561783e-10, + "logits/chosen": -1.8769848346710205, + "logits/rejected": -1.8729861974716187, + "logps/chosen": -0.0789232924580574, + "logps/rejected": -168.5382843017578, + "loss": 0.333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023467648774385452, + "rewards/margins": 5.876770973205566, + "rewards/rejected": -5.8533034324646, + "step": 16394 + }, + { + "epoch": 0.95, + "learning_rate": 5.518566256954904e-10, + "logits/chosen": -2.0660951137542725, + "logits/rejected": -2.055877447128296, + "logps/chosen": -11.006473541259766, + "logps/rejected": -164.98301696777344, + "loss": 0.5228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.255833238363266, + "rewards/margins": 1.0745089054107666, + "rewards/rejected": -1.330342173576355, + "step": 16395 + }, + { + "epoch": 0.95, + "learning_rate": 5.504612068747305e-10, + "logits/chosen": -1.8356837034225464, + "logits/rejected": -1.8528848886489868, + "logps/chosen": -179.01425170898438, + "logps/rejected": -372.7314758300781, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.351278781890869, + "rewards/margins": 6.803027629852295, + "rewards/rejected": -4.451748847961426, + "step": 16396 + }, + { + "epoch": 0.95, + "learning_rate": 5.490675447434701e-10, + "logits/chosen": -1.9721797704696655, + "logits/rejected": -1.9644966125488281, + "logps/chosen": -46.576133728027344, + "logps/rejected": -133.47747802734375, + "loss": 0.5588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38715019822120667, + "rewards/margins": 0.2098369598388672, + "rewards/rejected": 0.17731323838233948, + "step": 16397 + }, + { + "epoch": 0.95, + "learning_rate": 5.47675639351225e-10, + "logits/chosen": -2.0526742935180664, + "logits/rejected": -2.0467946529388428, + "logps/chosen": -19.599576950073242, + "logps/rejected": -134.73733520507812, + "loss": 0.2783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29934120178222656, + "rewards/margins": 2.327637195587158, + "rewards/rejected": -2.0282959938049316, + "step": 16398 + }, + { + "epoch": 0.95, + "learning_rate": 5.46285490747439e-10, + "logits/chosen": -1.9176442623138428, + "logits/rejected": -1.9169286489486694, + "logps/chosen": -32.968963623046875, + "logps/rejected": -196.25375366210938, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1907581388950348, + "rewards/margins": 1.7283252477645874, + "rewards/rejected": -1.537567138671875, + "step": 16399 + }, + { + "epoch": 0.95, + "learning_rate": 5.448970989814949e-10, + "logits/chosen": -1.6984401941299438, + "logits/rejected": -1.7020559310913086, + "logps/chosen": -43.31134796142578, + "logps/rejected": -253.21371459960938, + "loss": 0.179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7986099123954773, + "rewards/margins": 3.4764084815979004, + "rewards/rejected": -2.6777985095977783, + "step": 16400 + }, + { + "epoch": 0.95, + "learning_rate": 5.435104641027144e-10, + "logits/chosen": -1.7856664657592773, + "logits/rejected": -1.7997581958770752, + "logps/chosen": -209.03306579589844, + "logps/rejected": -292.1973571777344, + "loss": 0.1273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.776142954826355, + "rewards/margins": 1.8831528425216675, + "rewards/rejected": -0.1070098876953125, + "step": 16401 + }, + { + "epoch": 0.95, + "learning_rate": 5.421255861603635e-10, + "logits/chosen": -2.0353260040283203, + "logits/rejected": -2.0281476974487305, + "logps/chosen": -5.008154392242432, + "logps/rejected": -277.8951721191406, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13691139221191406, + "rewards/margins": 7.327356338500977, + "rewards/rejected": -7.1904449462890625, + "step": 16402 + }, + { + "epoch": 0.95, + "learning_rate": 5.407424652036363e-10, + "logits/chosen": -2.0995471477508545, + "logits/rejected": -2.094458818435669, + "logps/chosen": -12.961832046508789, + "logps/rejected": -103.12931823730469, + "loss": 0.6649, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.32012176513671875, + "rewards/margins": -0.2337692379951477, + "rewards/rejected": 0.5538910031318665, + "step": 16403 + }, + { + "epoch": 0.95, + "learning_rate": 5.393611012816657e-10, + "logits/chosen": -2.1155548095703125, + "logits/rejected": -2.1097171306610107, + "logps/chosen": -0.03380333259701729, + "logps/rejected": -180.02073669433594, + "loss": 0.3722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012215238530188799, + "rewards/margins": 2.7964820861816406, + "rewards/rejected": -2.797703504562378, + "step": 16404 + }, + { + "epoch": 0.95, + "learning_rate": 5.37981494443529e-10, + "logits/chosen": -1.826250672340393, + "logits/rejected": -1.8082612752914429, + "logps/chosen": -218.66917419433594, + "logps/rejected": -471.8780822753906, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3785431385040283, + "rewards/margins": 3.7865447998046875, + "rewards/rejected": -1.4080017805099487, + "step": 16405 + }, + { + "epoch": 0.95, + "learning_rate": 5.366036447382317e-10, + "logits/chosen": -1.8643102645874023, + "logits/rejected": -1.8684003353118896, + "logps/chosen": -0.025138452649116516, + "logps/rejected": -15.342300415039062, + "loss": 0.66, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006489450461231172, + "rewards/margins": 0.07397349923849106, + "rewards/rejected": -0.07462244480848312, + "step": 16406 + }, + { + "epoch": 0.95, + "learning_rate": 5.352275522147287e-10, + "logits/chosen": -1.7666622400283813, + "logits/rejected": -1.7735626697540283, + "logps/chosen": -206.23703002929688, + "logps/rejected": -350.46160888671875, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.744799852371216, + "rewards/margins": 3.060189962387085, + "rewards/rejected": -0.315390020608902, + "step": 16407 + }, + { + "epoch": 0.95, + "learning_rate": 5.338532169218979e-10, + "logits/chosen": -1.991924524307251, + "logits/rejected": -1.9741010665893555, + "logps/chosen": -23.529850006103516, + "logps/rejected": -196.709228515625, + "loss": 0.5638, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46445924043655396, + "rewards/margins": 0.056645214557647705, + "rewards/rejected": 0.40781402587890625, + "step": 16408 + }, + { + "epoch": 0.95, + "learning_rate": 5.324806389085668e-10, + "logits/chosen": -1.8797407150268555, + "logits/rejected": -1.8749797344207764, + "logps/chosen": -13.843393325805664, + "logps/rejected": -196.85133361816406, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0496426597237587, + "rewards/margins": 2.588766098022461, + "rewards/rejected": -2.53912353515625, + "step": 16409 + }, + { + "epoch": 0.95, + "learning_rate": 5.311098182234963e-10, + "logits/chosen": -1.7476999759674072, + "logits/rejected": -1.763628602027893, + "logps/chosen": -11.287618637084961, + "logps/rejected": -109.21611022949219, + "loss": 0.3723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22628441452980042, + "rewards/margins": 1.089925765991211, + "rewards/rejected": -0.8636413812637329, + "step": 16410 + }, + { + "epoch": 0.96, + "learning_rate": 5.29740754915381e-10, + "logits/chosen": -1.940234661102295, + "logits/rejected": -1.936303734779358, + "logps/chosen": -0.019227314740419388, + "logps/rejected": -128.5817108154297, + "loss": 0.466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012914568651467562, + "rewards/margins": 1.3116227388381958, + "rewards/rejected": -1.312914252281189, + "step": 16411 + }, + { + "epoch": 0.96, + "learning_rate": 5.28373449032865e-10, + "logits/chosen": -1.9202277660369873, + "logits/rejected": -1.9138206243515015, + "logps/chosen": -125.2842025756836, + "logps/rejected": -172.31588745117188, + "loss": 0.1401, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1572303771972656, + "rewards/margins": 1.4542930126190186, + "rewards/rejected": 0.7029373049736023, + "step": 16412 + }, + { + "epoch": 0.96, + "learning_rate": 5.270079006245099e-10, + "logits/chosen": -1.9458644390106201, + "logits/rejected": -1.9396804571151733, + "logps/chosen": -139.1690673828125, + "logps/rejected": -199.78167724609375, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9935333728790283, + "rewards/margins": 2.73016357421875, + "rewards/rejected": -0.7366302609443665, + "step": 16413 + }, + { + "epoch": 0.96, + "learning_rate": 5.256441097388375e-10, + "logits/chosen": -2.031412124633789, + "logits/rejected": -2.022482395172119, + "logps/chosen": -134.80361938476562, + "logps/rejected": -347.3291931152344, + "loss": 0.0727, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6843475103378296, + "rewards/margins": 2.682452440261841, + "rewards/rejected": -0.9981048703193665, + "step": 16414 + }, + { + "epoch": 0.96, + "learning_rate": 5.242820764242873e-10, + "logits/chosen": -1.8456374406814575, + "logits/rejected": -1.8443493843078613, + "logps/chosen": -33.95322036743164, + "logps/rejected": -75.91704559326172, + "loss": 0.196, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8578728437423706, + "rewards/margins": 1.21478271484375, + "rewards/rejected": 0.6430900692939758, + "step": 16415 + }, + { + "epoch": 0.96, + "learning_rate": 5.229218007292535e-10, + "logits/chosen": -1.8968119621276855, + "logits/rejected": -1.8906089067459106, + "logps/chosen": -80.93099975585938, + "logps/rejected": -209.287353515625, + "loss": 0.1297, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1091049909591675, + "rewards/margins": 2.3848695755004883, + "rewards/rejected": -1.2757644653320312, + "step": 16416 + }, + { + "epoch": 0.96, + "learning_rate": 5.215632827020533e-10, + "logits/chosen": -1.9909083843231201, + "logits/rejected": -1.9868425130844116, + "logps/chosen": -9.56966495513916, + "logps/rejected": -152.5506134033203, + "loss": 0.4949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3513398766517639, + "rewards/margins": 2.207537889480591, + "rewards/rejected": -2.55887770652771, + "step": 16417 + }, + { + "epoch": 0.96, + "learning_rate": 5.202065223909536e-10, + "logits/chosen": -1.772682547569275, + "logits/rejected": -1.7818500995635986, + "logps/chosen": -53.99515151977539, + "logps/rejected": -199.21786499023438, + "loss": 0.4153, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17702141404151917, + "rewards/margins": 1.5236324071884155, + "rewards/rejected": -1.3466110229492188, + "step": 16418 + }, + { + "epoch": 0.96, + "learning_rate": 5.188515198441434e-10, + "logits/chosen": -2.1076033115386963, + "logits/rejected": -2.0927882194519043, + "logps/chosen": -6.675615441054106e-05, + "logps/rejected": -200.93980407714844, + "loss": 0.3272, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5735422493889928e-06, + "rewards/margins": 5.6748456954956055, + "rewards/rejected": -5.674844264984131, + "step": 16419 + }, + { + "epoch": 0.96, + "learning_rate": 5.174982751097679e-10, + "logits/chosen": -1.8908964395523071, + "logits/rejected": -1.8890682458877563, + "logps/chosen": -220.40817260742188, + "logps/rejected": -301.2657775878906, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5575225949287415, + "rewards/margins": 1.450653076171875, + "rewards/rejected": -0.8931304812431335, + "step": 16420 + }, + { + "epoch": 0.96, + "learning_rate": 5.161467882358994e-10, + "logits/chosen": -1.9916239976882935, + "logits/rejected": -1.995375156402588, + "logps/chosen": -0.0018164922948926687, + "logps/rejected": -189.6168975830078, + "loss": 0.3726, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001253301015822217, + "rewards/margins": 2.568455219268799, + "rewards/rejected": -2.5685806274414062, + "step": 16421 + }, + { + "epoch": 0.96, + "learning_rate": 5.147970592705553e-10, + "logits/chosen": -1.5633434057235718, + "logits/rejected": -1.5725018978118896, + "logps/chosen": -281.205078125, + "logps/rejected": -337.3843688964844, + "loss": 0.1829, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6363251209259033, + "rewards/margins": 0.9752716422080994, + "rewards/rejected": 0.661053478717804, + "step": 16422 + }, + { + "epoch": 0.96, + "learning_rate": 5.134490882616693e-10, + "logits/chosen": -1.7845791578292847, + "logits/rejected": -1.7793304920196533, + "logps/chosen": -12.12208080291748, + "logps/rejected": -63.73271179199219, + "loss": 0.4296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20070676505565643, + "rewards/margins": 0.9560912251472473, + "rewards/rejected": -0.7553844451904297, + "step": 16423 + }, + { + "epoch": 0.96, + "learning_rate": 5.121028752571421e-10, + "logits/chosen": -1.9790853261947632, + "logits/rejected": -1.976603627204895, + "logps/chosen": -10.905187606811523, + "logps/rejected": -142.5864715576172, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1519538015127182, + "rewards/margins": 3.4260032176971436, + "rewards/rejected": -3.2740495204925537, + "step": 16424 + }, + { + "epoch": 0.96, + "learning_rate": 5.10758420304791e-10, + "logits/chosen": -1.9617375135421753, + "logits/rejected": -1.9639272689819336, + "logps/chosen": -133.56298828125, + "logps/rejected": -393.957763671875, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4502182006835938, + "rewards/margins": 4.93789529800415, + "rewards/rejected": -3.4876770973205566, + "step": 16425 + }, + { + "epoch": 0.96, + "learning_rate": 5.094157234523777e-10, + "logits/chosen": -1.7988439798355103, + "logits/rejected": -1.8012114763259888, + "logps/chosen": -2.8848373403889127e-05, + "logps/rejected": -191.49716186523438, + "loss": 0.3528, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8609630930986896e-07, + "rewards/margins": 3.9488680362701416, + "rewards/rejected": -3.9488677978515625, + "step": 16426 + }, + { + "epoch": 0.96, + "learning_rate": 5.080747847475974e-10, + "logits/chosen": -1.8409255743026733, + "logits/rejected": -1.810705304145813, + "logps/chosen": -196.3651123046875, + "logps/rejected": -404.5549011230469, + "loss": 0.0626, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6742645502090454, + "rewards/margins": 2.6595001220703125, + "rewards/rejected": -0.9852356314659119, + "step": 16427 + }, + { + "epoch": 0.96, + "learning_rate": 5.06735604238101e-10, + "logits/chosen": -1.5437819957733154, + "logits/rejected": -1.5276367664337158, + "logps/chosen": -257.53424072265625, + "logps/rejected": -527.2203369140625, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.270983934402466, + "rewards/margins": 6.300494194030762, + "rewards/rejected": -4.029510498046875, + "step": 16428 + }, + { + "epoch": 0.96, + "learning_rate": 5.053981819714504e-10, + "logits/chosen": -1.7794623374938965, + "logits/rejected": -1.7635542154312134, + "logps/chosen": -12.560945510864258, + "logps/rejected": -212.8164825439453, + "loss": 0.2626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39227294921875, + "rewards/margins": 3.6055893898010254, + "rewards/rejected": -3.2133164405822754, + "step": 16429 + }, + { + "epoch": 0.96, + "learning_rate": 5.040625179951575e-10, + "logits/chosen": -1.7864835262298584, + "logits/rejected": -1.7206521034240723, + "logps/chosen": -378.0111389160156, + "logps/rejected": -643.953369140625, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.576681613922119, + "rewards/margins": 4.299032688140869, + "rewards/rejected": -1.72235107421875, + "step": 16430 + }, + { + "epoch": 0.96, + "learning_rate": 5.027286123566787e-10, + "logits/chosen": -1.6759368181228638, + "logits/rejected": -1.7055763006210327, + "logps/chosen": -232.30458068847656, + "logps/rejected": -405.12164306640625, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.284837484359741, + "rewards/margins": 6.661421775817871, + "rewards/rejected": -4.376584053039551, + "step": 16431 + }, + { + "epoch": 0.96, + "learning_rate": 5.01396465103393e-10, + "logits/chosen": -1.73995041847229, + "logits/rejected": -1.7346543073654175, + "logps/chosen": -199.98049926757812, + "logps/rejected": -521.9884033203125, + "loss": 0.051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.926556408405304, + "rewards/margins": 4.5232696533203125, + "rewards/rejected": -3.5967133045196533, + "step": 16432 + }, + { + "epoch": 0.96, + "learning_rate": 5.000660762826347e-10, + "logits/chosen": -2.0447192192077637, + "logits/rejected": -2.0441904067993164, + "logps/chosen": -41.23120880126953, + "logps/rejected": -125.22966766357422, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7383796572685242, + "rewards/margins": 1.4486076831817627, + "rewards/rejected": -0.7102279663085938, + "step": 16433 + }, + { + "epoch": 0.96, + "learning_rate": 4.987374459416549e-10, + "logits/chosen": -1.8537733554840088, + "logits/rejected": -1.8582159280776978, + "logps/chosen": -35.299190521240234, + "logps/rejected": -242.07391357421875, + "loss": 0.2335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5189846158027649, + "rewards/margins": 4.383126735687256, + "rewards/rejected": -3.8641419410705566, + "step": 16434 + }, + { + "epoch": 0.96, + "learning_rate": 4.974105741276602e-10, + "logits/chosen": -1.974111795425415, + "logits/rejected": -1.9473172426223755, + "logps/chosen": -31.84916877746582, + "logps/rejected": -356.27178955078125, + "loss": 0.3735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23419208824634552, + "rewards/margins": 1.7886807918548584, + "rewards/rejected": -2.0228729248046875, + "step": 16435 + }, + { + "epoch": 0.96, + "learning_rate": 4.960854608877796e-10, + "logits/chosen": -2.000807046890259, + "logits/rejected": -2.012645959854126, + "logps/chosen": -224.05397033691406, + "logps/rejected": -362.794189453125, + "loss": 0.0789, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.715658664703369, + "rewards/margins": 2.2045319080352783, + "rewards/rejected": 0.511126697063446, + "step": 16436 + }, + { + "epoch": 0.96, + "learning_rate": 4.947621062691032e-10, + "logits/chosen": -1.8173174858093262, + "logits/rejected": -1.842447280883789, + "logps/chosen": -136.27085876464844, + "logps/rejected": -399.5438232421875, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8414077758789062, + "rewards/margins": 5.144548416137695, + "rewards/rejected": -3.30314040184021, + "step": 16437 + }, + { + "epoch": 0.96, + "learning_rate": 4.934405103186268e-10, + "logits/chosen": -2.096536636352539, + "logits/rejected": -2.0873255729675293, + "logps/chosen": -0.19516810774803162, + "logps/rejected": -198.75027465820312, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0062296525575220585, + "rewards/margins": 5.978274822235107, + "rewards/rejected": -5.984504699707031, + "step": 16438 + }, + { + "epoch": 0.96, + "learning_rate": 4.921206730833072e-10, + "logits/chosen": -1.910299301147461, + "logits/rejected": -1.9125525951385498, + "logps/chosen": -78.93119049072266, + "logps/rejected": -248.5685272216797, + "loss": 0.14, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0107109546661377, + "rewards/margins": 4.159757614135742, + "rewards/rejected": -3.1490464210510254, + "step": 16439 + }, + { + "epoch": 0.96, + "learning_rate": 4.908025946100292e-10, + "logits/chosen": -2.1203558444976807, + "logits/rejected": -2.1234211921691895, + "logps/chosen": -0.00486531900241971, + "logps/rejected": -24.47607421875, + "loss": 0.5802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00030202051857486367, + "rewards/margins": 0.34634390473365784, + "rewards/rejected": -0.34604188799858093, + "step": 16440 + }, + { + "epoch": 0.96, + "learning_rate": 4.894862749456219e-10, + "logits/chosen": -1.8224064111709595, + "logits/rejected": -1.8054014444351196, + "logps/chosen": -185.28305053710938, + "logps/rejected": -303.86065673828125, + "loss": 0.0534, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.527975559234619, + "rewards/margins": 2.5515382289886475, + "rewards/rejected": -0.02356262318789959, + "step": 16441 + }, + { + "epoch": 0.96, + "learning_rate": 4.881717141368425e-10, + "logits/chosen": -1.9653881788253784, + "logits/rejected": -1.9963500499725342, + "logps/chosen": -273.9667663574219, + "logps/rejected": -219.13546752929688, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7163665294647217, + "rewards/margins": 0.8014587163925171, + "rewards/rejected": 1.9149078130722046, + "step": 16442 + }, + { + "epoch": 0.96, + "learning_rate": 4.868589122303923e-10, + "logits/chosen": -2.0067291259765625, + "logits/rejected": -1.9824291467666626, + "logps/chosen": -28.092823028564453, + "logps/rejected": -373.0442199707031, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1912277191877365, + "rewards/margins": 8.990477561950684, + "rewards/rejected": -8.799249649047852, + "step": 16443 + }, + { + "epoch": 0.96, + "learning_rate": 4.855478692729064e-10, + "logits/chosen": -2.029059410095215, + "logits/rejected": -2.0055932998657227, + "logps/chosen": -90.86097717285156, + "logps/rejected": -316.9110412597656, + "loss": 0.3365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0566253662109375, + "rewards/margins": 3.492401123046875, + "rewards/rejected": -3.4357757568359375, + "step": 16444 + }, + { + "epoch": 0.96, + "learning_rate": 4.842385853109643e-10, + "logits/chosen": -1.9616369009017944, + "logits/rejected": -1.9627071619033813, + "logps/chosen": -199.99063110351562, + "logps/rejected": -420.04144287109375, + "loss": 0.1355, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6975525617599487, + "rewards/margins": 1.4323090314865112, + "rewards/rejected": 0.2652435302734375, + "step": 16445 + }, + { + "epoch": 0.96, + "learning_rate": 4.82931060391073e-10, + "logits/chosen": -1.9256118535995483, + "logits/rejected": -1.9225879907608032, + "logps/chosen": -0.2856901288032532, + "logps/rejected": -227.79953002929688, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02383784018456936, + "rewards/margins": 3.4138739109039307, + "rewards/rejected": -3.390036106109619, + "step": 16446 + }, + { + "epoch": 0.96, + "learning_rate": 4.8162529455969e-10, + "logits/chosen": -1.8898823261260986, + "logits/rejected": -1.872817039489746, + "logps/chosen": -46.12903594970703, + "logps/rejected": -283.87908935546875, + "loss": 0.1723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33112868666648865, + "rewards/margins": 5.725809574127197, + "rewards/rejected": -5.394680976867676, + "step": 16447 + }, + { + "epoch": 0.96, + "learning_rate": 4.80321287863189e-10, + "logits/chosen": -1.980350375175476, + "logits/rejected": -1.97379732131958, + "logps/chosen": -1.3330422639846802, + "logps/rejected": -163.84353637695312, + "loss": 0.3107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14977428317070007, + "rewards/margins": 4.0814361572265625, + "rewards/rejected": -3.931662082672119, + "step": 16448 + }, + { + "epoch": 0.96, + "learning_rate": 4.790190403479111e-10, + "logits/chosen": -2.074775457382202, + "logits/rejected": -2.069500684738159, + "logps/chosen": -23.519922256469727, + "logps/rejected": -249.91036987304688, + "loss": 0.3544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06315765529870987, + "rewards/margins": 4.463172912597656, + "rewards/rejected": -4.526330471038818, + "step": 16449 + }, + { + "epoch": 0.96, + "learning_rate": 4.777185520601023e-10, + "logits/chosen": -1.9293185472488403, + "logits/rejected": -1.9748896360397339, + "logps/chosen": -158.77511596679688, + "logps/rejected": -418.8150939941406, + "loss": 0.1448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33109742403030396, + "rewards/margins": 4.885653972625732, + "rewards/rejected": -4.554556369781494, + "step": 16450 + }, + { + "epoch": 0.96, + "learning_rate": 4.76419823045976e-10, + "logits/chosen": -1.86191725730896, + "logits/rejected": -1.8366535902023315, + "logps/chosen": -15.51742172241211, + "logps/rejected": -385.9171142578125, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1007465347647667, + "rewards/margins": 3.585280418395996, + "rewards/rejected": -3.4845337867736816, + "step": 16451 + }, + { + "epoch": 0.96, + "learning_rate": 4.751228533516616e-10, + "logits/chosen": -1.8764222860336304, + "logits/rejected": -1.9249345064163208, + "logps/chosen": -265.37689208984375, + "logps/rejected": -302.07550048828125, + "loss": 0.4081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6958252191543579, + "rewards/margins": 0.021392822265625, + "rewards/rejected": 0.6744323968887329, + "step": 16452 + }, + { + "epoch": 0.96, + "learning_rate": 4.738276430232335e-10, + "logits/chosen": -1.6467199325561523, + "logits/rejected": -1.6384308338165283, + "logps/chosen": -122.6552734375, + "logps/rejected": -356.8226623535156, + "loss": 0.1062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8183578848838806, + "rewards/margins": 8.019706726074219, + "rewards/rejected": -7.201348781585693, + "step": 16453 + }, + { + "epoch": 0.96, + "learning_rate": 4.725341921067105e-10, + "logits/chosen": -1.9489660263061523, + "logits/rejected": -1.9466503858566284, + "logps/chosen": -0.006932454649358988, + "logps/rejected": -114.98985290527344, + "loss": 0.4779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005004348349757493, + "rewards/margins": 1.1925759315490723, + "rewards/rejected": -1.1930763721466064, + "step": 16454 + }, + { + "epoch": 0.96, + "learning_rate": 4.712425006480336e-10, + "logits/chosen": -1.9015285968780518, + "logits/rejected": -1.9094046354293823, + "logps/chosen": -2.7518458366394043, + "logps/rejected": -180.23928833007812, + "loss": 0.3659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012401580810546875, + "rewards/margins": 2.505229949951172, + "rewards/rejected": -2.492828369140625, + "step": 16455 + }, + { + "epoch": 0.96, + "learning_rate": 4.699525686930994e-10, + "logits/chosen": -1.939327597618103, + "logits/rejected": -1.934819221496582, + "logps/chosen": -1.0080089569091797, + "logps/rejected": -91.4045181274414, + "loss": 0.4121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01943444088101387, + "rewards/margins": 1.7787331342697144, + "rewards/rejected": -1.7592986822128296, + "step": 16456 + }, + { + "epoch": 0.96, + "learning_rate": 4.686643962877268e-10, + "logits/chosen": -1.7585482597351074, + "logits/rejected": -1.8128052949905396, + "logps/chosen": -211.23367309570312, + "logps/rejected": -328.4715576171875, + "loss": 0.0999, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.936366319656372, + "rewards/margins": 2.2643113136291504, + "rewards/rejected": -0.32794496417045593, + "step": 16457 + }, + { + "epoch": 0.96, + "learning_rate": 4.673779834776737e-10, + "logits/chosen": -1.8208065032958984, + "logits/rejected": -1.8140230178833008, + "logps/chosen": -97.60408020019531, + "logps/rejected": -268.8572692871094, + "loss": 0.2603, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0390113592147827, + "rewards/margins": 0.5824531316757202, + "rewards/rejected": 0.4565582275390625, + "step": 16458 + }, + { + "epoch": 0.96, + "learning_rate": 4.660933303086478e-10, + "logits/chosen": -1.6140310764312744, + "logits/rejected": -1.6175941228866577, + "logps/chosen": -4.146986961364746, + "logps/rejected": -44.782752990722656, + "loss": 0.3716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4231228828430176, + "rewards/margins": 1.0191327333450317, + "rewards/rejected": -0.5960098505020142, + "step": 16459 + }, + { + "epoch": 0.96, + "learning_rate": 4.648104368262851e-10, + "logits/chosen": -1.7694965600967407, + "logits/rejected": -1.7302601337432861, + "logps/chosen": -240.594482421875, + "logps/rejected": -493.9740905761719, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.738922119140625, + "rewards/margins": 3.4457061290740967, + "rewards/rejected": 0.29321596026420593, + "step": 16460 + }, + { + "epoch": 0.96, + "learning_rate": 4.6352930307616e-10, + "logits/chosen": -1.8870692253112793, + "logits/rejected": -1.8913692235946655, + "logps/chosen": -305.525634765625, + "logps/rejected": -543.3516845703125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8630859851837158, + "rewards/margins": 7.166818618774414, + "rewards/rejected": -5.303732395172119, + "step": 16461 + }, + { + "epoch": 0.96, + "learning_rate": 4.622499291037807e-10, + "logits/chosen": -1.938257098197937, + "logits/rejected": -1.9431431293487549, + "logps/chosen": -17.700124740600586, + "logps/rejected": -219.23422241210938, + "loss": 0.3148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07760220021009445, + "rewards/margins": 7.121062755584717, + "rewards/rejected": -7.043460369110107, + "step": 16462 + }, + { + "epoch": 0.96, + "learning_rate": 4.609723149545941e-10, + "logits/chosen": -1.854886531829834, + "logits/rejected": -1.8279733657836914, + "logps/chosen": -227.7059783935547, + "logps/rejected": -505.67572021484375, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0569214820861816, + "rewards/margins": 6.584539890289307, + "rewards/rejected": -4.527618408203125, + "step": 16463 + }, + { + "epoch": 0.96, + "learning_rate": 4.596964606740028e-10, + "logits/chosen": -1.802940011024475, + "logits/rejected": -1.8011444807052612, + "logps/chosen": -63.751068115234375, + "logps/rejected": -107.97187042236328, + "loss": 1.2641, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.9986678957939148, + "rewards/margins": -1.206494927406311, + "rewards/rejected": 0.20782700181007385, + "step": 16464 + }, + { + "epoch": 0.96, + "learning_rate": 4.584223663073095e-10, + "logits/chosen": -1.6897085905075073, + "logits/rejected": -1.6641877889633179, + "logps/chosen": -221.77078247070312, + "logps/rejected": -393.18414306640625, + "loss": 0.1261, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4836366176605225, + "rewards/margins": 1.5565491914749146, + "rewards/rejected": 0.9270874261856079, + "step": 16465 + }, + { + "epoch": 0.96, + "learning_rate": 4.571500318998001e-10, + "logits/chosen": -1.945306658744812, + "logits/rejected": -1.9079736471176147, + "logps/chosen": -126.0462875366211, + "logps/rejected": -372.1008605957031, + "loss": 0.252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3281593322753906, + "rewards/margins": 5.502307891845703, + "rewards/rejected": -5.1741485595703125, + "step": 16466 + }, + { + "epoch": 0.96, + "learning_rate": 4.5587945749665513e-10, + "logits/chosen": -2.014749050140381, + "logits/rejected": -2.014434814453125, + "logps/chosen": -0.000651317008305341, + "logps/rejected": -188.1271209716797, + "loss": 0.3271, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4224746084655635e-05, + "rewards/margins": 5.983403205871582, + "rewards/rejected": -5.98342752456665, + "step": 16467 + }, + { + "epoch": 0.96, + "learning_rate": 4.5461064314301636e-10, + "logits/chosen": -1.8107106685638428, + "logits/rejected": -1.8203202486038208, + "logps/chosen": -169.0346221923828, + "logps/rejected": -277.2626953125, + "loss": 0.1782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6987289190292358, + "rewards/margins": 1.4377670288085938, + "rewards/rejected": 0.2609619200229645, + "step": 16468 + }, + { + "epoch": 0.96, + "learning_rate": 4.5334358888395874e-10, + "logits/chosen": -1.9393374919891357, + "logits/rejected": -1.9130568504333496, + "logps/chosen": -87.37205505371094, + "logps/rejected": -312.859130859375, + "loss": 0.1873, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7435478568077087, + "rewards/margins": 2.560802459716797, + "rewards/rejected": -1.817254662513733, + "step": 16469 + }, + { + "epoch": 0.96, + "learning_rate": 4.520782947645019e-10, + "logits/chosen": -1.7691326141357422, + "logits/rejected": -1.7424530982971191, + "logps/chosen": -151.82501220703125, + "logps/rejected": -241.99917602539062, + "loss": 0.1961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2461715936660767, + "rewards/margins": 1.4517303705215454, + "rewards/rejected": -0.20555877685546875, + "step": 16470 + }, + { + "epoch": 0.96, + "learning_rate": 4.508147608295876e-10, + "logits/chosen": -2.0495872497558594, + "logits/rejected": -2.044069766998291, + "logps/chosen": -47.74681091308594, + "logps/rejected": -202.4937744140625, + "loss": 0.3108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5281818509101868, + "rewards/margins": 1.4987690448760986, + "rewards/rejected": -0.9705871939659119, + "step": 16471 + }, + { + "epoch": 0.96, + "learning_rate": 4.495529871241022e-10, + "logits/chosen": -1.887765884399414, + "logits/rejected": -1.8865970373153687, + "logps/chosen": -11.793947219848633, + "logps/rejected": -202.12496948242188, + "loss": 0.3806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.036345865577459335, + "rewards/margins": 2.201838254928589, + "rewards/rejected": -2.165492296218872, + "step": 16472 + }, + { + "epoch": 0.96, + "learning_rate": 4.4829297369287645e-10, + "logits/chosen": -1.7424513101577759, + "logits/rejected": -1.7323408126831055, + "logps/chosen": -246.341064453125, + "logps/rejected": -400.39276123046875, + "loss": 0.0619, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0667297840118408, + "rewards/margins": 4.954840183258057, + "rewards/rejected": -3.888110399246216, + "step": 16473 + }, + { + "epoch": 0.96, + "learning_rate": 4.470347205806635e-10, + "logits/chosen": -1.818341612815857, + "logits/rejected": -1.8169786930084229, + "logps/chosen": -179.22671508789062, + "logps/rejected": -345.12652587890625, + "loss": 0.2468, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.964898705482483, + "rewards/margins": 0.5186767578125, + "rewards/rejected": 1.446221947669983, + "step": 16474 + }, + { + "epoch": 0.96, + "learning_rate": 4.4577822783216647e-10, + "logits/chosen": -1.8088607788085938, + "logits/rejected": -1.806312918663025, + "logps/chosen": -258.1534118652344, + "logps/rejected": -333.367431640625, + "loss": 0.0921, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.355920314788818, + "rewards/margins": 1.6229734420776367, + "rewards/rejected": 2.7329468727111816, + "step": 16475 + }, + { + "epoch": 0.96, + "learning_rate": 4.445234954920274e-10, + "logits/chosen": -1.8321579694747925, + "logits/rejected": -1.833404779434204, + "logps/chosen": -22.702245712280273, + "logps/rejected": -189.54669189453125, + "loss": 0.3444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4695495665073395, + "rewards/margins": 1.0791901350021362, + "rewards/rejected": -0.6096405386924744, + "step": 16476 + }, + { + "epoch": 0.96, + "learning_rate": 4.4327052360481067e-10, + "logits/chosen": -2.0636250972747803, + "logits/rejected": -2.0539255142211914, + "logps/chosen": -104.90609741210938, + "logps/rejected": -263.108642578125, + "loss": 0.3631, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.134837344288826, + "rewards/margins": 1.5506271123886108, + "rewards/rejected": -1.6854645013809204, + "step": 16477 + }, + { + "epoch": 0.96, + "learning_rate": 4.420193122150362e-10, + "logits/chosen": -1.7823232412338257, + "logits/rejected": -1.7519601583480835, + "logps/chosen": -282.2744445800781, + "logps/rejected": -485.98797607421875, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.572296142578125, + "rewards/margins": 3.6972076892852783, + "rewards/rejected": -1.1249115467071533, + "step": 16478 + }, + { + "epoch": 0.96, + "learning_rate": 4.407698613671518e-10, + "logits/chosen": -1.7278491258621216, + "logits/rejected": -1.725217580795288, + "logps/chosen": -0.00013958910130895674, + "logps/rejected": -107.35932922363281, + "loss": 0.5004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0329406298696995e-06, + "rewards/margins": 0.8064108490943909, + "rewards/rejected": -0.8064178824424744, + "step": 16479 + }, + { + "epoch": 0.96, + "learning_rate": 4.3952217110553857e-10, + "logits/chosen": -1.8537747859954834, + "logits/rejected": -1.8551958799362183, + "logps/chosen": -0.00010788260260596871, + "logps/rejected": -182.45379638671875, + "loss": 0.3505, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9457404454879e-06, + "rewards/margins": 2.9952917098999023, + "rewards/rejected": -2.995295763015747, + "step": 16480 + }, + { + "epoch": 0.96, + "learning_rate": 4.3827624147452225e-10, + "logits/chosen": -1.8733190298080444, + "logits/rejected": -1.8723056316375732, + "logps/chosen": -187.08375549316406, + "logps/rejected": -344.06182861328125, + "loss": 0.1112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.879644751548767, + "rewards/margins": 2.245513916015625, + "rewards/rejected": -0.3658691346645355, + "step": 16481 + }, + { + "epoch": 0.96, + "learning_rate": 4.3703207251837294e-10, + "logits/chosen": -1.9253116846084595, + "logits/rejected": -1.9240140914916992, + "logps/chosen": -27.393901824951172, + "logps/rejected": -152.08282470703125, + "loss": 0.3302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22197552025318146, + "rewards/margins": 1.54066002368927, + "rewards/rejected": -1.318684458732605, + "step": 16482 + }, + { + "epoch": 0.96, + "learning_rate": 4.3578966428128304e-10, + "logits/chosen": -1.9901564121246338, + "logits/rejected": -2.006722927093506, + "logps/chosen": -236.42071533203125, + "logps/rejected": -446.58599853515625, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0686891078948975, + "rewards/margins": 6.249091148376465, + "rewards/rejected": -4.180401802062988, + "step": 16483 + }, + { + "epoch": 0.96, + "learning_rate": 4.3454901680737844e-10, + "logits/chosen": -2.123121500015259, + "logits/rejected": -2.1248233318328857, + "logps/chosen": -5.376265048980713, + "logps/rejected": -81.80928802490234, + "loss": 0.2897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17180843651294708, + "rewards/margins": 3.4149928092956543, + "rewards/rejected": -3.2431843280792236, + "step": 16484 + }, + { + "epoch": 0.96, + "learning_rate": 4.333101301407571e-10, + "logits/chosen": -1.6470444202423096, + "logits/rejected": -1.547196388244629, + "logps/chosen": -287.0343933105469, + "logps/rejected": -523.9517211914062, + "loss": 0.0943, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.187509298324585, + "rewards/margins": 2.014065742492676, + "rewards/rejected": 0.17344360053539276, + "step": 16485 + }, + { + "epoch": 0.96, + "learning_rate": 4.3207300432540617e-10, + "logits/chosen": -1.988373041152954, + "logits/rejected": -2.012842893600464, + "logps/chosen": -236.72134399414062, + "logps/rejected": -532.1434936523438, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3999602794647217, + "rewards/margins": 8.881756782531738, + "rewards/rejected": -6.4817962646484375, + "step": 16486 + }, + { + "epoch": 0.96, + "learning_rate": 4.3083763940529593e-10, + "logits/chosen": -1.857348918914795, + "logits/rejected": -1.861201524734497, + "logps/chosen": -3.218629353796132e-05, + "logps/rejected": -72.728515625, + "loss": 0.3952, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.203995338983077e-06, + "rewards/margins": 2.234912633895874, + "rewards/rejected": -2.2349114418029785, + "step": 16487 + }, + { + "epoch": 0.96, + "learning_rate": 4.2960403542429137e-10, + "logits/chosen": -1.853009819984436, + "logits/rejected": -1.9174410104751587, + "logps/chosen": -253.99758911132812, + "logps/rejected": -463.208740234375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2662384510040283, + "rewards/margins": 8.670132637023926, + "rewards/rejected": -5.403893947601318, + "step": 16488 + }, + { + "epoch": 0.96, + "learning_rate": 4.283721924262296e-10, + "logits/chosen": -1.8668938875198364, + "logits/rejected": -1.848010778427124, + "logps/chosen": -345.64312744140625, + "logps/rejected": -493.5868835449219, + "loss": 0.1405, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8516845703125, + "rewards/margins": 1.1571379899978638, + "rewards/rejected": 1.6945465803146362, + "step": 16489 + }, + { + "epoch": 0.96, + "learning_rate": 4.2714211045487e-10, + "logits/chosen": -1.8561102151870728, + "logits/rejected": -1.909022331237793, + "logps/chosen": -262.6391906738281, + "logps/rejected": -266.38385009765625, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.46067214012146, + "rewards/margins": 3.081418037414551, + "rewards/rejected": 0.37925416231155396, + "step": 16490 + }, + { + "epoch": 0.96, + "learning_rate": 4.259137895539111e-10, + "logits/chosen": -1.8173749446868896, + "logits/rejected": -1.8043533563613892, + "logps/chosen": -30.409500122070312, + "logps/rejected": -118.83533477783203, + "loss": 0.5342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25184518098831177, + "rewards/margins": 0.19984552264213562, + "rewards/rejected": 0.051999665796756744, + "step": 16491 + }, + { + "epoch": 0.96, + "learning_rate": 4.246872297669846e-10, + "logits/chosen": -1.9830759763717651, + "logits/rejected": -1.9612586498260498, + "logps/chosen": -20.87447738647461, + "logps/rejected": -535.0382690429688, + "loss": 0.1949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5207281112670898, + "rewards/margins": 12.579095840454102, + "rewards/rejected": -12.058367729187012, + "step": 16492 + }, + { + "epoch": 0.96, + "learning_rate": 4.2346243113766665e-10, + "logits/chosen": -1.776216745376587, + "logits/rejected": -1.7865713834762573, + "logps/chosen": -233.7190399169922, + "logps/rejected": -404.051513671875, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.745131015777588, + "rewards/margins": 3.0444748401641846, + "rewards/rejected": -0.29934388399124146, + "step": 16493 + }, + { + "epoch": 0.96, + "learning_rate": 4.222393937094726e-10, + "logits/chosen": -1.6661550998687744, + "logits/rejected": -1.6957024335861206, + "logps/chosen": -189.83026123046875, + "logps/rejected": -477.3059387207031, + "loss": 0.0343, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7236511707305908, + "rewards/margins": 5.109537124633789, + "rewards/rejected": -3.385885715484619, + "step": 16494 + }, + { + "epoch": 0.96, + "learning_rate": 4.2101811752583983e-10, + "logits/chosen": -1.7098734378814697, + "logits/rejected": -1.7019160985946655, + "logps/chosen": -7.438586908392608e-05, + "logps/rejected": -223.38858032226562, + "loss": 0.3431, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.375489566475153e-05, + "rewards/margins": 6.081249713897705, + "rewards/rejected": -6.081225872039795, + "step": 16495 + }, + { + "epoch": 0.96, + "learning_rate": 4.1979860263016144e-10, + "logits/chosen": -1.8946020603179932, + "logits/rejected": -1.8783572912216187, + "logps/chosen": -148.7308349609375, + "logps/rejected": -251.81655883789062, + "loss": 0.0951, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0452942848205566, + "rewards/margins": 2.291600227355957, + "rewards/rejected": -0.24630585312843323, + "step": 16496 + }, + { + "epoch": 0.96, + "learning_rate": 4.1858084906576384e-10, + "logits/chosen": -1.7596609592437744, + "logits/rejected": -1.7503551244735718, + "logps/chosen": -173.12277221679688, + "logps/rejected": -314.0330505371094, + "loss": 0.3121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21971893310546875, + "rewards/margins": 1.8050460815429688, + "rewards/rejected": -2.0247650146484375, + "step": 16497 + }, + { + "epoch": 0.96, + "learning_rate": 4.173648568759014e-10, + "logits/chosen": -1.7778339385986328, + "logits/rejected": -1.7845720052719116, + "logps/chosen": -171.53253173828125, + "logps/rejected": -260.59661865234375, + "loss": 0.0819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2727631330490112, + "rewards/margins": 2.2905824184417725, + "rewards/rejected": -1.0178192853927612, + "step": 16498 + }, + { + "epoch": 0.96, + "learning_rate": 4.161506261037784e-10, + "logits/chosen": -1.9910844564437866, + "logits/rejected": -1.970690369606018, + "logps/chosen": -22.59880256652832, + "logps/rejected": -328.5257873535156, + "loss": 0.3819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3759773373603821, + "rewards/margins": 3.9246299266815186, + "rewards/rejected": -4.300607204437256, + "step": 16499 + }, + { + "epoch": 0.96, + "learning_rate": 4.1493815679252145e-10, + "logits/chosen": -1.891320824623108, + "logits/rejected": -1.808217167854309, + "logps/chosen": -363.74676513671875, + "logps/rejected": -737.641845703125, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2430970668792725, + "rewards/margins": 4.260675430297852, + "rewards/rejected": -1.017578125, + "step": 16500 + }, + { + "epoch": 0.96, + "learning_rate": 4.1372744898520716e-10, + "logits/chosen": -1.9173299074172974, + "logits/rejected": -1.910622000694275, + "logps/chosen": -3.464043617248535, + "logps/rejected": -245.7884063720703, + "loss": 0.1693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7437718510627747, + "rewards/margins": 3.8456945419311523, + "rewards/rejected": -3.1019227504730225, + "step": 16501 + }, + { + "epoch": 0.96, + "learning_rate": 4.1251850272484567e-10, + "logits/chosen": -1.7712002992630005, + "logits/rejected": -1.7789613008499146, + "logps/chosen": -40.86372756958008, + "logps/rejected": -214.71182250976562, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0738399550318718, + "rewards/margins": 3.484107494354248, + "rewards/rejected": -3.4102675914764404, + "step": 16502 + }, + { + "epoch": 0.96, + "learning_rate": 4.1131131805439147e-10, + "logits/chosen": -1.7770451307296753, + "logits/rejected": -1.77206289768219, + "logps/chosen": -4.915012836456299, + "logps/rejected": -184.78981018066406, + "loss": 0.3303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08387932926416397, + "rewards/margins": 3.1218247413635254, + "rewards/rejected": -3.037945508956909, + "step": 16503 + }, + { + "epoch": 0.96, + "learning_rate": 4.101058950167158e-10, + "logits/chosen": -1.8994580507278442, + "logits/rejected": -1.8760483264923096, + "logps/chosen": -20.533912658691406, + "logps/rejected": -120.72455596923828, + "loss": 0.6291, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.19633446633815765, + "rewards/margins": -0.1298549622297287, + "rewards/rejected": 0.32618942856788635, + "step": 16504 + }, + { + "epoch": 0.96, + "learning_rate": 4.0890223365465105e-10, + "logits/chosen": -1.8230977058410645, + "logits/rejected": -1.876114845275879, + "logps/chosen": -226.44325256347656, + "logps/rejected": -283.192626953125, + "loss": 0.1945, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3897002935409546, + "rewards/margins": 0.9089202880859375, + "rewards/rejected": 0.4807800352573395, + "step": 16505 + }, + { + "epoch": 0.96, + "learning_rate": 4.0770033401096304e-10, + "logits/chosen": -1.9831212759017944, + "logits/rejected": -1.9577980041503906, + "logps/chosen": -162.79336547851562, + "logps/rejected": -463.3249816894531, + "loss": 0.0674, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.722381591796875, + "rewards/margins": 2.561685085296631, + "rewards/rejected": -0.8393036127090454, + "step": 16506 + }, + { + "epoch": 0.96, + "learning_rate": 4.065001961283343e-10, + "logits/chosen": -1.9148625135421753, + "logits/rejected": -1.914865493774414, + "logps/chosen": -152.48751831054688, + "logps/rejected": -266.18499755859375, + "loss": 0.2056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8936783075332642, + "rewards/margins": 1.5229690074920654, + "rewards/rejected": -0.629290759563446, + "step": 16507 + }, + { + "epoch": 0.96, + "learning_rate": 4.053018200494085e-10, + "logits/chosen": -2.0133438110351562, + "logits/rejected": -1.9924687147140503, + "logps/chosen": -63.548404693603516, + "logps/rejected": -247.7700653076172, + "loss": 0.3145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5190243124961853, + "rewards/margins": 1.2068417072296143, + "rewards/rejected": -0.687817394733429, + "step": 16508 + }, + { + "epoch": 0.96, + "learning_rate": 4.041052058167516e-10, + "logits/chosen": -1.9560105800628662, + "logits/rejected": -1.9505912065505981, + "logps/chosen": -0.000990925240330398, + "logps/rejected": -184.21807861328125, + "loss": 0.3585, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.29498689097818e-05, + "rewards/margins": 2.8266947269439697, + "rewards/rejected": -2.8266618251800537, + "step": 16509 + }, + { + "epoch": 0.96, + "learning_rate": 4.0291035347288504e-10, + "logits/chosen": -1.9924399852752686, + "logits/rejected": -1.9741466045379639, + "logps/chosen": -176.1050262451172, + "logps/rejected": -283.717041015625, + "loss": 0.0874, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7469955682754517, + "rewards/margins": 2.317488193511963, + "rewards/rejected": -0.5704925656318665, + "step": 16510 + }, + { + "epoch": 0.96, + "learning_rate": 4.0171726306024166e-10, + "logits/chosen": -1.9391974210739136, + "logits/rejected": -1.9882385730743408, + "logps/chosen": -245.72137451171875, + "logps/rejected": -343.8913269042969, + "loss": 0.0753, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6154358386993408, + "rewards/margins": 2.160421848297119, + "rewards/rejected": -0.5449859499931335, + "step": 16511 + }, + { + "epoch": 0.96, + "learning_rate": 4.0052593462122086e-10, + "logits/chosen": -1.8014367818832397, + "logits/rejected": -1.7943568229675293, + "logps/chosen": -4.912371635437012, + "logps/rejected": -157.22006225585938, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2300366461277008, + "rewards/margins": 4.638564109802246, + "rewards/rejected": -4.408527374267578, + "step": 16512 + }, + { + "epoch": 0.96, + "learning_rate": 3.9933636819812766e-10, + "logits/chosen": -2.2048754692077637, + "logits/rejected": -2.2089385986328125, + "logps/chosen": -0.00945991836488247, + "logps/rejected": -107.34925842285156, + "loss": 0.3601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0040508764795959, + "rewards/margins": 2.7753758430480957, + "rewards/rejected": -2.771324872970581, + "step": 16513 + }, + { + "epoch": 0.96, + "learning_rate": 3.981485638332338e-10, + "logits/chosen": -1.816120982170105, + "logits/rejected": -1.8149679899215698, + "logps/chosen": -0.047211021184921265, + "logps/rejected": -114.78974914550781, + "loss": 0.4381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024067445192486048, + "rewards/margins": 1.930283784866333, + "rewards/rejected": -1.9326905012130737, + "step": 16514 + }, + { + "epoch": 0.96, + "learning_rate": 3.9696252156872776e-10, + "logits/chosen": -1.8223613500595093, + "logits/rejected": -1.7901031970977783, + "logps/chosen": -151.12045288085938, + "logps/rejected": -388.130126953125, + "loss": 0.1867, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2597687244415283, + "rewards/margins": 0.8893219232559204, + "rewards/rejected": 1.370446801185608, + "step": 16515 + }, + { + "epoch": 0.96, + "learning_rate": 3.9577824144675365e-10, + "logits/chosen": -1.7684481143951416, + "logits/rejected": -1.7588543891906738, + "logps/chosen": -209.90037536621094, + "logps/rejected": -374.65338134765625, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9424667358398438, + "rewards/margins": 3.506221055984497, + "rewards/rejected": -1.5637543201446533, + "step": 16516 + }, + { + "epoch": 0.96, + "learning_rate": 3.945957235093722e-10, + "logits/chosen": -1.9824069738388062, + "logits/rejected": -1.9553112983703613, + "logps/chosen": -196.42832946777344, + "logps/rejected": -649.1339111328125, + "loss": 0.0387, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1925582885742188, + "rewards/margins": 5.483561992645264, + "rewards/rejected": -4.291003704071045, + "step": 16517 + }, + { + "epoch": 0.96, + "learning_rate": 3.9341496779859984e-10, + "logits/chosen": -1.9184950590133667, + "logits/rejected": -1.8709535598754883, + "logps/chosen": -165.70144653320312, + "logps/rejected": -621.676025390625, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0253982543945312, + "rewards/margins": 6.769404888153076, + "rewards/rejected": -4.744006633758545, + "step": 16518 + }, + { + "epoch": 0.96, + "learning_rate": 3.9223597435637525e-10, + "logits/chosen": -1.6965075731277466, + "logits/rejected": -1.707666277885437, + "logps/chosen": -0.0019172501051798463, + "logps/rejected": -307.3069763183594, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013395897985901684, + "rewards/margins": 7.522455215454102, + "rewards/rejected": -7.522589206695557, + "step": 16519 + }, + { + "epoch": 0.96, + "learning_rate": 3.9105874322459266e-10, + "logits/chosen": -1.9939942359924316, + "logits/rejected": -1.9976848363876343, + "logps/chosen": -117.29783630371094, + "logps/rejected": -122.2542724609375, + "loss": 0.6193, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.6507347226142883, + "rewards/margins": -0.31954729557037354, + "rewards/rejected": 0.9702820181846619, + "step": 16520 + }, + { + "epoch": 0.96, + "learning_rate": 3.8988327444506863e-10, + "logits/chosen": -1.881885290145874, + "logits/rejected": -1.8706083297729492, + "logps/chosen": -30.024534225463867, + "logps/rejected": -164.62600708007812, + "loss": 0.5892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5425930023193359, + "rewards/margins": 1.9698550701141357, + "rewards/rejected": -2.5124480724334717, + "step": 16521 + }, + { + "epoch": 0.96, + "learning_rate": 3.8870956805955313e-10, + "logits/chosen": -1.8136333227157593, + "logits/rejected": -1.8083369731903076, + "logps/chosen": -60.2242431640625, + "logps/rejected": -245.72886657714844, + "loss": 0.2531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5778549313545227, + "rewards/margins": 2.5192840099334717, + "rewards/rejected": -1.9414291381835938, + "step": 16522 + }, + { + "epoch": 0.96, + "learning_rate": 3.875376241097517e-10, + "logits/chosen": -2.0062451362609863, + "logits/rejected": -2.0083200931549072, + "logps/chosen": -34.13570022583008, + "logps/rejected": -140.41885375976562, + "loss": 0.2443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6127514243125916, + "rewards/margins": 1.4099972248077393, + "rewards/rejected": -0.7972458004951477, + "step": 16523 + }, + { + "epoch": 0.96, + "learning_rate": 3.8636744263729204e-10, + "logits/chosen": -1.9422725439071655, + "logits/rejected": -1.9508936405181885, + "logps/chosen": -155.55995178222656, + "logps/rejected": -320.96307373046875, + "loss": 0.0823, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4739350080490112, + "rewards/margins": 2.389334201812744, + "rewards/rejected": -0.9153991937637329, + "step": 16524 + }, + { + "epoch": 0.96, + "learning_rate": 3.8519902368375213e-10, + "logits/chosen": -1.6931655406951904, + "logits/rejected": -1.7153716087341309, + "logps/chosen": -265.67425537109375, + "logps/rejected": -401.3806457519531, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2077605724334717, + "rewards/margins": 3.780505418777466, + "rewards/rejected": -0.5727447867393494, + "step": 16525 + }, + { + "epoch": 0.96, + "learning_rate": 3.84032367290632e-10, + "logits/chosen": -1.8528896570205688, + "logits/rejected": -1.8523247241973877, + "logps/chosen": -26.909767150878906, + "logps/rejected": -335.6495361328125, + "loss": 0.1945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4247339367866516, + "rewards/margins": 6.472012042999268, + "rewards/rejected": -6.047277927398682, + "step": 16526 + }, + { + "epoch": 0.96, + "learning_rate": 3.82867473499382e-10, + "logits/chosen": -1.8763843774795532, + "logits/rejected": -1.8702654838562012, + "logps/chosen": -10.380367279052734, + "logps/rejected": -175.1060333251953, + "loss": 0.3769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03758258745074272, + "rewards/margins": 2.7684879302978516, + "rewards/rejected": -2.730905294418335, + "step": 16527 + }, + { + "epoch": 0.96, + "learning_rate": 3.8170434235137994e-10, + "logits/chosen": -2.1583478450775146, + "logits/rejected": -2.168271541595459, + "logps/chosen": -9.908411979675293, + "logps/rejected": -189.61862182617188, + "loss": 0.2617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4238789677619934, + "rewards/margins": 1.726398229598999, + "rewards/rejected": -1.3025192022323608, + "step": 16528 + }, + { + "epoch": 0.96, + "learning_rate": 3.80542973887954e-10, + "logits/chosen": -1.9212679862976074, + "logits/rejected": -1.9107741117477417, + "logps/chosen": -12.67491626739502, + "logps/rejected": -375.8797607421875, + "loss": 0.3095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13111524283885956, + "rewards/margins": 6.37623929977417, + "rewards/rejected": -6.507354736328125, + "step": 16529 + }, + { + "epoch": 0.96, + "learning_rate": 3.793833681503489e-10, + "logits/chosen": -1.9735440015792847, + "logits/rejected": -1.8720824718475342, + "logps/chosen": -243.07444763183594, + "logps/rejected": -593.5997314453125, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.084666728973389, + "rewards/margins": 4.224852085113525, + "rewards/rejected": -0.14018554985523224, + "step": 16530 + }, + { + "epoch": 0.96, + "learning_rate": 3.7822552517977614e-10, + "logits/chosen": -1.863448977470398, + "logits/rejected": -1.8648664951324463, + "logps/chosen": -32.53743362426758, + "logps/rejected": -139.28700256347656, + "loss": 0.4098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5922417044639587, + "rewards/margins": 0.5281578302383423, + "rewards/rejected": 0.06408386677503586, + "step": 16531 + }, + { + "epoch": 0.96, + "learning_rate": 3.7706944501735283e-10, + "logits/chosen": -2.0496065616607666, + "logits/rejected": -2.0351665019989014, + "logps/chosen": -114.5870361328125, + "logps/rejected": -239.365966796875, + "loss": 0.1049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8441696166992188, + "rewards/margins": 1.6863205432891846, + "rewards/rejected": 0.15784911811351776, + "step": 16532 + }, + { + "epoch": 0.96, + "learning_rate": 3.759151277041572e-10, + "logits/chosen": -1.9583982229232788, + "logits/rejected": -1.947343349456787, + "logps/chosen": -160.7121124267578, + "logps/rejected": -343.400390625, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7607834339141846, + "rewards/margins": 2.587559461593628, + "rewards/rejected": 0.17322388291358948, + "step": 16533 + }, + { + "epoch": 0.96, + "learning_rate": 3.747625732811954e-10, + "logits/chosen": -1.7706636190414429, + "logits/rejected": -1.7431714534759521, + "logps/chosen": -149.90115356445312, + "logps/rejected": -292.5758056640625, + "loss": 0.1207, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.622274875640869, + "rewards/margins": 1.554489254951477, + "rewards/rejected": 1.067785620689392, + "step": 16534 + }, + { + "epoch": 0.96, + "learning_rate": 3.7361178178940687e-10, + "logits/chosen": -1.7484914064407349, + "logits/rejected": -1.7381153106689453, + "logps/chosen": -18.354984283447266, + "logps/rejected": -300.3185119628906, + "loss": 0.1923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4920179545879364, + "rewards/margins": 5.694146633148193, + "rewards/rejected": -5.202128887176514, + "step": 16535 + }, + { + "epoch": 0.96, + "learning_rate": 3.7246275326968115e-10, + "logits/chosen": -1.6561342477798462, + "logits/rejected": -1.65582275390625, + "logps/chosen": -250.15060424804688, + "logps/rejected": -237.6375274658203, + "loss": 0.3918, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6645783185958862, + "rewards/margins": 0.055145263671875, + "rewards/rejected": 1.6094330549240112, + "step": 16536 + }, + { + "epoch": 0.96, + "learning_rate": 3.713154877628244e-10, + "logits/chosen": -1.8908309936523438, + "logits/rejected": -1.8941010236740112, + "logps/chosen": -21.47471809387207, + "logps/rejected": -108.74475860595703, + "loss": 0.3254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2608680725097656, + "rewards/margins": 2.83957839012146, + "rewards/rejected": -2.5787103176116943, + "step": 16537 + }, + { + "epoch": 0.96, + "learning_rate": 3.7016998530960963e-10, + "logits/chosen": -1.9090676307678223, + "logits/rejected": -1.9452272653579712, + "logps/chosen": -264.17547607421875, + "logps/rejected": -541.7362060546875, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.254034399986267, + "rewards/margins": 3.9165711402893066, + "rewards/rejected": -2.66253662109375, + "step": 16538 + }, + { + "epoch": 0.96, + "learning_rate": 3.6902624595072096e-10, + "logits/chosen": -2.0414669513702393, + "logits/rejected": -2.0308704376220703, + "logps/chosen": -14.46633529663086, + "logps/rejected": -250.33990478515625, + "loss": 0.1537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6021385192871094, + "rewards/margins": 4.323795318603516, + "rewards/rejected": -3.7216567993164062, + "step": 16539 + }, + { + "epoch": 0.96, + "learning_rate": 3.6788426972679255e-10, + "logits/chosen": -1.9930428266525269, + "logits/rejected": -1.985397219657898, + "logps/chosen": -56.29735565185547, + "logps/rejected": -195.83526611328125, + "loss": 1.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7995041608810425, + "rewards/margins": 1.6745270490646362, + "rewards/rejected": -3.4740312099456787, + "step": 16540 + }, + { + "epoch": 0.96, + "learning_rate": 3.6674405667838636e-10, + "logits/chosen": -1.9656096696853638, + "logits/rejected": -1.9703288078308105, + "logps/chosen": -192.58152770996094, + "logps/rejected": -341.80377197265625, + "loss": 0.0498, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.249920606613159, + "rewards/margins": 2.8587403297424316, + "rewards/rejected": -0.6088196039199829, + "step": 16541 + }, + { + "epoch": 0.96, + "learning_rate": 3.6560560684602007e-10, + "logits/chosen": -1.7641243934631348, + "logits/rejected": -1.758823275566101, + "logps/chosen": -36.44453430175781, + "logps/rejected": -172.168212890625, + "loss": 0.2914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21966858208179474, + "rewards/margins": 1.772607445716858, + "rewards/rejected": -1.5529388189315796, + "step": 16542 + }, + { + "epoch": 0.96, + "learning_rate": 3.6446892027012785e-10, + "logits/chosen": -1.9529414176940918, + "logits/rejected": -1.9943575859069824, + "logps/chosen": -158.9949951171875, + "logps/rejected": -282.06097412109375, + "loss": 0.1075, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.772241234779358, + "rewards/margins": 1.6117279529571533, + "rewards/rejected": 0.16051331162452698, + "step": 16543 + }, + { + "epoch": 0.96, + "learning_rate": 3.6333399699109424e-10, + "logits/chosen": -1.9717470407485962, + "logits/rejected": -2.0180797576904297, + "logps/chosen": -295.76800537109375, + "logps/rejected": -436.9769287109375, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6227082014083862, + "rewards/margins": 5.629345893859863, + "rewards/rejected": -4.0066375732421875, + "step": 16544 + }, + { + "epoch": 0.96, + "learning_rate": 3.622008370492369e-10, + "logits/chosen": -1.931893229484558, + "logits/rejected": -1.878308653831482, + "logps/chosen": -0.010759084485471249, + "logps/rejected": -622.2889404296875, + "loss": 0.3355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0013219022657722235, + "rewards/margins": 10.3939790725708, + "rewards/rejected": -10.392657279968262, + "step": 16545 + }, + { + "epoch": 0.96, + "learning_rate": 3.610694404848069e-10, + "logits/chosen": -1.9811522960662842, + "logits/rejected": -1.978847622871399, + "logps/chosen": -24.813106536865234, + "logps/rejected": -158.64837646484375, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2589092254638672, + "rewards/margins": 4.238903045654297, + "rewards/rejected": -3.9799935817718506, + "step": 16546 + }, + { + "epoch": 0.96, + "learning_rate": 3.59939807338e-10, + "logits/chosen": -1.7401201725006104, + "logits/rejected": -1.6891708374023438, + "logps/chosen": -192.78863525390625, + "logps/rejected": -510.63336181640625, + "loss": 0.0578, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.448132276535034, + "rewards/margins": 2.807940721511841, + "rewards/rejected": -0.3598083555698395, + "step": 16547 + }, + { + "epoch": 0.96, + "learning_rate": 3.5881193764895623e-10, + "logits/chosen": -2.0686492919921875, + "logits/rejected": -2.0657877922058105, + "logps/chosen": -80.8250503540039, + "logps/rejected": -180.96878051757812, + "loss": 0.232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2457894086837769, + "rewards/margins": 1.3566200733184814, + "rewards/rejected": -0.11083068698644638, + "step": 16548 + }, + { + "epoch": 0.96, + "learning_rate": 3.5768583145772133e-10, + "logits/chosen": -1.7207108736038208, + "logits/rejected": -1.6999071836471558, + "logps/chosen": -190.64852905273438, + "logps/rejected": -493.5960388183594, + "loss": 0.0421, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.302709937095642, + "rewards/margins": 8.260149955749512, + "rewards/rejected": -6.957440376281738, + "step": 16549 + }, + { + "epoch": 0.96, + "learning_rate": 3.5656148880431893e-10, + "logits/chosen": -1.9536701440811157, + "logits/rejected": -1.9649864435195923, + "logps/chosen": -198.55682373046875, + "logps/rejected": -372.81658935546875, + "loss": 0.0416, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1160247325897217, + "rewards/margins": 3.276620388031006, + "rewards/rejected": -1.1605957746505737, + "step": 16550 + }, + { + "epoch": 0.96, + "learning_rate": 3.5543890972868364e-10, + "logits/chosen": -1.9011077880859375, + "logits/rejected": -1.9376435279846191, + "logps/chosen": -124.73174285888672, + "logps/rejected": -267.3533630371094, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3244576454162598, + "rewards/margins": 4.374228477478027, + "rewards/rejected": -2.0497710704803467, + "step": 16551 + }, + { + "epoch": 0.96, + "learning_rate": 3.5431809427070025e-10, + "logits/chosen": -1.8042616844177246, + "logits/rejected": -1.8117979764938354, + "logps/chosen": -141.02200317382812, + "logps/rejected": -463.45513916015625, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.187152147293091, + "rewards/margins": 8.227801322937012, + "rewards/rejected": -6.0406494140625, + "step": 16552 + }, + { + "epoch": 0.96, + "learning_rate": 3.5319904247017585e-10, + "logits/chosen": -1.8896090984344482, + "logits/rejected": -1.936202883720398, + "logps/chosen": -233.19216918945312, + "logps/rejected": -547.80615234375, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7006165981292725, + "rewards/margins": 7.827280044555664, + "rewards/rejected": -5.1266632080078125, + "step": 16553 + }, + { + "epoch": 0.96, + "learning_rate": 3.5208175436687306e-10, + "logits/chosen": -1.7719100713729858, + "logits/rejected": -1.7839603424072266, + "logps/chosen": -345.50341796875, + "logps/rejected": -629.98193359375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7779481410980225, + "rewards/margins": 6.092419624328613, + "rewards/rejected": -3.314471483230591, + "step": 16554 + }, + { + "epoch": 0.96, + "learning_rate": 3.5096623000048234e-10, + "logits/chosen": -1.9167966842651367, + "logits/rejected": -1.9090520143508911, + "logps/chosen": -288.32305908203125, + "logps/rejected": -401.27191162109375, + "loss": 0.3947, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5294922590255737, + "rewards/margins": -0.09310293197631836, + "rewards/rejected": 1.622595191001892, + "step": 16555 + }, + { + "epoch": 0.96, + "learning_rate": 3.4985246941062754e-10, + "logits/chosen": -1.7905828952789307, + "logits/rejected": -1.795752763748169, + "logps/chosen": -182.8119354248047, + "logps/rejected": -352.4574890136719, + "loss": 0.0483, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3656082153320312, + "rewards/margins": 3.4166154861450195, + "rewards/rejected": -1.0510071516036987, + "step": 16556 + }, + { + "epoch": 0.96, + "learning_rate": 3.48740472636877e-10, + "logits/chosen": -1.8605109453201294, + "logits/rejected": -1.8741575479507446, + "logps/chosen": -161.62127685546875, + "logps/rejected": -262.8753662109375, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8636093139648438, + "rewards/margins": 2.859963893890381, + "rewards/rejected": -0.9963546991348267, + "step": 16557 + }, + { + "epoch": 0.96, + "learning_rate": 3.4763023971873807e-10, + "logits/chosen": -1.6227738857269287, + "logits/rejected": -1.631386637687683, + "logps/chosen": -273.513671875, + "logps/rejected": -570.7618408203125, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.57647705078125, + "rewards/margins": 6.559350490570068, + "rewards/rejected": -4.982873439788818, + "step": 16558 + }, + { + "epoch": 0.96, + "learning_rate": 3.465217706956458e-10, + "logits/chosen": -1.8619904518127441, + "logits/rejected": -1.849843144416809, + "logps/chosen": -29.549055099487305, + "logps/rejected": -281.2322998046875, + "loss": 0.222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33187732100486755, + "rewards/margins": 3.201748847961426, + "rewards/rejected": -2.8698716163635254, + "step": 16559 + }, + { + "epoch": 0.96, + "learning_rate": 3.454150656069854e-10, + "logits/chosen": -1.7279597520828247, + "logits/rejected": -1.7194621562957764, + "logps/chosen": -135.2661895751953, + "logps/rejected": -196.06777954101562, + "loss": 0.2727, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5877655744552612, + "rewards/margins": 0.6976608633995056, + "rewards/rejected": 0.8901047110557556, + "step": 16560 + }, + { + "epoch": 0.96, + "learning_rate": 3.4431012449206433e-10, + "logits/chosen": -1.9223822355270386, + "logits/rejected": -1.9252465963363647, + "logps/chosen": -21.089204788208008, + "logps/rejected": -192.3131866455078, + "loss": 0.1855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5940767526626587, + "rewards/margins": 3.037497043609619, + "rewards/rejected": -2.44342041015625, + "step": 16561 + }, + { + "epoch": 0.96, + "learning_rate": 3.4320694739014555e-10, + "logits/chosen": -1.8897098302841187, + "logits/rejected": -1.9085415601730347, + "logps/chosen": -253.0802001953125, + "logps/rejected": -516.06005859375, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4493377208709717, + "rewards/margins": 3.957925319671631, + "rewards/rejected": -1.5085877180099487, + "step": 16562 + }, + { + "epoch": 0.96, + "learning_rate": 3.4210553434041443e-10, + "logits/chosen": -1.767897367477417, + "logits/rejected": -1.7223083972930908, + "logps/chosen": -146.44451904296875, + "logps/rejected": -335.3760986328125, + "loss": 0.1977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2931915521621704, + "rewards/margins": 1.6427063941955566, + "rewards/rejected": -0.34951478242874146, + "step": 16563 + }, + { + "epoch": 0.96, + "learning_rate": 3.410058853819897e-10, + "logits/chosen": -1.7791215181350708, + "logits/rejected": -1.779267430305481, + "logps/chosen": -0.06543533504009247, + "logps/rejected": -102.90079498291016, + "loss": 0.4681, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001732699602143839, + "rewards/margins": 1.3027496337890625, + "rewards/rejected": -1.3029228448867798, + "step": 16564 + }, + { + "epoch": 0.96, + "learning_rate": 3.3990800055395117e-10, + "logits/chosen": -1.8383907079696655, + "logits/rejected": -1.837669014930725, + "logps/chosen": -108.29132080078125, + "logps/rejected": -179.67173767089844, + "loss": 0.4403, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.255652666091919, + "rewards/margins": 0.13388288021087646, + "rewards/rejected": 1.1217697858810425, + "step": 16565 + }, + { + "epoch": 0.96, + "learning_rate": 3.3881187989528994e-10, + "logits/chosen": -1.8867673873901367, + "logits/rejected": -1.8827359676361084, + "logps/chosen": -29.94524383544922, + "logps/rejected": -203.28485107421875, + "loss": 0.3624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23787422478199005, + "rewards/margins": 1.3314152956008911, + "rewards/rejected": -1.0935410261154175, + "step": 16566 + }, + { + "epoch": 0.96, + "learning_rate": 3.3771752344495255e-10, + "logits/chosen": -1.7205239534378052, + "logits/rejected": -1.72257661819458, + "logps/chosen": -0.0022556728217750788, + "logps/rejected": -84.24136352539062, + "loss": 0.5701, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3061310710327234e-05, + "rewards/margins": 0.5725486278533936, + "rewards/rejected": -0.5725616812705994, + "step": 16567 + }, + { + "epoch": 0.96, + "learning_rate": 3.3662493124181344e-10, + "logits/chosen": -1.8216960430145264, + "logits/rejected": -1.8197728395462036, + "logps/chosen": -244.19882202148438, + "logps/rejected": -408.74481201171875, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.849252462387085, + "rewards/margins": 4.279779434204102, + "rewards/rejected": -0.4305267333984375, + "step": 16568 + }, + { + "epoch": 0.96, + "learning_rate": 3.3553410332468615e-10, + "logits/chosen": -1.9125943183898926, + "logits/rejected": -1.9021406173706055, + "logps/chosen": -30.775083541870117, + "logps/rejected": -271.5181884765625, + "loss": 0.2586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40506038069725037, + "rewards/margins": 4.3905463218688965, + "rewards/rejected": -3.985485792160034, + "step": 16569 + }, + { + "epoch": 0.96, + "learning_rate": 3.3444503973232285e-10, + "logits/chosen": -1.9518877267837524, + "logits/rejected": -1.9375126361846924, + "logps/chosen": -313.7012634277344, + "logps/rejected": -650.145751953125, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.314352512359619, + "rewards/margins": 7.170248508453369, + "rewards/rejected": -4.85589599609375, + "step": 16570 + }, + { + "epoch": 0.96, + "learning_rate": 3.333577405034149e-10, + "logits/chosen": -1.889540672302246, + "logits/rejected": -1.8800932168960571, + "logps/chosen": -38.164947509765625, + "logps/rejected": -264.7244567871094, + "loss": 0.1885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47444459795951843, + "rewards/margins": 4.425512790679932, + "rewards/rejected": -3.951068162918091, + "step": 16571 + }, + { + "epoch": 0.96, + "learning_rate": 3.3227220567657587e-10, + "logits/chosen": -1.8520312309265137, + "logits/rejected": -1.8537757396697998, + "logps/chosen": -9.89834976196289, + "logps/rejected": -60.66371536254883, + "loss": 0.4889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2517363131046295, + "rewards/margins": 1.3067888021469116, + "rewards/rejected": -1.5585250854492188, + "step": 16572 + }, + { + "epoch": 0.96, + "learning_rate": 3.3118843529039155e-10, + "logits/chosen": -1.5545417070388794, + "logits/rejected": -1.5626493692398071, + "logps/chosen": -296.61553955078125, + "logps/rejected": -475.4371643066406, + "loss": 0.0502, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2260010242462158, + "rewards/margins": 4.015780448913574, + "rewards/rejected": -2.7897796630859375, + "step": 16573 + }, + { + "epoch": 0.96, + "learning_rate": 3.301064293833422e-10, + "logits/chosen": -1.7726565599441528, + "logits/rejected": -1.7811050415039062, + "logps/chosen": -104.42562866210938, + "logps/rejected": -209.0616912841797, + "loss": 0.2088, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0414559841156006, + "rewards/margins": 1.0832421779632568, + "rewards/rejected": 0.9582138061523438, + "step": 16574 + }, + { + "epoch": 0.96, + "learning_rate": 3.2902618799387493e-10, + "logits/chosen": -2.0803630352020264, + "logits/rejected": -2.085843563079834, + "logps/chosen": -6.172765254974365, + "logps/rejected": -161.27700805664062, + "loss": 0.4109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02352457121014595, + "rewards/margins": 1.6619263887405396, + "rewards/rejected": -1.6384018659591675, + "step": 16575 + }, + { + "epoch": 0.96, + "learning_rate": 3.279477111603646e-10, + "logits/chosen": -1.8773081302642822, + "logits/rejected": -1.8712869882583618, + "logps/chosen": -60.874088287353516, + "logps/rejected": -196.78213500976562, + "loss": 0.0988, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.78913152217865, + "rewards/margins": 2.033025026321411, + "rewards/rejected": -0.24389342963695526, + "step": 16576 + }, + { + "epoch": 0.96, + "learning_rate": 3.268709989211249e-10, + "logits/chosen": -1.797123908996582, + "logits/rejected": -1.7711288928985596, + "logps/chosen": -304.08514404296875, + "logps/rejected": -428.21514892578125, + "loss": 0.118, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.307897925376892, + "rewards/margins": 2.093768358230591, + "rewards/rejected": -0.785870373249054, + "step": 16577 + }, + { + "epoch": 0.96, + "learning_rate": 3.257960513144087e-10, + "logits/chosen": -1.986703872680664, + "logits/rejected": -1.9788289070129395, + "logps/chosen": -43.25502395629883, + "logps/rejected": -332.1610107421875, + "loss": 0.1207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9871475100517273, + "rewards/margins": 8.264418601989746, + "rewards/rejected": -7.277270793914795, + "step": 16578 + }, + { + "epoch": 0.96, + "learning_rate": 3.247228683783965e-10, + "logits/chosen": -1.6179925203323364, + "logits/rejected": -1.622153639793396, + "logps/chosen": -95.37825012207031, + "logps/rejected": -389.7088623046875, + "loss": 0.1815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6881256103515625, + "rewards/margins": 3.8054473400115967, + "rewards/rejected": -3.117321729660034, + "step": 16579 + }, + { + "epoch": 0.96, + "learning_rate": 3.2365145015121333e-10, + "logits/chosen": -1.9553017616271973, + "logits/rejected": -1.9554930925369263, + "logps/chosen": -0.00615524360910058, + "logps/rejected": -72.69473266601562, + "loss": 0.4093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005294604343362153, + "rewards/margins": 1.9358785152435303, + "rewards/rejected": -1.9364079236984253, + "step": 16580 + }, + { + "epoch": 0.96, + "learning_rate": 3.225817966709288e-10, + "logits/chosen": -1.9143656492233276, + "logits/rejected": -1.9394136667251587, + "logps/chosen": -205.77926635742188, + "logps/rejected": -252.4592742919922, + "loss": 0.0773, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1707305908203125, + "rewards/margins": 1.9419234991073608, + "rewards/rejected": 0.22880707681179047, + "step": 16581 + }, + { + "epoch": 0.96, + "learning_rate": 3.215139079755347e-10, + "logits/chosen": -1.9785182476043701, + "logits/rejected": -1.9837158918380737, + "logps/chosen": -0.334667831659317, + "logps/rejected": -175.60015869140625, + "loss": 0.3452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018338272348046303, + "rewards/margins": 3.7847564220428467, + "rewards/rejected": -3.8030946254730225, + "step": 16582 + }, + { + "epoch": 0.97, + "learning_rate": 3.204477841029729e-10, + "logits/chosen": -1.9140042066574097, + "logits/rejected": -1.925618290901184, + "logps/chosen": -117.92308044433594, + "logps/rejected": -239.24423217773438, + "loss": 0.1241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6327590942382812, + "rewards/margins": 2.1888656616210938, + "rewards/rejected": -1.5561065673828125, + "step": 16583 + }, + { + "epoch": 0.97, + "learning_rate": 3.1938342509111316e-10, + "logits/chosen": -1.7257754802703857, + "logits/rejected": -1.676953673362732, + "logps/chosen": -209.5208740234375, + "logps/rejected": -484.8597412109375, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7231476306915283, + "rewards/margins": 3.1304352283477783, + "rewards/rejected": -0.40728759765625, + "step": 16584 + }, + { + "epoch": 0.97, + "learning_rate": 3.1832083097776405e-10, + "logits/chosen": -1.8615845441818237, + "logits/rejected": -1.8646390438079834, + "logps/chosen": -26.22895050048828, + "logps/rejected": -195.64613342285156, + "loss": 0.1261, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0765949487686157, + "rewards/margins": 3.327554702758789, + "rewards/rejected": -2.250959873199463, + "step": 16585 + }, + { + "epoch": 0.97, + "learning_rate": 3.1726000180068436e-10, + "logits/chosen": -1.9877898693084717, + "logits/rejected": -1.9857652187347412, + "logps/chosen": -11.803581237792969, + "logps/rejected": -154.9959716796875, + "loss": 0.3988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006770610809326172, + "rewards/margins": 1.4951388835906982, + "rewards/rejected": -1.488368272781372, + "step": 16586 + }, + { + "epoch": 0.97, + "learning_rate": 3.162009375975494e-10, + "logits/chosen": -1.5751798152923584, + "logits/rejected": -1.5737375020980835, + "logps/chosen": -18.41063117980957, + "logps/rejected": -56.43096160888672, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4664648175239563, + "rewards/margins": 1.2750146389007568, + "rewards/rejected": -0.8085498809814453, + "step": 16587 + }, + { + "epoch": 0.97, + "learning_rate": 3.151436384059902e-10, + "logits/chosen": -1.6711832284927368, + "logits/rejected": -1.666507363319397, + "logps/chosen": -0.9753945469856262, + "logps/rejected": -248.97799682617188, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16078518331050873, + "rewards/margins": 5.91794490814209, + "rewards/rejected": -5.75715970993042, + "step": 16588 + }, + { + "epoch": 0.97, + "learning_rate": 3.1408810426356015e-10, + "logits/chosen": -1.8210577964782715, + "logits/rejected": -1.7988263368606567, + "logps/chosen": -213.1906280517578, + "logps/rejected": -351.48321533203125, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.0280961990356445, + "rewards/margins": 2.953569173812866, + "rewards/rejected": 1.0745270252227783, + "step": 16589 + }, + { + "epoch": 0.97, + "learning_rate": 3.13034335207768e-10, + "logits/chosen": -2.0265727043151855, + "logits/rejected": -2.0269429683685303, + "logps/chosen": -43.72647476196289, + "logps/rejected": -182.0238800048828, + "loss": 1.4503, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.338889718055725, + "rewards/margins": -1.1069759130477905, + "rewards/rejected": -0.231913760304451, + "step": 16590 + }, + { + "epoch": 0.97, + "learning_rate": 3.119823312760339e-10, + "logits/chosen": -1.8826433420181274, + "logits/rejected": -1.9190922975540161, + "logps/chosen": -150.52871704101562, + "logps/rejected": -202.35433959960938, + "loss": 0.114, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0382461547851562, + "rewards/margins": 1.5148041248321533, + "rewards/rejected": 0.5234420895576477, + "step": 16591 + }, + { + "epoch": 0.97, + "learning_rate": 3.109320925057446e-10, + "logits/chosen": -1.8636354207992554, + "logits/rejected": -1.864911675453186, + "logps/chosen": -2.0182254314422607, + "logps/rejected": -109.14091491699219, + "loss": 0.3724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17558465898036957, + "rewards/margins": 1.869537591934204, + "rewards/rejected": -1.6939529180526733, + "step": 16592 + }, + { + "epoch": 0.97, + "learning_rate": 3.09883618934198e-10, + "logits/chosen": -2.000143051147461, + "logits/rejected": -2.0052101612091064, + "logps/chosen": -27.196758270263672, + "logps/rejected": -303.1416015625, + "loss": 0.3881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14321556687355042, + "rewards/margins": 1.368984580039978, + "rewards/rejected": -1.22576904296875, + "step": 16593 + }, + { + "epoch": 0.97, + "learning_rate": 3.088369105986477e-10, + "logits/chosen": -1.9670439958572388, + "logits/rejected": -1.968637228012085, + "logps/chosen": -0.005044986493885517, + "logps/rejected": -179.11166381835938, + "loss": 0.3595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002870960161089897, + "rewards/margins": 3.0089261531829834, + "rewards/rejected": -3.0092132091522217, + "step": 16594 + }, + { + "epoch": 0.97, + "learning_rate": 3.0779196753626945e-10, + "logits/chosen": -1.6914290189743042, + "logits/rejected": -1.6876531839370728, + "logps/chosen": -7.198067665100098, + "logps/rejected": -109.94945526123047, + "loss": 0.3454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21983681619167328, + "rewards/margins": 2.0130810737609863, + "rewards/rejected": -1.7932442426681519, + "step": 16595 + }, + { + "epoch": 0.97, + "learning_rate": 3.0674878978420025e-10, + "logits/chosen": -2.0234837532043457, + "logits/rejected": -2.04058837890625, + "logps/chosen": -198.81185913085938, + "logps/rejected": -338.2640380859375, + "loss": 0.1237, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0350341796875, + "rewards/margins": 2.3893585205078125, + "rewards/rejected": -1.3543243408203125, + "step": 16596 + }, + { + "epoch": 0.97, + "learning_rate": 3.057073773794883e-10, + "logits/chosen": -1.818574070930481, + "logits/rejected": -1.8292760848999023, + "logps/chosen": -37.63679504394531, + "logps/rejected": -292.1159362792969, + "loss": 0.1854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5311199426651001, + "rewards/margins": 5.845025062561035, + "rewards/rejected": -5.313905239105225, + "step": 16597 + }, + { + "epoch": 0.97, + "learning_rate": 3.0466773035912606e-10, + "logits/chosen": -1.99859619140625, + "logits/rejected": -1.9985368251800537, + "logps/chosen": -188.98370361328125, + "logps/rejected": -350.13641357421875, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.078930616378784, + "rewards/margins": 2.599966287612915, + "rewards/rejected": 0.478964239358902, + "step": 16598 + }, + { + "epoch": 0.97, + "learning_rate": 3.036298487600564e-10, + "logits/chosen": -2.016263008117676, + "logits/rejected": -2.0123789310455322, + "logps/chosen": -0.0014226825442165136, + "logps/rejected": -204.56222534179688, + "loss": 0.3564, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.386700260918587e-05, + "rewards/margins": 2.9694337844848633, + "rewards/rejected": -2.9694976806640625, + "step": 16599 + }, + { + "epoch": 0.97, + "learning_rate": 3.025937326191441e-10, + "logits/chosen": -1.8886184692382812, + "logits/rejected": -1.8903440237045288, + "logps/chosen": -184.65350341796875, + "logps/rejected": -181.41366577148438, + "loss": 0.0456, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7860352993011475, + "rewards/margins": 2.832820177078247, + "rewards/rejected": -0.04678497463464737, + "step": 16600 + }, + { + "epoch": 0.97, + "learning_rate": 3.015593819732043e-10, + "logits/chosen": -1.9399527311325073, + "logits/rejected": -1.935734748840332, + "logps/chosen": -77.2720947265625, + "logps/rejected": -259.77703857421875, + "loss": 0.1829, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8454086184501648, + "rewards/margins": 2.278090000152588, + "rewards/rejected": -1.4326813220977783, + "step": 16601 + }, + { + "epoch": 0.97, + "learning_rate": 3.005267968589742e-10, + "logits/chosen": -1.9796018600463867, + "logits/rejected": -2.0333690643310547, + "logps/chosen": -201.53482055664062, + "logps/rejected": -225.89797973632812, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2807953357696533, + "rewards/margins": 2.651075839996338, + "rewards/rejected": -0.3702804744243622, + "step": 16602 + }, + { + "epoch": 0.97, + "learning_rate": 2.994959773131356e-10, + "logits/chosen": -1.9104483127593994, + "logits/rejected": -1.915132999420166, + "logps/chosen": -5.693953514099121, + "logps/rejected": -139.7925567626953, + "loss": 0.937, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.18931666016578674, + "rewards/margins": -0.6335854530334473, + "rewards/rejected": 0.4442687928676605, + "step": 16603 + }, + { + "epoch": 0.97, + "learning_rate": 2.9846692337230915e-10, + "logits/chosen": -1.9449418783187866, + "logits/rejected": -1.9396898746490479, + "logps/chosen": -222.63059997558594, + "logps/rejected": -373.0709533691406, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0055465698242188, + "rewards/margins": 2.1036391258239746, + "rewards/rejected": -1.0980926752090454, + "step": 16604 + }, + { + "epoch": 0.97, + "learning_rate": 2.9743963507306014e-10, + "logits/chosen": -1.7315804958343506, + "logits/rejected": -1.7344740629196167, + "logps/chosen": -83.80770111083984, + "logps/rejected": -230.05978393554688, + "loss": 0.1612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8911392092704773, + "rewards/margins": 5.999635219573975, + "rewards/rejected": -5.108496189117432, + "step": 16605 + }, + { + "epoch": 0.97, + "learning_rate": 2.9641411245187596e-10, + "logits/chosen": -1.7642515897750854, + "logits/rejected": -1.7607810497283936, + "logps/chosen": -35.690635681152344, + "logps/rejected": -122.0430679321289, + "loss": 0.3884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4087776243686676, + "rewards/margins": 0.9012092351913452, + "rewards/rejected": -0.492431640625, + "step": 16606 + }, + { + "epoch": 0.97, + "learning_rate": 2.953903555451831e-10, + "logits/chosen": -1.8782812356948853, + "logits/rejected": -1.8566306829452515, + "logps/chosen": -158.86221313476562, + "logps/rejected": -306.71197509765625, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0533249378204346, + "rewards/margins": 3.4705307483673096, + "rewards/rejected": -0.417205810546875, + "step": 16607 + }, + { + "epoch": 0.97, + "learning_rate": 2.9436836438936353e-10, + "logits/chosen": -1.8683913946151733, + "logits/rejected": -1.8642799854278564, + "logps/chosen": -14.629280090332031, + "logps/rejected": -268.1617736816406, + "loss": 0.1941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6141563653945923, + "rewards/margins": 6.32586145401001, + "rewards/rejected": -5.711705207824707, + "step": 16608 + }, + { + "epoch": 0.97, + "learning_rate": 2.9334813902071045e-10, + "logits/chosen": -1.8070772886276245, + "logits/rejected": -1.8077125549316406, + "logps/chosen": -0.9930463433265686, + "logps/rejected": -156.0468292236328, + "loss": 0.4879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09551776200532913, + "rewards/margins": 1.2806129455566406, + "rewards/rejected": -1.3761307001113892, + "step": 16609 + }, + { + "epoch": 0.97, + "learning_rate": 2.923296794754726e-10, + "logits/chosen": -1.860321283340454, + "logits/rejected": -1.8441522121429443, + "logps/chosen": -192.44580078125, + "logps/rejected": -379.04437255859375, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.876185655593872, + "rewards/margins": 3.4978103637695312, + "rewards/rejected": -1.6216248273849487, + "step": 16610 + }, + { + "epoch": 0.97, + "learning_rate": 2.9131298578983776e-10, + "logits/chosen": -1.931146502494812, + "logits/rejected": -1.9128472805023193, + "logps/chosen": -116.5587158203125, + "logps/rejected": -285.3180847167969, + "loss": 0.0909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0272064208984375, + "rewards/margins": 2.185446262359619, + "rewards/rejected": -1.158239722251892, + "step": 16611 + }, + { + "epoch": 0.97, + "learning_rate": 2.902980579999159e-10, + "logits/chosen": -1.8180572986602783, + "logits/rejected": -1.809139370918274, + "logps/chosen": -85.99189758300781, + "logps/rejected": -265.4095458984375, + "loss": 0.1631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42500001192092896, + "rewards/margins": 5.448405742645264, + "rewards/rejected": -5.0234055519104, + "step": 16612 + }, + { + "epoch": 0.97, + "learning_rate": 2.8928489614176153e-10, + "logits/chosen": -1.8518692255020142, + "logits/rejected": -1.8632357120513916, + "logps/chosen": -267.0569763183594, + "logps/rejected": -465.82080078125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.166241407394409, + "rewards/margins": 5.333502292633057, + "rewards/rejected": -2.1672608852386475, + "step": 16613 + }, + { + "epoch": 0.97, + "learning_rate": 2.8827350025136257e-10, + "logits/chosen": -1.812631607055664, + "logits/rejected": -1.8139785528182983, + "logps/chosen": -42.031097412109375, + "logps/rejected": -264.9823303222656, + "loss": 0.0934, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.159380316734314, + "rewards/margins": 4.674190521240234, + "rewards/rejected": -3.51481032371521, + "step": 16614 + }, + { + "epoch": 0.97, + "learning_rate": 2.872638703646624e-10, + "logits/chosen": -1.8328992128372192, + "logits/rejected": -1.8406680822372437, + "logps/chosen": -140.62640380859375, + "logps/rejected": -279.0446472167969, + "loss": 0.148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8194778561592102, + "rewards/margins": 3.5450730323791504, + "rewards/rejected": -2.725595235824585, + "step": 16615 + }, + { + "epoch": 0.97, + "learning_rate": 2.8625600651752124e-10, + "logits/chosen": -1.861235499382019, + "logits/rejected": -1.8638840913772583, + "logps/chosen": -0.04061713069677353, + "logps/rejected": -184.02810668945312, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007210806361399591, + "rewards/margins": 5.377608776092529, + "rewards/rejected": -5.378329753875732, + "step": 16616 + }, + { + "epoch": 0.97, + "learning_rate": 2.8524990874574385e-10, + "logits/chosen": -1.886838674545288, + "logits/rejected": -1.7608380317687988, + "logps/chosen": -259.8980407714844, + "logps/rejected": -601.0631103515625, + "loss": 0.0492, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.535177707672119, + "rewards/margins": 3.2863006591796875, + "rewards/rejected": -0.7511230707168579, + "step": 16617 + }, + { + "epoch": 0.97, + "learning_rate": 2.842455770850627e-10, + "logits/chosen": -2.041375160217285, + "logits/rejected": -1.98381507396698, + "logps/chosen": -192.2280731201172, + "logps/rejected": -524.8944702148438, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8933913707733154, + "rewards/margins": 3.0830276012420654, + "rewards/rejected": -0.18963623046875, + "step": 16618 + }, + { + "epoch": 0.97, + "learning_rate": 2.832430115711715e-10, + "logits/chosen": -1.8220921754837036, + "logits/rejected": -1.8185813426971436, + "logps/chosen": -18.706745147705078, + "logps/rejected": -352.15155029296875, + "loss": 0.1981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5453785061836243, + "rewards/margins": 5.45398473739624, + "rewards/rejected": -4.908606052398682, + "step": 16619 + }, + { + "epoch": 0.97, + "learning_rate": 2.822422122396806e-10, + "logits/chosen": -1.9736348390579224, + "logits/rejected": -1.969557523727417, + "logps/chosen": -16.578733444213867, + "logps/rejected": -113.21063232421875, + "loss": 0.8249, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.10626678913831711, + "rewards/margins": -0.5602073669433594, + "rewards/rejected": 0.6664741635322571, + "step": 16620 + }, + { + "epoch": 0.97, + "learning_rate": 2.8124317912613937e-10, + "logits/chosen": -1.8411352634429932, + "logits/rejected": -1.8469014167785645, + "logps/chosen": -33.191078186035156, + "logps/rejected": -166.7103729248047, + "loss": 0.4807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34056147933006287, + "rewards/margins": 1.9249154329299927, + "rewards/rejected": -2.265476942062378, + "step": 16621 + }, + { + "epoch": 0.97, + "learning_rate": 2.8024591226604166e-10, + "logits/chosen": -1.856147289276123, + "logits/rejected": -1.8593353033065796, + "logps/chosen": -0.0007595160859636962, + "logps/rejected": -289.81591796875, + "loss": 0.3265, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.730911263730377e-05, + "rewards/margins": 9.315340995788574, + "rewards/rejected": -9.315408706665039, + "step": 16622 + }, + { + "epoch": 0.97, + "learning_rate": 2.7925041169481466e-10, + "logits/chosen": -1.7894306182861328, + "logits/rejected": -1.80697500705719, + "logps/chosen": -217.63189697265625, + "logps/rejected": -274.7693786621094, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9239838123321533, + "rewards/margins": 2.296307325363159, + "rewards/rejected": 0.6276764273643494, + "step": 16623 + }, + { + "epoch": 0.97, + "learning_rate": 2.7825667744782456e-10, + "logits/chosen": -1.8446290493011475, + "logits/rejected": -1.8335545063018799, + "logps/chosen": -13.44693660736084, + "logps/rejected": -226.41070556640625, + "loss": 0.4189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14189310371875763, + "rewards/margins": 4.8276262283325195, + "rewards/rejected": -4.969519138336182, + "step": 16624 + }, + { + "epoch": 0.97, + "learning_rate": 2.7726470956037085e-10, + "logits/chosen": -1.9951118230819702, + "logits/rejected": -1.9345401525497437, + "logps/chosen": -159.15956115722656, + "logps/rejected": -405.2690734863281, + "loss": 0.1011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5580490827560425, + "rewards/margins": 2.8534317016601562, + "rewards/rejected": -1.2953827381134033, + "step": 16625 + }, + { + "epoch": 0.97, + "learning_rate": 2.762745080676976e-10, + "logits/chosen": -1.811381459236145, + "logits/rejected": -1.9276541471481323, + "logps/chosen": -171.9447021484375, + "logps/rejected": -409.87847900390625, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.30419921875, + "rewards/margins": 2.1429686546325684, + "rewards/rejected": -0.8387695550918579, + "step": 16626 + }, + { + "epoch": 0.97, + "learning_rate": 2.752860730049711e-10, + "logits/chosen": -1.8802173137664795, + "logits/rejected": -1.8613414764404297, + "logps/chosen": -7.462307985406369e-05, + "logps/rejected": -199.51344299316406, + "loss": 0.3762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.817743577703368e-05, + "rewards/margins": 2.4438352584838867, + "rewards/rejected": -2.443817138671875, + "step": 16627 + }, + { + "epoch": 0.97, + "learning_rate": 2.742994044073244e-10, + "logits/chosen": -1.7607805728912354, + "logits/rejected": -1.757339596748352, + "logps/chosen": -22.61454963684082, + "logps/rejected": -149.1018524169922, + "loss": 0.4486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5471062064170837, + "rewards/margins": 0.49928975105285645, + "rewards/rejected": 0.04781646654009819, + "step": 16628 + }, + { + "epoch": 0.97, + "learning_rate": 2.733145023097905e-10, + "logits/chosen": -1.9180042743682861, + "logits/rejected": -1.9146077632904053, + "logps/chosen": -0.0018122394103556871, + "logps/rejected": -196.88572692871094, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015838470426388085, + "rewards/margins": 3.1059436798095703, + "rewards/rejected": -3.1061019897460938, + "step": 16629 + }, + { + "epoch": 0.97, + "learning_rate": 2.723313667473637e-10, + "logits/chosen": -2.091585159301758, + "logits/rejected": -2.0817039012908936, + "logps/chosen": -14.296514511108398, + "logps/rejected": -234.99032592773438, + "loss": 0.2902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20424441993236542, + "rewards/margins": 4.992100238800049, + "rewards/rejected": -4.787855625152588, + "step": 16630 + }, + { + "epoch": 0.97, + "learning_rate": 2.713499977549771e-10, + "logits/chosen": -1.927441120147705, + "logits/rejected": -1.9181580543518066, + "logps/chosen": -3.693568229675293, + "logps/rejected": -258.18023681640625, + "loss": 0.3789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11871111392974854, + "rewards/margins": 5.517769813537598, + "rewards/rejected": -5.636480808258057, + "step": 16631 + }, + { + "epoch": 0.97, + "learning_rate": 2.703703953674863e-10, + "logits/chosen": -2.014171838760376, + "logits/rejected": -2.0089635848999023, + "logps/chosen": -9.84644066193141e-05, + "logps/rejected": -70.13291931152344, + "loss": 0.6864, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.026421952905366e-06, + "rewards/margins": 0.022121930494904518, + "rewards/rejected": -0.022119903936982155, + "step": 16632 + }, + { + "epoch": 0.97, + "learning_rate": 2.6939255961968553e-10, + "logits/chosen": -1.9142804145812988, + "logits/rejected": -1.9276514053344727, + "logps/chosen": -200.92935180664062, + "logps/rejected": -244.27947998046875, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3223389387130737, + "rewards/margins": 0.03003549575805664, + "rewards/rejected": 1.292303442955017, + "step": 16633 + }, + { + "epoch": 0.97, + "learning_rate": 2.6841649054633043e-10, + "logits/chosen": -1.785515546798706, + "logits/rejected": -1.847055196762085, + "logps/chosen": -291.5902404785156, + "logps/rejected": -442.0563049316406, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.732891798019409, + "rewards/margins": 6.030035495758057, + "rewards/rejected": -3.2971436977386475, + "step": 16634 + }, + { + "epoch": 0.97, + "learning_rate": 2.674421881820765e-10, + "logits/chosen": -1.8005486726760864, + "logits/rejected": -1.7985551357269287, + "logps/chosen": -158.682373046875, + "logps/rejected": -316.54071044921875, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2888457775115967, + "rewards/margins": 3.0855255126953125, + "rewards/rejected": -0.796679675579071, + "step": 16635 + }, + { + "epoch": 0.97, + "learning_rate": 2.664696525615462e-10, + "logits/chosen": -1.9585464000701904, + "logits/rejected": -1.9358311891555786, + "logps/chosen": -141.80914306640625, + "logps/rejected": -406.30462646484375, + "loss": 0.2277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5195327997207642, + "rewards/margins": 0.7754104733467102, + "rewards/rejected": 0.744122326374054, + "step": 16636 + }, + { + "epoch": 0.97, + "learning_rate": 2.654988837192895e-10, + "logits/chosen": -1.7765462398529053, + "logits/rejected": -1.7788809537887573, + "logps/chosen": -12.14785385131836, + "logps/rejected": -162.96127319335938, + "loss": 0.2625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5157710909843445, + "rewards/margins": 3.22324800491333, + "rewards/rejected": -2.707476854324341, + "step": 16637 + }, + { + "epoch": 0.97, + "learning_rate": 2.645298816897901e-10, + "logits/chosen": -1.7833300828933716, + "logits/rejected": -1.7534096240997314, + "logps/chosen": -208.77337646484375, + "logps/rejected": -306.5415954589844, + "loss": 0.0418, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.970623731613159, + "rewards/margins": 3.064126491546631, + "rewards/rejected": -0.09350281208753586, + "step": 16638 + }, + { + "epoch": 0.97, + "learning_rate": 2.635626465074703e-10, + "logits/chosen": -1.7484692335128784, + "logits/rejected": -1.741645336151123, + "logps/chosen": -227.80007934570312, + "logps/rejected": -418.33642578125, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7895278930664062, + "rewards/margins": 4.603401184082031, + "rewards/rejected": -2.813873291015625, + "step": 16639 + }, + { + "epoch": 0.97, + "learning_rate": 2.6259717820669156e-10, + "logits/chosen": -1.7438397407531738, + "logits/rejected": -1.7301630973815918, + "logps/chosen": -72.89073944091797, + "logps/rejected": -419.7713623046875, + "loss": 0.4687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5697975158691406, + "rewards/margins": 10.77901840209961, + "rewards/rejected": -11.34881591796875, + "step": 16640 + }, + { + "epoch": 0.97, + "learning_rate": 2.616334768217543e-10, + "logits/chosen": -1.9134413003921509, + "logits/rejected": -1.9163579940795898, + "logps/chosen": -27.144271850585938, + "logps/rejected": -115.69810485839844, + "loss": 0.4304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40560492873191833, + "rewards/margins": 1.0079472064971924, + "rewards/rejected": -0.6023422479629517, + "step": 16641 + }, + { + "epoch": 0.97, + "learning_rate": 2.6067154238689214e-10, + "logits/chosen": -1.82148015499115, + "logits/rejected": -1.823870301246643, + "logps/chosen": -175.76007080078125, + "logps/rejected": -300.3359375, + "loss": 0.0602, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4883651733398438, + "rewards/margins": 2.3680405616760254, + "rewards/rejected": 0.12032470852136612, + "step": 16642 + }, + { + "epoch": 0.97, + "learning_rate": 2.5971137493628336e-10, + "logits/chosen": -1.92978036403656, + "logits/rejected": -1.9352777004241943, + "logps/chosen": -46.88724136352539, + "logps/rejected": -72.36103057861328, + "loss": 0.9907, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2629005908966064, + "rewards/margins": 0.1795501708984375, + "rewards/rejected": -1.442450761795044, + "step": 16643 + }, + { + "epoch": 0.97, + "learning_rate": 2.5875297450402843e-10, + "logits/chosen": -1.885470986366272, + "logits/rejected": -1.8847118616104126, + "logps/chosen": -12.128474235534668, + "logps/rejected": -70.50568389892578, + "loss": 0.7879, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.11259632557630539, + "rewards/margins": -0.4677373766899109, + "rewards/rejected": 0.5803337097167969, + "step": 16644 + }, + { + "epoch": 0.97, + "learning_rate": 2.577963411241779e-10, + "logits/chosen": -1.9324973821640015, + "logits/rejected": -1.9556365013122559, + "logps/chosen": -183.66847229003906, + "logps/rejected": -268.28009033203125, + "loss": 0.0649, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5039443969726562, + "rewards/margins": 2.1855666637420654, + "rewards/rejected": 0.31837770342826843, + "step": 16645 + }, + { + "epoch": 0.97, + "learning_rate": 2.568414748307157e-10, + "logits/chosen": -1.9061110019683838, + "logits/rejected": -1.8908758163452148, + "logps/chosen": -3.421263681957498e-05, + "logps/rejected": -237.70547485351562, + "loss": 0.3432, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.7683716530855236e-08, + "rewards/margins": 5.783477783203125, + "rewards/rejected": -5.783477783203125, + "step": 16646 + }, + { + "epoch": 0.97, + "learning_rate": 2.5588837565756473e-10, + "logits/chosen": -1.8425298929214478, + "logits/rejected": -1.8437230587005615, + "logps/chosen": -29.925731658935547, + "logps/rejected": -197.24972534179688, + "loss": 0.5319, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21334953606128693, + "rewards/margins": 1.366127371788025, + "rewards/rejected": -1.5794769525527954, + "step": 16647 + }, + { + "epoch": 0.97, + "learning_rate": 2.549370436385867e-10, + "logits/chosen": -1.8490266799926758, + "logits/rejected": -1.8457467555999756, + "logps/chosen": -192.4002685546875, + "logps/rejected": -264.5014953613281, + "loss": 0.129, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.158796787261963, + "rewards/margins": 1.761988878250122, + "rewards/rejected": 0.39680787920951843, + "step": 16648 + }, + { + "epoch": 0.97, + "learning_rate": 2.5398747880757134e-10, + "logits/chosen": -2.0701537132263184, + "logits/rejected": -2.067704677581787, + "logps/chosen": -23.755151748657227, + "logps/rejected": -119.94194030761719, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.759236752986908, + "rewards/margins": 1.140866756439209, + "rewards/rejected": -0.38162994384765625, + "step": 16649 + }, + { + "epoch": 0.97, + "learning_rate": 2.530396811982527e-10, + "logits/chosen": -2.1201066970825195, + "logits/rejected": -2.114694833755493, + "logps/chosen": -30.191810607910156, + "logps/rejected": -144.9174346923828, + "loss": 0.6406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21822242438793182, + "rewards/margins": 0.49359244108200073, + "rewards/rejected": -0.7118148803710938, + "step": 16650 + }, + { + "epoch": 0.97, + "learning_rate": 2.5209365084430943e-10, + "logits/chosen": -1.9758068323135376, + "logits/rejected": -1.9688916206359863, + "logps/chosen": -29.464929580688477, + "logps/rejected": -259.06939697265625, + "loss": 0.1163, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0044759511947632, + "rewards/margins": 4.265564441680908, + "rewards/rejected": -3.2610886096954346, + "step": 16651 + }, + { + "epoch": 0.97, + "learning_rate": 2.511493877793369e-10, + "logits/chosen": -2.0459463596343994, + "logits/rejected": -2.0321223735809326, + "logps/chosen": -14.253101348876953, + "logps/rejected": -133.7333526611328, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38149699568748474, + "rewards/margins": 1.5427038669586182, + "rewards/rejected": -1.161206841468811, + "step": 16652 + }, + { + "epoch": 0.97, + "learning_rate": 2.5020689203688605e-10, + "logits/chosen": -1.9350334405899048, + "logits/rejected": -1.9160351753234863, + "logps/chosen": -157.22056579589844, + "logps/rejected": -349.44683837890625, + "loss": 0.0723, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0048751831054688, + "rewards/margins": 2.292790174484253, + "rewards/rejected": -0.28791505098342896, + "step": 16653 + }, + { + "epoch": 0.97, + "learning_rate": 2.4926616365044117e-10, + "logits/chosen": -1.891890048980713, + "logits/rejected": -1.8961083889007568, + "logps/chosen": -0.0003082506882492453, + "logps/rejected": -237.49832153320312, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1860416634590365e-05, + "rewards/margins": 5.3543477058410645, + "rewards/rejected": -5.3543596267700195, + "step": 16654 + }, + { + "epoch": 0.97, + "learning_rate": 2.4832720265342557e-10, + "logits/chosen": -1.9949190616607666, + "logits/rejected": -1.9979777336120605, + "logps/chosen": -149.759521484375, + "logps/rejected": -229.10646057128906, + "loss": 0.1753, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3361434936523438, + "rewards/margins": 1.1585906744003296, + "rewards/rejected": 0.17755280435085297, + "step": 16655 + }, + { + "epoch": 0.97, + "learning_rate": 2.473900090791792e-10, + "logits/chosen": -1.9657599925994873, + "logits/rejected": -1.9599493741989136, + "logps/chosen": -2.2168846130371094, + "logps/rejected": -27.635374069213867, + "loss": 0.6475, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.15565136075019836, + "rewards/margins": -0.025212779641151428, + "rewards/rejected": 0.1808641403913498, + "step": 16656 + }, + { + "epoch": 0.97, + "learning_rate": 2.4645458296100875e-10, + "logits/chosen": -1.8991883993148804, + "logits/rejected": -1.9076473712921143, + "logps/chosen": -33.157066345214844, + "logps/rejected": -185.87107849121094, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8002575039863586, + "rewards/margins": 1.2756359577178955, + "rewards/rejected": -0.4753784239292145, + "step": 16657 + }, + { + "epoch": 0.97, + "learning_rate": 2.455209243321488e-10, + "logits/chosen": -1.88324773311615, + "logits/rejected": -1.879595160484314, + "logps/chosen": -42.71490478515625, + "logps/rejected": -299.76434326171875, + "loss": 0.0931, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0549472570419312, + "rewards/margins": 4.794074058532715, + "rewards/rejected": -3.739126682281494, + "step": 16658 + }, + { + "epoch": 0.97, + "learning_rate": 2.445890332257561e-10, + "logits/chosen": -1.8736454248428345, + "logits/rejected": -1.9147714376449585, + "logps/chosen": -244.1373748779297, + "logps/rejected": -240.98974609375, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6335036754608154, + "rewards/margins": 3.6673383712768555, + "rewards/rejected": -1.0338348150253296, + "step": 16659 + }, + { + "epoch": 0.97, + "learning_rate": 2.4365890967493753e-10, + "logits/chosen": -2.0862884521484375, + "logits/rejected": -2.084796190261841, + "logps/chosen": -45.73064041137695, + "logps/rejected": -234.7149200439453, + "loss": 0.5115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5792407989501953, + "rewards/margins": 2.4836788177490234, + "rewards/rejected": -3.0629196166992188, + "step": 16660 + }, + { + "epoch": 0.97, + "learning_rate": 2.427305537127444e-10, + "logits/chosen": -1.8330954313278198, + "logits/rejected": -1.8158472776412964, + "logps/chosen": -176.42416381835938, + "logps/rejected": -343.2879333496094, + "loss": 0.1257, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2346954345703125, + "rewards/margins": 2.7490265369415283, + "rewards/rejected": -1.5143311023712158, + "step": 16661 + }, + { + "epoch": 0.97, + "learning_rate": 2.4180396537215043e-10, + "logits/chosen": -2.058364152908325, + "logits/rejected": -2.053497552871704, + "logps/chosen": -27.9971981048584, + "logps/rejected": -241.55221557617188, + "loss": 0.3504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0965091735124588, + "rewards/margins": 3.1392695903778076, + "rewards/rejected": -3.23577880859375, + "step": 16662 + }, + { + "epoch": 0.97, + "learning_rate": 2.408791446860736e-10, + "logits/chosen": -1.813333511352539, + "logits/rejected": -1.8081815242767334, + "logps/chosen": -45.10612106323242, + "logps/rejected": -241.3961181640625, + "loss": 0.1301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8857013583183289, + "rewards/margins": 3.217477798461914, + "rewards/rejected": -2.3317763805389404, + "step": 16663 + }, + { + "epoch": 0.97, + "learning_rate": 2.3995609168736555e-10, + "logits/chosen": -1.8842267990112305, + "logits/rejected": -1.8756158351898193, + "logps/chosen": -184.09828186035156, + "logps/rejected": -263.15655517578125, + "loss": 0.3117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.556445300579071, + "rewards/margins": 0.794512927532196, + "rewards/rejected": -0.238067626953125, + "step": 16664 + }, + { + "epoch": 0.97, + "learning_rate": 2.390348064088166e-10, + "logits/chosen": -1.8981504440307617, + "logits/rejected": -1.8947007656097412, + "logps/chosen": -26.386335372924805, + "logps/rejected": -221.29620361328125, + "loss": 0.2598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49540042877197266, + "rewards/margins": 1.855338454246521, + "rewards/rejected": -1.3599380254745483, + "step": 16665 + }, + { + "epoch": 0.97, + "learning_rate": 2.381152888831672e-10, + "logits/chosen": -1.9596049785614014, + "logits/rejected": -1.9271166324615479, + "logps/chosen": -169.86512756347656, + "logps/rejected": -326.1356201171875, + "loss": 0.0791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6395645141601562, + "rewards/margins": 2.880357503890991, + "rewards/rejected": -2.240792989730835, + "step": 16666 + }, + { + "epoch": 0.97, + "learning_rate": 2.371975391430692e-10, + "logits/chosen": -1.7549452781677246, + "logits/rejected": -1.7721184492111206, + "logps/chosen": -154.90908813476562, + "logps/rejected": -253.32205200195312, + "loss": 0.0484, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5248841047286987, + "rewards/margins": 3.996631145477295, + "rewards/rejected": -2.4717469215393066, + "step": 16667 + }, + { + "epoch": 0.97, + "learning_rate": 2.3628155722112965e-10, + "logits/chosen": -1.9337464570999146, + "logits/rejected": -1.9288783073425293, + "logps/chosen": -23.16799545288086, + "logps/rejected": -183.13526916503906, + "loss": 0.2791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29519253969192505, + "rewards/margins": 4.649594783782959, + "rewards/rejected": -4.3544020652771, + "step": 16668 + }, + { + "epoch": 0.97, + "learning_rate": 2.3536734314988927e-10, + "logits/chosen": -2.0149588584899902, + "logits/rejected": -2.0132570266723633, + "logps/chosen": -95.88842010498047, + "logps/rejected": -300.5649108886719, + "loss": 0.1557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7046348452568054, + "rewards/margins": 4.349724769592285, + "rewards/rejected": -3.645089864730835, + "step": 16669 + }, + { + "epoch": 0.97, + "learning_rate": 2.344548969618332e-10, + "logits/chosen": -1.9050707817077637, + "logits/rejected": -1.8906078338623047, + "logps/chosen": -268.1595458984375, + "logps/rejected": -375.67303466796875, + "loss": 0.3639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8200012445449829, + "rewards/margins": 0.20289915800094604, + "rewards/rejected": 0.6171020865440369, + "step": 16670 + }, + { + "epoch": 0.97, + "learning_rate": 2.335442186893577e-10, + "logits/chosen": -1.6108187437057495, + "logits/rejected": -1.5712579488754272, + "logps/chosen": -199.6109619140625, + "logps/rejected": -321.8074645996094, + "loss": 0.1653, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9508575201034546, + "rewards/margins": 1.3296387195587158, + "rewards/rejected": 0.6212188601493835, + "step": 16671 + }, + { + "epoch": 0.97, + "learning_rate": 2.3263530836483692e-10, + "logits/chosen": -1.9592477083206177, + "logits/rejected": -1.9862909317016602, + "logps/chosen": -157.43667602539062, + "logps/rejected": -303.07415771484375, + "loss": 0.0931, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0196595191955566, + "rewards/margins": 1.8228180408477783, + "rewards/rejected": 0.19684143364429474, + "step": 16672 + }, + { + "epoch": 0.97, + "learning_rate": 2.3172816602053946e-10, + "logits/chosen": -1.9445247650146484, + "logits/rejected": -1.906569480895996, + "logps/chosen": -195.769287109375, + "logps/rejected": -289.3786926269531, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7502152919769287, + "rewards/margins": 0.5320146083831787, + "rewards/rejected": 2.21820068359375, + "step": 16673 + }, + { + "epoch": 0.97, + "learning_rate": 2.308227916887062e-10, + "logits/chosen": -2.203075408935547, + "logits/rejected": -2.201934337615967, + "logps/chosen": -30.38667106628418, + "logps/rejected": -261.11944580078125, + "loss": 0.2322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20751933753490448, + "rewards/margins": 4.27334451675415, + "rewards/rejected": -4.06582498550415, + "step": 16674 + }, + { + "epoch": 0.97, + "learning_rate": 2.2991918540148926e-10, + "logits/chosen": -1.9781347513198853, + "logits/rejected": -1.9858145713806152, + "logps/chosen": -233.51950073242188, + "logps/rejected": -308.5263671875, + "loss": 0.1142, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.747888207435608, + "rewards/margins": 1.843475341796875, + "rewards/rejected": -0.09558715671300888, + "step": 16675 + }, + { + "epoch": 0.97, + "learning_rate": 2.2901734719100174e-10, + "logits/chosen": -2.131561040878296, + "logits/rejected": -2.1022300720214844, + "logps/chosen": -115.65308380126953, + "logps/rejected": -237.94161987304688, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2923455238342285, + "rewards/margins": 0.2308342456817627, + "rewards/rejected": 2.061511278152466, + "step": 16676 + }, + { + "epoch": 0.97, + "learning_rate": 2.2811727708927363e-10, + "logits/chosen": -2.000973701477051, + "logits/rejected": -1.9909589290618896, + "logps/chosen": -48.08344650268555, + "logps/rejected": -247.29673767089844, + "loss": 0.2217, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.076607584953308, + "rewards/margins": 1.2832344770431519, + "rewards/rejected": -0.20662689208984375, + "step": 16677 + }, + { + "epoch": 0.97, + "learning_rate": 2.2721897512827382e-10, + "logits/chosen": -2.046501398086548, + "logits/rejected": -2.048719882965088, + "logps/chosen": -2.7779715061187744, + "logps/rejected": -69.60343170166016, + "loss": 0.4496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4410833418369293, + "rewards/margins": 0.3586675524711609, + "rewards/rejected": 0.08241577446460724, + "step": 16678 + }, + { + "epoch": 0.97, + "learning_rate": 2.2632244133992673e-10, + "logits/chosen": -1.9000678062438965, + "logits/rejected": -1.8940845727920532, + "logps/chosen": -20.602922439575195, + "logps/rejected": -55.79973602294922, + "loss": 0.4128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.740778923034668, + "rewards/margins": 0.41336116194725037, + "rewards/rejected": 0.3274177610874176, + "step": 16679 + }, + { + "epoch": 0.97, + "learning_rate": 2.2542767575607358e-10, + "logits/chosen": -1.7578089237213135, + "logits/rejected": -1.807568907737732, + "logps/chosen": -162.50070190429688, + "logps/rejected": -375.97265625, + "loss": 0.3592, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1775238513946533, + "rewards/margins": 0.09122014045715332, + "rewards/rejected": 1.0863037109375, + "step": 16680 + }, + { + "epoch": 0.97, + "learning_rate": 2.245346784085056e-10, + "logits/chosen": -1.8656114339828491, + "logits/rejected": -1.8617141246795654, + "logps/chosen": -175.45729064941406, + "logps/rejected": -310.95001220703125, + "loss": 0.1668, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2935043573379517, + "rewards/margins": 1.3715896606445312, + "rewards/rejected": -0.07808532565832138, + "step": 16681 + }, + { + "epoch": 0.97, + "learning_rate": 2.2364344932893632e-10, + "logits/chosen": -1.755967140197754, + "logits/rejected": -1.765730381011963, + "logps/chosen": -220.4197540283203, + "logps/rejected": -296.5599365234375, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2889404296875, + "rewards/margins": 2.868460178375244, + "rewards/rejected": 0.420480340719223, + "step": 16682 + }, + { + "epoch": 0.97, + "learning_rate": 2.2275398854904036e-10, + "logits/chosen": -1.6445531845092773, + "logits/rejected": -1.6339623928070068, + "logps/chosen": -165.31832885742188, + "logps/rejected": -415.36962890625, + "loss": 0.1024, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5354279279708862, + "rewards/margins": 2.3936586380004883, + "rewards/rejected": -0.8582305908203125, + "step": 16683 + }, + { + "epoch": 0.97, + "learning_rate": 2.218662961004092e-10, + "logits/chosen": -2.0062811374664307, + "logits/rejected": -2.0225205421447754, + "logps/chosen": -221.83599853515625, + "logps/rejected": -594.38916015625, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8298981189727783, + "rewards/margins": 10.900625228881836, + "rewards/rejected": -8.070727348327637, + "step": 16684 + }, + { + "epoch": 0.97, + "learning_rate": 2.2098037201457865e-10, + "logits/chosen": -1.8867496252059937, + "logits/rejected": -1.8936527967453003, + "logps/chosen": -252.72286987304688, + "logps/rejected": -473.558349609375, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5898834466934204, + "rewards/margins": 4.475030422210693, + "rewards/rejected": -2.8851470947265625, + "step": 16685 + }, + { + "epoch": 0.97, + "learning_rate": 2.2009621632302355e-10, + "logits/chosen": -1.815459966659546, + "logits/rejected": -1.774771809577942, + "logps/chosen": -271.28497314453125, + "logps/rejected": -562.781005859375, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.363568067550659, + "rewards/margins": 4.947125434875488, + "rewards/rejected": -1.58355712890625, + "step": 16686 + }, + { + "epoch": 0.97, + "learning_rate": 2.192138290571466e-10, + "logits/chosen": -2.0899744033813477, + "logits/rejected": -2.0877208709716797, + "logps/chosen": -47.13092803955078, + "logps/rejected": -198.74252319335938, + "loss": 0.8151, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5858635306358337, + "rewards/margins": -0.20192188024520874, + "rewards/rejected": -0.383941650390625, + "step": 16687 + }, + { + "epoch": 0.97, + "learning_rate": 2.1833321024829487e-10, + "logits/chosen": -2.02887225151062, + "logits/rejected": -2.0236051082611084, + "logps/chosen": -6.628387451171875, + "logps/rejected": -146.05445861816406, + "loss": 0.3553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13726253807544708, + "rewards/margins": 2.433884382247925, + "rewards/rejected": -2.296621799468994, + "step": 16688 + }, + { + "epoch": 0.97, + "learning_rate": 2.1745435992776562e-10, + "logits/chosen": -1.7829794883728027, + "logits/rejected": -1.8014259338378906, + "logps/chosen": -306.3883361816406, + "logps/rejected": -433.84246826171875, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.642102003097534, + "rewards/margins": 4.450213432312012, + "rewards/rejected": -1.808111548423767, + "step": 16689 + }, + { + "epoch": 0.97, + "learning_rate": 2.1657727812676164e-10, + "logits/chosen": -1.8886555433273315, + "logits/rejected": -1.8891160488128662, + "logps/chosen": -18.526094436645508, + "logps/rejected": -196.40521240234375, + "loss": 0.5471, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3462093472480774, + "rewards/margins": 1.2659778594970703, + "rewards/rejected": -1.6121872663497925, + "step": 16690 + }, + { + "epoch": 0.97, + "learning_rate": 2.157019648764524e-10, + "logits/chosen": -1.8513693809509277, + "logits/rejected": -1.8539634943008423, + "logps/chosen": -0.20710471272468567, + "logps/rejected": -118.28844451904297, + "loss": 0.5986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002954824361950159, + "rewards/margins": 0.2608901560306549, + "rewards/rejected": -0.2579353451728821, + "step": 16691 + }, + { + "epoch": 0.97, + "learning_rate": 2.1482842020793533e-10, + "logits/chosen": -2.0006394386291504, + "logits/rejected": -1.9945566654205322, + "logps/chosen": -50.1427001953125, + "logps/rejected": -196.6614990234375, + "loss": 0.1998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7143413424491882, + "rewards/margins": 2.075559616088867, + "rewards/rejected": -1.3612183332443237, + "step": 16692 + }, + { + "epoch": 0.97, + "learning_rate": 2.1395664415223557e-10, + "logits/chosen": -1.650765299797058, + "logits/rejected": -1.6448129415512085, + "logps/chosen": -11.771570205688477, + "logps/rejected": -99.92366790771484, + "loss": 0.6161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016737842932343483, + "rewards/margins": 0.3384173512458801, + "rewards/rejected": -0.35515519976615906, + "step": 16693 + }, + { + "epoch": 0.97, + "learning_rate": 2.1308663674032278e-10, + "logits/chosen": -1.7357622385025024, + "logits/rejected": -1.731482744216919, + "logps/chosen": -206.8769989013672, + "logps/rejected": -229.35079956054688, + "loss": 0.2901, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1990982294082642, + "rewards/margins": 0.374542236328125, + "rewards/rejected": 0.8245559930801392, + "step": 16694 + }, + { + "epoch": 0.97, + "learning_rate": 2.1221839800310558e-10, + "logits/chosen": -1.9130675792694092, + "logits/rejected": -1.910239577293396, + "logps/chosen": -51.42095947265625, + "logps/rejected": -172.77471923828125, + "loss": 0.1452, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6703681945800781, + "rewards/margins": 3.839728593826294, + "rewards/rejected": -3.169360399246216, + "step": 16695 + }, + { + "epoch": 0.97, + "learning_rate": 2.1135192797143153e-10, + "logits/chosen": -1.952204704284668, + "logits/rejected": -1.9124760627746582, + "logps/chosen": -74.47721862792969, + "logps/rejected": -236.31759643554688, + "loss": 0.1559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6022300720214844, + "rewards/margins": 5.257369518280029, + "rewards/rejected": -4.655139446258545, + "step": 16696 + }, + { + "epoch": 0.97, + "learning_rate": 2.1048722667608155e-10, + "logits/chosen": -1.7795374393463135, + "logits/rejected": -1.7860203981399536, + "logps/chosen": -20.937053680419922, + "logps/rejected": -207.14498901367188, + "loss": 0.1407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9033159613609314, + "rewards/margins": 3.861084222793579, + "rewards/rejected": -2.957768201828003, + "step": 16697 + }, + { + "epoch": 0.97, + "learning_rate": 2.0962429414776995e-10, + "logits/chosen": -1.6023108959197998, + "logits/rejected": -1.6123908758163452, + "logps/chosen": -346.70391845703125, + "logps/rejected": -589.8220825195312, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2756898403167725, + "rewards/margins": 8.182647705078125, + "rewards/rejected": -5.906958103179932, + "step": 16698 + }, + { + "epoch": 0.97, + "learning_rate": 2.0876313041715555e-10, + "logits/chosen": -1.8575650453567505, + "logits/rejected": -1.8573089838027954, + "logps/chosen": -238.65155029296875, + "logps/rejected": -397.3307800292969, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4278900623321533, + "rewards/margins": 4.671792984008789, + "rewards/rejected": -3.2439026832580566, + "step": 16699 + }, + { + "epoch": 0.97, + "learning_rate": 2.0790373551483054e-10, + "logits/chosen": -1.9644789695739746, + "logits/rejected": -1.937922477722168, + "logps/chosen": -151.23080444335938, + "logps/rejected": -541.4866333007812, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8750656843185425, + "rewards/margins": 5.008555889129639, + "rewards/rejected": -3.1334900856018066, + "step": 16700 + }, + { + "epoch": 0.97, + "learning_rate": 2.0704610947132606e-10, + "logits/chosen": -1.9554266929626465, + "logits/rejected": -1.9547202587127686, + "logps/chosen": -4.286396503448486, + "logps/rejected": -79.34194946289062, + "loss": 0.4252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08722062408924103, + "rewards/margins": 1.2912744283676147, + "rewards/rejected": -1.3784950971603394, + "step": 16701 + }, + { + "epoch": 0.97, + "learning_rate": 2.061902523171011e-10, + "logits/chosen": -1.8628309965133667, + "logits/rejected": -1.84835684299469, + "logps/chosen": -47.18901824951172, + "logps/rejected": -190.4273223876953, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31304168701171875, + "rewards/margins": 1.198164463043213, + "rewards/rejected": -0.8851227164268494, + "step": 16702 + }, + { + "epoch": 0.97, + "learning_rate": 2.0533616408257015e-10, + "logits/chosen": -1.8318809270858765, + "logits/rejected": -1.8326234817504883, + "logps/chosen": -21.508731842041016, + "logps/rejected": -84.39653778076172, + "loss": 0.3953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2813173234462738, + "rewards/margins": 1.3240854740142822, + "rewards/rejected": -1.042768120765686, + "step": 16703 + }, + { + "epoch": 0.97, + "learning_rate": 2.0448384479807567e-10, + "logits/chosen": -1.923580288887024, + "logits/rejected": -1.9245926141738892, + "logps/chosen": -9.07732105255127, + "logps/rejected": -31.276317596435547, + "loss": 0.596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12422046810388565, + "rewards/margins": 0.09702110290527344, + "rewards/rejected": 0.027199363335967064, + "step": 16704 + }, + { + "epoch": 0.97, + "learning_rate": 2.0363329449388232e-10, + "logits/chosen": -1.726521372795105, + "logits/rejected": -1.7304786443710327, + "logps/chosen": -221.90054321289062, + "logps/rejected": -487.747802734375, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9920517206192017, + "rewards/margins": 3.729997158050537, + "rewards/rejected": -1.737945556640625, + "step": 16705 + }, + { + "epoch": 0.97, + "learning_rate": 2.027845132002215e-10, + "logits/chosen": -1.7105095386505127, + "logits/rejected": -1.7706955671310425, + "logps/chosen": -224.14501953125, + "logps/rejected": -135.06423950195312, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2967621088027954, + "rewards/margins": 0.09591984748840332, + "rewards/rejected": 1.200842261314392, + "step": 16706 + }, + { + "epoch": 0.97, + "learning_rate": 2.0193750094723573e-10, + "logits/chosen": -1.885467767715454, + "logits/rejected": -1.888550877571106, + "logps/chosen": -0.0019588328432291746, + "logps/rejected": -31.30720329284668, + "loss": 0.6403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004700628574937582, + "rewards/margins": 0.15844647586345673, + "rewards/rejected": -0.1537458449602127, + "step": 16707 + }, + { + "epoch": 0.97, + "learning_rate": 2.0109225776501758e-10, + "logits/chosen": -1.8505868911743164, + "logits/rejected": -1.7856261730194092, + "logps/chosen": -223.89706420898438, + "logps/rejected": -437.3150329589844, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6386871337890625, + "rewards/margins": 3.996060371398926, + "rewards/rejected": -1.3573731184005737, + "step": 16708 + }, + { + "epoch": 0.97, + "learning_rate": 2.0024878368359866e-10, + "logits/chosen": -1.9906946420669556, + "logits/rejected": -1.9597615003585815, + "logps/chosen": -221.8656768798828, + "logps/rejected": -552.8770751953125, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2081360816955566, + "rewards/margins": 4.827459812164307, + "rewards/rejected": -2.61932373046875, + "step": 16709 + }, + { + "epoch": 0.97, + "learning_rate": 1.9940707873293826e-10, + "logits/chosen": -1.955310344696045, + "logits/rejected": -1.940936803817749, + "logps/chosen": -28.575380325317383, + "logps/rejected": -373.7057800292969, + "loss": 0.2397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4750005900859833, + "rewards/margins": 4.351144790649414, + "rewards/rejected": -3.8761444091796875, + "step": 16710 + }, + { + "epoch": 0.97, + "learning_rate": 1.985671429429403e-10, + "logits/chosen": -1.9664396047592163, + "logits/rejected": -1.967591404914856, + "logps/chosen": -10.215974807739258, + "logps/rejected": -324.52813720703125, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050479985773563385, + "rewards/margins": 8.395333290100098, + "rewards/rejected": -8.344853401184082, + "step": 16711 + }, + { + "epoch": 0.97, + "learning_rate": 1.9772897634344198e-10, + "logits/chosen": -1.7123433351516724, + "logits/rejected": -1.7255430221557617, + "logps/chosen": -272.9642028808594, + "logps/rejected": -412.3306579589844, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.360180616378784, + "rewards/margins": 3.1048247814178467, + "rewards/rejected": -0.7446441650390625, + "step": 16712 + }, + { + "epoch": 0.97, + "learning_rate": 1.9689257896421397e-10, + "logits/chosen": -1.7714698314666748, + "logits/rejected": -1.7441489696502686, + "logps/chosen": -146.13853454589844, + "logps/rejected": -340.9411926269531, + "loss": 0.0614, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1447372436523438, + "rewards/margins": 3.6523756980895996, + "rewards/rejected": -1.5076385736465454, + "step": 16713 + }, + { + "epoch": 0.97, + "learning_rate": 1.9605795083498244e-10, + "logits/chosen": -1.7900657653808594, + "logits/rejected": -1.7768902778625488, + "logps/chosen": -157.14285278320312, + "logps/rejected": -349.1153564453125, + "loss": 0.0694, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9509766101837158, + "rewards/margins": 2.0959534645080566, + "rewards/rejected": -0.14497680962085724, + "step": 16714 + }, + { + "epoch": 0.97, + "learning_rate": 1.9522509198539038e-10, + "logits/chosen": -1.8630633354187012, + "logits/rejected": -1.8696101903915405, + "logps/chosen": -6.46215295791626, + "logps/rejected": -93.39933776855469, + "loss": 0.5852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19100776314735413, + "rewards/margins": 0.3717851936817169, + "rewards/rejected": -0.562792956829071, + "step": 16715 + }, + { + "epoch": 0.97, + "learning_rate": 1.943940024450197e-10, + "logits/chosen": -1.80594003200531, + "logits/rejected": -1.8211679458618164, + "logps/chosen": -324.1495666503906, + "logps/rejected": -375.933349609375, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6544647216796875, + "rewards/margins": 4.098605155944824, + "rewards/rejected": -2.444140672683716, + "step": 16716 + }, + { + "epoch": 0.97, + "learning_rate": 1.9356468224340227e-10, + "logits/chosen": -1.8897156715393066, + "logits/rejected": -1.8918421268463135, + "logps/chosen": -227.2069549560547, + "logps/rejected": -292.74432373046875, + "loss": 0.2536, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.537484884262085, + "rewards/margins": 0.4691803455352783, + "rewards/rejected": 2.0683045387268066, + "step": 16717 + }, + { + "epoch": 0.97, + "learning_rate": 1.9273713140999792e-10, + "logits/chosen": -1.9944216012954712, + "logits/rejected": -1.9889018535614014, + "logps/chosen": -64.03443145751953, + "logps/rejected": -249.0714874267578, + "loss": 0.1479, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4082863330841064, + "rewards/margins": 1.799074649810791, + "rewards/rejected": -0.3907882869243622, + "step": 16718 + }, + { + "epoch": 0.97, + "learning_rate": 1.9191134997419977e-10, + "logits/chosen": -2.09916353225708, + "logits/rejected": -2.0869827270507812, + "logps/chosen": -23.39841079711914, + "logps/rejected": -227.4213409423828, + "loss": 0.2654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4745826721191406, + "rewards/margins": 4.015956878662109, + "rewards/rejected": -3.5413742065429688, + "step": 16719 + }, + { + "epoch": 0.97, + "learning_rate": 1.9108733796535104e-10, + "logits/chosen": -1.9240422248840332, + "logits/rejected": -1.9272994995117188, + "logps/chosen": -28.767004013061523, + "logps/rejected": -134.94529724121094, + "loss": 0.441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3521791398525238, + "rewards/margins": 1.2496000528335571, + "rewards/rejected": -1.6017792224884033, + "step": 16720 + }, + { + "epoch": 0.97, + "learning_rate": 1.9026509541272272e-10, + "logits/chosen": -2.023388385772705, + "logits/rejected": -2.0269434452056885, + "logps/chosen": -0.0004390128015074879, + "logps/rejected": -99.85438537597656, + "loss": 0.5873, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.928599865816068e-06, + "rewards/margins": 0.48131483793258667, + "rewards/rejected": -0.4813247621059418, + "step": 16721 + }, + { + "epoch": 0.97, + "learning_rate": 1.8944462234552483e-10, + "logits/chosen": -2.00290846824646, + "logits/rejected": -2.0016229152679443, + "logps/chosen": -82.0768814086914, + "logps/rejected": -235.9424591064453, + "loss": 0.2123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6196670532226562, + "rewards/margins": 2.859654188156128, + "rewards/rejected": -2.2399871349334717, + "step": 16722 + }, + { + "epoch": 0.97, + "learning_rate": 1.8862591879290068e-10, + "logits/chosen": -1.9218556880950928, + "logits/rejected": -1.9089282751083374, + "logps/chosen": -92.5251235961914, + "logps/rejected": -380.4385681152344, + "loss": 0.2482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10845337063074112, + "rewards/margins": 4.610980033874512, + "rewards/rejected": -4.502526760101318, + "step": 16723 + }, + { + "epoch": 0.97, + "learning_rate": 1.8780898478393815e-10, + "logits/chosen": -1.7973612546920776, + "logits/rejected": -1.8010557889938354, + "logps/chosen": -215.23251342773438, + "logps/rejected": -472.21966552734375, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7927337884902954, + "rewards/margins": 5.363858222961426, + "rewards/rejected": -3.571124315261841, + "step": 16724 + }, + { + "epoch": 0.97, + "learning_rate": 1.8699382034765843e-10, + "logits/chosen": -1.7962579727172852, + "logits/rejected": -1.7990119457244873, + "logps/chosen": -27.299083709716797, + "logps/rejected": -105.03546142578125, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48899155855178833, + "rewards/margins": 1.200927734375, + "rewards/rejected": -0.7119361758232117, + "step": 16725 + }, + { + "epoch": 0.97, + "learning_rate": 1.861804255130217e-10, + "logits/chosen": -1.9912209510803223, + "logits/rejected": -1.9862910509109497, + "logps/chosen": -37.56330871582031, + "logps/rejected": -292.0238342285156, + "loss": 0.4079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23824577033519745, + "rewards/margins": 4.036052227020264, + "rewards/rejected": -4.274298191070557, + "step": 16726 + }, + { + "epoch": 0.97, + "learning_rate": 1.8536880030892154e-10, + "logits/chosen": -2.07558536529541, + "logits/rejected": -2.059009552001953, + "logps/chosen": -0.26907384395599365, + "logps/rejected": -270.2430725097656, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01646554470062256, + "rewards/margins": 5.027530193328857, + "rewards/rejected": -5.0439958572387695, + "step": 16727 + }, + { + "epoch": 0.97, + "learning_rate": 1.8455894476419044e-10, + "logits/chosen": -2.0954861640930176, + "logits/rejected": -2.1033518314361572, + "logps/chosen": -5.258277416229248, + "logps/rejected": -175.42709350585938, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09978609532117844, + "rewards/margins": 3.2746763229370117, + "rewards/rejected": -3.1748902797698975, + "step": 16728 + }, + { + "epoch": 0.97, + "learning_rate": 1.8375085890759979e-10, + "logits/chosen": -1.9842933416366577, + "logits/rejected": -1.9844355583190918, + "logps/chosen": -80.8985595703125, + "logps/rejected": -192.364990234375, + "loss": 0.2243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3402412533760071, + "rewards/margins": 3.0882728099823, + "rewards/rejected": -2.7480316162109375, + "step": 16729 + }, + { + "epoch": 0.97, + "learning_rate": 1.8294454276785443e-10, + "logits/chosen": -1.8970959186553955, + "logits/rejected": -1.8699870109558105, + "logps/chosen": -63.704620361328125, + "logps/rejected": -300.1184387207031, + "loss": 0.0579, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1195068359375, + "rewards/margins": 6.773922920227051, + "rewards/rejected": -5.654416084289551, + "step": 16730 + }, + { + "epoch": 0.97, + "learning_rate": 1.8213999637360367e-10, + "logits/chosen": -1.804128646850586, + "logits/rejected": -1.7970858812332153, + "logps/chosen": -5.75352668762207, + "logps/rejected": -266.356689453125, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18720737099647522, + "rewards/margins": 5.105606555938721, + "rewards/rejected": -4.918399333953857, + "step": 16731 + }, + { + "epoch": 0.97, + "learning_rate": 1.8133721975342463e-10, + "logits/chosen": -2.086806297302246, + "logits/rejected": -2.0416319370269775, + "logps/chosen": -0.00013124602264724672, + "logps/rejected": -533.7123413085938, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6357014323584735e-06, + "rewards/margins": 11.989070892333984, + "rewards/rejected": -11.98907470703125, + "step": 16732 + }, + { + "epoch": 0.97, + "learning_rate": 1.80536212935839e-10, + "logits/chosen": -1.9905039072036743, + "logits/rejected": -1.9886523485183716, + "logps/chosen": -9.337004661560059, + "logps/rejected": -218.16094970703125, + "loss": 0.2903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3175935745239258, + "rewards/margins": 2.775357246398926, + "rewards/rejected": -2.457763671875, + "step": 16733 + }, + { + "epoch": 0.97, + "learning_rate": 1.7973697594929616e-10, + "logits/chosen": -1.7839752435684204, + "logits/rejected": -1.7817339897155762, + "logps/chosen": -95.49386596679688, + "logps/rejected": -239.51071166992188, + "loss": 0.1696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9828934073448181, + "rewards/margins": 2.868830919265747, + "rewards/rejected": -1.8859375715255737, + "step": 16734 + }, + { + "epoch": 0.97, + "learning_rate": 1.7893950882219567e-10, + "logits/chosen": -1.7441444396972656, + "logits/rejected": -1.7461038827896118, + "logps/chosen": -22.544116973876953, + "logps/rejected": -191.64637756347656, + "loss": 0.3132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11769580841064453, + "rewards/margins": 3.0100603103637695, + "rewards/rejected": -2.892364501953125, + "step": 16735 + }, + { + "epoch": 0.97, + "learning_rate": 1.7814381158286483e-10, + "logits/chosen": -1.9728388786315918, + "logits/rejected": -1.973513126373291, + "logps/chosen": -173.1669158935547, + "logps/rejected": -313.84075927734375, + "loss": 0.2852, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.355992078781128, + "rewards/margins": 0.37886500358581543, + "rewards/rejected": 1.9771270751953125, + "step": 16736 + }, + { + "epoch": 0.97, + "learning_rate": 1.7734988425956444e-10, + "logits/chosen": -1.482703447341919, + "logits/rejected": -1.4552571773529053, + "logps/chosen": -235.29733276367188, + "logps/rejected": -484.4535217285156, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3396973609924316, + "rewards/margins": 4.457366943359375, + "rewards/rejected": -1.117669701576233, + "step": 16737 + }, + { + "epoch": 0.97, + "learning_rate": 1.765577268805163e-10, + "logits/chosen": -1.9491151571273804, + "logits/rejected": -1.9398647546768188, + "logps/chosen": -175.92636108398438, + "logps/rejected": -272.689208984375, + "loss": 0.1709, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2978286743164062, + "rewards/margins": 1.4074599742889404, + "rewards/rejected": -0.10963135212659836, + "step": 16738 + }, + { + "epoch": 0.97, + "learning_rate": 1.7576733947384237e-10, + "logits/chosen": -1.931929349899292, + "logits/rejected": -1.935005784034729, + "logps/chosen": -13.59338665008545, + "logps/rejected": -113.06130981445312, + "loss": 1.1744, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.963513970375061, + "rewards/margins": -0.6767212152481079, + "rewards/rejected": -0.2867927551269531, + "step": 16739 + }, + { + "epoch": 0.97, + "learning_rate": 1.7497872206763132e-10, + "logits/chosen": -1.9539437294006348, + "logits/rejected": -1.9528285264968872, + "logps/chosen": -50.217384338378906, + "logps/rejected": -202.87396240234375, + "loss": 0.2292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4860069453716278, + "rewards/margins": 1.9927818775177002, + "rewards/rejected": -1.50677490234375, + "step": 16740 + }, + { + "epoch": 0.97, + "learning_rate": 1.741918746898996e-10, + "logits/chosen": -1.9383021593093872, + "logits/rejected": -1.953287124633789, + "logps/chosen": -212.4317626953125, + "logps/rejected": -395.91796875, + "loss": 0.0515, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3114960193634033, + "rewards/margins": 2.935211181640625, + "rewards/rejected": -1.6237152814865112, + "step": 16741 + }, + { + "epoch": 0.97, + "learning_rate": 1.7340679736859153e-10, + "logits/chosen": -1.8400559425354004, + "logits/rejected": -1.8403842449188232, + "logps/chosen": -230.02279663085938, + "logps/rejected": -359.2358093261719, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9195693731307983, + "rewards/margins": 6.508699417114258, + "rewards/rejected": -4.58912992477417, + "step": 16742 + }, + { + "epoch": 0.97, + "learning_rate": 1.7262349013160705e-10, + "logits/chosen": -1.7811838388442993, + "logits/rejected": -1.778457760810852, + "logps/chosen": -276.8505859375, + "logps/rejected": -414.22222900390625, + "loss": 0.2188, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.087725877761841, + "rewards/margins": 0.6995605230331421, + "rewards/rejected": 1.3881653547286987, + "step": 16743 + }, + { + "epoch": 0.97, + "learning_rate": 1.7184195300676828e-10, + "logits/chosen": -1.8290727138519287, + "logits/rejected": -1.8596223592758179, + "logps/chosen": -137.53106689453125, + "logps/rejected": -167.767822265625, + "loss": 0.2501, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.414363145828247, + "rewards/margins": 0.5964905023574829, + "rewards/rejected": 0.8178726434707642, + "step": 16744 + }, + { + "epoch": 0.97, + "learning_rate": 1.7106218602183641e-10, + "logits/chosen": -1.9575310945510864, + "logits/rejected": -1.963572382926941, + "logps/chosen": -46.6347541809082, + "logps/rejected": -163.6234130859375, + "loss": 0.4856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2015388458967209, + "rewards/margins": 1.410195231437683, + "rewards/rejected": -1.6117340326309204, + "step": 16745 + }, + { + "epoch": 0.97, + "learning_rate": 1.7028418920451703e-10, + "logits/chosen": -1.9185383319854736, + "logits/rejected": -1.9173846244812012, + "logps/chosen": -22.076309204101562, + "logps/rejected": -78.30754852294922, + "loss": 0.2593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9855537414550781, + "rewards/margins": 1.4175742864608765, + "rewards/rejected": -0.4320205748081207, + "step": 16746 + }, + { + "epoch": 0.97, + "learning_rate": 1.695079625824436e-10, + "logits/chosen": -1.8451579809188843, + "logits/rejected": -1.8579813241958618, + "logps/chosen": -246.17446899414062, + "logps/rejected": -450.5406799316406, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8328033685684204, + "rewards/margins": 6.957733154296875, + "rewards/rejected": -5.124929904937744, + "step": 16747 + }, + { + "epoch": 0.97, + "learning_rate": 1.6873350618319958e-10, + "logits/chosen": -1.979557991027832, + "logits/rejected": -2.0096404552459717, + "logps/chosen": -143.28338623046875, + "logps/rejected": -262.59405517578125, + "loss": 0.1422, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5258667469024658, + "rewards/margins": 1.5020813941955566, + "rewards/rejected": 0.02378540113568306, + "step": 16748 + }, + { + "epoch": 0.97, + "learning_rate": 1.6796082003428526e-10, + "logits/chosen": -2.1038661003112793, + "logits/rejected": -2.1022331714630127, + "logps/chosen": -0.00047138039371930063, + "logps/rejected": -75.89430236816406, + "loss": 0.761, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0009676424670033157, + "rewards/margins": -0.29678016901016235, + "rewards/rejected": 0.29774782061576843, + "step": 16749 + }, + { + "epoch": 0.97, + "learning_rate": 1.6718990416316747e-10, + "logits/chosen": -1.712928056716919, + "logits/rejected": -1.7132116556167603, + "logps/chosen": -41.67976379394531, + "logps/rejected": -191.0792694091797, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11954040825366974, + "rewards/margins": 1.1631699800491333, + "rewards/rejected": -1.04362952709198, + "step": 16750 + }, + { + "epoch": 0.97, + "learning_rate": 1.6642075859721328e-10, + "logits/chosen": -1.9703378677368164, + "logits/rejected": -1.9567760229110718, + "logps/chosen": -49.50727081298828, + "logps/rejected": -392.4537353515625, + "loss": 0.0917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.285007119178772, + "rewards/margins": 9.030755996704102, + "rewards/rejected": -7.745748996734619, + "step": 16751 + }, + { + "epoch": 0.97, + "learning_rate": 1.656533833637619e-10, + "logits/chosen": -1.9418953657150269, + "logits/rejected": -1.928270936012268, + "logps/chosen": -275.20404052734375, + "logps/rejected": -463.0641784667969, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.49957275390625, + "rewards/margins": 4.642767429351807, + "rewards/rejected": -2.1431946754455566, + "step": 16752 + }, + { + "epoch": 0.97, + "learning_rate": 1.648877784900693e-10, + "logits/chosen": -1.7775952816009521, + "logits/rejected": -1.7791311740875244, + "logps/chosen": -47.8902473449707, + "logps/rejected": -218.03482055664062, + "loss": 0.2653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2733013331890106, + "rewards/margins": 4.616509437561035, + "rewards/rejected": -4.343208312988281, + "step": 16753 + }, + { + "epoch": 0.97, + "learning_rate": 1.641239440033304e-10, + "logits/chosen": -1.707339882850647, + "logits/rejected": -1.6274561882019043, + "logps/chosen": -171.25637817382812, + "logps/rejected": -431.00225830078125, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8677520751953125, + "rewards/margins": 2.802328586578369, + "rewards/rejected": -0.9345764517784119, + "step": 16754 + }, + { + "epoch": 0.98, + "learning_rate": 1.6336187993067908e-10, + "logits/chosen": -1.932476282119751, + "logits/rejected": -1.9456377029418945, + "logps/chosen": -173.44589233398438, + "logps/rejected": -306.33984375, + "loss": 0.4822, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.7352966666221619, + "rewards/margins": -0.26251524686813354, + "rewards/rejected": 0.9978119134902954, + "step": 16755 + }, + { + "epoch": 0.98, + "learning_rate": 1.6260158629919362e-10, + "logits/chosen": -2.0856411457061768, + "logits/rejected": -2.0700957775115967, + "logps/chosen": -23.9229679107666, + "logps/rejected": -141.72796630859375, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36439037322998047, + "rewards/margins": 2.0906317234039307, + "rewards/rejected": -1.7262413501739502, + "step": 16756 + }, + { + "epoch": 0.98, + "learning_rate": 1.6184306313588582e-10, + "logits/chosen": -2.0933685302734375, + "logits/rejected": -2.0816092491149902, + "logps/chosen": -22.003406524658203, + "logps/rejected": -242.28573608398438, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5770349502563477, + "rewards/margins": 1.0048593282699585, + "rewards/rejected": -0.4278244078159332, + "step": 16757 + }, + { + "epoch": 0.98, + "learning_rate": 1.610863104676896e-10, + "logits/chosen": -1.688273310661316, + "logits/rejected": -1.7500662803649902, + "logps/chosen": -232.25364685058594, + "logps/rejected": -464.7086486816406, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.316319227218628, + "rewards/margins": 8.153715133666992, + "rewards/rejected": -5.837396144866943, + "step": 16758 + }, + { + "epoch": 0.98, + "learning_rate": 1.603313283215002e-10, + "logits/chosen": -1.8915386199951172, + "logits/rejected": -1.878382921218872, + "logps/chosen": -154.05322265625, + "logps/rejected": -403.479248046875, + "loss": 0.057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3578414916992188, + "rewards/margins": 4.455946445465088, + "rewards/rejected": -3.098104953765869, + "step": 16759 + }, + { + "epoch": 0.98, + "learning_rate": 1.59578116724135e-10, + "logits/chosen": -1.9024416208267212, + "logits/rejected": -1.9040026664733887, + "logps/chosen": -263.69921875, + "logps/rejected": -427.9666748046875, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7096221446990967, + "rewards/margins": 4.660458564758301, + "rewards/rejected": -1.950836181640625, + "step": 16760 + }, + { + "epoch": 0.98, + "learning_rate": 1.5882667570235043e-10, + "logits/chosen": -2.0281600952148438, + "logits/rejected": -2.0270164012908936, + "logps/chosen": -4.659768581390381, + "logps/rejected": -194.6295928955078, + "loss": 0.3422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13496819138526917, + "rewards/margins": 2.6787915229797363, + "rewards/rejected": -2.5438232421875, + "step": 16761 + }, + { + "epoch": 0.98, + "learning_rate": 1.5807700528284173e-10, + "logits/chosen": -1.9600601196289062, + "logits/rejected": -1.9488270282745361, + "logps/chosen": -57.300540924072266, + "logps/rejected": -361.53857421875, + "loss": 0.1714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8148441314697266, + "rewards/margins": 5.038818836212158, + "rewards/rejected": -4.223974704742432, + "step": 16762 + }, + { + "epoch": 0.98, + "learning_rate": 1.5732910549223766e-10, + "logits/chosen": -1.8232237100601196, + "logits/rejected": -1.8256685733795166, + "logps/chosen": -0.0004926258698105812, + "logps/rejected": -76.73118591308594, + "loss": 0.6117, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.950220035330858e-06, + "rewards/margins": 0.3399553596973419, + "rewards/rejected": -0.3399643003940582, + "step": 16763 + }, + { + "epoch": 0.98, + "learning_rate": 1.5658297635711137e-10, + "logits/chosen": -1.6436530351638794, + "logits/rejected": -1.6615229845046997, + "logps/chosen": -252.88087463378906, + "logps/rejected": -369.3682861328125, + "loss": 0.0446, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2799394130706787, + "rewards/margins": 2.6115739345550537, + "rewards/rejected": 0.668365478515625, + "step": 16764 + }, + { + "epoch": 0.98, + "learning_rate": 1.55838617903975e-10, + "logits/chosen": -1.9496220350265503, + "logits/rejected": -1.9477241039276123, + "logps/chosen": -0.0012160835321992636, + "logps/rejected": -195.30490112304688, + "loss": 0.3591, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.422712638392113e-05, + "rewards/margins": 3.414231300354004, + "rewards/rejected": -3.41428542137146, + "step": 16765 + }, + { + "epoch": 0.98, + "learning_rate": 1.5509603015925743e-10, + "logits/chosen": -1.73651921749115, + "logits/rejected": -1.6842938661575317, + "logps/chosen": -134.65499877929688, + "logps/rejected": -205.64132690429688, + "loss": 0.221, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8058594465255737, + "rewards/margins": 1.105543613433838, + "rewards/rejected": 0.7003158926963806, + "step": 16766 + }, + { + "epoch": 0.98, + "learning_rate": 1.5435521314934862e-10, + "logits/chosen": -2.0085606575012207, + "logits/rejected": -2.0037500858306885, + "logps/chosen": -26.133438110351562, + "logps/rejected": -166.7935791015625, + "loss": 0.2128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.724505603313446, + "rewards/margins": 3.06376051902771, + "rewards/rejected": -2.339254856109619, + "step": 16767 + }, + { + "epoch": 0.98, + "learning_rate": 1.5361616690056645e-10, + "logits/chosen": -1.9029145240783691, + "logits/rejected": -1.9540437459945679, + "logps/chosen": -272.63165283203125, + "logps/rejected": -405.7601318359375, + "loss": 0.0367, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.568267822265625, + "rewards/margins": 2.9962525367736816, + "rewards/rejected": -0.4279846251010895, + "step": 16768 + }, + { + "epoch": 0.98, + "learning_rate": 1.528788914391621e-10, + "logits/chosen": -2.0247058868408203, + "logits/rejected": -2.03143048286438, + "logps/chosen": -42.46127700805664, + "logps/rejected": -235.47647094726562, + "loss": 0.1288, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2814476490020752, + "rewards/margins": 3.044053077697754, + "rewards/rejected": -1.7626053094863892, + "step": 16769 + }, + { + "epoch": 0.98, + "learning_rate": 1.5214338679132578e-10, + "logits/chosen": -1.865861415863037, + "logits/rejected": -1.868157148361206, + "logps/chosen": -0.0030612589325755835, + "logps/rejected": -119.55050659179688, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020951477927155793, + "rewards/margins": 3.5226054191589355, + "rewards/rejected": -3.522814989089966, + "step": 16770 + }, + { + "epoch": 0.98, + "learning_rate": 1.5140965298319207e-10, + "logits/chosen": -1.9449259042739868, + "logits/rejected": -1.9408992528915405, + "logps/chosen": -0.7582623362541199, + "logps/rejected": -204.26864624023438, + "loss": 0.3563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012268763966858387, + "rewards/margins": 4.406157970428467, + "rewards/rejected": -4.418426513671875, + "step": 16771 + }, + { + "epoch": 0.98, + "learning_rate": 1.5067769004082352e-10, + "logits/chosen": -1.6357147693634033, + "logits/rejected": -1.6084766387939453, + "logps/chosen": -139.2921142578125, + "logps/rejected": -212.1320037841797, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.356744408607483, + "rewards/margins": 0.44600069522857666, + "rewards/rejected": 0.9107437133789062, + "step": 16772 + }, + { + "epoch": 0.98, + "learning_rate": 1.4994749799022156e-10, + "logits/chosen": -1.710684895515442, + "logits/rejected": -1.747084617614746, + "logps/chosen": -237.2720947265625, + "logps/rejected": -323.0008544921875, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5599701404571533, + "rewards/margins": 3.8856873512268066, + "rewards/rejected": -1.3257172107696533, + "step": 16773 + }, + { + "epoch": 0.98, + "learning_rate": 1.4921907685732094e-10, + "logits/chosen": -1.8455240726470947, + "logits/rejected": -1.8590643405914307, + "logps/chosen": -1.8208073377609253, + "logps/rejected": -148.56414794921875, + "loss": 0.3842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05021942779421806, + "rewards/margins": 2.682572364807129, + "rewards/rejected": -2.7327919006347656, + "step": 16774 + }, + { + "epoch": 0.98, + "learning_rate": 1.4849242666801763e-10, + "logits/chosen": -1.8529012203216553, + "logits/rejected": -1.839436411857605, + "logps/chosen": -221.9871368408203, + "logps/rejected": -363.76605224609375, + "loss": 0.1165, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.984027147293091, + "rewards/margins": 1.3662842512130737, + "rewards/rejected": 1.617742896080017, + "step": 16775 + }, + { + "epoch": 0.98, + "learning_rate": 1.4776754744810215e-10, + "logits/chosen": -1.9596552848815918, + "logits/rejected": -1.961362600326538, + "logps/chosen": -0.021216433495283127, + "logps/rejected": -132.6168670654297, + "loss": 0.563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012881633592769504, + "rewards/margins": 0.6134762167930603, + "rewards/rejected": -0.614764392375946, + "step": 16776 + }, + { + "epoch": 0.98, + "learning_rate": 1.4704443922334275e-10, + "logits/chosen": -1.8891888856887817, + "logits/rejected": -1.8767503499984741, + "logps/chosen": -224.08116149902344, + "logps/rejected": -349.2040710449219, + "loss": 0.25, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9672164916992188, + "rewards/margins": 0.4581465721130371, + "rewards/rejected": 2.5090699195861816, + "step": 16777 + }, + { + "epoch": 0.98, + "learning_rate": 1.4632310201941888e-10, + "logits/chosen": -1.9761680364608765, + "logits/rejected": -1.9943240880966187, + "logps/chosen": -191.03253173828125, + "logps/rejected": -419.1091003417969, + "loss": 0.0487, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6006485223770142, + "rewards/margins": 4.33767557144165, + "rewards/rejected": -2.7370269298553467, + "step": 16778 + }, + { + "epoch": 0.98, + "learning_rate": 1.456035358619656e-10, + "logits/chosen": -1.9900634288787842, + "logits/rejected": -1.992226004600525, + "logps/chosen": -35.73517990112305, + "logps/rejected": -191.60244750976562, + "loss": 0.2558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7419261932373047, + "rewards/margins": 1.9443508386611938, + "rewards/rejected": -1.2024246454238892, + "step": 16779 + }, + { + "epoch": 0.98, + "learning_rate": 1.448857407765347e-10, + "logits/chosen": -2.03875994682312, + "logits/rejected": -2.03108549118042, + "logps/chosen": -28.49765396118164, + "logps/rejected": -197.75596618652344, + "loss": 0.3209, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1690666228532791, + "rewards/margins": 2.0228569507598877, + "rewards/rejected": -1.853790283203125, + "step": 16780 + }, + { + "epoch": 0.98, + "learning_rate": 1.44169716788628e-10, + "logits/chosen": -1.8361725807189941, + "logits/rejected": -1.8940478563308716, + "logps/chosen": -201.727783203125, + "logps/rejected": -372.00042724609375, + "loss": 0.038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8368240594863892, + "rewards/margins": 2.807112216949463, + "rewards/rejected": -0.970288097858429, + "step": 16781 + }, + { + "epoch": 0.98, + "learning_rate": 1.4345546392368624e-10, + "logits/chosen": -1.8003039360046387, + "logits/rejected": -1.884353518486023, + "logps/chosen": -187.15432739257812, + "logps/rejected": -424.63543701171875, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8045333623886108, + "rewards/margins": 7.385032653808594, + "rewards/rejected": -5.580499172210693, + "step": 16782 + }, + { + "epoch": 0.98, + "learning_rate": 1.4274298220707804e-10, + "logits/chosen": -2.064603805541992, + "logits/rejected": -2.0593724250793457, + "logps/chosen": -42.8988151550293, + "logps/rejected": -290.8149719238281, + "loss": 0.1885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.496664434671402, + "rewards/margins": 2.5318238735198975, + "rewards/rejected": -2.0351593494415283, + "step": 16783 + }, + { + "epoch": 0.98, + "learning_rate": 1.42032271664122e-10, + "logits/chosen": -1.835618019104004, + "logits/rejected": -1.8374214172363281, + "logps/chosen": -0.6984941959381104, + "logps/rejected": -156.66482543945312, + "loss": 0.6154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010596501640975475, + "rewards/margins": 0.32561880350112915, + "rewards/rejected": -0.3150222897529602, + "step": 16784 + }, + { + "epoch": 0.98, + "learning_rate": 1.413233323200591e-10, + "logits/chosen": -1.7206329107284546, + "logits/rejected": -1.698157787322998, + "logps/chosen": -230.56222534179688, + "logps/rejected": -494.7370910644531, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.0111083984375, + "rewards/margins": 3.872357130050659, + "rewards/rejected": 0.13875122368335724, + "step": 16785 + }, + { + "epoch": 0.98, + "learning_rate": 1.4061616420008027e-10, + "logits/chosen": -2.039415121078491, + "logits/rejected": -2.0405526161193848, + "logps/chosen": -5.682636737823486, + "logps/rejected": -146.0852813720703, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17576384544372559, + "rewards/margins": 2.7037758827209473, + "rewards/rejected": -2.5280120372772217, + "step": 16786 + }, + { + "epoch": 0.98, + "learning_rate": 1.3991076732929874e-10, + "logits/chosen": -1.9267327785491943, + "logits/rejected": -1.9125535488128662, + "logps/chosen": -0.10963460803031921, + "logps/rejected": -161.42295837402344, + "loss": 0.3661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009390177205204964, + "rewards/margins": 5.663394927978516, + "rewards/rejected": -5.6540045738220215, + "step": 16787 + }, + { + "epoch": 0.98, + "learning_rate": 1.3920714173278336e-10, + "logits/chosen": -1.9627660512924194, + "logits/rejected": -1.9639359712600708, + "logps/chosen": -9.7365140914917, + "logps/rejected": -87.85330963134766, + "loss": 0.6055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06419544667005539, + "rewards/margins": 0.43831998109817505, + "rewards/rejected": -0.502515435218811, + "step": 16788 + }, + { + "epoch": 0.98, + "learning_rate": 1.385052874355197e-10, + "logits/chosen": -1.9375576972961426, + "logits/rejected": -1.9246361255645752, + "logps/chosen": -170.23348999023438, + "logps/rejected": -353.83685302734375, + "loss": 0.0801, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0102570056915283, + "rewards/margins": 2.403961181640625, + "rewards/rejected": -0.39370423555374146, + "step": 16789 + }, + { + "epoch": 0.98, + "learning_rate": 1.3780520446245446e-10, + "logits/chosen": -1.9531657695770264, + "logits/rejected": -1.9427082538604736, + "logps/chosen": -25.101154327392578, + "logps/rejected": -168.12347412109375, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33122560381889343, + "rewards/margins": 1.5772583484649658, + "rewards/rejected": -1.24603271484375, + "step": 16790 + }, + { + "epoch": 0.98, + "learning_rate": 1.3710689283844557e-10, + "logits/chosen": -1.9236717224121094, + "logits/rejected": -1.9230166673660278, + "logps/chosen": -7.045150414342061e-05, + "logps/rejected": -140.26873779296875, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": 5.078073627373669e-06, + "rewards/margins": 2.8495969772338867, + "rewards/rejected": -2.8495919704437256, + "step": 16791 + }, + { + "epoch": 0.98, + "learning_rate": 1.3641035258830646e-10, + "logits/chosen": -1.793517827987671, + "logits/rejected": -1.7866162061691284, + "logps/chosen": -266.2706298828125, + "logps/rejected": -468.5697937011719, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.483135938644409, + "rewards/margins": 5.545114040374756, + "rewards/rejected": -3.0619781017303467, + "step": 16792 + }, + { + "epoch": 0.98, + "learning_rate": 1.357155837367785e-10, + "logits/chosen": -1.9811146259307861, + "logits/rejected": -1.983703851699829, + "logps/chosen": -0.2719676196575165, + "logps/rejected": -152.40725708007812, + "loss": 0.4471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0010091752046719193, + "rewards/margins": 1.2944341897964478, + "rewards/rejected": -1.2934249639511108, + "step": 16793 + }, + { + "epoch": 0.98, + "learning_rate": 1.3502258630855302e-10, + "logits/chosen": -2.0376546382904053, + "logits/rejected": -2.027869939804077, + "logps/chosen": -253.48390197753906, + "logps/rejected": -442.03314208984375, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2975692749023438, + "rewards/margins": 3.914930820465088, + "rewards/rejected": -1.6173614263534546, + "step": 16794 + }, + { + "epoch": 0.98, + "learning_rate": 1.3433136032823256e-10, + "logits/chosen": -1.8821340799331665, + "logits/rejected": -1.8849573135375977, + "logps/chosen": -75.29303741455078, + "logps/rejected": -343.0068054199219, + "loss": 0.1903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.914538562297821, + "rewards/margins": 3.820892333984375, + "rewards/rejected": -2.906353712081909, + "step": 16795 + }, + { + "epoch": 0.98, + "learning_rate": 1.3364190582038638e-10, + "logits/chosen": -1.9315959215164185, + "logits/rejected": -1.9368139505386353, + "logps/chosen": -68.66702270507812, + "logps/rejected": -266.4449157714844, + "loss": 0.3255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7513870596885681, + "rewards/margins": 1.128804087638855, + "rewards/rejected": -0.3774169981479645, + "step": 16796 + }, + { + "epoch": 0.98, + "learning_rate": 1.3295422280950041e-10, + "logits/chosen": -1.9764550924301147, + "logits/rejected": -1.9686965942382812, + "logps/chosen": -6.874148368835449, + "logps/rejected": -205.1754913330078, + "loss": 0.1959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5817761421203613, + "rewards/margins": 5.11154317855835, + "rewards/rejected": -4.529767036437988, + "step": 16797 + }, + { + "epoch": 0.98, + "learning_rate": 1.322683113200107e-10, + "logits/chosen": -1.8946168422698975, + "logits/rejected": -1.8894758224487305, + "logps/chosen": -0.28701817989349365, + "logps/rejected": -117.35653686523438, + "loss": 0.5284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029037630185484886, + "rewards/margins": 0.6898530721664429, + "rewards/rejected": -0.660815417766571, + "step": 16798 + }, + { + "epoch": 0.98, + "learning_rate": 1.3158417137627553e-10, + "logits/chosen": -1.8540453910827637, + "logits/rejected": -1.8588554859161377, + "logps/chosen": -8.259228706359863, + "logps/rejected": -145.87269592285156, + "loss": 0.4327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14656153321266174, + "rewards/margins": 1.331122636795044, + "rewards/rejected": -1.1845611333847046, + "step": 16799 + }, + { + "epoch": 0.98, + "learning_rate": 1.3090180300260877e-10, + "logits/chosen": -1.8625290393829346, + "logits/rejected": -1.861412763595581, + "logps/chosen": -0.0004969360306859016, + "logps/rejected": -166.7784423828125, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.326785074226791e-06, + "rewards/margins": 4.273866653442383, + "rewards/rejected": -4.273862361907959, + "step": 16800 + }, + { + "epoch": 0.98, + "learning_rate": 1.3022120622324106e-10, + "logits/chosen": -2.0341036319732666, + "logits/rejected": -2.023120164871216, + "logps/chosen": -37.01253128051758, + "logps/rejected": -185.0682373046875, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2652919888496399, + "rewards/margins": 1.8024547100067139, + "rewards/rejected": -1.5371627807617188, + "step": 16801 + }, + { + "epoch": 0.98, + "learning_rate": 1.2954238106235305e-10, + "logits/chosen": -1.8592517375946045, + "logits/rejected": -1.8648879528045654, + "logps/chosen": -199.78439331054688, + "logps/rejected": -202.98187255859375, + "loss": 0.2807, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2167099714279175, + "rewards/margins": 0.8345276117324829, + "rewards/rejected": 0.3821823298931122, + "step": 16802 + }, + { + "epoch": 0.98, + "learning_rate": 1.2886532754406987e-10, + "logits/chosen": -1.9581760168075562, + "logits/rejected": -1.9530415534973145, + "logps/chosen": -32.77621078491211, + "logps/rejected": -169.42440795898438, + "loss": 0.2218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5687667727470398, + "rewards/margins": 2.6442368030548096, + "rewards/rejected": -2.075469970703125, + "step": 16803 + }, + { + "epoch": 0.98, + "learning_rate": 1.281900456924334e-10, + "logits/chosen": -1.837917685508728, + "logits/rejected": -1.8413902521133423, + "logps/chosen": -160.8300323486328, + "logps/rejected": -299.59423828125, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7523956298828125, + "rewards/margins": 3.01531982421875, + "rewards/rejected": -0.2629241943359375, + "step": 16804 + }, + { + "epoch": 0.98, + "learning_rate": 1.2751653553143559e-10, + "logits/chosen": -1.8947207927703857, + "logits/rejected": -1.8864084482192993, + "logps/chosen": -59.27943420410156, + "logps/rejected": -209.8873748779297, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.018566131591796875, + "rewards/margins": 3.6988182067871094, + "rewards/rejected": -3.6802520751953125, + "step": 16805 + }, + { + "epoch": 0.98, + "learning_rate": 1.2684479708500172e-10, + "logits/chosen": -1.9233895540237427, + "logits/rejected": -1.9282128810882568, + "logps/chosen": -0.0019354213727638125, + "logps/rejected": -69.99652862548828, + "loss": 0.5449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016468267131131142, + "rewards/margins": 0.5023102760314941, + "rewards/rejected": -0.5024749636650085, + "step": 16806 + }, + { + "epoch": 0.98, + "learning_rate": 1.2617483037699607e-10, + "logits/chosen": -1.9007612466812134, + "logits/rejected": -1.896690845489502, + "logps/chosen": -14.314804077148438, + "logps/rejected": -128.70181274414062, + "loss": 0.4441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10446586459875107, + "rewards/margins": 1.482492446899414, + "rewards/rejected": -1.3780266046524048, + "step": 16807 + }, + { + "epoch": 0.98, + "learning_rate": 1.2550663543122176e-10, + "logits/chosen": -1.8334311246871948, + "logits/rejected": -1.909128189086914, + "logps/chosen": -255.78250122070312, + "logps/rejected": -222.76377868652344, + "loss": 0.2297, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5267242193222046, + "rewards/margins": 0.6334884166717529, + "rewards/rejected": 0.8932358026504517, + "step": 16808 + }, + { + "epoch": 0.98, + "learning_rate": 1.2484021227141538e-10, + "logits/chosen": -1.8125529289245605, + "logits/rejected": -1.8254234790802002, + "logps/chosen": -152.47743225097656, + "logps/rejected": -423.90032958984375, + "loss": 0.051, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6179932355880737, + "rewards/margins": 3.452939033508301, + "rewards/rejected": -1.8349456787109375, + "step": 16809 + }, + { + "epoch": 0.98, + "learning_rate": 1.2417556092124692e-10, + "logits/chosen": -2.068702459335327, + "logits/rejected": -2.0687620639801025, + "logps/chosen": -53.03724670410156, + "logps/rejected": -158.83489990234375, + "loss": 0.4866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5522876977920532, + "rewards/margins": 2.88392972946167, + "rewards/rejected": -3.4362175464630127, + "step": 16810 + }, + { + "epoch": 0.98, + "learning_rate": 1.2351268140433634e-10, + "logits/chosen": -2.0168399810791016, + "logits/rejected": -2.0111894607543945, + "logps/chosen": -0.003785880282521248, + "logps/rejected": -77.53977966308594, + "loss": 0.4681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001759148552082479, + "rewards/margins": 1.280898928642273, + "rewards/rejected": -1.279139757156372, + "step": 16811 + }, + { + "epoch": 0.98, + "learning_rate": 1.2285157374422039e-10, + "logits/chosen": -1.616713047027588, + "logits/rejected": -1.6435102224349976, + "logps/chosen": -256.10455322265625, + "logps/rejected": -493.0452880859375, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.071447730064392, + "rewards/margins": 7.338068008422852, + "rewards/rejected": -6.26662015914917, + "step": 16812 + }, + { + "epoch": 0.98, + "learning_rate": 1.2219223796439694e-10, + "logits/chosen": -1.8043303489685059, + "logits/rejected": -1.7731820344924927, + "logps/chosen": -309.6831359863281, + "logps/rejected": -488.7789306640625, + "loss": 0.0321, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.174673557281494, + "rewards/margins": 3.985409736633301, + "rewards/rejected": -1.810736060142517, + "step": 16813 + }, + { + "epoch": 0.98, + "learning_rate": 1.2153467408828611e-10, + "logits/chosen": -1.7916582822799683, + "logits/rejected": -1.7886019945144653, + "logps/chosen": -4.264204502105713, + "logps/rejected": -179.45904541015625, + "loss": 0.3122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14890213310718536, + "rewards/margins": 4.225784778594971, + "rewards/rejected": -4.076882839202881, + "step": 16814 + }, + { + "epoch": 0.98, + "learning_rate": 1.2087888213924147e-10, + "logits/chosen": -1.490287184715271, + "logits/rejected": -1.4838707447052002, + "logps/chosen": -26.53550910949707, + "logps/rejected": -212.77487182617188, + "loss": 0.096, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9958101511001587, + "rewards/margins": 3.9203357696533203, + "rewards/rejected": -2.924525499343872, + "step": 16815 + }, + { + "epoch": 0.98, + "learning_rate": 1.202248621405666e-10, + "logits/chosen": -1.9842524528503418, + "logits/rejected": -2.0578086376190186, + "logps/chosen": -151.9709014892578, + "logps/rejected": -367.0048522949219, + "loss": 0.0253, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.055159091949463, + "rewards/margins": 5.018815994262695, + "rewards/rejected": -2.9636566638946533, + "step": 16816 + }, + { + "epoch": 0.98, + "learning_rate": 1.195726141154929e-10, + "logits/chosen": -1.805213451385498, + "logits/rejected": -1.8018982410430908, + "logps/chosen": -56.459625244140625, + "logps/rejected": -124.77369689941406, + "loss": 0.2418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7276546359062195, + "rewards/margins": 1.4143909215927124, + "rewards/rejected": -0.6867362856864929, + "step": 16817 + }, + { + "epoch": 0.98, + "learning_rate": 1.1892213808718522e-10, + "logits/chosen": -2.063108205795288, + "logits/rejected": -2.0626957416534424, + "logps/chosen": -0.39563482999801636, + "logps/rejected": -101.23995208740234, + "loss": 0.4091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003496763063594699, + "rewards/margins": 1.76918363571167, + "rewards/rejected": -1.7656868696212769, + "step": 16818 + }, + { + "epoch": 0.98, + "learning_rate": 1.1827343407876388e-10, + "logits/chosen": -2.0810060501098633, + "logits/rejected": -2.0829079151153564, + "logps/chosen": -27.02102279663086, + "logps/rejected": -117.14947509765625, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2432563751935959, + "rewards/margins": 1.6315727233886719, + "rewards/rejected": -1.3883163928985596, + "step": 16819 + }, + { + "epoch": 0.98, + "learning_rate": 1.1762650211326607e-10, + "logits/chosen": -1.788532018661499, + "logits/rejected": -1.782835841178894, + "logps/chosen": -12.429931640625, + "logps/rejected": -207.77886962890625, + "loss": 0.3692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05677442625164986, + "rewards/margins": 1.6570017337799072, + "rewards/rejected": -1.6002273559570312, + "step": 16820 + }, + { + "epoch": 0.98, + "learning_rate": 1.1698134221367896e-10, + "logits/chosen": -1.8399983644485474, + "logits/rejected": -1.8447047472000122, + "logps/chosen": -187.24087524414062, + "logps/rejected": -325.92279052734375, + "loss": 0.0416, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9066742658615112, + "rewards/margins": 3.332284688949585, + "rewards/rejected": -1.4256104230880737, + "step": 16821 + }, + { + "epoch": 0.98, + "learning_rate": 1.163379544029175e-10, + "logits/chosen": -1.7287850379943848, + "logits/rejected": -1.7351961135864258, + "logps/chosen": -45.342342376708984, + "logps/rejected": -144.19027709960938, + "loss": 0.1328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1821552515029907, + "rewards/margins": 2.5029211044311523, + "rewards/rejected": -1.320765733718872, + "step": 16822 + }, + { + "epoch": 0.98, + "learning_rate": 1.1569633870384121e-10, + "logits/chosen": -1.9899024963378906, + "logits/rejected": -1.9806100130081177, + "logps/chosen": -5.3825578689575195, + "logps/rejected": -271.83612060546875, + "loss": 0.3466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10002269595861435, + "rewards/margins": 4.477378845214844, + "rewards/rejected": -4.577401638031006, + "step": 16823 + }, + { + "epoch": 0.98, + "learning_rate": 1.1505649513923743e-10, + "logits/chosen": -1.992175817489624, + "logits/rejected": -1.864012598991394, + "logps/chosen": -136.4915771484375, + "logps/rejected": -829.4671630859375, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.070443868637085, + "rewards/margins": 6.611227989196777, + "rewards/rejected": -4.540783882141113, + "step": 16824 + }, + { + "epoch": 0.98, + "learning_rate": 1.1441842373184352e-10, + "logits/chosen": -1.9607208967208862, + "logits/rejected": -1.959640622138977, + "logps/chosen": -185.6409149169922, + "logps/rejected": -349.45208740234375, + "loss": 0.0666, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.70450758934021, + "rewards/margins": 2.085629463195801, + "rewards/rejected": 0.618878185749054, + "step": 16825 + }, + { + "epoch": 0.98, + "learning_rate": 1.1378212450432467e-10, + "logits/chosen": -2.0328071117401123, + "logits/rejected": -2.0332930088043213, + "logps/chosen": -5.401937484741211, + "logps/rejected": -152.09197998046875, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2671675682067871, + "rewards/margins": 3.800525665283203, + "rewards/rejected": -3.533358097076416, + "step": 16826 + }, + { + "epoch": 0.98, + "learning_rate": 1.1314759747927949e-10, + "logits/chosen": -2.1405680179595947, + "logits/rejected": -2.1362063884735107, + "logps/chosen": -5.019115924835205, + "logps/rejected": -122.93805694580078, + "loss": 0.4608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07335128635168076, + "rewards/margins": 0.58746337890625, + "rewards/rejected": -0.514112114906311, + "step": 16827 + }, + { + "epoch": 0.98, + "learning_rate": 1.125148426792566e-10, + "logits/chosen": -2.019535779953003, + "logits/rejected": -2.060871124267578, + "logps/chosen": -240.06288146972656, + "logps/rejected": -312.1605224609375, + "loss": 0.0783, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5610947608947754, + "rewards/margins": 1.8920762538909912, + "rewards/rejected": 0.669018566608429, + "step": 16828 + }, + { + "epoch": 0.98, + "learning_rate": 1.1188386012673246e-10, + "logits/chosen": -2.0316197872161865, + "logits/rejected": -2.0320892333984375, + "logps/chosen": -46.53455352783203, + "logps/rejected": -191.28526306152344, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18176154792308807, + "rewards/margins": 3.5204105377197266, + "rewards/rejected": -3.338649034500122, + "step": 16829 + }, + { + "epoch": 0.98, + "learning_rate": 1.1125464984412802e-10, + "logits/chosen": -1.8147709369659424, + "logits/rejected": -1.8159691095352173, + "logps/chosen": -2.6959543228149414, + "logps/rejected": -168.7689971923828, + "loss": 0.3144, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.047252487391233444, + "rewards/margins": 6.256433963775635, + "rewards/rejected": -6.209181308746338, + "step": 16830 + }, + { + "epoch": 0.98, + "learning_rate": 1.1062721185378654e-10, + "logits/chosen": -1.7781026363372803, + "logits/rejected": -1.7868478298187256, + "logps/chosen": -194.11453247070312, + "logps/rejected": -375.46630859375, + "loss": 0.0853, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0414626598358154, + "rewards/margins": 2.0380568504333496, + "rewards/rejected": 0.0034057616721838713, + "step": 16831 + }, + { + "epoch": 0.98, + "learning_rate": 1.1000154617800128e-10, + "logits/chosen": -2.1099188327789307, + "logits/rejected": -2.108349323272705, + "logps/chosen": -0.0010619862005114555, + "logps/rejected": -119.46935272216797, + "loss": 0.3779, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.890067874745e-06, + "rewards/margins": 2.7490031719207764, + "rewards/rejected": -2.749011993408203, + "step": 16832 + }, + { + "epoch": 0.98, + "learning_rate": 1.0937765283899891e-10, + "logits/chosen": -1.9656434059143066, + "logits/rejected": -1.9333980083465576, + "logps/chosen": -206.60467529296875, + "logps/rejected": -334.8359375, + "loss": 0.1991, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6842437982559204, + "rewards/margins": 0.837048351764679, + "rewards/rejected": 0.8471954464912415, + "step": 16833 + }, + { + "epoch": 0.98, + "learning_rate": 1.0875553185894504e-10, + "logits/chosen": -1.944224238395691, + "logits/rejected": -2.0048208236694336, + "logps/chosen": -198.94630432128906, + "logps/rejected": -476.8050537109375, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9273117780685425, + "rewards/margins": 6.830607891082764, + "rewards/rejected": -4.903295993804932, + "step": 16834 + }, + { + "epoch": 0.98, + "learning_rate": 1.0813518325993864e-10, + "logits/chosen": -1.605578899383545, + "logits/rejected": -1.6088193655014038, + "logps/chosen": -103.8729248046875, + "logps/rejected": -399.30072021484375, + "loss": 0.0435, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7234375476837158, + "rewards/margins": 3.4410247802734375, + "rewards/rejected": -1.7175873517990112, + "step": 16835 + }, + { + "epoch": 0.98, + "learning_rate": 1.0751660706401766e-10, + "logits/chosen": -1.8117058277130127, + "logits/rejected": -1.8135391473770142, + "logps/chosen": -267.54193115234375, + "logps/rejected": -421.076416015625, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.602923631668091, + "rewards/margins": 3.1971497535705566, + "rewards/rejected": -0.594226062297821, + "step": 16836 + }, + { + "epoch": 0.98, + "learning_rate": 1.0689980329315896e-10, + "logits/chosen": -1.7539767026901245, + "logits/rejected": -1.7593728303909302, + "logps/chosen": -181.9195556640625, + "logps/rejected": -224.49798583984375, + "loss": 0.0585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9919312000274658, + "rewards/margins": 2.981597900390625, + "rewards/rejected": -0.989666759967804, + "step": 16837 + }, + { + "epoch": 0.98, + "learning_rate": 1.0628477196927277e-10, + "logits/chosen": -1.918062448501587, + "logits/rejected": -1.9198967218399048, + "logps/chosen": -3.099421883234754e-05, + "logps/rejected": -141.77296447753906, + "loss": 0.3701, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.07960657816875e-07, + "rewards/margins": 2.697132110595703, + "rewards/rejected": -2.6971328258514404, + "step": 16838 + }, + { + "epoch": 0.98, + "learning_rate": 1.056715131142083e-10, + "logits/chosen": -1.9023350477218628, + "logits/rejected": -1.9551163911819458, + "logps/chosen": -181.07247924804688, + "logps/rejected": -519.1001586914062, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7633025646209717, + "rewards/margins": 7.840243339538574, + "rewards/rejected": -5.076941013336182, + "step": 16839 + }, + { + "epoch": 0.98, + "learning_rate": 1.0506002674974812e-10, + "logits/chosen": -1.7100859880447388, + "logits/rejected": -1.7034881114959717, + "logps/chosen": -88.14537811279297, + "logps/rejected": -258.8323059082031, + "loss": 0.142, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0408531427383423, + "rewards/margins": 2.160576820373535, + "rewards/rejected": -1.1197235584259033, + "step": 16840 + }, + { + "epoch": 0.98, + "learning_rate": 1.0445031289761929e-10, + "logits/chosen": -1.802404761314392, + "logits/rejected": -1.804512858390808, + "logps/chosen": -217.49795532226562, + "logps/rejected": -454.74700927734375, + "loss": 0.1219, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.947882056236267, + "rewards/margins": 1.540960669517517, + "rewards/rejected": 0.40692138671875, + "step": 16841 + }, + { + "epoch": 0.98, + "learning_rate": 1.0384237157948228e-10, + "logits/chosen": -2.035813331604004, + "logits/rejected": -2.0302491188049316, + "logps/chosen": -1.0714948177337646, + "logps/rejected": -187.45187377929688, + "loss": 0.3561, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09398842602968216, + "rewards/margins": 4.2977705001831055, + "rewards/rejected": -4.391758918762207, + "step": 16842 + }, + { + "epoch": 0.98, + "learning_rate": 1.032362028169309e-10, + "logits/chosen": -1.8915045261383057, + "logits/rejected": -1.8944461345672607, + "logps/chosen": -69.47789001464844, + "logps/rejected": -292.0928955078125, + "loss": 0.1383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.962066650390625, + "rewards/margins": 4.698172092437744, + "rewards/rejected": -3.736105442047119, + "step": 16843 + }, + { + "epoch": 0.98, + "learning_rate": 1.026318066315035e-10, + "logits/chosen": -2.025759220123291, + "logits/rejected": -2.0196571350097656, + "logps/chosen": -26.115015029907227, + "logps/rejected": -234.07745361328125, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3335428237915039, + "rewards/margins": 2.1186366081237793, + "rewards/rejected": -1.7850936651229858, + "step": 16844 + }, + { + "epoch": 0.98, + "learning_rate": 1.0202918304466624e-10, + "logits/chosen": -2.134382486343384, + "logits/rejected": -2.1327500343322754, + "logps/chosen": -2.392735719680786, + "logps/rejected": -217.47427368164062, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13881151378154755, + "rewards/margins": 2.1196250915527344, + "rewards/rejected": -1.9808136224746704, + "step": 16845 + }, + { + "epoch": 0.98, + "learning_rate": 1.014283320778353e-10, + "logits/chosen": -1.9756107330322266, + "logits/rejected": -1.946729063987732, + "logps/chosen": -99.06734466552734, + "logps/rejected": -538.741455078125, + "loss": 0.21, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45382460951805115, + "rewards/margins": 15.34380054473877, + "rewards/rejected": -14.889975547790527, + "step": 16846 + }, + { + "epoch": 0.98, + "learning_rate": 1.0082925375234363e-10, + "logits/chosen": -1.9170531034469604, + "logits/rejected": -1.9061428308486938, + "logps/chosen": -89.00416564941406, + "logps/rejected": -387.50408935546875, + "loss": 0.2177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.590100109577179, + "rewards/margins": 6.140909194946289, + "rewards/rejected": -5.550808906555176, + "step": 16847 + }, + { + "epoch": 0.98, + "learning_rate": 1.0023194808947977e-10, + "logits/chosen": -2.004436492919922, + "logits/rejected": -1.9978137016296387, + "logps/chosen": -27.608617782592773, + "logps/rejected": -214.26052856445312, + "loss": 0.2657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35230883955955505, + "rewards/margins": 3.8013155460357666, + "rewards/rejected": -3.4490067958831787, + "step": 16848 + }, + { + "epoch": 0.98, + "learning_rate": 9.963641511047116e-11, + "logits/chosen": -1.832465410232544, + "logits/rejected": -1.8194924592971802, + "logps/chosen": -350.65838623046875, + "logps/rejected": -457.2012939453125, + "loss": 0.2072, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.003100633621216, + "rewards/margins": 0.7251770496368408, + "rewards/rejected": 1.277923583984375, + "step": 16849 + }, + { + "epoch": 0.98, + "learning_rate": 9.904265483645647e-11, + "logits/chosen": -1.8421870470046997, + "logits/rejected": -1.7739946842193604, + "logps/chosen": -298.4600830078125, + "logps/rejected": -378.3898010253906, + "loss": 0.0594, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5021119117736816, + "rewards/margins": 2.969259738922119, + "rewards/rejected": -0.4671478271484375, + "step": 16850 + }, + { + "epoch": 0.98, + "learning_rate": 9.845066728854656e-11, + "logits/chosen": -2.003758192062378, + "logits/rejected": -1.9997221231460571, + "logps/chosen": -4.684872692450881e-05, + "logps/rejected": -94.17186737060547, + "loss": 0.5766, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2635988468900905e-06, + "rewards/margins": 0.5345309376716614, + "rewards/rejected": -0.5345321893692017, + "step": 16851 + }, + { + "epoch": 0.98, + "learning_rate": 9.786045248775798e-11, + "logits/chosen": -1.8410959243774414, + "logits/rejected": -1.8088430166244507, + "logps/chosen": -215.96804809570312, + "logps/rejected": -400.0087890625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.752453565597534, + "rewards/margins": 5.583477973937988, + "rewards/rejected": -1.831024169921875, + "step": 16852 + }, + { + "epoch": 0.98, + "learning_rate": 9.727201045506839e-11, + "logits/chosen": -1.9590672254562378, + "logits/rejected": -1.9581589698791504, + "logps/chosen": -6.222593947313726e-05, + "logps/rejected": -210.9080352783203, + "loss": 0.3447, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7311333471734542e-06, + "rewards/margins": 5.0709052085876465, + "rewards/rejected": -5.070909023284912, + "step": 16853 + }, + { + "epoch": 0.98, + "learning_rate": 9.668534121137773e-11, + "logits/chosen": -1.928358554840088, + "logits/rejected": -1.931666612625122, + "logps/chosen": -6.257749080657959, + "logps/rejected": -61.06830596923828, + "loss": 0.3642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3123999238014221, + "rewards/margins": 1.5761992931365967, + "rewards/rejected": -1.2637993097305298, + "step": 16854 + }, + { + "epoch": 0.98, + "learning_rate": 9.610044477752488e-11, + "logits/chosen": -1.6976581811904907, + "logits/rejected": -1.669679045677185, + "logps/chosen": -113.84820556640625, + "logps/rejected": -284.0521240234375, + "loss": 0.0512, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4897583723068237, + "rewards/margins": 3.17718505859375, + "rewards/rejected": -1.6874268054962158, + "step": 16855 + }, + { + "epoch": 0.98, + "learning_rate": 9.551732117428767e-11, + "logits/chosen": -1.7065019607543945, + "logits/rejected": -1.6832308769226074, + "logps/chosen": -171.34095764160156, + "logps/rejected": -254.35263061523438, + "loss": 0.2482, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7992324829101562, + "rewards/margins": 0.9058273434638977, + "rewards/rejected": 0.8934051394462585, + "step": 16856 + }, + { + "epoch": 0.98, + "learning_rate": 9.493597042238843e-11, + "logits/chosen": -2.1517271995544434, + "logits/rejected": -2.150540351867676, + "logps/chosen": -6.701650142669678, + "logps/rejected": -174.8190155029297, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.051549434661865234, + "rewards/margins": 3.216531991958618, + "rewards/rejected": -3.164982557296753, + "step": 16857 + }, + { + "epoch": 0.98, + "learning_rate": 9.435639254247174e-11, + "logits/chosen": -1.8649003505706787, + "logits/rejected": -1.8659178018569946, + "logps/chosen": -79.5051040649414, + "logps/rejected": -398.1072998046875, + "loss": 0.1265, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0564193725585938, + "rewards/margins": 6.013890266418457, + "rewards/rejected": -4.957470893859863, + "step": 16858 + }, + { + "epoch": 0.98, + "learning_rate": 9.377858755513779e-11, + "logits/chosen": -1.8715611696243286, + "logits/rejected": -1.9187743663787842, + "logps/chosen": -144.54664611816406, + "logps/rejected": -331.45928955078125, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6337722539901733, + "rewards/margins": 4.346440315246582, + "rewards/rejected": -2.712667942047119, + "step": 16859 + }, + { + "epoch": 0.98, + "learning_rate": 9.32025554809035e-11, + "logits/chosen": -2.1428256034851074, + "logits/rejected": -2.143866539001465, + "logps/chosen": -0.0003991959092672914, + "logps/rejected": -90.60528564453125, + "loss": 0.4785, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.510216503054835e-06, + "rewards/margins": 1.1146736145019531, + "rewards/rejected": -1.1146820783615112, + "step": 16860 + }, + { + "epoch": 0.98, + "learning_rate": 9.262829634023029e-11, + "logits/chosen": -1.9205385446548462, + "logits/rejected": -1.9148240089416504, + "logps/chosen": -224.16458129882812, + "logps/rejected": -360.066650390625, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6684799194335938, + "rewards/margins": 4.46377420425415, + "rewards/rejected": -1.795294165611267, + "step": 16861 + }, + { + "epoch": 0.98, + "learning_rate": 9.20558101535296e-11, + "logits/chosen": -1.876157522201538, + "logits/rejected": -1.801161289215088, + "logps/chosen": -325.8033752441406, + "logps/rejected": -533.4099731445312, + "loss": 0.071, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8147430419921875, + "rewards/margins": 2.5862090587615967, + "rewards/rejected": -0.771466076374054, + "step": 16862 + }, + { + "epoch": 0.98, + "learning_rate": 9.14850969411296e-11, + "logits/chosen": -1.9088385105133057, + "logits/rejected": -1.9345648288726807, + "logps/chosen": -159.81997680664062, + "logps/rejected": -211.78009033203125, + "loss": 0.1648, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5087647438049316, + "rewards/margins": 1.0305237770080566, + "rewards/rejected": 1.478240966796875, + "step": 16863 + }, + { + "epoch": 0.98, + "learning_rate": 9.091615672330854e-11, + "logits/chosen": -1.8875932693481445, + "logits/rejected": -1.8904352188110352, + "logps/chosen": -1.5545527935028076, + "logps/rejected": -83.67399597167969, + "loss": 0.6171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048810627311468124, + "rewards/margins": 0.14435353875160217, + "rewards/rejected": -0.09554290771484375, + "step": 16864 + }, + { + "epoch": 0.98, + "learning_rate": 9.034898952027248e-11, + "logits/chosen": -1.8788915872573853, + "logits/rejected": -1.8624037504196167, + "logps/chosen": -40.26601791381836, + "logps/rejected": -223.49752807617188, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39998742938041687, + "rewards/margins": 1.6636669635772705, + "rewards/rejected": -1.2636795043945312, + "step": 16865 + }, + { + "epoch": 0.98, + "learning_rate": 8.978359535218305e-11, + "logits/chosen": -1.9205743074417114, + "logits/rejected": -1.9225645065307617, + "logps/chosen": -18.958215713500977, + "logps/rejected": -304.81744384765625, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09331836551427841, + "rewards/margins": 4.8131608963012695, + "rewards/rejected": -4.719842433929443, + "step": 16866 + }, + { + "epoch": 0.98, + "learning_rate": 8.921997423911309e-11, + "logits/chosen": -1.988608956336975, + "logits/rejected": -1.982434630393982, + "logps/chosen": -45.39455032348633, + "logps/rejected": -232.69796752929688, + "loss": 0.399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20836181938648224, + "rewards/margins": 3.0135467052459717, + "rewards/rejected": -3.2219085693359375, + "step": 16867 + }, + { + "epoch": 0.98, + "learning_rate": 8.865812620109103e-11, + "logits/chosen": -1.9131578207015991, + "logits/rejected": -1.9040619134902954, + "logps/chosen": -303.45135498046875, + "logps/rejected": -426.4914855957031, + "loss": 0.1227, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.980792284011841, + "rewards/margins": 1.3409637212753296, + "rewards/rejected": 1.6398285627365112, + "step": 16868 + }, + { + "epoch": 0.98, + "learning_rate": 8.809805125807313e-11, + "logits/chosen": -2.0112617015838623, + "logits/rejected": -1.971882939338684, + "logps/chosen": -181.03079223632812, + "logps/rejected": -411.9509582519531, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6590211391448975, + "rewards/margins": 2.9293367862701416, + "rewards/rejected": -0.270315557718277, + "step": 16869 + }, + { + "epoch": 0.98, + "learning_rate": 8.753974942996011e-11, + "logits/chosen": -1.9728035926818848, + "logits/rejected": -1.9831411838531494, + "logps/chosen": -135.93450927734375, + "logps/rejected": -229.5454864501953, + "loss": 0.1251, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9862335920333862, + "rewards/margins": 1.4654953479766846, + "rewards/rejected": 0.5207382440567017, + "step": 16870 + }, + { + "epoch": 0.98, + "learning_rate": 8.69832207365806e-11, + "logits/chosen": -1.7818617820739746, + "logits/rejected": -1.780329704284668, + "logps/chosen": -118.00544738769531, + "logps/rejected": -439.25927734375, + "loss": 0.2782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03337707743048668, + "rewards/margins": 4.631234645843506, + "rewards/rejected": -4.66461181640625, + "step": 16871 + }, + { + "epoch": 0.98, + "learning_rate": 8.642846519771318e-11, + "logits/chosen": -1.8965708017349243, + "logits/rejected": -1.8957759141921997, + "logps/chosen": -27.29267120361328, + "logps/rejected": -139.8291473388672, + "loss": 0.4144, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10103931277990341, + "rewards/margins": 1.1470308303833008, + "rewards/rejected": -1.0459915399551392, + "step": 16872 + }, + { + "epoch": 0.98, + "learning_rate": 8.587548283305324e-11, + "logits/chosen": -1.7159900665283203, + "logits/rejected": -1.7257190942764282, + "logps/chosen": -34.180503845214844, + "logps/rejected": -168.33038330078125, + "loss": 0.2567, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2045639753341675, + "rewards/margins": 0.9151062965393066, + "rewards/rejected": 0.2894577085971832, + "step": 16873 + }, + { + "epoch": 0.98, + "learning_rate": 8.532427366225725e-11, + "logits/chosen": -2.0704188346862793, + "logits/rejected": -2.0690317153930664, + "logps/chosen": -0.13078245520591736, + "logps/rejected": -221.36854553222656, + "loss": 0.4693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007528141140937805, + "rewards/margins": 1.335523009300232, + "rewards/rejected": -1.3347702026367188, + "step": 16874 + }, + { + "epoch": 0.98, + "learning_rate": 8.477483770490401e-11, + "logits/chosen": -1.864222764968872, + "logits/rejected": -1.86616051197052, + "logps/chosen": -263.05364990234375, + "logps/rejected": -459.73345947265625, + "loss": 0.0616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7165588736534119, + "rewards/margins": 4.330487251281738, + "rewards/rejected": -3.6139283180236816, + "step": 16875 + }, + { + "epoch": 0.98, + "learning_rate": 8.422717498050569e-11, + "logits/chosen": -1.961234211921692, + "logits/rejected": -1.9357789754867554, + "logps/chosen": -250.498046875, + "logps/rejected": -415.4814147949219, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.090319871902466, + "rewards/margins": 2.5951080322265625, + "rewards/rejected": 0.49521180987358093, + "step": 16876 + }, + { + "epoch": 0.98, + "learning_rate": 8.36812855085245e-11, + "logits/chosen": -2.0768227577209473, + "logits/rejected": -2.062364339828491, + "logps/chosen": -0.00041883712401613593, + "logps/rejected": -197.29998779296875, + "loss": 0.4332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00023638228594791144, + "rewards/margins": 1.6364973783493042, + "rewards/rejected": -1.636260986328125, + "step": 16877 + }, + { + "epoch": 0.98, + "learning_rate": 8.313716930835047e-11, + "logits/chosen": -1.729498028755188, + "logits/rejected": -1.7322412729263306, + "logps/chosen": -4.023373126983643, + "logps/rejected": -7.560746669769287, + "loss": 0.6631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013630724512040615, + "rewards/margins": 0.022822285071015358, + "rewards/rejected": -0.009191560558974743, + "step": 16878 + }, + { + "epoch": 0.98, + "learning_rate": 8.259482639931259e-11, + "logits/chosen": -1.942681908607483, + "logits/rejected": -1.9271916151046753, + "logps/chosen": -296.265380859375, + "logps/rejected": -348.87744140625, + "loss": 0.1855, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0489838123321533, + "rewards/margins": 1.1278901100158691, + "rewards/rejected": 0.921093761920929, + "step": 16879 + }, + { + "epoch": 0.98, + "learning_rate": 8.205425680068434e-11, + "logits/chosen": -1.7339856624603271, + "logits/rejected": -1.7042014598846436, + "logps/chosen": -211.50270080566406, + "logps/rejected": -319.7080993652344, + "loss": 0.0838, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6953811645507812, + "rewards/margins": 1.9358962774276733, + "rewards/rejected": 0.7594848871231079, + "step": 16880 + }, + { + "epoch": 0.98, + "learning_rate": 8.151546053166147e-11, + "logits/chosen": -2.0497043132781982, + "logits/rejected": -2.0478787422180176, + "logps/chosen": -49.059024810791016, + "logps/rejected": -258.6318664550781, + "loss": 0.2078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6553630828857422, + "rewards/margins": 2.5930066108703613, + "rewards/rejected": -1.9376434087753296, + "step": 16881 + }, + { + "epoch": 0.98, + "learning_rate": 8.097843761138423e-11, + "logits/chosen": -2.0145599842071533, + "logits/rejected": -2.01656174659729, + "logps/chosen": -24.642711639404297, + "logps/rejected": -158.18592834472656, + "loss": 0.55, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31463757157325745, + "rewards/margins": 0.28442516922950745, + "rewards/rejected": 0.03021240234375, + "step": 16882 + }, + { + "epoch": 0.98, + "learning_rate": 8.044318805893736e-11, + "logits/chosen": -1.930927038192749, + "logits/rejected": -1.9219352006912231, + "logps/chosen": -43.25798416137695, + "logps/rejected": -337.15618896484375, + "loss": 0.0957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2997452020645142, + "rewards/margins": 6.46139669418335, + "rewards/rejected": -5.161651611328125, + "step": 16883 + }, + { + "epoch": 0.98, + "learning_rate": 7.990971189332785e-11, + "logits/chosen": -1.7928463220596313, + "logits/rejected": -1.8169809579849243, + "logps/chosen": -244.6534881591797, + "logps/rejected": -374.67340087890625, + "loss": 0.0489, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.538142442703247, + "rewards/margins": 4.554039001464844, + "rewards/rejected": -3.0158965587615967, + "step": 16884 + }, + { + "epoch": 0.98, + "learning_rate": 7.937800913351278e-11, + "logits/chosen": -1.8601139783859253, + "logits/rejected": -1.8665714263916016, + "logps/chosen": -6.164363861083984, + "logps/rejected": -129.25650024414062, + "loss": 1.0217, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2346695512533188, + "rewards/margins": -0.8903229832649231, + "rewards/rejected": 0.6556534171104431, + "step": 16885 + }, + { + "epoch": 0.98, + "learning_rate": 7.884807979837705e-11, + "logits/chosen": -1.9970142841339111, + "logits/rejected": -1.9875237941741943, + "logps/chosen": -167.8624267578125, + "logps/rejected": -369.865966796875, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6838210821151733, + "rewards/margins": 4.8531999588012695, + "rewards/rejected": -3.1693787574768066, + "step": 16886 + }, + { + "epoch": 0.98, + "learning_rate": 7.831992390675002e-11, + "logits/chosen": -1.8392423391342163, + "logits/rejected": -1.829850435256958, + "logps/chosen": -36.399566650390625, + "logps/rejected": -188.98385620117188, + "loss": 0.3202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1041080504655838, + "rewards/margins": 4.544999599456787, + "rewards/rejected": -4.649107456207275, + "step": 16887 + }, + { + "epoch": 0.98, + "learning_rate": 7.779354147738893e-11, + "logits/chosen": -1.871270775794983, + "logits/rejected": -1.873805046081543, + "logps/chosen": -10.78271770477295, + "logps/rejected": -59.730133056640625, + "loss": 0.7537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4253019392490387, + "rewards/margins": 0.28002646565437317, + "rewards/rejected": -0.7053284049034119, + "step": 16888 + }, + { + "epoch": 0.98, + "learning_rate": 7.726893252900101e-11, + "logits/chosen": -1.9890906810760498, + "logits/rejected": -1.9871981143951416, + "logps/chosen": -0.003255492774769664, + "logps/rejected": -77.21197509765625, + "loss": 0.4402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014150436618365347, + "rewards/margins": 1.3977723121643066, + "rewards/rejected": -1.3979138135910034, + "step": 16889 + }, + { + "epoch": 0.98, + "learning_rate": 7.674609708022139e-11, + "logits/chosen": -1.9636207818984985, + "logits/rejected": -1.9483541250228882, + "logps/chosen": -121.39537811279297, + "logps/rejected": -199.05738830566406, + "loss": 0.053, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.020231008529663, + "rewards/margins": 2.5922768115997314, + "rewards/rejected": 0.4279541075229645, + "step": 16890 + }, + { + "epoch": 0.98, + "learning_rate": 7.622503514961853e-11, + "logits/chosen": -1.9230594635009766, + "logits/rejected": -1.9233711957931519, + "logps/chosen": -36.651885986328125, + "logps/rejected": -209.21072387695312, + "loss": 0.2216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28081512451171875, + "rewards/margins": 2.957354784011841, + "rewards/rejected": -2.676539659500122, + "step": 16891 + }, + { + "epoch": 0.98, + "learning_rate": 7.57057467557054e-11, + "logits/chosen": -1.8363622426986694, + "logits/rejected": -1.8425978422164917, + "logps/chosen": -87.33746337890625, + "logps/rejected": -271.55670166015625, + "loss": 0.1175, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4851082563400269, + "rewards/margins": 3.208651065826416, + "rewards/rejected": -1.7235428094863892, + "step": 16892 + }, + { + "epoch": 0.98, + "learning_rate": 7.518823191692836e-11, + "logits/chosen": -1.9807909727096558, + "logits/rejected": -1.9850261211395264, + "logps/chosen": -33.562164306640625, + "logps/rejected": -255.79074096679688, + "loss": 0.1664, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9147918820381165, + "rewards/margins": 3.3294098377227783, + "rewards/rejected": -2.4146180152893066, + "step": 16893 + }, + { + "epoch": 0.98, + "learning_rate": 7.467249065167825e-11, + "logits/chosen": -1.9326756000518799, + "logits/rejected": -1.9379199743270874, + "logps/chosen": -24.151813507080078, + "logps/rejected": -187.2206573486328, + "loss": 0.2321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6060439944267273, + "rewards/margins": 2.878048896789551, + "rewards/rejected": -2.2720048427581787, + "step": 16894 + }, + { + "epoch": 0.98, + "learning_rate": 7.415852297827374e-11, + "logits/chosen": -2.003462791442871, + "logits/rejected": -2.0014355182647705, + "logps/chosen": -78.97087860107422, + "logps/rejected": -313.71875, + "loss": 0.2933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.931989312171936, + "rewards/margins": 1.0384072065353394, + "rewards/rejected": -0.10641784965991974, + "step": 16895 + }, + { + "epoch": 0.98, + "learning_rate": 7.364632891496691e-11, + "logits/chosen": -2.052722454071045, + "logits/rejected": -2.0642998218536377, + "logps/chosen": -20.436012268066406, + "logps/rejected": -184.15771484375, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32248058915138245, + "rewards/margins": 2.316779851913452, + "rewards/rejected": -1.994299292564392, + "step": 16896 + }, + { + "epoch": 0.98, + "learning_rate": 7.313590847995987e-11, + "logits/chosen": -1.7860898971557617, + "logits/rejected": -1.7265905141830444, + "logps/chosen": -238.0194549560547, + "logps/rejected": -383.15185546875, + "loss": 0.1921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7504456043243408, + "rewards/margins": 1.209069848060608, + "rewards/rejected": 0.5413757562637329, + "step": 16897 + }, + { + "epoch": 0.98, + "learning_rate": 7.262726169138811e-11, + "logits/chosen": -1.7864086627960205, + "logits/rejected": -1.7987397909164429, + "logps/chosen": -47.66054153442383, + "logps/rejected": -254.6730499267578, + "loss": 0.1055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2413166761398315, + "rewards/margins": 4.981868267059326, + "rewards/rejected": -3.740551710128784, + "step": 16898 + }, + { + "epoch": 0.98, + "learning_rate": 7.212038856731494e-11, + "logits/chosen": -1.9088870286941528, + "logits/rejected": -1.9133409261703491, + "logps/chosen": -21.029172897338867, + "logps/rejected": -180.2257843017578, + "loss": 0.1647, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0545427799224854, + "rewards/margins": 2.3105592727661133, + "rewards/rejected": -1.2560166120529175, + "step": 16899 + }, + { + "epoch": 0.98, + "learning_rate": 7.161528912574821e-11, + "logits/chosen": -2.0057826042175293, + "logits/rejected": -1.9875935316085815, + "logps/chosen": -51.685760498046875, + "logps/rejected": -243.836181640625, + "loss": 0.8794, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6148872375488281, + "rewards/margins": 1.7562339305877686, + "rewards/rejected": -3.3711211681365967, + "step": 16900 + }, + { + "epoch": 0.98, + "learning_rate": 7.11119633846291e-11, + "logits/chosen": -1.8786765336990356, + "logits/rejected": -1.7869162559509277, + "logps/chosen": -195.9186248779297, + "logps/rejected": -400.49761962890625, + "loss": 0.0498, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5373916625976562, + "rewards/margins": 3.124558925628662, + "rewards/rejected": -0.5871673822402954, + "step": 16901 + }, + { + "epoch": 0.98, + "learning_rate": 7.061041136184887e-11, + "logits/chosen": -1.8628813028335571, + "logits/rejected": -1.8956979513168335, + "logps/chosen": -227.17225646972656, + "logps/rejected": -253.29998779296875, + "loss": 0.3863, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.5769805908203125, + "rewards/margins": -0.0924224853515625, + "rewards/rejected": 2.669403076171875, + "step": 16902 + }, + { + "epoch": 0.98, + "learning_rate": 7.011063307520992e-11, + "logits/chosen": -2.0190131664276123, + "logits/rejected": -2.0151944160461426, + "logps/chosen": -0.03837663307785988, + "logps/rejected": -55.66493606567383, + "loss": 0.6017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010452262125909328, + "rewards/margins": 0.32959291338920593, + "rewards/rejected": -0.31914064288139343, + "step": 16903 + }, + { + "epoch": 0.98, + "learning_rate": 6.96126285424814e-11, + "logits/chosen": -1.7927716970443726, + "logits/rejected": -1.7971949577331543, + "logps/chosen": -220.43658447265625, + "logps/rejected": -398.7093200683594, + "loss": 0.1177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3026580810546875, + "rewards/margins": 6.693747043609619, + "rewards/rejected": -6.391088962554932, + "step": 16904 + }, + { + "epoch": 0.98, + "learning_rate": 6.91163977813436e-11, + "logits/chosen": -1.9256306886672974, + "logits/rejected": -1.9237651824951172, + "logps/chosen": -4.915292739868164, + "logps/rejected": -67.77323913574219, + "loss": 0.5914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039849378168582916, + "rewards/margins": 0.19878873229026794, + "rewards/rejected": -0.15893936157226562, + "step": 16905 + }, + { + "epoch": 0.98, + "learning_rate": 6.862194080943795e-11, + "logits/chosen": -2.1403164863586426, + "logits/rejected": -2.137995481491089, + "logps/chosen": -195.3292999267578, + "logps/rejected": -303.2547302246094, + "loss": 0.2165, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2481963634490967, + "rewards/margins": 0.6365721225738525, + "rewards/rejected": 2.611624240875244, + "step": 16906 + }, + { + "epoch": 0.98, + "learning_rate": 6.812925764431155e-11, + "logits/chosen": -1.8467260599136353, + "logits/rejected": -1.8246327638626099, + "logps/chosen": -70.3384017944336, + "logps/rejected": -244.47706604003906, + "loss": 0.0557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5085747241973877, + "rewards/margins": 4.88183069229126, + "rewards/rejected": -3.373255968093872, + "step": 16907 + }, + { + "epoch": 0.98, + "learning_rate": 6.763834830348369e-11, + "logits/chosen": -2.0705835819244385, + "logits/rejected": -2.060122489929199, + "logps/chosen": -58.36855697631836, + "logps/rejected": -232.4945068359375, + "loss": 0.2264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5183674097061157, + "rewards/margins": 2.147609233856201, + "rewards/rejected": -1.629241943359375, + "step": 16908 + }, + { + "epoch": 0.98, + "learning_rate": 6.71492128043849e-11, + "logits/chosen": -2.012326240539551, + "logits/rejected": -2.010850429534912, + "logps/chosen": -0.00010525840480113402, + "logps/rejected": -151.4115753173828, + "loss": 0.3231, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.503230689399061e-06, + "rewards/margins": 4.511635780334473, + "rewards/rejected": -4.511638164520264, + "step": 16909 + }, + { + "epoch": 0.98, + "learning_rate": 6.66618511643957e-11, + "logits/chosen": -1.9857652187347412, + "logits/rejected": -1.9818631410598755, + "logps/chosen": -15.271418571472168, + "logps/rejected": -156.1282196044922, + "loss": 0.2614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2620368003845215, + "rewards/margins": 3.115887403488159, + "rewards/rejected": -2.8538506031036377, + "step": 16910 + }, + { + "epoch": 0.98, + "learning_rate": 6.61762634008245e-11, + "logits/chosen": -1.8568774461746216, + "logits/rejected": -1.8482197523117065, + "logps/chosen": -24.507373809814453, + "logps/rejected": -210.9364013671875, + "loss": 0.1824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6191987991333008, + "rewards/margins": 3.44040846824646, + "rewards/rejected": -2.821209669113159, + "step": 16911 + }, + { + "epoch": 0.98, + "learning_rate": 6.569244953092967e-11, + "logits/chosen": -1.890390157699585, + "logits/rejected": -1.8701515197753906, + "logps/chosen": -21.986473083496094, + "logps/rejected": -242.77850341796875, + "loss": 0.2456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4422096312046051, + "rewards/margins": 4.78118371963501, + "rewards/rejected": -4.3389739990234375, + "step": 16912 + }, + { + "epoch": 0.98, + "learning_rate": 6.52104095718864e-11, + "logits/chosen": -1.7957950830459595, + "logits/rejected": -1.7927875518798828, + "logps/chosen": -1.243617296218872, + "logps/rejected": -162.9131317138672, + "loss": 0.3578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031184827908873558, + "rewards/margins": 2.2095205783843994, + "rewards/rejected": -2.178335666656494, + "step": 16913 + }, + { + "epoch": 0.98, + "learning_rate": 6.47301435408365e-11, + "logits/chosen": -1.8058274984359741, + "logits/rejected": -1.8044192790985107, + "logps/chosen": -73.29701232910156, + "logps/rejected": -223.2229766845703, + "loss": 0.4024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2436821013689041, + "rewards/margins": 1.955996036529541, + "rewards/rejected": -2.1996781826019287, + "step": 16914 + }, + { + "epoch": 0.98, + "learning_rate": 6.425165145482747e-11, + "logits/chosen": -1.7602405548095703, + "logits/rejected": -1.7580525875091553, + "logps/chosen": -0.00026498385705053806, + "logps/rejected": -478.1031799316406, + "loss": 0.3231, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.090245394967496e-06, + "rewards/margins": 9.002903938293457, + "rewards/rejected": -9.002908706665039, + "step": 16915 + }, + { + "epoch": 0.98, + "learning_rate": 6.377493333086237e-11, + "logits/chosen": -1.7522895336151123, + "logits/rejected": -1.7458299398422241, + "logps/chosen": -104.82050323486328, + "logps/rejected": -216.32237243652344, + "loss": 0.4801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3416580259799957, + "rewards/margins": 0.34336090087890625, + "rewards/rejected": -0.0017028808360919356, + "step": 16916 + }, + { + "epoch": 0.98, + "learning_rate": 6.329998918587209e-11, + "logits/chosen": -1.868794322013855, + "logits/rejected": -1.8990297317504883, + "logps/chosen": -206.0384521484375, + "logps/rejected": -501.82098388671875, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2902634143829346, + "rewards/margins": 4.3010358810424805, + "rewards/rejected": -2.010772705078125, + "step": 16917 + }, + { + "epoch": 0.98, + "learning_rate": 6.282681903673758e-11, + "logits/chosen": -1.840950608253479, + "logits/rejected": -1.7978895902633667, + "logps/chosen": -338.8556213378906, + "logps/rejected": -588.239013671875, + "loss": 0.104, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3585175275802612, + "rewards/margins": 6.660477161407471, + "rewards/rejected": -5.30195951461792, + "step": 16918 + }, + { + "epoch": 0.98, + "learning_rate": 6.235542290026208e-11, + "logits/chosen": -1.860904335975647, + "logits/rejected": -1.8598257303237915, + "logps/chosen": -13.543246269226074, + "logps/rejected": -192.19845581054688, + "loss": 0.2002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47696447372436523, + "rewards/margins": 3.570915937423706, + "rewards/rejected": -3.093951463699341, + "step": 16919 + }, + { + "epoch": 0.98, + "learning_rate": 6.188580079319327e-11, + "logits/chosen": -1.8662713766098022, + "logits/rejected": -1.8674107789993286, + "logps/chosen": -3.25980806350708, + "logps/rejected": -163.67282104492188, + "loss": 0.6513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014430880546569824, + "rewards/margins": 0.07017123699188232, + "rewards/rejected": -0.0557403564453125, + "step": 16920 + }, + { + "epoch": 0.98, + "learning_rate": 6.141795273221229e-11, + "logits/chosen": -1.8144868612289429, + "logits/rejected": -1.8186793327331543, + "logps/chosen": -18.90386962890625, + "logps/rejected": -120.34178161621094, + "loss": 0.8449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7264019250869751, + "rewards/margins": 0.08209419250488281, + "rewards/rejected": -0.8084961175918579, + "step": 16921 + }, + { + "epoch": 0.98, + "learning_rate": 6.09518787339447e-11, + "logits/chosen": -1.9605188369750977, + "logits/rejected": -1.959168791770935, + "logps/chosen": -0.7600847482681274, + "logps/rejected": -44.23991394042969, + "loss": 0.6523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029182160273194313, + "rewards/margins": 0.15740306675434113, + "rewards/rejected": -0.1865852326154709, + "step": 16922 + }, + { + "epoch": 0.98, + "learning_rate": 6.048757881494392e-11, + "logits/chosen": -1.9090183973312378, + "logits/rejected": -1.9036331176757812, + "logps/chosen": -49.28782272338867, + "logps/rejected": -364.1295471191406, + "loss": 0.2118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25128671526908875, + "rewards/margins": 6.066972732543945, + "rewards/rejected": -5.815686225891113, + "step": 16923 + }, + { + "epoch": 0.98, + "learning_rate": 6.002505299170791e-11, + "logits/chosen": -1.7425838708877563, + "logits/rejected": -1.7220615148544312, + "logps/chosen": -230.975341796875, + "logps/rejected": -496.556640625, + "loss": 0.0763, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.939900279045105, + "rewards/margins": 3.0468950271606445, + "rewards/rejected": -1.10699462890625, + "step": 16924 + }, + { + "epoch": 0.98, + "learning_rate": 5.956430128066236e-11, + "logits/chosen": -1.5649468898773193, + "logits/rejected": -1.5693042278289795, + "logps/chosen": -214.88917541503906, + "logps/rejected": -342.14471435546875, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0731369256973267, + "rewards/margins": 0.15971529483795166, + "rewards/rejected": 0.913421630859375, + "step": 16925 + }, + { + "epoch": 0.98, + "learning_rate": 5.910532369817755e-11, + "logits/chosen": -1.8066489696502686, + "logits/rejected": -1.8144481182098389, + "logps/chosen": -132.52780151367188, + "logps/rejected": -257.0374755859375, + "loss": 0.0619, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1712234020233154, + "rewards/margins": 2.4334654808044434, + "rewards/rejected": -0.2622421383857727, + "step": 16926 + }, + { + "epoch": 0.99, + "learning_rate": 5.86481202605571e-11, + "logits/chosen": -1.8300895690917969, + "logits/rejected": -1.8072893619537354, + "logps/chosen": -243.70530700683594, + "logps/rejected": -338.447265625, + "loss": 0.3794, + "rewards/accuracies": 0.0, + "rewards/chosen": 2.038325548171997, + "rewards/margins": -0.05853128433227539, + "rewards/rejected": 2.0968568325042725, + "step": 16927 + }, + { + "epoch": 0.99, + "learning_rate": 5.819269098404911e-11, + "logits/chosen": -1.7470883131027222, + "logits/rejected": -1.7334359884262085, + "logps/chosen": -9.500825399300084e-05, + "logps/rejected": -394.9765319824219, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4199275685532484e-06, + "rewards/margins": 11.455951690673828, + "rewards/rejected": -11.455954551696777, + "step": 16928 + }, + { + "epoch": 0.99, + "learning_rate": 5.7739035884823984e-11, + "logits/chosen": -2.0366532802581787, + "logits/rejected": -2.034792184829712, + "logps/chosen": -45.66523361206055, + "logps/rejected": -304.6088562011719, + "loss": 0.1504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7880436182022095, + "rewards/margins": 3.5208592414855957, + "rewards/rejected": -2.7328155040740967, + "step": 16929 + }, + { + "epoch": 0.99, + "learning_rate": 5.728715497900771e-11, + "logits/chosen": -2.0002918243408203, + "logits/rejected": -1.99955415725708, + "logps/chosen": -1.6658666133880615, + "logps/rejected": -142.99234008789062, + "loss": 0.6436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012512469664216042, + "rewards/margins": 0.03587215021252632, + "rewards/rejected": -0.02335968054831028, + "step": 16930 + }, + { + "epoch": 0.99, + "learning_rate": 5.683704828264302e-11, + "logits/chosen": -2.1487202644348145, + "logits/rejected": -2.147365093231201, + "logps/chosen": -8.185306549072266, + "logps/rejected": -134.92929077148438, + "loss": 0.2532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2891322076320648, + "rewards/margins": 2.4638254642486572, + "rewards/rejected": -2.1746933460235596, + "step": 16931 + }, + { + "epoch": 0.99, + "learning_rate": 5.638871581172821e-11, + "logits/chosen": -1.942710518836975, + "logits/rejected": -1.930418610572815, + "logps/chosen": -22.046823501586914, + "logps/rejected": -114.08203125, + "loss": 0.4893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4184015393257141, + "rewards/margins": 0.3784761428833008, + "rewards/rejected": 0.039925385266542435, + "step": 16932 + }, + { + "epoch": 0.99, + "learning_rate": 5.59421575821839e-11, + "logits/chosen": -1.8055691719055176, + "logits/rejected": -1.807551622390747, + "logps/chosen": -42.926326751708984, + "logps/rejected": -94.85860443115234, + "loss": 0.4731, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4625408351421356, + "rewards/margins": 0.4351772367954254, + "rewards/rejected": 0.02736358717083931, + "step": 16933 + }, + { + "epoch": 0.99, + "learning_rate": 5.5497373609880716e-11, + "logits/chosen": -1.9420092105865479, + "logits/rejected": -1.9352220296859741, + "logps/chosen": -46.81330108642578, + "logps/rejected": -218.49993896484375, + "loss": 0.0709, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8872520923614502, + "rewards/margins": 4.141031742095947, + "rewards/rejected": -2.253779649734497, + "step": 16934 + }, + { + "epoch": 0.99, + "learning_rate": 5.505436391061158e-11, + "logits/chosen": -1.8632187843322754, + "logits/rejected": -1.8589763641357422, + "logps/chosen": -367.2535400390625, + "logps/rejected": -449.6783447265625, + "loss": 0.2087, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.795867919921875, + "rewards/margins": 0.9772888422012329, + "rewards/rejected": -0.18142090737819672, + "step": 16935 + }, + { + "epoch": 0.99, + "learning_rate": 5.4613128500119456e-11, + "logits/chosen": -1.9686650037765503, + "logits/rejected": -1.9614863395690918, + "logps/chosen": -27.088096618652344, + "logps/rejected": -236.27769470214844, + "loss": 0.3023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0001314163237111643, + "rewards/margins": 3.3525164127349854, + "rewards/rejected": -3.3523850440979004, + "step": 16936 + }, + { + "epoch": 0.99, + "learning_rate": 5.417366739408069e-11, + "logits/chosen": -1.9882045984268188, + "logits/rejected": -1.989543080329895, + "logps/chosen": -18.643850326538086, + "logps/rejected": -223.73196411132812, + "loss": 0.3811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021021461114287376, + "rewards/margins": 1.9211221933364868, + "rewards/rejected": -1.9001007080078125, + "step": 16937 + }, + { + "epoch": 0.99, + "learning_rate": 5.3735980608099474e-11, + "logits/chosen": -1.9112917184829712, + "logits/rejected": -1.9176225662231445, + "logps/chosen": -33.88578796386719, + "logps/rejected": -238.21054077148438, + "loss": 0.2391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15176354348659515, + "rewards/margins": 4.589709281921387, + "rewards/rejected": -4.43794584274292, + "step": 16938 + }, + { + "epoch": 0.99, + "learning_rate": 5.3300068157735575e-11, + "logits/chosen": -1.7964825630187988, + "logits/rejected": -1.782353401184082, + "logps/chosen": -195.43502807617188, + "logps/rejected": -331.56561279296875, + "loss": 0.1635, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.66253662109375, + "rewards/margins": 1.331756591796875, + "rewards/rejected": 0.330780029296875, + "step": 16939 + }, + { + "epoch": 0.99, + "learning_rate": 5.286593005846551e-11, + "logits/chosen": -1.874191164970398, + "logits/rejected": -1.9109973907470703, + "logps/chosen": -267.84197998046875, + "logps/rejected": -685.3468017578125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9902100563049316, + "rewards/margins": 9.067785263061523, + "rewards/rejected": -6.07757568359375, + "step": 16940 + }, + { + "epoch": 0.99, + "learning_rate": 5.243356632572138e-11, + "logits/chosen": -1.6840254068374634, + "logits/rejected": -1.6803796291351318, + "logps/chosen": -244.90396118164062, + "logps/rejected": -345.1897277832031, + "loss": 0.2726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7776123285293579, + "rewards/margins": 0.7067718505859375, + "rewards/rejected": 0.07084045559167862, + "step": 16941 + }, + { + "epoch": 0.99, + "learning_rate": 5.200297697485201e-11, + "logits/chosen": -1.901883840560913, + "logits/rejected": -1.8905363082885742, + "logps/chosen": -0.0007769085932523012, + "logps/rejected": -127.70751953125, + "loss": 0.3604, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.9651211409363896e-05, + "rewards/margins": 3.396411895751953, + "rewards/rejected": -3.3963723182678223, + "step": 16942 + }, + { + "epoch": 0.99, + "learning_rate": 5.1574162021156276e-11, + "logits/chosen": -1.824072241783142, + "logits/rejected": -1.9558916091918945, + "logps/chosen": -179.07583618164062, + "logps/rejected": -181.27127075195312, + "loss": 0.0742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5853790044784546, + "rewards/margins": 2.276959180831909, + "rewards/rejected": -0.6915802359580994, + "step": 16943 + }, + { + "epoch": 0.99, + "learning_rate": 5.114712147987754e-11, + "logits/chosen": -2.008742332458496, + "logits/rejected": -2.008014678955078, + "logps/chosen": -87.28231048583984, + "logps/rejected": -212.775634765625, + "loss": 0.1788, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7480812072753906, + "rewards/margins": 2.5686440467834473, + "rewards/rejected": -1.820562720298767, + "step": 16944 + }, + { + "epoch": 0.99, + "learning_rate": 5.0721855366175905e-11, + "logits/chosen": -1.9971073865890503, + "logits/rejected": -2.016347646713257, + "logps/chosen": -256.1015625, + "logps/rejected": -309.3326416015625, + "loss": 0.1063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.918878197669983, + "rewards/margins": 1.8162444829940796, + "rewards/rejected": 0.10263366997241974, + "step": 16945 + }, + { + "epoch": 0.99, + "learning_rate": 5.0298363695161497e-11, + "logits/chosen": -1.8765097856521606, + "logits/rejected": -1.8804689645767212, + "logps/chosen": -0.04863356053829193, + "logps/rejected": -168.4833984375, + "loss": 0.3556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034430832602083683, + "rewards/margins": 5.254713535308838, + "rewards/rejected": -5.258156776428223, + "step": 16946 + }, + { + "epoch": 0.99, + "learning_rate": 4.9876646481877836e-11, + "logits/chosen": -2.057546854019165, + "logits/rejected": -2.048142671585083, + "logps/chosen": -3.8330607414245605, + "logps/rejected": -141.09356689453125, + "loss": 0.4693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07492721080780029, + "rewards/margins": 1.4634959697723389, + "rewards/rejected": -1.5384231805801392, + "step": 16947 + }, + { + "epoch": 0.99, + "learning_rate": 4.9456703741307393e-11, + "logits/chosen": -1.8157439231872559, + "logits/rejected": -1.8079750537872314, + "logps/chosen": -30.796415328979492, + "logps/rejected": -177.91336059570312, + "loss": 0.2117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7291380167007446, + "rewards/margins": 3.2833266258239746, + "rewards/rejected": -2.5541884899139404, + "step": 16948 + }, + { + "epoch": 0.99, + "learning_rate": 4.903853548837711e-11, + "logits/chosen": -2.199453353881836, + "logits/rejected": -2.1842942237854004, + "logps/chosen": -28.97684097290039, + "logps/rejected": -71.80071258544922, + "loss": 0.2488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6639995574951172, + "rewards/margins": 1.4573017358779907, + "rewards/rejected": -0.7933021783828735, + "step": 16949 + }, + { + "epoch": 0.99, + "learning_rate": 4.8622141737925114e-11, + "logits/chosen": -1.9351719617843628, + "logits/rejected": -1.945748209953308, + "logps/chosen": -129.94000244140625, + "logps/rejected": -193.32176208496094, + "loss": 0.3281, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3415985107421875, + "rewards/margins": 0.6073028445243835, + "rewards/rejected": 0.734295666217804, + "step": 16950 + }, + { + "epoch": 0.99, + "learning_rate": 4.820752250476179e-11, + "logits/chosen": -2.107787847518921, + "logits/rejected": -2.091235876083374, + "logps/chosen": -0.058163825422525406, + "logps/rejected": -151.97158813476562, + "loss": 0.4514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0010388459777459502, + "rewards/margins": 1.3044216632843018, + "rewards/rejected": -1.3033828735351562, + "step": 16951 + }, + { + "epoch": 0.99, + "learning_rate": 4.779467780360314e-11, + "logits/chosen": -1.8836435079574585, + "logits/rejected": -1.8834214210510254, + "logps/chosen": -24.556676864624023, + "logps/rejected": -131.07191467285156, + "loss": 0.4359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0930355116724968, + "rewards/margins": 1.318002700805664, + "rewards/rejected": -1.2249672412872314, + "step": 16952 + }, + { + "epoch": 0.99, + "learning_rate": 4.738360764912075e-11, + "logits/chosen": -1.772777795791626, + "logits/rejected": -1.803295612335205, + "logps/chosen": -229.116455078125, + "logps/rejected": -347.06414794921875, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.92047119140625, + "rewards/margins": 4.747564792633057, + "rewards/rejected": -2.8270936012268066, + "step": 16953 + }, + { + "epoch": 0.99, + "learning_rate": 4.697431205591407e-11, + "logits/chosen": -1.8183674812316895, + "logits/rejected": -1.7825485467910767, + "logps/chosen": -253.6817626953125, + "logps/rejected": -568.68505859375, + "loss": 0.026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1943421363830566, + "rewards/margins": 3.676990032196045, + "rewards/rejected": -1.4826477766036987, + "step": 16954 + }, + { + "epoch": 0.99, + "learning_rate": 4.656679103853256e-11, + "logits/chosen": -1.8762940168380737, + "logits/rejected": -1.8548561334609985, + "logps/chosen": -0.004724636673927307, + "logps/rejected": -192.88790893554688, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00034976276219822466, + "rewards/margins": 3.645693778991699, + "rewards/rejected": -3.646043539047241, + "step": 16955 + }, + { + "epoch": 0.99, + "learning_rate": 4.6161044611442435e-11, + "logits/chosen": -1.8077857494354248, + "logits/rejected": -1.816739797592163, + "logps/chosen": -124.02546691894531, + "logps/rejected": -218.68939208984375, + "loss": 0.1491, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3863372802734375, + "rewards/margins": 1.4303710460662842, + "rewards/rejected": 0.9559661746025085, + "step": 16956 + }, + { + "epoch": 0.99, + "learning_rate": 4.575707278906549e-11, + "logits/chosen": -1.9363194704055786, + "logits/rejected": -1.9337997436523438, + "logps/chosen": -41.492576599121094, + "logps/rejected": -112.58515930175781, + "loss": 0.6766, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.3471687436103821, + "rewards/margins": -0.44994431734085083, + "rewards/rejected": 0.7971130609512329, + "step": 16957 + }, + { + "epoch": 0.99, + "learning_rate": 4.535487558575135e-11, + "logits/chosen": -1.9475988149642944, + "logits/rejected": -1.9636037349700928, + "logps/chosen": -183.26187133789062, + "logps/rejected": -555.0733642578125, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3060548305511475, + "rewards/margins": 8.65393352508545, + "rewards/rejected": -6.347878932952881, + "step": 16958 + }, + { + "epoch": 0.99, + "learning_rate": 4.495445301578305e-11, + "logits/chosen": -1.911233901977539, + "logits/rejected": -1.9265198707580566, + "logps/chosen": -120.13976287841797, + "logps/rejected": -287.08544921875, + "loss": 0.1343, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4316643476486206, + "rewards/margins": 1.6666252613067627, + "rewards/rejected": -0.23496094346046448, + "step": 16959 + }, + { + "epoch": 0.99, + "learning_rate": 4.455580509339363e-11, + "logits/chosen": -1.9393484592437744, + "logits/rejected": -1.929543137550354, + "logps/chosen": -0.3269224762916565, + "logps/rejected": -194.15438842773438, + "loss": 0.4435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019016861915588379, + "rewards/margins": 1.100362777709961, + "rewards/rejected": -1.102264404296875, + "step": 16960 + }, + { + "epoch": 0.99, + "learning_rate": 4.415893183273289e-11, + "logits/chosen": -1.9103788137435913, + "logits/rejected": -1.9028922319412231, + "logps/chosen": -61.9690055847168, + "logps/rejected": -125.5478744506836, + "loss": 0.1454, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6009129285812378, + "rewards/margins": 1.5293408632278442, + "rewards/rejected": 0.07157211750745773, + "step": 16961 + }, + { + "epoch": 0.99, + "learning_rate": 4.3763833247911776e-11, + "logits/chosen": -1.8641059398651123, + "logits/rejected": -1.8568694591522217, + "logps/chosen": -89.82600402832031, + "logps/rejected": -469.91888427734375, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2995712757110596, + "rewards/margins": 10.579093933105469, + "rewards/rejected": -8.279522895812988, + "step": 16962 + }, + { + "epoch": 0.99, + "learning_rate": 4.337050935296349e-11, + "logits/chosen": -1.841279149055481, + "logits/rejected": -1.8418269157409668, + "logps/chosen": -197.71759033203125, + "logps/rejected": -367.46075439453125, + "loss": 0.1216, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2881715297698975, + "rewards/margins": 1.4182069301605225, + "rewards/rejected": 1.869964599609375, + "step": 16963 + }, + { + "epoch": 0.99, + "learning_rate": 4.29789601618602e-11, + "logits/chosen": -1.9712518453598022, + "logits/rejected": -1.9637000560760498, + "logps/chosen": -62.738563537597656, + "logps/rejected": -239.32752990722656, + "loss": 0.3154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010239792056381702, + "rewards/margins": 1.578206181526184, + "rewards/rejected": -1.588446021080017, + "step": 16964 + }, + { + "epoch": 0.99, + "learning_rate": 4.2589185688507447e-11, + "logits/chosen": -1.9802749156951904, + "logits/rejected": -1.9696398973464966, + "logps/chosen": -0.3762703239917755, + "logps/rejected": -155.14056396484375, + "loss": 0.3661, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004192751832306385, + "rewards/margins": 2.434553861618042, + "rewards/rejected": -2.438746690750122, + "step": 16965 + }, + { + "epoch": 0.99, + "learning_rate": 4.2201185946749705e-11, + "logits/chosen": -1.9250736236572266, + "logits/rejected": -1.9209668636322021, + "logps/chosen": -0.007645518518984318, + "logps/rejected": -106.32736206054688, + "loss": 0.6647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006619260529987514, + "rewards/margins": 0.10605499893426895, + "rewards/rejected": -0.10671692341566086, + "step": 16966 + }, + { + "epoch": 0.99, + "learning_rate": 4.1814960950387054e-11, + "logits/chosen": -1.833003282546997, + "logits/rejected": -1.832402229309082, + "logps/chosen": -197.42889404296875, + "logps/rejected": -308.3946533203125, + "loss": 0.39, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9424256086349487, + "rewards/margins": 0.06733095645904541, + "rewards/rejected": 1.8750946521759033, + "step": 16967 + }, + { + "epoch": 0.99, + "learning_rate": 4.143051071311965e-11, + "logits/chosen": -1.8793132305145264, + "logits/rejected": -1.873258113861084, + "logps/chosen": -13.466902732849121, + "logps/rejected": -344.9578857421875, + "loss": 0.2526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21130190789699554, + "rewards/margins": 6.043049335479736, + "rewards/rejected": -5.831747531890869, + "step": 16968 + }, + { + "epoch": 0.99, + "learning_rate": 4.104783524861988e-11, + "logits/chosen": -1.8192195892333984, + "logits/rejected": -1.8218011856079102, + "logps/chosen": -11.921002388000488, + "logps/rejected": -223.8193817138672, + "loss": 0.3472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031118394690565765, + "rewards/margins": 5.987278938293457, + "rewards/rejected": -5.987590312957764, + "step": 16969 + }, + { + "epoch": 0.99, + "learning_rate": 4.066693457047132e-11, + "logits/chosen": -1.876801609992981, + "logits/rejected": -1.8790724277496338, + "logps/chosen": -167.64541625976562, + "logps/rejected": -349.11383056640625, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9081268310546875, + "rewards/margins": 3.6250152587890625, + "rewards/rejected": -1.716888427734375, + "step": 16970 + }, + { + "epoch": 0.99, + "learning_rate": 4.0287808692213157e-11, + "logits/chosen": -1.9300601482391357, + "logits/rejected": -1.9068676233291626, + "logps/chosen": -26.723100662231445, + "logps/rejected": -282.2669677734375, + "loss": 0.2182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5264173746109009, + "rewards/margins": 2.834092617034912, + "rewards/rejected": -2.3076751232147217, + "step": 16971 + }, + { + "epoch": 0.99, + "learning_rate": 3.991045762731238e-11, + "logits/chosen": -1.7991892099380493, + "logits/rejected": -1.8001608848571777, + "logps/chosen": -19.500402450561523, + "logps/rejected": -172.80384826660156, + "loss": 0.2674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29247361421585083, + "rewards/margins": 2.5200371742248535, + "rewards/rejected": -2.2275636196136475, + "step": 16972 + }, + { + "epoch": 0.99, + "learning_rate": 3.953488138917493e-11, + "logits/chosen": -1.9051438570022583, + "logits/rejected": -1.90518319606781, + "logps/chosen": -184.3883056640625, + "logps/rejected": -446.02825927734375, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.745501756668091, + "rewards/margins": 5.224328994750977, + "rewards/rejected": -2.4788269996643066, + "step": 16973 + }, + { + "epoch": 0.99, + "learning_rate": 3.916107999114015e-11, + "logits/chosen": -1.759089469909668, + "logits/rejected": -1.7454653978347778, + "logps/chosen": -262.67791748046875, + "logps/rejected": -651.2662353515625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0819642543792725, + "rewards/margins": 12.455689430236816, + "rewards/rejected": -9.373724937438965, + "step": 16974 + }, + { + "epoch": 0.99, + "learning_rate": 3.878905344648631e-11, + "logits/chosen": -2.0299649238586426, + "logits/rejected": -2.0226049423217773, + "logps/chosen": -3.111096143722534, + "logps/rejected": -190.10586547851562, + "loss": 0.3939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18202097713947296, + "rewards/margins": 1.401604175567627, + "rewards/rejected": -1.2195831537246704, + "step": 16975 + }, + { + "epoch": 0.99, + "learning_rate": 3.8418801768436146e-11, + "logits/chosen": -1.828631043434143, + "logits/rejected": -1.8303332328796387, + "logps/chosen": -241.54763793945312, + "logps/rejected": -420.4900817871094, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0938966274261475, + "rewards/margins": 5.296741008758545, + "rewards/rejected": -3.2028443813323975, + "step": 16976 + }, + { + "epoch": 0.99, + "learning_rate": 3.8050324970134716e-11, + "logits/chosen": -1.9074087142944336, + "logits/rejected": -1.904197335243225, + "logps/chosen": -1.8428233861923218, + "logps/rejected": -92.32838439941406, + "loss": 0.7888, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.019447743892669678, + "rewards/margins": -0.36613574624061584, + "rewards/rejected": 0.3855834901332855, + "step": 16977 + }, + { + "epoch": 0.99, + "learning_rate": 3.7683623064677094e-11, + "logits/chosen": -1.7856051921844482, + "logits/rejected": -1.7759302854537964, + "logps/chosen": -11.781767845153809, + "logps/rejected": -192.24948120117188, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3432539105415344, + "rewards/margins": 4.726478576660156, + "rewards/rejected": -4.3832244873046875, + "step": 16978 + }, + { + "epoch": 0.99, + "learning_rate": 3.731869606509175e-11, + "logits/chosen": -1.7879300117492676, + "logits/rejected": -1.7591350078582764, + "logps/chosen": -222.0994873046875, + "logps/rejected": -379.19488525390625, + "loss": 0.0922, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.57145094871521, + "rewards/margins": 2.0145204067230225, + "rewards/rejected": 0.5569305419921875, + "step": 16979 + }, + { + "epoch": 0.99, + "learning_rate": 3.6955543984334983e-11, + "logits/chosen": -2.024116039276123, + "logits/rejected": -2.018872022628784, + "logps/chosen": -7.3202314376831055, + "logps/rejected": -82.61656188964844, + "loss": 0.4077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2120082825422287, + "rewards/margins": 0.7992610931396484, + "rewards/rejected": -0.5872527956962585, + "step": 16980 + }, + { + "epoch": 0.99, + "learning_rate": 3.659416683531869e-11, + "logits/chosen": -1.8340953588485718, + "logits/rejected": -1.8367162942886353, + "logps/chosen": -117.6741714477539, + "logps/rejected": -341.1539306640625, + "loss": 0.0878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1674308776855469, + "rewards/margins": 2.826174259185791, + "rewards/rejected": -1.6587432622909546, + "step": 16981 + }, + { + "epoch": 0.99, + "learning_rate": 3.623456463087149e-11, + "logits/chosen": -1.9531898498535156, + "logits/rejected": -1.9568960666656494, + "logps/chosen": -1.3827238082885742, + "logps/rejected": -226.1377716064453, + "loss": 0.2983, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31296226382255554, + "rewards/margins": 4.152596950531006, + "rewards/rejected": -3.839634656906128, + "step": 16982 + }, + { + "epoch": 0.99, + "learning_rate": 3.587673738377206e-11, + "logits/chosen": -2.0920679569244385, + "logits/rejected": -2.076735496520996, + "logps/chosen": -90.85350799560547, + "logps/rejected": -232.88522338867188, + "loss": 0.2029, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2968361377716064, + "rewards/margins": 1.5799553394317627, + "rewards/rejected": -0.28311920166015625, + "step": 16983 + }, + { + "epoch": 0.99, + "learning_rate": 3.552068510673245e-11, + "logits/chosen": -2.0363121032714844, + "logits/rejected": -2.0316689014434814, + "logps/chosen": -2.239551305770874, + "logps/rejected": -239.8147430419922, + "loss": 0.3848, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14793305099010468, + "rewards/margins": 5.641140460968018, + "rewards/rejected": -5.789073467254639, + "step": 16984 + }, + { + "epoch": 0.99, + "learning_rate": 3.516640781240365e-11, + "logits/chosen": -1.7500776052474976, + "logits/rejected": -1.7504160404205322, + "logps/chosen": -230.76324462890625, + "logps/rejected": -481.8330078125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.856771945953369, + "rewards/margins": 7.938238620758057, + "rewards/rejected": -4.0814666748046875, + "step": 16985 + }, + { + "epoch": 0.99, + "learning_rate": 3.4813905513364495e-11, + "logits/chosen": -1.8362630605697632, + "logits/rejected": -1.8600635528564453, + "logps/chosen": -221.06085205078125, + "logps/rejected": -486.7048034667969, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5886476039886475, + "rewards/margins": 7.524624824523926, + "rewards/rejected": -4.935977458953857, + "step": 16986 + }, + { + "epoch": 0.99, + "learning_rate": 3.446317822214384e-11, + "logits/chosen": -1.633441686630249, + "logits/rejected": -1.6378809213638306, + "logps/chosen": -38.92924499511719, + "logps/rejected": -204.37127685546875, + "loss": 0.2574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4327377378940582, + "rewards/margins": 2.958761692047119, + "rewards/rejected": -2.5260238647460938, + "step": 16987 + }, + { + "epoch": 0.99, + "learning_rate": 3.4114225951198395e-11, + "logits/chosen": -1.966153621673584, + "logits/rejected": -1.9911422729492188, + "logps/chosen": -114.58761596679688, + "logps/rejected": -285.0462646484375, + "loss": 0.1348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8480934500694275, + "rewards/margins": 1.5755013227462769, + "rewards/rejected": -0.7274078726768494, + "step": 16988 + }, + { + "epoch": 0.99, + "learning_rate": 3.3767048712923795e-11, + "logits/chosen": -1.5185465812683105, + "logits/rejected": -1.5218994617462158, + "logps/chosen": -47.82883834838867, + "logps/rejected": -122.64006805419922, + "loss": 0.3945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5370609164237976, + "rewards/margins": 0.6036716103553772, + "rewards/rejected": -0.06661071628332138, + "step": 16989 + }, + { + "epoch": 0.99, + "learning_rate": 3.3421646519660173e-11, + "logits/chosen": -1.844873309135437, + "logits/rejected": -1.8391902446746826, + "logps/chosen": -180.4173583984375, + "logps/rejected": -320.2474365234375, + "loss": 0.1147, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5670913457870483, + "rewards/margins": 1.6177383661270142, + "rewards/rejected": -0.05064697191119194, + "step": 16990 + }, + { + "epoch": 0.99, + "learning_rate": 3.307801938366439e-11, + "logits/chosen": -1.8213821649551392, + "logits/rejected": -1.8016144037246704, + "logps/chosen": -207.6002960205078, + "logps/rejected": -338.1204833984375, + "loss": 0.1097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7766677737236023, + "rewards/margins": 2.4212632179260254, + "rewards/rejected": -1.6445953845977783, + "step": 16991 + }, + { + "epoch": 0.99, + "learning_rate": 3.273616731716e-11, + "logits/chosen": -1.96851646900177, + "logits/rejected": -1.971064567565918, + "logps/chosen": -0.0017497208900749683, + "logps/rejected": -196.03369140625, + "loss": 0.3312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.000153992252307944, + "rewards/margins": 5.567140102386475, + "rewards/rejected": -5.566986083984375, + "step": 16992 + }, + { + "epoch": 0.99, + "learning_rate": 3.2396090332276194e-11, + "logits/chosen": -1.9244431257247925, + "logits/rejected": -1.9144810438156128, + "logps/chosen": -6.7514543533325195, + "logps/rejected": -329.95904541015625, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0499722957611084, + "rewards/margins": 7.389111518859863, + "rewards/rejected": -7.339138984680176, + "step": 16993 + }, + { + "epoch": 0.99, + "learning_rate": 3.2057788441103296e-11, + "logits/chosen": -2.0770492553710938, + "logits/rejected": -2.0763332843780518, + "logps/chosen": -13.682328224182129, + "logps/rejected": -159.04855346679688, + "loss": 0.4125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02355203591287136, + "rewards/margins": 1.5846161842346191, + "rewards/rejected": -1.5610641241073608, + "step": 16994 + }, + { + "epoch": 0.99, + "learning_rate": 3.172126165565947e-11, + "logits/chosen": -1.916881799697876, + "logits/rejected": -1.917318344116211, + "logps/chosen": -21.104841232299805, + "logps/rejected": -182.83544921875, + "loss": 0.2498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3891967833042145, + "rewards/margins": 3.4997103214263916, + "rewards/rejected": -3.11051344871521, + "step": 16995 + }, + { + "epoch": 0.99, + "learning_rate": 3.138650998789627e-11, + "logits/chosen": -1.8875776529312134, + "logits/rejected": -1.8256663084030151, + "logps/chosen": -183.3196258544922, + "logps/rejected": -563.1390380859375, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.281877279281616, + "rewards/margins": 4.875651836395264, + "rewards/rejected": -2.5937745571136475, + "step": 16996 + }, + { + "epoch": 0.99, + "learning_rate": 3.105353344970973e-11, + "logits/chosen": -1.8383318185806274, + "logits/rejected": -1.8135977983474731, + "logps/chosen": -183.56610107421875, + "logps/rejected": -347.81463623046875, + "loss": 0.071, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5098297595977783, + "rewards/margins": 2.4150726795196533, + "rewards/rejected": 0.094757080078125, + "step": 16997 + }, + { + "epoch": 0.99, + "learning_rate": 3.072233205292374e-11, + "logits/chosen": -2.0005903244018555, + "logits/rejected": -1.9923568964004517, + "logps/chosen": -75.38475036621094, + "logps/rejected": -316.9504089355469, + "loss": 0.0642, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9147529602050781, + "rewards/margins": 3.4038186073303223, + "rewards/rejected": -1.4890655279159546, + "step": 16998 + }, + { + "epoch": 0.99, + "learning_rate": 3.039290580930664e-11, + "logits/chosen": -1.6324480772018433, + "logits/rejected": -1.6259263753890991, + "logps/chosen": -9.926950454711914, + "logps/rejected": -134.92474365234375, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22404222190380096, + "rewards/margins": 3.865715503692627, + "rewards/rejected": -3.6416733264923096, + "step": 16999 + }, + { + "epoch": 0.99, + "learning_rate": 3.006525473056021e-11, + "logits/chosen": -1.654361605644226, + "logits/rejected": -1.6553114652633667, + "logps/chosen": -13.847725868225098, + "logps/rejected": -92.74310302734375, + "loss": 0.5295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.284595787525177, + "rewards/margins": 0.405231773853302, + "rewards/rejected": -0.120635986328125, + "step": 17000 + }, + { + "epoch": 0.99, + "learning_rate": 2.973937882833066e-11, + "logits/chosen": -1.9572700262069702, + "logits/rejected": -1.948540449142456, + "logps/chosen": -0.3144870102405548, + "logps/rejected": -85.29879760742188, + "loss": 0.476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007262718863785267, + "rewards/margins": 1.163141131401062, + "rewards/rejected": -1.1558784246444702, + "step": 17001 + }, + { + "epoch": 0.99, + "learning_rate": 2.941527811418099e-11, + "logits/chosen": -1.6341580152511597, + "logits/rejected": -1.6147348880767822, + "logps/chosen": -282.4822998046875, + "logps/rejected": -433.67236328125, + "loss": 0.2155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4114502668380737, + "rewards/margins": 1.1980103254318237, + "rewards/rejected": 0.21343994140625, + "step": 17002 + }, + { + "epoch": 0.99, + "learning_rate": 2.909295259964084e-11, + "logits/chosen": -2.0186595916748047, + "logits/rejected": -2.026041030883789, + "logps/chosen": -12.191276550292969, + "logps/rejected": -128.53759765625, + "loss": 0.3725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08361158519983292, + "rewards/margins": 1.4759989976882935, + "rewards/rejected": -1.3923873901367188, + "step": 17003 + }, + { + "epoch": 0.99, + "learning_rate": 2.8772402296145528e-11, + "logits/chosen": -1.9585579633712769, + "logits/rejected": -1.9530683755874634, + "logps/chosen": -5.776119709014893, + "logps/rejected": -154.48385620117188, + "loss": 0.4496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03930783271789551, + "rewards/margins": 1.1005313396453857, + "rewards/rejected": -1.1398391723632812, + "step": 17004 + }, + { + "epoch": 0.99, + "learning_rate": 2.845362721509703e-11, + "logits/chosen": -1.799458384513855, + "logits/rejected": -1.7959893941879272, + "logps/chosen": -17.499095916748047, + "logps/rejected": -272.6164855957031, + "loss": 0.3953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23601503670215607, + "rewards/margins": 6.517375946044922, + "rewards/rejected": -6.753390789031982, + "step": 17005 + }, + { + "epoch": 0.99, + "learning_rate": 2.813662736780853e-11, + "logits/chosen": -2.0005948543548584, + "logits/rejected": -2.010340929031372, + "logps/chosen": -2.8127171993255615, + "logps/rejected": -157.70584106445312, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1824067384004593, + "rewards/margins": 2.546478271484375, + "rewards/rejected": -2.3640716075897217, + "step": 17006 + }, + { + "epoch": 0.99, + "learning_rate": 2.7821402765548786e-11, + "logits/chosen": -2.191028594970703, + "logits/rejected": -2.190577507019043, + "logps/chosen": -1.2192142009735107, + "logps/rejected": -52.77841567993164, + "loss": 0.4825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08679461479187012, + "rewards/margins": 0.7942599654197693, + "rewards/rejected": -0.7074653506278992, + "step": 17007 + }, + { + "epoch": 0.99, + "learning_rate": 2.7507953419514394e-11, + "logits/chosen": -1.733273983001709, + "logits/rejected": -1.578607201576233, + "logps/chosen": -142.91519165039062, + "logps/rejected": -456.59527587890625, + "loss": 0.124, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.644067406654358, + "rewards/margins": 2.137554883956909, + "rewards/rejected": -0.49348756670951843, + "step": 17008 + }, + { + "epoch": 0.99, + "learning_rate": 2.719627934083535e-11, + "logits/chosen": -1.7725166082382202, + "logits/rejected": -1.7718820571899414, + "logps/chosen": -210.831787109375, + "logps/rejected": -475.3858947753906, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7233765125274658, + "rewards/margins": 4.778146743774414, + "rewards/rejected": -3.054769992828369, + "step": 17009 + }, + { + "epoch": 0.99, + "learning_rate": 2.688638054059167e-11, + "logits/chosen": -1.847383975982666, + "logits/rejected": -1.8397341966629028, + "logps/chosen": -233.79104614257812, + "logps/rejected": -416.0931396484375, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.093191623687744, + "rewards/margins": 4.037274360656738, + "rewards/rejected": -0.9440826773643494, + "step": 17010 + }, + { + "epoch": 0.99, + "learning_rate": 2.6578257029785668e-11, + "logits/chosen": -2.0229384899139404, + "logits/rejected": -2.016270637512207, + "logps/chosen": -31.234392166137695, + "logps/rejected": -232.27047729492188, + "loss": 0.2883, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4318174421787262, + "rewards/margins": 2.031341314315796, + "rewards/rejected": -1.599523901939392, + "step": 17011 + }, + { + "epoch": 0.99, + "learning_rate": 2.627190881936969e-11, + "logits/chosen": -1.8515409231185913, + "logits/rejected": -1.8433936834335327, + "logps/chosen": -290.97412109375, + "logps/rejected": -400.47894287109375, + "loss": 0.1451, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.221081495285034, + "rewards/margins": 1.1908080577850342, + "rewards/rejected": 1.0302734375, + "step": 17012 + }, + { + "epoch": 0.99, + "learning_rate": 2.596733592021838e-11, + "logits/chosen": -2.053192615509033, + "logits/rejected": -2.0761051177978516, + "logps/chosen": -176.08566284179688, + "logps/rejected": -276.85125732421875, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4531265497207642, + "rewards/margins": 3.648210048675537, + "rewards/rejected": -2.1950836181640625, + "step": 17013 + }, + { + "epoch": 0.99, + "learning_rate": 2.5664538343161956e-11, + "logits/chosen": -1.9083796739578247, + "logits/rejected": -1.8947558403015137, + "logps/chosen": -284.1585693359375, + "logps/rejected": -422.14801025390625, + "loss": 0.1968, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3689606189727783, + "rewards/margins": 0.8157440423965454, + "rewards/rejected": 1.553216576576233, + "step": 17014 + }, + { + "epoch": 0.99, + "learning_rate": 2.536351609894738e-11, + "logits/chosen": -1.9059727191925049, + "logits/rejected": -1.9136768579483032, + "logps/chosen": -154.44976806640625, + "logps/rejected": -321.55859375, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.052067518234253, + "rewards/margins": 5.358757019042969, + "rewards/rejected": -2.306689500808716, + "step": 17015 + }, + { + "epoch": 0.99, + "learning_rate": 2.50642691982772e-11, + "logits/chosen": -1.9056757688522339, + "logits/rejected": -1.9021281003952026, + "logps/chosen": -20.495708465576172, + "logps/rejected": -207.87847900390625, + "loss": 0.3463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05235710367560387, + "rewards/margins": 4.505913257598877, + "rewards/rejected": -4.558270454406738, + "step": 17016 + }, + { + "epoch": 0.99, + "learning_rate": 2.476679765177625e-11, + "logits/chosen": -1.8505154848098755, + "logits/rejected": -1.8514610528945923, + "logps/chosen": -26.243806838989258, + "logps/rejected": -142.7020263671875, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4523216187953949, + "rewards/margins": 1.7710366249084473, + "rewards/rejected": -1.31871497631073, + "step": 17017 + }, + { + "epoch": 0.99, + "learning_rate": 2.4471101470013856e-11, + "logits/chosen": -1.6993865966796875, + "logits/rejected": -1.6953437328338623, + "logps/chosen": -45.84497833251953, + "logps/rejected": -256.82562255859375, + "loss": 0.1683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.229343056678772, + "rewards/margins": 1.9028935432434082, + "rewards/rejected": -0.6735504269599915, + "step": 17018 + }, + { + "epoch": 0.99, + "learning_rate": 2.4177180663492725e-11, + "logits/chosen": -1.4889609813690186, + "logits/rejected": -1.480717658996582, + "logps/chosen": -178.10308837890625, + "logps/rejected": -266.1403503417969, + "loss": 0.2702, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5995895862579346, + "rewards/margins": 0.8900619745254517, + "rewards/rejected": 0.7095276117324829, + "step": 17019 + }, + { + "epoch": 0.99, + "learning_rate": 2.3885035242660058e-11, + "logits/chosen": -1.8314034938812256, + "logits/rejected": -1.839267611503601, + "logps/chosen": -280.34423828125, + "logps/rejected": -557.0032958984375, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.993487596511841, + "rewards/margins": 7.262237548828125, + "rewards/rejected": -4.268750190734863, + "step": 17020 + }, + { + "epoch": 0.99, + "learning_rate": 2.3594665217885336e-11, + "logits/chosen": -1.89307701587677, + "logits/rejected": -1.8752033710479736, + "logps/chosen": -227.8707275390625, + "logps/rejected": -345.913818359375, + "loss": 0.2082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9009552001953125, + "rewards/margins": 0.9627960324287415, + "rewards/rejected": -0.06184082105755806, + "step": 17021 + }, + { + "epoch": 0.99, + "learning_rate": 2.330607059949363e-11, + "logits/chosen": -1.957433819770813, + "logits/rejected": -1.958907961845398, + "logps/chosen": -7.402696792269126e-05, + "logps/rejected": -111.39718627929688, + "loss": 0.3785, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.213132787204813e-06, + "rewards/margins": 2.5728580951690674, + "rewards/rejected": -2.572849988937378, + "step": 17022 + }, + { + "epoch": 0.99, + "learning_rate": 2.301925139773231e-11, + "logits/chosen": -1.8444929122924805, + "logits/rejected": -1.8405803442001343, + "logps/chosen": -238.388671875, + "logps/rejected": -512.1397705078125, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.638070821762085, + "rewards/margins": 7.8367462158203125, + "rewards/rejected": -5.198675632476807, + "step": 17023 + }, + { + "epoch": 0.99, + "learning_rate": 2.273420762278766e-11, + "logits/chosen": -1.8959532976150513, + "logits/rejected": -1.90272855758667, + "logps/chosen": -109.72377014160156, + "logps/rejected": -275.08319091796875, + "loss": 0.0964, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6490745544433594, + "rewards/margins": 3.058281898498535, + "rewards/rejected": -1.4092072248458862, + "step": 17024 + }, + { + "epoch": 0.99, + "learning_rate": 2.2450939284790472e-11, + "logits/chosen": -1.7029304504394531, + "logits/rejected": -1.7134320735931396, + "logps/chosen": -17.786386489868164, + "logps/rejected": -242.11972045898438, + "loss": 0.1953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6376360058784485, + "rewards/margins": 3.241482734680176, + "rewards/rejected": -2.603846788406372, + "step": 17025 + }, + { + "epoch": 0.99, + "learning_rate": 2.2169446393804915e-11, + "logits/chosen": -1.9266188144683838, + "logits/rejected": -1.9096699953079224, + "logps/chosen": -49.71678161621094, + "logps/rejected": -173.62759399414062, + "loss": 0.2981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7455295920372009, + "rewards/margins": 1.922595739364624, + "rewards/rejected": -1.1770660877227783, + "step": 17026 + }, + { + "epoch": 0.99, + "learning_rate": 2.1889728959828546e-11, + "logits/chosen": -2.1531195640563965, + "logits/rejected": -2.143214225769043, + "logps/chosen": -0.00028690736507996917, + "logps/rejected": -296.56182861328125, + "loss": 0.3418, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.462511983798322e-07, + "rewards/margins": 6.773406028747559, + "rewards/rejected": -6.7734055519104, + "step": 17027 + }, + { + "epoch": 0.99, + "learning_rate": 2.1611786992797865e-11, + "logits/chosen": -1.904581904411316, + "logits/rejected": -1.9000898599624634, + "logps/chosen": -0.00028725931770168245, + "logps/rejected": -146.76283264160156, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.516159111110028e-05, + "rewards/margins": 2.4764716625213623, + "rewards/rejected": -2.476496934890747, + "step": 17028 + }, + { + "epoch": 0.99, + "learning_rate": 2.1335620502588302e-11, + "logits/chosen": -1.9633355140686035, + "logits/rejected": -1.9628093242645264, + "logps/chosen": -3.3805413246154785, + "logps/rejected": -34.12470245361328, + "loss": 0.379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14663086831569672, + "rewards/margins": 1.5839309692382812, + "rewards/rejected": -1.4373000860214233, + "step": 17029 + }, + { + "epoch": 0.99, + "learning_rate": 2.106122949900868e-11, + "logits/chosen": -1.874950647354126, + "logits/rejected": -1.8690656423568726, + "logps/chosen": -153.91802978515625, + "logps/rejected": -333.96600341796875, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8920563459396362, + "rewards/margins": 2.9846343994140625, + "rewards/rejected": -1.0925781726837158, + "step": 17030 + }, + { + "epoch": 0.99, + "learning_rate": 2.0788613991806758e-11, + "logits/chosen": -1.844347357749939, + "logits/rejected": -1.8433653116226196, + "logps/chosen": -195.59185791015625, + "logps/rejected": -376.42034912109375, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1604995727539062, + "rewards/margins": 3.4898147583007812, + "rewards/rejected": -1.329315185546875, + "step": 17031 + }, + { + "epoch": 0.99, + "learning_rate": 2.051777399066368e-11, + "logits/chosen": -1.7572996616363525, + "logits/rejected": -1.7589921951293945, + "logps/chosen": -78.97465515136719, + "logps/rejected": -398.5914611816406, + "loss": 0.2437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7384209036827087, + "rewards/margins": 2.7138450145721436, + "rewards/rejected": -1.9754241704940796, + "step": 17032 + }, + { + "epoch": 0.99, + "learning_rate": 2.0248709505210627e-11, + "logits/chosen": -1.7851351499557495, + "logits/rejected": -1.754045009613037, + "logps/chosen": -139.1062469482422, + "logps/rejected": -219.08026123046875, + "loss": 0.2661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9733414053916931, + "rewards/margins": 1.0668015480041504, + "rewards/rejected": -0.0934600830078125, + "step": 17033 + }, + { + "epoch": 0.99, + "learning_rate": 1.9981420545001072e-11, + "logits/chosen": -2.0696985721588135, + "logits/rejected": -2.075923442840576, + "logps/chosen": -18.958053588867188, + "logps/rejected": -187.6123809814453, + "loss": 0.2573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3770652711391449, + "rewards/margins": 3.074542999267578, + "rewards/rejected": -2.6974778175354004, + "step": 17034 + }, + { + "epoch": 0.99, + "learning_rate": 1.971590711952742e-11, + "logits/chosen": -1.938319444656372, + "logits/rejected": -1.9361107349395752, + "logps/chosen": -62.676612854003906, + "logps/rejected": -140.5869598388672, + "loss": 0.655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4225006103515625, + "rewards/margins": 0.4963333010673523, + "rewards/rejected": -0.9188339114189148, + "step": 17035 + }, + { + "epoch": 0.99, + "learning_rate": 1.945216923822657e-11, + "logits/chosen": -1.842495322227478, + "logits/rejected": -1.8076128959655762, + "logps/chosen": -159.05706787109375, + "logps/rejected": -307.05810546875, + "loss": 0.1946, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.874104380607605, + "rewards/margins": 0.8578445911407471, + "rewards/rejected": 1.016259789466858, + "step": 17036 + }, + { + "epoch": 0.99, + "learning_rate": 1.9190206910468798e-11, + "logits/chosen": -2.140110969543457, + "logits/rejected": -2.1425416469573975, + "logps/chosen": -80.66708374023438, + "logps/rejected": -216.97523498535156, + "loss": 0.1155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.090126872062683, + "rewards/margins": 2.6965248584747314, + "rewards/rejected": -1.6063979864120483, + "step": 17037 + }, + { + "epoch": 0.99, + "learning_rate": 1.8930020145552227e-11, + "logits/chosen": -1.9062567949295044, + "logits/rejected": -1.906619906425476, + "logps/chosen": -2.891791582107544, + "logps/rejected": -45.84065628051758, + "loss": 0.591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05917999893426895, + "rewards/margins": 0.43126609921455383, + "rewards/rejected": -0.4904460906982422, + "step": 17038 + }, + { + "epoch": 0.99, + "learning_rate": 1.8671608952730567e-11, + "logits/chosen": -1.8677626848220825, + "logits/rejected": -1.8574281930923462, + "logps/chosen": -65.43889617919922, + "logps/rejected": -248.38546752929688, + "loss": 0.0836, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6890891790390015, + "rewards/margins": 3.462172746658325, + "rewards/rejected": -1.7730835676193237, + "step": 17039 + }, + { + "epoch": 0.99, + "learning_rate": 1.8414973341174255e-11, + "logits/chosen": -1.9539319276809692, + "logits/rejected": -1.9614076614379883, + "logps/chosen": -7.933963298797607, + "logps/rejected": -230.4345245361328, + "loss": 0.2293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4392377436161041, + "rewards/margins": 2.9889373779296875, + "rewards/rejected": -2.549699544906616, + "step": 17040 + }, + { + "epoch": 0.99, + "learning_rate": 1.816011332000378e-11, + "logits/chosen": -1.9044100046157837, + "logits/rejected": -1.860351324081421, + "logps/chosen": -346.5435791015625, + "logps/rejected": -573.908935546875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.401153564453125, + "rewards/margins": 5.483325004577637, + "rewards/rejected": -1.0821716785430908, + "step": 17041 + }, + { + "epoch": 0.99, + "learning_rate": 1.7907028898278553e-11, + "logits/chosen": -1.86408269405365, + "logits/rejected": -1.8023302555084229, + "logps/chosen": -213.32496643066406, + "logps/rejected": -317.5035705566406, + "loss": 0.0651, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.547560214996338, + "rewards/margins": 2.846421957015991, + "rewards/rejected": -0.29886171221733093, + "step": 17042 + }, + { + "epoch": 0.99, + "learning_rate": 1.7655720084980285e-11, + "logits/chosen": -2.0019819736480713, + "logits/rejected": -2.012270450592041, + "logps/chosen": -360.8214416503906, + "logps/rejected": -382.83837890625, + "loss": 0.123, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.044665575027466, + "rewards/margins": 1.3937195539474487, + "rewards/rejected": 1.650946021080017, + "step": 17043 + }, + { + "epoch": 0.99, + "learning_rate": 1.740618688904072e-11, + "logits/chosen": -1.6859146356582642, + "logits/rejected": -1.6536474227905273, + "logps/chosen": -283.07879638671875, + "logps/rejected": -512.1939697265625, + "loss": 0.1854, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6937103271484375, + "rewards/margins": 0.9492583870887756, + "rewards/rejected": 0.7444519400596619, + "step": 17044 + }, + { + "epoch": 0.99, + "learning_rate": 1.7158429319324984e-11, + "logits/chosen": -1.9042752981185913, + "logits/rejected": -1.9605858325958252, + "logps/chosen": -176.52301025390625, + "logps/rejected": -203.23373413085938, + "loss": 0.1089, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.691253662109375, + "rewards/margins": 1.512304663658142, + "rewards/rejected": 1.178948998451233, + "step": 17045 + }, + { + "epoch": 0.99, + "learning_rate": 1.6912447384637153e-11, + "logits/chosen": -1.8997212648391724, + "logits/rejected": -1.9042328596115112, + "logps/chosen": -95.50871276855469, + "logps/rejected": -371.70513916015625, + "loss": 0.1108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0932563543319702, + "rewards/margins": 6.666657447814941, + "rewards/rejected": -5.573400974273682, + "step": 17046 + }, + { + "epoch": 0.99, + "learning_rate": 1.6668241093709125e-11, + "logits/chosen": -1.7766896486282349, + "logits/rejected": -1.731540560722351, + "logps/chosen": -258.861083984375, + "logps/rejected": -456.4410095214844, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0183656215667725, + "rewards/margins": 4.458774089813232, + "rewards/rejected": -1.4404083490371704, + "step": 17047 + }, + { + "epoch": 0.99, + "learning_rate": 1.6425810455222843e-11, + "logits/chosen": -1.9462310075759888, + "logits/rejected": -1.932918667793274, + "logps/chosen": -29.451828002929688, + "logps/rejected": -351.280029296875, + "loss": 0.2636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20527534186840057, + "rewards/margins": 6.308287620544434, + "rewards/rejected": -6.1030120849609375, + "step": 17048 + }, + { + "epoch": 0.99, + "learning_rate": 1.6185155477788093e-11, + "logits/chosen": -1.6098252534866333, + "logits/rejected": -1.5818341970443726, + "logps/chosen": -176.81756591796875, + "logps/rejected": -286.4309997558594, + "loss": 0.2157, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4347763061523438, + "rewards/margins": 1.2459609508514404, + "rewards/rejected": 0.18881531059741974, + "step": 17049 + }, + { + "epoch": 0.99, + "learning_rate": 1.5946276169953588e-11, + "logits/chosen": -1.9869791269302368, + "logits/rejected": -1.981546401977539, + "logps/chosen": -5.331231594085693, + "logps/rejected": -203.57223510742188, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23095019161701202, + "rewards/margins": 3.926262617111206, + "rewards/rejected": -4.157212734222412, + "step": 17050 + }, + { + "epoch": 0.99, + "learning_rate": 1.570917254020143e-11, + "logits/chosen": -1.8779746294021606, + "logits/rejected": -1.875190258026123, + "logps/chosen": -40.11803436279297, + "logps/rejected": -302.0400390625, + "loss": 0.1451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7822708487510681, + "rewards/margins": 2.569471836090088, + "rewards/rejected": -1.787200927734375, + "step": 17051 + }, + { + "epoch": 0.99, + "learning_rate": 1.547384459696377e-11, + "logits/chosen": -1.9390369653701782, + "logits/rejected": -1.9221787452697754, + "logps/chosen": -155.76275634765625, + "logps/rejected": -221.60589599609375, + "loss": 0.1608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5529297590255737, + "rewards/margins": 1.2042908668518066, + "rewards/rejected": 0.3486389219760895, + "step": 17052 + }, + { + "epoch": 0.99, + "learning_rate": 1.524029234859503e-11, + "logits/chosen": -2.0679783821105957, + "logits/rejected": -2.0595316886901855, + "logps/chosen": -126.9364013671875, + "logps/rejected": -182.196533203125, + "loss": 0.085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.754547119140625, + "rewards/margins": 3.099557399749756, + "rewards/rejected": -1.3450103998184204, + "step": 17053 + }, + { + "epoch": 0.99, + "learning_rate": 1.5008515803394127e-11, + "logits/chosen": -2.1442489624023438, + "logits/rejected": -2.1293978691101074, + "logps/chosen": -90.90274047851562, + "logps/rejected": -290.7989196777344, + "loss": 0.1714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.771527111530304, + "rewards/margins": 4.635846138000488, + "rewards/rejected": -3.86431884765625, + "step": 17054 + }, + { + "epoch": 0.99, + "learning_rate": 1.477851496959337e-11, + "logits/chosen": -1.908652901649475, + "logits/rejected": -1.9056425094604492, + "logps/chosen": -0.0016886978410184383, + "logps/rejected": -84.97283172607422, + "loss": 0.5689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00079634616849944, + "rewards/margins": 0.594630241394043, + "rewards/rejected": -0.5938339233398438, + "step": 17055 + }, + { + "epoch": 0.99, + "learning_rate": 1.4550289855358444e-11, + "logits/chosen": -1.7393200397491455, + "logits/rejected": -1.7459717988967896, + "logps/chosen": -257.2630920410156, + "logps/rejected": -286.7630615234375, + "loss": 0.0841, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.424508810043335, + "rewards/margins": 1.8693207502365112, + "rewards/rejected": 1.5551880598068237, + "step": 17056 + }, + { + "epoch": 0.99, + "learning_rate": 1.4323840468810632e-11, + "logits/chosen": -1.9276386499404907, + "logits/rejected": -1.9503034353256226, + "logps/chosen": -224.87811279296875, + "logps/rejected": -364.0855407714844, + "loss": 0.1701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48043519258499146, + "rewards/margins": 2.592388868331909, + "rewards/rejected": -2.1119537353515625, + "step": 17057 + }, + { + "epoch": 0.99, + "learning_rate": 1.40991668179824e-11, + "logits/chosen": -1.745321273803711, + "logits/rejected": -1.7357691526412964, + "logps/chosen": -16.719390869140625, + "logps/rejected": -110.0234375, + "loss": 0.3548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13176803290843964, + "rewards/margins": 2.249005079269409, + "rewards/rejected": -2.117237091064453, + "step": 17058 + }, + { + "epoch": 0.99, + "learning_rate": 1.3876268910856248e-11, + "logits/chosen": -1.8188254833221436, + "logits/rejected": -1.79799222946167, + "logps/chosen": -130.40963745117188, + "logps/rejected": -183.52322387695312, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0669647455215454, + "rewards/margins": 0.921368420124054, + "rewards/rejected": 0.14559631049633026, + "step": 17059 + }, + { + "epoch": 0.99, + "learning_rate": 1.365514675535917e-11, + "logits/chosen": -2.0902204513549805, + "logits/rejected": -2.0814332962036133, + "logps/chosen": -0.3824988603591919, + "logps/rejected": -238.373291015625, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05533432960510254, + "rewards/margins": 3.4193389415740967, + "rewards/rejected": -3.364004611968994, + "step": 17060 + }, + { + "epoch": 0.99, + "learning_rate": 1.3435800359340443e-11, + "logits/chosen": -2.166121006011963, + "logits/rejected": -2.163567066192627, + "logps/chosen": -0.0003167123068124056, + "logps/rejected": -74.34307098388672, + "loss": 0.4365, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.34849930065684e-05, + "rewards/margins": 1.610834002494812, + "rewards/rejected": -1.6107704639434814, + "step": 17061 + }, + { + "epoch": 0.99, + "learning_rate": 1.3218229730588283e-11, + "logits/chosen": -1.9539904594421387, + "logits/rejected": -1.9518623352050781, + "logps/chosen": -57.13508987426758, + "logps/rejected": -154.45718383789062, + "loss": 0.6371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.334036260843277, + "rewards/margins": 0.7925674915313721, + "rewards/rejected": -1.1266037225723267, + "step": 17062 + }, + { + "epoch": 0.99, + "learning_rate": 1.300243487684094e-11, + "logits/chosen": -1.8172985315322876, + "logits/rejected": -1.819978952407837, + "logps/chosen": -5.145044803619385, + "logps/rejected": -66.54117584228516, + "loss": 0.8471, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.2481088638305664, + "rewards/margins": -0.7712980508804321, + "rewards/rejected": 1.0194069147109985, + "step": 17063 + }, + { + "epoch": 0.99, + "learning_rate": 1.2788415805758956e-11, + "logits/chosen": -1.9136600494384766, + "logits/rejected": -1.9130878448486328, + "logps/chosen": -21.11873435974121, + "logps/rejected": -125.906494140625, + "loss": 0.1844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8499231338500977, + "rewards/margins": 2.4723520278930664, + "rewards/rejected": -1.6224288940429688, + "step": 17064 + }, + { + "epoch": 0.99, + "learning_rate": 1.2576172524941808e-11, + "logits/chosen": -1.9853821992874146, + "logits/rejected": -1.9889556169509888, + "logps/chosen": -0.3688158690929413, + "logps/rejected": -85.39129638671875, + "loss": 0.4071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006792789790779352, + "rewards/margins": 1.8150173425674438, + "rewards/rejected": -1.8082245588302612, + "step": 17065 + }, + { + "epoch": 0.99, + "learning_rate": 1.2365705041939012e-11, + "logits/chosen": -1.9330744743347168, + "logits/rejected": -1.9327888488769531, + "logps/chosen": -42.996334075927734, + "logps/rejected": -93.37872314453125, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03814659267663956, + "rewards/margins": 1.909300684928894, + "rewards/rejected": -1.8711540699005127, + "step": 17066 + }, + { + "epoch": 0.99, + "learning_rate": 1.2157013364222369e-11, + "logits/chosen": -1.749829888343811, + "logits/rejected": -1.7799067497253418, + "logps/chosen": -237.69688415527344, + "logps/rejected": -286.6424560546875, + "loss": 0.4653, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.9511855840682983, + "rewards/margins": -0.346394419670105, + "rewards/rejected": 2.2975800037384033, + "step": 17067 + }, + { + "epoch": 0.99, + "learning_rate": 1.1950097499202617e-11, + "logits/chosen": -2.028064727783203, + "logits/rejected": -2.0105509757995605, + "logps/chosen": -122.21302795410156, + "logps/rejected": -325.3597412109375, + "loss": 0.1423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9861648678779602, + "rewards/margins": 2.1553757190704346, + "rewards/rejected": -1.1692107915878296, + "step": 17068 + }, + { + "epoch": 0.99, + "learning_rate": 1.1744957454234982e-11, + "logits/chosen": -1.921030044555664, + "logits/rejected": -1.9366222620010376, + "logps/chosen": -288.97906494140625, + "logps/rejected": -493.5335693359375, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6962342262268066, + "rewards/margins": 7.2571868896484375, + "rewards/rejected": -4.560952663421631, + "step": 17069 + }, + { + "epoch": 0.99, + "learning_rate": 1.1541593236608083e-11, + "logits/chosen": -1.8602851629257202, + "logits/rejected": -1.8477351665496826, + "logps/chosen": -58.880455017089844, + "logps/rejected": -301.16455078125, + "loss": 0.2444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4893341064453125, + "rewards/margins": 6.345202922821045, + "rewards/rejected": -5.855868816375732, + "step": 17070 + }, + { + "epoch": 0.99, + "learning_rate": 1.1340004853538365e-11, + "logits/chosen": -1.7131834030151367, + "logits/rejected": -1.7398674488067627, + "logps/chosen": -222.59507751464844, + "logps/rejected": -326.99188232421875, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3474502563476562, + "rewards/margins": 3.6075332164764404, + "rewards/rejected": -0.26008301973342896, + "step": 17071 + }, + { + "epoch": 0.99, + "learning_rate": 1.1140192312197871e-11, + "logits/chosen": -1.7395248413085938, + "logits/rejected": -1.757346510887146, + "logps/chosen": -325.13134765625, + "logps/rejected": -448.0087890625, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.601855516433716, + "rewards/margins": 3.5943970680236816, + "rewards/rejected": -0.992541491985321, + "step": 17072 + }, + { + "epoch": 0.99, + "learning_rate": 1.0942155619675375e-11, + "logits/chosen": -1.9166513681411743, + "logits/rejected": -1.9199914932250977, + "logps/chosen": -195.3426513671875, + "logps/rejected": -407.9944763183594, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5778473615646362, + "rewards/margins": 6.917877674102783, + "rewards/rejected": -5.340030193328857, + "step": 17073 + }, + { + "epoch": 0.99, + "learning_rate": 1.074589478300969e-11, + "logits/chosen": -1.8883264064788818, + "logits/rejected": -1.8857513666152954, + "logps/chosen": -20.026174545288086, + "logps/rejected": -166.89642333984375, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1219875812530518, + "rewards/margins": 0.9594387412071228, + "rewards/rejected": 0.16254882514476776, + "step": 17074 + }, + { + "epoch": 0.99, + "learning_rate": 1.0551409809178569e-11, + "logits/chosen": -2.0401577949523926, + "logits/rejected": -2.0391693115234375, + "logps/chosen": -15.165220260620117, + "logps/rejected": -212.1386260986328, + "loss": 0.3689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09657344967126846, + "rewards/margins": 2.330061912536621, + "rewards/rejected": -2.2334885597229004, + "step": 17075 + }, + { + "epoch": 0.99, + "learning_rate": 1.0358700705082046e-11, + "logits/chosen": -1.9480743408203125, + "logits/rejected": -1.9387646913528442, + "logps/chosen": -103.6358871459961, + "logps/rejected": -448.5650634765625, + "loss": 0.2182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5076789855957031, + "rewards/margins": 4.570136547088623, + "rewards/rejected": -4.06245756149292, + "step": 17076 + }, + { + "epoch": 0.99, + "learning_rate": 1.0167767477570199e-11, + "logits/chosen": -1.8299578428268433, + "logits/rejected": -1.8962613344192505, + "logps/chosen": -359.00372314453125, + "logps/rejected": -357.97906494140625, + "loss": 0.5164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.864758312702179, + "rewards/margins": 0.8780456185340881, + "rewards/rejected": -1.742803931236267, + "step": 17077 + }, + { + "epoch": 0.99, + "learning_rate": 9.978610133432042e-12, + "logits/chosen": -1.8099714517593384, + "logits/rejected": -1.7941772937774658, + "logps/chosen": -18.149301528930664, + "logps/rejected": -168.34661865234375, + "loss": 0.318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2399938553571701, + "rewards/margins": 2.707285165786743, + "rewards/rejected": -2.4672913551330566, + "step": 17078 + }, + { + "epoch": 0.99, + "learning_rate": 9.79122867937332e-12, + "logits/chosen": -1.9491122961044312, + "logits/rejected": -1.9373817443847656, + "logps/chosen": -3.707361611304805e-05, + "logps/rejected": -228.71034240722656, + "loss": 0.3637, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0993652444522013e-07, + "rewards/margins": 3.15663743019104, + "rewards/rejected": -3.156637668609619, + "step": 17079 + }, + { + "epoch": 0.99, + "learning_rate": 9.605623122066476e-12, + "logits/chosen": -1.8600012063980103, + "logits/rejected": -1.8617558479309082, + "logps/chosen": -3.9598450660705566, + "logps/rejected": -54.51856994628906, + "loss": 0.5258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14811335504055023, + "rewards/margins": 0.632732093334198, + "rewards/rejected": -0.48461875319480896, + "step": 17080 + }, + { + "epoch": 0.99, + "learning_rate": 9.421793468089578e-12, + "logits/chosen": -1.9388089179992676, + "logits/rejected": -1.940208911895752, + "logps/chosen": -8.170642852783203, + "logps/rejected": -221.5497589111328, + "loss": 0.336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006274604704231024, + "rewards/margins": 4.678491592407227, + "rewards/rejected": -4.672216892242432, + "step": 17081 + }, + { + "epoch": 0.99, + "learning_rate": 9.239739723987395e-12, + "logits/chosen": -1.87846839427948, + "logits/rejected": -1.9023959636688232, + "logps/chosen": -191.21559143066406, + "logps/rejected": -246.36753845214844, + "loss": 0.1867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.713581919670105, + "rewards/margins": 1.0077438354492188, + "rewards/rejected": 0.7058380246162415, + "step": 17082 + }, + { + "epoch": 0.99, + "learning_rate": 9.05946189621587e-12, + "logits/chosen": -1.9620575904846191, + "logits/rejected": -1.9929996728897095, + "logps/chosen": -225.6123809814453, + "logps/rejected": -249.7493896484375, + "loss": 0.0825, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0339341163635254, + "rewards/margins": 1.7873642444610596, + "rewards/rejected": 1.2465698719024658, + "step": 17083 + }, + { + "epoch": 0.99, + "learning_rate": 8.880959991186543e-12, + "logits/chosen": -1.8379483222961426, + "logits/rejected": -1.8664079904556274, + "logps/chosen": -212.01101684570312, + "logps/rejected": -411.76641845703125, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5797882080078125, + "rewards/margins": 4.099874973297119, + "rewards/rejected": -1.520086646080017, + "step": 17084 + }, + { + "epoch": 0.99, + "learning_rate": 8.704234015238788e-12, + "logits/chosen": -1.9391721487045288, + "logits/rejected": -1.939227819442749, + "logps/chosen": -46.87709045410156, + "logps/rejected": -219.48171997070312, + "loss": 0.7243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9771347045898438, + "rewards/margins": 1.9262070655822754, + "rewards/rejected": -2.903341770172119, + "step": 17085 + }, + { + "epoch": 0.99, + "learning_rate": 8.529283974650913e-12, + "logits/chosen": -1.934505581855774, + "logits/rejected": -1.9370787143707275, + "logps/chosen": -4.264191150665283, + "logps/rejected": -148.69747924804688, + "loss": 0.4609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17382942140102386, + "rewards/margins": 0.9842603802680969, + "rewards/rejected": -0.8104309439659119, + "step": 17086 + }, + { + "epoch": 0.99, + "learning_rate": 8.35610987563462e-12, + "logits/chosen": -1.9386487007141113, + "logits/rejected": -1.9291149377822876, + "logps/chosen": -179.4076690673828, + "logps/rejected": -266.0230407714844, + "loss": 0.2004, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4053360223770142, + "rewards/margins": 1.0389633178710938, + "rewards/rejected": 0.366372674703598, + "step": 17087 + }, + { + "epoch": 0.99, + "learning_rate": 8.184711724351645e-12, + "logits/chosen": -1.8476457595825195, + "logits/rejected": -1.803713083267212, + "logps/chosen": -177.263427734375, + "logps/rejected": -309.7770080566406, + "loss": 0.0625, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4780030250549316, + "rewards/margins": 2.864837646484375, + "rewards/rejected": -0.3868347108364105, + "step": 17088 + }, + { + "epoch": 0.99, + "learning_rate": 8.015089526880458e-12, + "logits/chosen": -1.7895095348358154, + "logits/rejected": -1.7832691669464111, + "logps/chosen": -37.68390655517578, + "logps/rejected": -248.25828552246094, + "loss": 0.1168, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0046478509902954, + "rewards/margins": 4.254739284515381, + "rewards/rejected": -3.250091552734375, + "step": 17089 + }, + { + "epoch": 0.99, + "learning_rate": 7.847243289249572e-12, + "logits/chosen": -1.9744800329208374, + "logits/rejected": -1.9694331884384155, + "logps/chosen": -9.595064163208008, + "logps/rejected": -240.8380584716797, + "loss": 0.2314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4800929129123688, + "rewards/margins": 2.8339900970458984, + "rewards/rejected": -2.3538970947265625, + "step": 17090 + }, + { + "epoch": 0.99, + "learning_rate": 7.681173017420884e-12, + "logits/chosen": -1.9199777841567993, + "logits/rejected": -1.9270941019058228, + "logps/chosen": -1.351557731628418, + "logps/rejected": -196.09063720703125, + "loss": 0.3803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12433748692274094, + "rewards/margins": 3.970651865005493, + "rewards/rejected": -4.09498929977417, + "step": 17091 + }, + { + "epoch": 0.99, + "learning_rate": 7.516878717295227e-12, + "logits/chosen": -1.7942861318588257, + "logits/rejected": -1.7402421236038208, + "logps/chosen": -170.15325927734375, + "logps/rejected": -355.826416015625, + "loss": 0.2437, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6559189558029175, + "rewards/margins": 0.9894852042198181, + "rewards/rejected": 0.6664337515830994, + "step": 17092 + }, + { + "epoch": 0.99, + "learning_rate": 7.354360394712378e-12, + "logits/chosen": -1.8509045839309692, + "logits/rejected": -1.801032304763794, + "logps/chosen": -177.66380310058594, + "logps/rejected": -334.072509765625, + "loss": 0.0839, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4823379516601562, + "rewards/margins": 2.1220107078552246, + "rewards/rejected": 0.3603271543979645, + "step": 17093 + }, + { + "epoch": 0.99, + "learning_rate": 7.193618055445494e-12, + "logits/chosen": -2.0280802249908447, + "logits/rejected": -2.017385959625244, + "logps/chosen": -5.630642414093018, + "logps/rejected": -108.48606872558594, + "loss": 0.4389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1996448040008545, + "rewards/margins": 1.1026195287704468, + "rewards/rejected": -0.9029747247695923, + "step": 17094 + }, + { + "epoch": 0.99, + "learning_rate": 7.0346517052011226e-12, + "logits/chosen": -2.035658359527588, + "logits/rejected": -2.020059108734131, + "logps/chosen": -3.230552465538494e-05, + "logps/rejected": -165.90817260742188, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": 6.317856104942621e-07, + "rewards/margins": 3.2847297191619873, + "rewards/rejected": -3.28472900390625, + "step": 17095 + }, + { + "epoch": 0.99, + "learning_rate": 6.877461349624747e-12, + "logits/chosen": -1.9425171613693237, + "logits/rejected": -1.9358329772949219, + "logps/chosen": -26.8149471282959, + "logps/rejected": -157.47129821777344, + "loss": 0.4398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039101600646972656, + "rewards/margins": 1.2680047750473022, + "rewards/rejected": -1.2289031744003296, + "step": 17096 + }, + { + "epoch": 0.99, + "learning_rate": 6.722046994306341e-12, + "logits/chosen": -2.0005950927734375, + "logits/rejected": -1.9982819557189941, + "logps/chosen": -0.043740708380937576, + "logps/rejected": -71.92230987548828, + "loss": 0.5217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10971132665872574, + "rewards/margins": 0.6547499895095825, + "rewards/rejected": -0.545038640499115, + "step": 17097 + }, + { + "epoch": 0.99, + "learning_rate": 6.568408644763712e-12, + "logits/chosen": -1.8775385618209839, + "logits/rejected": -1.9386391639709473, + "logps/chosen": -169.1153564453125, + "logps/rejected": -614.1937866210938, + "loss": 0.0303, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.651312232017517, + "rewards/margins": 11.974676132202148, + "rewards/rejected": -10.3233642578125, + "step": 17098 + }, + { + "epoch": 1.0, + "learning_rate": 6.416546306453607e-12, + "logits/chosen": -2.164212703704834, + "logits/rejected": -2.1548495292663574, + "logps/chosen": -11.73193073272705, + "logps/rejected": -117.1313705444336, + "loss": 0.465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023965930566191673, + "rewards/margins": 1.4099637269973755, + "rewards/rejected": -1.3859977722167969, + "step": 17099 + }, + { + "epoch": 1.0, + "learning_rate": 6.266459984777262e-12, + "logits/chosen": -1.5273505449295044, + "logits/rejected": -1.5299148559570312, + "logps/chosen": -95.81515502929688, + "logps/rejected": -230.106201171875, + "loss": 0.0802, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.609578013420105, + "rewards/margins": 3.3880021572113037, + "rewards/rejected": -1.7784241437911987, + "step": 17100 + }, + { + "epoch": 1.0, + "learning_rate": 6.1181496850581935e-12, + "logits/chosen": -1.9470953941345215, + "logits/rejected": -1.941053032875061, + "logps/chosen": -0.1134846955537796, + "logps/rejected": -262.5782470703125, + "loss": 0.3481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009340420365333557, + "rewards/margins": 4.994514465332031, + "rewards/rejected": -4.995448589324951, + "step": 17101 + }, + { + "epoch": 1.0, + "learning_rate": 5.9716154125699635e-12, + "logits/chosen": -1.938889741897583, + "logits/rejected": -1.917837381362915, + "logps/chosen": -234.10623168945312, + "logps/rejected": -298.86163330078125, + "loss": 0.2666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13497619330883026, + "rewards/margins": 1.051782250404358, + "rewards/rejected": -0.9168060421943665, + "step": 17102 + }, + { + "epoch": 1.0, + "learning_rate": 5.826857172519517e-12, + "logits/chosen": -1.923600673675537, + "logits/rejected": -1.8865656852722168, + "logps/chosen": -140.39515686035156, + "logps/rejected": -305.19769287109375, + "loss": 0.0451, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8936097621917725, + "rewards/margins": 3.0561158657073975, + "rewards/rejected": -0.162506103515625, + "step": 17103 + }, + { + "epoch": 1.0, + "learning_rate": 5.6838749700416356e-12, + "logits/chosen": -2.125457763671875, + "logits/rejected": -2.1183788776397705, + "logps/chosen": -0.0013003378408029675, + "logps/rejected": -148.6929931640625, + "loss": 0.371, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.743620461842511e-05, + "rewards/margins": 2.670670509338379, + "rewards/rejected": -2.6706979274749756, + "step": 17104 + }, + { + "epoch": 1.0, + "learning_rate": 5.542668810226692e-12, + "logits/chosen": -1.9266139268875122, + "logits/rejected": -1.8395640850067139, + "logps/chosen": -141.68429565429688, + "logps/rejected": -326.0915222167969, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2712295055389404, + "rewards/margins": 2.4397597312927246, + "rewards/rejected": -0.16853027045726776, + "step": 17105 + }, + { + "epoch": 1.0, + "learning_rate": 5.4032386980817915e-12, + "logits/chosen": -1.6062922477722168, + "logits/rejected": -1.5801022052764893, + "logps/chosen": -173.53915405273438, + "logps/rejected": -516.2101440429688, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.89129638671875, + "rewards/margins": 5.230389595031738, + "rewards/rejected": -3.339092969894409, + "step": 17106 + }, + { + "epoch": 1.0, + "learning_rate": 5.265584638569631e-12, + "logits/chosen": -2.0689210891723633, + "logits/rejected": -2.0522139072418213, + "logps/chosen": -184.63490295410156, + "logps/rejected": -235.6363525390625, + "loss": 0.1016, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.367671251296997, + "rewards/margins": 1.6178604364395142, + "rewards/rejected": 1.749810814857483, + "step": 17107 + }, + { + "epoch": 1.0, + "learning_rate": 5.1297066365696416e-12, + "logits/chosen": -1.8493701219558716, + "logits/rejected": -1.8519821166992188, + "logps/chosen": -28.465255737304688, + "logps/rejected": -229.58169555664062, + "loss": 0.2662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3372839093208313, + "rewards/margins": 4.158823490142822, + "rewards/rejected": -3.8215394020080566, + "step": 17108 + }, + { + "epoch": 1.0, + "learning_rate": 4.995604696911293e-12, + "logits/chosen": -1.794059157371521, + "logits/rejected": -1.790557622909546, + "logps/chosen": -225.9810028076172, + "logps/rejected": -279.937255859375, + "loss": 0.0644, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.778181552886963, + "rewards/margins": 2.621019124984741, + "rewards/rejected": 0.15716247260570526, + "step": 17109 + }, + { + "epoch": 1.0, + "learning_rate": 4.863278824368544e-12, + "logits/chosen": -1.8151862621307373, + "logits/rejected": -1.80305016040802, + "logps/chosen": -237.2347412109375, + "logps/rejected": -411.1368408203125, + "loss": 0.2206, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5876617431640625, + "rewards/margins": 0.7590575814247131, + "rewards/rejected": 0.8286041617393494, + "step": 17110 + }, + { + "epoch": 1.0, + "learning_rate": 4.732729023626536e-12, + "logits/chosen": -1.8200098276138306, + "logits/rejected": -1.802712082862854, + "logps/chosen": -0.017810432240366936, + "logps/rejected": -146.36764526367188, + "loss": 0.4707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008560508722439408, + "rewards/margins": 1.2242705821990967, + "rewards/rejected": -1.2251266241073608, + "step": 17111 + }, + { + "epoch": 1.0, + "learning_rate": 4.603955299337103e-12, + "logits/chosen": -1.6717547178268433, + "logits/rejected": -1.6651159524917603, + "logps/chosen": -13.380417823791504, + "logps/rejected": -225.44631958007812, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14165182411670685, + "rewards/margins": 6.53427267074585, + "rewards/rejected": -6.392621040344238, + "step": 17112 + }, + { + "epoch": 1.0, + "learning_rate": 4.4769576560688135e-12, + "logits/chosen": -1.8913624286651611, + "logits/rejected": -1.908023715019226, + "logps/chosen": -161.7244873046875, + "logps/rejected": -365.4564208984375, + "loss": 0.0558, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0812530517578125, + "rewards/margins": 4.266848564147949, + "rewards/rejected": -3.185595750808716, + "step": 17113 + }, + { + "epoch": 1.0, + "learning_rate": 4.351736098329173e-12, + "logits/chosen": -2.1029200553894043, + "logits/rejected": -2.1208229064941406, + "logps/chosen": -200.6207275390625, + "logps/rejected": -467.98443603515625, + "loss": 0.1167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5705291628837585, + "rewards/margins": 7.943215847015381, + "rewards/rejected": -7.372686862945557, + "step": 17114 + }, + { + "epoch": 1.0, + "learning_rate": 4.228290630575726e-12, + "logits/chosen": -1.9615402221679688, + "logits/rejected": -1.9582802057266235, + "logps/chosen": -21.086883544921875, + "logps/rejected": -134.95523071289062, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6128076910972595, + "rewards/margins": 1.4817171096801758, + "rewards/rejected": -0.868909478187561, + "step": 17115 + }, + { + "epoch": 1.0, + "learning_rate": 4.106621257188303e-12, + "logits/chosen": -1.619604468345642, + "logits/rejected": -1.6050236225128174, + "logps/chosen": -156.68603515625, + "logps/rejected": -300.01678466796875, + "loss": 0.1001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8970367908477783, + "rewards/margins": 2.3284270763397217, + "rewards/rejected": -0.4313903748989105, + "step": 17116 + }, + { + "epoch": 1.0, + "learning_rate": 3.9867279824856715e-12, + "logits/chosen": -1.7928922176361084, + "logits/rejected": -1.785852074623108, + "logps/chosen": -11.250848770141602, + "logps/rejected": -146.68649291992188, + "loss": 0.4907, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27505284547805786, + "rewards/margins": 0.6257212162017822, + "rewards/rejected": -0.350668340921402, + "step": 17117 + }, + { + "epoch": 1.0, + "learning_rate": 3.868610810736639e-12, + "logits/chosen": -2.0587682723999023, + "logits/rejected": -2.0449953079223633, + "logps/chosen": -3.959873676300049, + "logps/rejected": -207.57809448242188, + "loss": 0.365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024178743362426758, + "rewards/margins": 2.1353085041046143, + "rewards/rejected": -2.1111297607421875, + "step": 17118 + }, + { + "epoch": 1.0, + "learning_rate": 3.752269746132297e-12, + "logits/chosen": -1.7963417768478394, + "logits/rejected": -1.7983942031860352, + "logps/chosen": -10.730457305908203, + "logps/rejected": -91.20127868652344, + "loss": 0.4892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04276781156659126, + "rewards/margins": 0.9484850764274597, + "rewards/rejected": -0.9912528991699219, + "step": 17119 + }, + { + "epoch": 1.0, + "learning_rate": 3.637704792802676e-12, + "logits/chosen": -1.922075629234314, + "logits/rejected": -1.9176759719848633, + "logps/chosen": -1.0501431226730347, + "logps/rejected": -180.679931640625, + "loss": 0.3534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07157520204782486, + "rewards/margins": 2.1323158740997314, + "rewards/rejected": -2.0607407093048096, + "step": 17120 + }, + { + "epoch": 1.0, + "learning_rate": 3.524915954822294e-12, + "logits/chosen": -1.7057392597198486, + "logits/rejected": -1.6995456218719482, + "logps/chosen": -9.591752052307129, + "logps/rejected": -277.31744384765625, + "loss": 0.1761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5496720671653748, + "rewards/margins": 4.902245044708252, + "rewards/rejected": -4.352572917938232, + "step": 17121 + }, + { + "epoch": 1.0, + "learning_rate": 3.413903236193505e-12, + "logits/chosen": -1.926363229751587, + "logits/rejected": -1.916101098060608, + "logps/chosen": -78.32476806640625, + "logps/rejected": -180.07748413085938, + "loss": 0.1968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2610100507736206, + "rewards/margins": 1.5703942775726318, + "rewards/rejected": -0.30938416719436646, + "step": 17122 + }, + { + "epoch": 1.0, + "learning_rate": 3.304666640863152e-12, + "logits/chosen": -1.9173842668533325, + "logits/rejected": -1.905943512916565, + "logps/chosen": -74.77578735351562, + "logps/rejected": -350.7419128417969, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.583212375640869, + "rewards/margins": 8.0662202835083, + "rewards/rejected": -5.483007907867432, + "step": 17123 + }, + { + "epoch": 1.0, + "learning_rate": 3.197206172711464e-12, + "logits/chosen": -1.9412721395492554, + "logits/rejected": -1.926111102104187, + "logps/chosen": -0.001170734642073512, + "logps/rejected": -69.67404174804688, + "loss": 0.5213, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3390899514197372e-05, + "rewards/margins": 0.8593608140945435, + "rewards/rejected": -0.8593841791152954, + "step": 17124 + }, + { + "epoch": 1.0, + "learning_rate": 3.091521835557609e-12, + "logits/chosen": -1.846061110496521, + "logits/rejected": -1.8384003639221191, + "logps/chosen": -220.37290954589844, + "logps/rejected": -269.33587646484375, + "loss": 0.2527, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6485443115234375, + "rewards/margins": 0.5179625749588013, + "rewards/rejected": 1.1305817365646362, + "step": 17125 + }, + { + "epoch": 1.0, + "learning_rate": 2.98761363315414e-12, + "logits/chosen": -1.938578486442566, + "logits/rejected": -1.9478342533111572, + "logps/chosen": -114.75300598144531, + "logps/rejected": -355.9657287597656, + "loss": 0.247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3078903257846832, + "rewards/margins": 2.553715705871582, + "rewards/rejected": -2.2458252906799316, + "step": 17126 + }, + { + "epoch": 1.0, + "learning_rate": 2.8854815691925495e-12, + "logits/chosen": -1.8291678428649902, + "logits/rejected": -1.7459688186645508, + "logps/chosen": -195.5943603515625, + "logps/rejected": -371.498779296875, + "loss": 0.1292, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4463868141174316, + "rewards/margins": 1.5159242153167725, + "rewards/rejected": 0.930462658405304, + "step": 17127 + }, + { + "epoch": 1.0, + "learning_rate": 2.7851256472977146e-12, + "logits/chosen": -2.0209829807281494, + "logits/rejected": -2.0198845863342285, + "logps/chosen": -218.76698303222656, + "logps/rejected": -377.36676025390625, + "loss": 0.0905, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.411511182785034, + "rewards/margins": 1.7604339122772217, + "rewards/rejected": 0.6510772705078125, + "step": 17128 + }, + { + "epoch": 1.0, + "learning_rate": 2.6865458710390034e-12, + "logits/chosen": -1.8103495836257935, + "logits/rejected": -1.8259116411209106, + "logps/chosen": -196.25283813476562, + "logps/rejected": -507.90252685546875, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.530452013015747, + "rewards/margins": 4.717597961425781, + "rewards/rejected": -3.187145948410034, + "step": 17129 + }, + { + "epoch": 1.0, + "learning_rate": 2.5897422439191686e-12, + "logits/chosen": -1.8516114950180054, + "logits/rejected": -1.8328022956848145, + "logps/chosen": -117.23860168457031, + "logps/rejected": -257.2707214355469, + "loss": 0.2311, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3494614362716675, + "rewards/margins": 1.3449753522872925, + "rewards/rejected": 0.004486083984375, + "step": 17130 + }, + { + "epoch": 1.0, + "learning_rate": 2.494714769374351e-12, + "logits/chosen": -1.9126439094543457, + "logits/rejected": -1.9023175239562988, + "logps/chosen": -179.37942504882812, + "logps/rejected": -516.8153686523438, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4487550258636475, + "rewards/margins": 4.2529449462890625, + "rewards/rejected": -0.8041900992393494, + "step": 17131 + }, + { + "epoch": 1.0, + "learning_rate": 2.4014634507851795e-12, + "logits/chosen": -2.005000114440918, + "logits/rejected": -1.9751486778259277, + "logps/chosen": -92.26490020751953, + "logps/rejected": -183.94061279296875, + "loss": 0.3658, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2452659606933594, + "rewards/margins": 0.3137916326522827, + "rewards/rejected": 0.9314743280410767, + "step": 17132 + }, + { + "epoch": 1.0, + "learning_rate": 2.3099882914545677e-12, + "logits/chosen": -1.8875393867492676, + "logits/rejected": -1.8676036596298218, + "logps/chosen": -162.86427307128906, + "logps/rejected": -282.2569274902344, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2723495960235596, + "rewards/margins": 0.49439239501953125, + "rewards/rejected": 1.7779572010040283, + "step": 17133 + }, + { + "epoch": 1.0, + "learning_rate": 2.22028929464102e-12, + "logits/chosen": -1.6784892082214355, + "logits/rejected": -1.6651378870010376, + "logps/chosen": -168.33766174316406, + "logps/rejected": -287.4236755371094, + "loss": 0.1426, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1222426891326904, + "rewards/margins": 1.2357405424118042, + "rewards/rejected": 1.8865021467208862, + "step": 17134 + }, + { + "epoch": 1.0, + "learning_rate": 2.1323664635308768e-12, + "logits/chosen": -1.940723180770874, + "logits/rejected": -1.9310989379882812, + "logps/chosen": -4.264199733734131, + "logps/rejected": -252.47552490234375, + "loss": 0.3971, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19139480590820312, + "rewards/margins": 6.746984004974365, + "rewards/rejected": -6.938378810882568, + "step": 17135 + }, + { + "epoch": 1.0, + "learning_rate": 2.0462198012438648e-12, + "logits/chosen": -2.1074984073638916, + "logits/rejected": -2.104174852371216, + "logps/chosen": -6.949772068765014e-05, + "logps/rejected": -58.176551818847656, + "loss": 0.4564, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3257326776947593e-06, + "rewards/margins": 1.2627151012420654, + "rewards/rejected": -1.262711763381958, + "step": 17136 + }, + { + "epoch": 1.0, + "learning_rate": 1.961849310838648e-12, + "logits/chosen": -2.0099756717681885, + "logits/rejected": -1.9968310594558716, + "logps/chosen": -10.071121215820312, + "logps/rejected": -131.1333465576172, + "loss": 0.6098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04326324537396431, + "rewards/margins": 0.28213807940483093, + "rewards/rejected": -0.23887482285499573, + "step": 17137 + }, + { + "epoch": 1.0, + "learning_rate": 1.87925499531838e-12, + "logits/chosen": -2.0727171897888184, + "logits/rejected": -2.0481762886047363, + "logps/chosen": -23.311710357666016, + "logps/rejected": -159.377197265625, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5751827359199524, + "rewards/margins": 1.6839168071746826, + "rewards/rejected": -1.108734130859375, + "step": 17138 + }, + { + "epoch": 1.0, + "learning_rate": 1.7984368576140497e-12, + "logits/chosen": -1.8446860313415527, + "logits/rejected": -1.8292244672775269, + "logps/chosen": -138.82797241210938, + "logps/rejected": -239.43789672851562, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.19438636302948, + "rewards/margins": 0.4476059675216675, + "rewards/rejected": 0.7467803955078125, + "step": 17139 + }, + { + "epoch": 1.0, + "learning_rate": 1.7193949005955832e-12, + "logits/chosen": -1.7534652948379517, + "logits/rejected": -1.7615370750427246, + "logps/chosen": -34.4830436706543, + "logps/rejected": -170.78689575195312, + "loss": 0.3453, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.019792914390564, + "rewards/margins": 0.6728614568710327, + "rewards/rejected": 0.34693145751953125, + "step": 17140 + }, + { + "epoch": 1.0, + "learning_rate": 1.6421291270718452e-12, + "logits/chosen": -1.8277111053466797, + "logits/rejected": -1.813095211982727, + "logps/chosen": -32.08231735229492, + "logps/rejected": -292.0372009277344, + "loss": 0.38, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3230135142803192, + "rewards/margins": 2.6393887996673584, + "rewards/rejected": -2.96240234375, + "step": 17141 + }, + { + "epoch": 1.0, + "learning_rate": 1.5666395397906374e-12, + "logits/chosen": -1.6980257034301758, + "logits/rejected": -1.7095427513122559, + "logps/chosen": -158.76800537109375, + "logps/rejected": -377.64434814453125, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.628308057785034, + "rewards/margins": 5.15899658203125, + "rewards/rejected": -2.530688524246216, + "step": 17142 + }, + { + "epoch": 1.0, + "learning_rate": 1.4929261414275973e-12, + "logits/chosen": -1.8075045347213745, + "logits/rejected": -1.8062270879745483, + "logps/chosen": -106.08578491210938, + "logps/rejected": -292.1529541015625, + "loss": 0.1427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7764236330986023, + "rewards/margins": 3.3461990356445312, + "rewards/rejected": -2.569775342941284, + "step": 17143 + }, + { + "epoch": 1.0, + "learning_rate": 1.4209889346084025e-12, + "logits/chosen": -1.8532590866088867, + "logits/rejected": -1.8364284038543701, + "logps/chosen": -80.1060562133789, + "logps/rejected": -327.5941467285156, + "loss": 0.4794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5587013363838196, + "rewards/margins": 4.7184319496154785, + "rewards/rejected": -5.277133464813232, + "step": 17144 + }, + { + "epoch": 1.0, + "learning_rate": 1.3508279218810148e-12, + "logits/chosen": -1.9740875959396362, + "logits/rejected": -2.0057730674743652, + "logps/chosen": -235.11630249023438, + "logps/rejected": -368.02020263671875, + "loss": 0.0906, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.307177782058716, + "rewards/margins": 1.766754150390625, + "rewards/rejected": 0.540423572063446, + "step": 17145 + }, + { + "epoch": 1.0, + "learning_rate": 1.282443105743436e-12, + "logits/chosen": -1.967456340789795, + "logits/rejected": -1.9616204500198364, + "logps/chosen": -0.005647989921271801, + "logps/rejected": -369.9551086425781, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.294058453524485e-05, + "rewards/margins": 8.143743515014648, + "rewards/rejected": -8.143786430358887, + "step": 17146 + }, + { + "epoch": 1.0, + "learning_rate": 1.2158344886270544e-12, + "logits/chosen": -1.6508457660675049, + "logits/rejected": -1.6390413045883179, + "logps/chosen": -45.200687408447266, + "logps/rejected": -160.39630126953125, + "loss": 0.2571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6322097778320312, + "rewards/margins": 1.7000092267990112, + "rewards/rejected": -1.06779944896698, + "step": 17147 + }, + { + "epoch": 1.0, + "learning_rate": 1.1510020728910942e-12, + "logits/chosen": -1.8523229360580444, + "logits/rejected": -1.8525481224060059, + "logps/chosen": -2.2435245513916016, + "logps/rejected": -120.3177719116211, + "loss": 0.4381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09963975101709366, + "rewards/margins": 1.5266082286834717, + "rewards/rejected": -1.4269684553146362, + "step": 17148 + }, + { + "epoch": 1.0, + "learning_rate": 1.087945860844819e-12, + "logits/chosen": -1.8548591136932373, + "logits/rejected": -1.8465112447738647, + "logps/chosen": -26.4005126953125, + "logps/rejected": -112.6400146484375, + "loss": 0.5281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5605350732803345, + "rewards/margins": 0.0925464928150177, + "rewards/rejected": 0.4679885804653168, + "step": 17149 + }, + { + "epoch": 1.0, + "learning_rate": 1.0266658547253282e-12, + "logits/chosen": -1.9645333290100098, + "logits/rejected": -1.9496535062789917, + "logps/chosen": -67.4437255859375, + "logps/rejected": -255.41714477539062, + "loss": 0.1272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7462486624717712, + "rewards/margins": 3.469641923904419, + "rewards/rejected": -2.723393201828003, + "step": 17150 + }, + { + "epoch": 1.0, + "learning_rate": 9.671620567086592e-13, + "logits/chosen": -1.7733519077301025, + "logits/rejected": -1.7767678499221802, + "logps/chosen": -162.60757446289062, + "logps/rejected": -259.21240234375, + "loss": 0.2241, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5081329345703125, + "rewards/margins": 0.8427978157997131, + "rewards/rejected": 0.6653351187705994, + "step": 17151 + }, + { + "epoch": 1.0, + "learning_rate": 9.094344689097866e-13, + "logits/chosen": -1.8970091342926025, + "logits/rejected": -1.8911913633346558, + "logps/chosen": -78.26030731201172, + "logps/rejected": -241.60543823242188, + "loss": 0.341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.057512667030096054, + "rewards/margins": 2.8923590183258057, + "rewards/rejected": -2.949871778488159, + "step": 17152 + }, + { + "epoch": 1.0, + "learning_rate": 8.534830933826231e-13, + "logits/chosen": -1.8590353727340698, + "logits/rejected": -1.8558498620986938, + "logps/chosen": -54.72779083251953, + "logps/rejected": -223.15579223632812, + "loss": 0.464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49113160371780396, + "rewards/margins": 3.6817567348480225, + "rewards/rejected": -4.172888278961182, + "step": 17153 + }, + { + "epoch": 1.0, + "learning_rate": 7.993079321089169e-13, + "logits/chosen": -1.992640495300293, + "logits/rejected": -1.996193766593933, + "logps/chosen": -87.44432067871094, + "logps/rejected": -161.27883911132812, + "loss": 0.3714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.663116455078125, + "rewards/margins": 0.49341583251953125, + "rewards/rejected": 0.16970062255859375, + "step": 17154 + }, + { + "epoch": 1.0, + "learning_rate": 7.469089870204559e-13, + "logits/chosen": -1.9402655363082886, + "logits/rejected": -1.9346810579299927, + "logps/chosen": -61.99038314819336, + "logps/rejected": -215.1044158935547, + "loss": 0.4815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3865722715854645, + "rewards/margins": 2.0489699840545654, + "rewards/rejected": -2.435542345046997, + "step": 17155 + }, + { + "epoch": 1.0, + "learning_rate": 6.962862599713126e-13, + "logits/chosen": -1.652611255645752, + "logits/rejected": -1.6612319946289062, + "logps/chosen": -197.52651977539062, + "logps/rejected": -159.40667724609375, + "loss": 0.4866, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.8713531494140625, + "rewards/margins": -0.23196113109588623, + "rewards/rejected": 1.1033142805099487, + "step": 17156 + }, + { + "epoch": 1.0, + "learning_rate": 6.474397527600483e-13, + "logits/chosen": -1.7557486295700073, + "logits/rejected": -1.752726674079895, + "logps/chosen": -18.766254425048828, + "logps/rejected": -242.2388458251953, + "loss": 0.2693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005078697111457586, + "rewards/margins": 3.9528634548187256, + "rewards/rejected": -3.957942247390747, + "step": 17157 + }, + { + "epoch": 1.0, + "learning_rate": 6.003694671297132e-13, + "logits/chosen": -1.7868916988372803, + "logits/rejected": -1.7886241674423218, + "logps/chosen": -6.006041526794434, + "logps/rejected": -77.79867553710938, + "loss": 0.4395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029801607131958008, + "rewards/margins": 1.3886380195617676, + "rewards/rejected": -1.3588364124298096, + "step": 17158 + }, + { + "epoch": 1.0, + "learning_rate": 5.550754047456419e-13, + "logits/chosen": -1.8672285079956055, + "logits/rejected": -1.8664051294326782, + "logps/chosen": -87.30487060546875, + "logps/rejected": -204.670654296875, + "loss": 0.0678, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6203224658966064, + "rewards/margins": 3.4145288467407227, + "rewards/rejected": -1.7942062616348267, + "step": 17159 + }, + { + "epoch": 1.0, + "learning_rate": 5.115575672232087e-13, + "logits/chosen": -1.9532113075256348, + "logits/rejected": -1.932976484298706, + "logps/chosen": -152.41424560546875, + "logps/rejected": -283.3687438964844, + "loss": 0.1121, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4868714809417725, + "rewards/margins": 1.614425778388977, + "rewards/rejected": 0.8724457025527954, + "step": 17160 + }, + { + "epoch": 1.0, + "learning_rate": 4.698159561000725e-13, + "logits/chosen": -1.9196070432662964, + "logits/rejected": -1.9146474599838257, + "logps/chosen": -9.375242233276367, + "logps/rejected": -113.61911010742188, + "loss": 0.4191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3124869465827942, + "rewards/margins": 0.9386084675788879, + "rewards/rejected": -0.6261215209960938, + "step": 17161 + }, + { + "epoch": 1.0, + "learning_rate": 4.2985057286393235e-13, + "logits/chosen": -1.8724170923233032, + "logits/rejected": -1.8677345514297485, + "logps/chosen": -37.138580322265625, + "logps/rejected": -379.7200927734375, + "loss": 0.1221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6550998687744141, + "rewards/margins": 6.609955310821533, + "rewards/rejected": -5.954855442047119, + "step": 17162 + }, + { + "epoch": 1.0, + "learning_rate": 3.916614189358736e-13, + "logits/chosen": -1.7541099786758423, + "logits/rejected": -1.7284796237945557, + "logps/chosen": -229.17591857910156, + "logps/rejected": -336.41888427734375, + "loss": 0.2266, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0175873041152954, + "rewards/margins": 0.9117279052734375, + "rewards/rejected": 0.10585937649011612, + "step": 17163 + }, + { + "epoch": 1.0, + "learning_rate": 3.5524849566481716e-13, + "logits/chosen": -1.866700530052185, + "logits/rejected": -1.860024333000183, + "logps/chosen": -221.14419555664062, + "logps/rejected": -371.526611328125, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5886428356170654, + "rewards/margins": 2.7320785522460938, + "rewards/rejected": -0.14343567192554474, + "step": 17164 + }, + { + "epoch": 1.0, + "learning_rate": 3.206118043552752e-13, + "logits/chosen": -2.1137900352478027, + "logits/rejected": -2.1200528144836426, + "logps/chosen": -219.73683166503906, + "logps/rejected": -633.4819946289062, + "loss": 0.8444, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0596604347229004, + "rewards/margins": 14.402313232421875, + "rewards/rejected": -16.461973190307617, + "step": 17165 + }, + { + "epoch": 1.0, + "learning_rate": 2.8775134623404416e-13, + "logits/chosen": -1.7172025442123413, + "logits/rejected": -1.7319546937942505, + "logps/chosen": -305.604736328125, + "logps/rejected": -432.48370361328125, + "loss": 0.0443, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4499664306640625, + "rewards/margins": 2.832598924636841, + "rewards/rejected": -0.38263246417045593, + "step": 17166 + }, + { + "epoch": 1.0, + "learning_rate": 2.566671224613071e-13, + "logits/chosen": -1.9327716827392578, + "logits/rejected": -1.8923640251159668, + "logps/chosen": -182.3778076171875, + "logps/rejected": -396.8358154296875, + "loss": 0.1449, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5601990222930908, + "rewards/margins": 1.6200134754180908, + "rewards/rejected": -0.059814453125, + "step": 17167 + }, + { + "epoch": 1.0, + "learning_rate": 2.2735913414728691e-13, + "logits/chosen": -1.655211329460144, + "logits/rejected": -1.6600372791290283, + "logps/chosen": -19.874967575073242, + "logps/rejected": -49.47254180908203, + "loss": 0.5456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5088363885879517, + "rewards/margins": 0.11040344834327698, + "rewards/rejected": 0.3984329402446747, + "step": 17168 + }, + { + "epoch": 1.0, + "learning_rate": 1.9982738233559337e-13, + "logits/chosen": -2.02368426322937, + "logits/rejected": -2.0084941387176514, + "logps/chosen": -2.200160503387451, + "logps/rejected": -125.47212219238281, + "loss": 0.6132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0076573374681174755, + "rewards/margins": 0.38024264574050903, + "rewards/rejected": -0.3725852966308594, + "step": 17169 + }, + { + "epoch": 1.0, + "learning_rate": 1.7407186800322272e-13, + "logits/chosen": -1.8320753574371338, + "logits/rejected": -1.8298593759536743, + "logps/chosen": -34.20273208618164, + "logps/rejected": -196.18072509765625, + "loss": 0.3967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33038750290870667, + "rewards/margins": 1.0055843591690063, + "rewards/rejected": -0.6751968264579773, + "step": 17170 + }, + { + "epoch": 1.0, + "learning_rate": 1.500925920605578e-13, + "logits/chosen": -1.7372218370437622, + "logits/rejected": -1.7170926332473755, + "logps/chosen": -135.70506286621094, + "logps/rejected": -433.95135498046875, + "loss": 0.0394, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9213684797286987, + "rewards/margins": 4.1374664306640625, + "rewards/rejected": -2.2160980701446533, + "step": 17171 + }, + { + "epoch": 1.0, + "learning_rate": 1.2788955536247036e-13, + "logits/chosen": -2.0993940830230713, + "logits/rejected": -2.108764171600342, + "logps/chosen": -11.031434059143066, + "logps/rejected": -122.7625503540039, + "loss": 0.3934, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30229875445365906, + "rewards/margins": 1.4674065113067627, + "rewards/rejected": -1.1651077270507812, + "step": 17172 + }, + { + "epoch": 1.0, + "learning_rate": 1.0746275869721877e-13, + "logits/chosen": -1.6460858583450317, + "logits/rejected": -1.6614667177200317, + "logps/chosen": -226.95333862304688, + "logps/rejected": -302.2797546386719, + "loss": 0.4846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8748688101768494, + "rewards/margins": 0.16217041015625, + "rewards/rejected": 0.7126984000205994, + "step": 17173 + }, + { + "epoch": 1.0, + "learning_rate": 8.881220279199908e-14, + "logits/chosen": -1.7532522678375244, + "logits/rejected": -1.7603018283843994, + "logps/chosen": -84.13995361328125, + "logps/rejected": -205.39181518554688, + "loss": 0.4813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10896225273609161, + "rewards/margins": 0.7954033017158508, + "rewards/rejected": -0.9043655395507812, + "step": 17174 + }, + { + "epoch": 1.0, + "learning_rate": 7.1937888307394e-14, + "logits/chosen": -1.7487547397613525, + "logits/rejected": -1.7543106079101562, + "logps/chosen": -139.14791870117188, + "logps/rejected": -268.10552978515625, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.558764696121216, + "rewards/margins": 3.207324266433716, + "rewards/rejected": -0.6485595703125, + "step": 17175 + }, + { + "epoch": 1.0, + "learning_rate": 5.683981584292397e-14, + "logits/chosen": -1.752670407295227, + "logits/rejected": -1.7566561698913574, + "logps/chosen": -0.03168619051575661, + "logps/rejected": -53.719600677490234, + "loss": 0.6155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017765183001756668, + "rewards/margins": 0.2994285523891449, + "rewards/rejected": -0.3012050688266754, + "step": 17176 + }, + { + "epoch": 1.0, + "learning_rate": 4.351798593704714e-14, + "logits/chosen": -1.876883625984192, + "logits/rejected": -1.8627824783325195, + "logps/chosen": -34.74162292480469, + "logps/rejected": -303.5035400390625, + "loss": 0.201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6479217410087585, + "rewards/margins": 2.618670701980591, + "rewards/rejected": -1.9707489013671875, + "step": 17177 + }, + { + "epoch": 1.0, + "learning_rate": 3.197239906160831e-14, + "logits/chosen": -1.9337506294250488, + "logits/rejected": -1.9297540187835693, + "logps/chosen": -204.68167114257812, + "logps/rejected": -475.66650390625, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.989222764968872, + "rewards/margins": 3.373915195465088, + "rewards/rejected": -0.38469240069389343, + "step": 17178 + }, + { + "epoch": 1.0, + "learning_rate": 2.220305562739e-14, + "logits/chosen": -1.9266176223754883, + "logits/rejected": -1.92220938205719, + "logps/chosen": -4.606322288513184, + "logps/rejected": -154.88064575195312, + "loss": 0.394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08442473411560059, + "rewards/margins": 1.7812875509262085, + "rewards/rejected": -1.696862816810608, + "step": 17179 + }, + { + "epoch": 1.0, + "learning_rate": 1.4209955978561339e-14, + "logits/chosen": -1.7438085079193115, + "logits/rejected": -1.745928406715393, + "logps/chosen": -57.90226745605469, + "logps/rejected": -133.61740112304688, + "loss": 0.2578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8299652338027954, + "rewards/margins": 1.6579452753067017, + "rewards/rejected": -0.8279800415039062, + "step": 17180 + }, + { + "epoch": 1.0, + "learning_rate": 7.993100403780317e-15, + "logits/chosen": -1.9405111074447632, + "logits/rejected": -1.9603999853134155, + "logps/chosen": -270.30426025390625, + "logps/rejected": -441.9033203125, + "loss": 0.0759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.022253394126892, + "rewards/margins": 4.451727390289307, + "rewards/rejected": -3.429473876953125, + "step": 17181 + }, + { + "epoch": 1.0, + "learning_rate": 3.552489119540425e-15, + "logits/chosen": -1.6911596059799194, + "logits/rejected": -1.6869293451309204, + "logps/chosen": -312.1789245605469, + "logps/rejected": -422.6761474609375, + "loss": 0.1066, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6265076398849487, + "rewards/margins": 1.6514954566955566, + "rewards/rejected": -0.02498779259622097, + "step": 17182 + }, + { + "epoch": 1.0, + "learning_rate": 8.881222868240001e-16, + "logits/chosen": -1.735417127609253, + "logits/rejected": -1.7343188524246216, + "logps/chosen": -31.087032318115234, + "logps/rejected": -322.38525390625, + "loss": 0.2139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3934799134731293, + "rewards/margins": 4.929490566253662, + "rewards/rejected": -4.5360107421875, + "step": 17183 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -1.9603831768035889, + "logits/rejected": -1.951076865196228, + "logps/chosen": -102.55958557128906, + "logps/rejected": -230.5373077392578, + "loss": 0.5022, + "rewards/accuracies": 0.0, + "rewards/chosen": 1.5056869983673096, + "rewards/margins": -0.45345306396484375, + "rewards/rejected": 1.9591400623321533, + "step": 17184 + }, + { + "epoch": 1.0, + "step": 17184, + "total_flos": 0.0, + "train_loss": 0.35127759474262366, + "train_runtime": 20700.9971, + "train_samples_per_second": 0.83, + "train_steps_per_second": 0.83 + } + ], + "max_steps": 17184, + "num_train_epochs": 1, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}